#! /usr/bin/perl # block-fast-flux -- Read details from servfail log, block fast flux DNS attacks. # Copyright (C) 2014 Nick Urbanik # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. use strict; use warnings; use Getopt::Long; use Readonly; use Data::Dumper; use Fcntl qw( :flock :seek ); use Net::Netmask; use Linux::Inotify2; use POSIX (); use Digest::MD5; use FindBin; use lib "$FindBin::Bin/../lib"; use Debug::OnTheFly qw( :debug ); $Data::Dumper::Indent = 1; $Data::Dumper::Quotekeys = 0; $Data::Dumper::Sortkeys = 1; $Data::Dumper::Deepcopy = 1; Readonly my $THRESHOLD => 200; Readonly my $MAXSIZE => 20_000_000; # Do not block domains with less than $MINDOTS in the domain name, # unless the domain is listed in the file blacklist. Readonly my $MINDOTS => 2; # Want to have the minimum of $FRACTION_MIN Readonly my $FRACTION_MIN => 0.5; Readonly my $MINDOT_FRACTION_MIN => 0.75; # $TAIL_TIME is the time we tail the query error log looking for reoffenders: Readonly my $TAIL_TIME => 20; # seconds Readonly my $ONE_MINUTE => 60; # $MIN_REAPPEARANCES is the default minimum number of reappearances of # a re-offender after unblocking it to decide to re-block it: Readonly my $MIN_REAPPEARANCES => 1; # If we haven't seen bad behaviour from a domain in more than $DEFAULT_MAX_MINS # then don't block it anymore. Readonly my $DEFAULT_MAX_MINS => 120; Readonly my $BAD_ZONES_CACHE_FILE => '/var/cache/bind/bad-zones'; Readonly my $BAD_ZONES_FILE => '/var/named/chroot/etc/named/bad-zones.conf'; Readonly my $FILE_TO_TOUCH_AFTER_SUCCESS => '/var/cache/bind/block-fast-flux-last-ran'; Readonly my @LOGFILES => qw( /var/named/chroot/var/log/queries/errors /var/named/chroot/var/log/queries/errors.0 ); sub usage { ( my $prog = $0 ) =~ s{.*/}{}; print < map { $_ => 1 } qw( org.uk me.uk ltd.uk pic.uk net.uk sch.uk ac.uk gov.uk mod.uk mil.uk nhs.uk police.uk 9msn.com.au akadns.net akamai.net akamaiedge.net akamaihd.net amazon.com amazonaws.com apple.com asn.au belkin.com bigpond.com bigpond.net bing.com bluecoat.com co.uk com.au doubleclick.net dropbox.com dsbl.org ebay.com edu.au facebook.com facebook.net fbcdn.net flurry.com gmail.com google-analytics.com google.com googleadservices.com googleapis.com googlesyndication.com gov.au gov.au gstatic.com hotmail.com httpbl.org id.au iinet.net.au in-addr.arpa incapdns.net instagram.com isc.org kaspersky.com kik.com live.com mail-abuse.com mail-abuse.org mcafee.com microsoft.com msftncsi.com msn.com net.au netgear.com news.com.au ninemsn.com.au njabl.org norton.com ntn.symantec.com ntp.org optus.com.au optus.net optus.net.au optusnet.com.au org.au paypal.com realestate.com.au root-servers.net samsung.com skype.com skype.net smh.com.au sonicwall.com sophosxl.com sophosxl.net sorbs.net spamcop.net spamhaus.org surbl.org symantec.com symantecliveupdate.com tpgi.com.au trendmicro.com twitter.com uribl.com viber.com windowsupdate.com yahoo.com yahooapis.com youtube.com ); sub slurp_file { my ( $filename ) = @_; open my $fh, '<', $filename or die "Cannot open '$filename': $!"; my $text = do { local $/; <$fh> }; close $fh or die "Cannot close '$filename': $!"; return $text; } # Assume the directory containing $filename exists. # Assume the file has changed and needs saving and checking in. # Assume we want the file to have a permission of 0644. # Assume we want owner to be root, group owner to be named. sub write_with_rcs { my ( $filename, $text ) = @_; my ( $dir ) = $filename =~ m{(.*)/.}; $dir = q{.} unless defined $dir; chomp( $dir = qx{ cd $dir; pwd } ); die "directory '$dir' does not exist\n" unless -d $dir; mkdir "$dir/RCS", 0700 or die "Cannot create $dir/RCS directory for $filename: $!" unless -d "$dir/RCS"; if ( -f $filename ) { my @cmd = ( 'ci', '-q', '-munmanaged changes', '-t-first checkin', '-l', $filename ); system( @cmd ) == 0 or die "Failed to do @cmd: $?"; } open my $fh, '>', "$filename.$$" or die "Cannot open file $filename.$$: $!"; print { $fh } $text or die "Cannot write to $filename.$$: $!"; close $fh or die "Cannot close file $filename.$$: $!"; rename "$filename.$$", $filename or die "Cannot rename $filename.$$, $filename: $!"; my @cmd = ( 'ci', '-q', "-mwritten by $0", '-t-first checkin', '-l', $filename ); system( @cmd ) == 0 or die "Failed to do @cmd: $?"; my $namedgroup = ( getgrnam 'named' )[ 2 ] or die "Cannot find the GID for group 'named': $!"; chown 0, $namedgroup, $filename or die "Cannot change group ownership of $filename to $namedgroup: $!"; chmod 0644, $filename or die "Cannot chmod 0644, $filename: $!"; return 1; } # Assume the directory that will contain the file exists. # Assume no RCS tags or other things that might change without # altering the meaningful content of the file. # Will write the contents to the file if they differ, and # use RCS to track the history of changes. sub text_install { my ( $filename, $text ) = @_; if ( ( -s $filename || 0 ) == length $text ) { my $orig = Digest::MD5->md5_hex( slurp_file $filename ); my $want = Digest::MD5->md5_hex( $text ); return if $orig eq $want; } return write_with_rcs $filename, $text; } sub find_baddies { my ( $logfh, $detail ) = @_; my $num = 0; my ( $client, $dom ); while ( <$logfh> ) { my ( $c, $d ) = m{ ^\S+ # date \s \S+ # time \s query-errors: \s client \s ([\d.]+)\#\d+: # client address#port: \s query \s failed \s \(SERVFAIL\) \s for \s+ ([^/]+) # domain /IN/ (\S+) #type \s at \s query.c:\d+ }xms; $client = $c if $c; $dom = $d if $d; my ( $domain, $type, $reason ) = m{ ^\S+ # date \s \S+ # time \s query-errors: \s fetch \s completed \s at \s resolver\.c:\d+ \s for \s+ ([^/]+) # domain / (\S+) # type \s in \s [\d.]+: \s (?:failure|timed \s out)/success \s \[ ([^\]]+) # reasons \] }xms; next unless $domain; $domain = lc $domain; my @comp = split m{\.}, $domain; shift @comp if @comp > 2; my $top_dom = join q{.}, @comp; ++$num; ++$detail->{$top_dom}{count}; ++$detail->{$top_dom}{client}{$client} if $dom and $dom eq $domain and $client; ++$detail->{$top_dom}{domain}{$domain}; ++$detail->{$top_dom}{type}{$type}; ++$detail->{$top_dom}{reason}{$reason}; } return $num; } sub top_servfails { my ( $logfiles, $max ) = @_; my @logs; for my $file ( @$logfiles ) { push @logs, $file; my $lastsize = -s $file; if ( $max and $lastsize > $max ) { last; } elsif ( $max ) { $max -= $lastsize; } } my $num = 0; my %count; note( Data::Dumper->Dump( [ \@logs ], [ qw( *logs ) ] ) ); while ( my $logfile = shift @logs ) { open my $logfh, '<', $logfile or warn "Cannot open '$logfile': $!" and last; if ( ! @logs and $max ) { seek $logfh, -$max, SEEK_END or warn "unable to seek to $max bytes before eof of '$logfile': $!\n"; } $num += find_baddies $logfh, \%count; close $logfh or die "Cannot close '$logfile': $!"; } note "\nHave $num records\n"; return \%count; } # Maintain a cache file /var/cache/bind/bad-zones # Has this format: # $domain\t$time\t$instances\t$numsubdomains\t$reasons\t$count_last_minute # $domain appears only on one line. # $time is the last time that the domain appeared as bad. # It is an ISO format date string so can compare lexically. # We build the bad-zones.conf file from this file where # $time is less than a threshold time away. # We do not remove domains from the cache file. # It could be a Berkeley DB file, but text is fine, and human friendly. # Both files are managed with RCS files. # @$baddies is a list of bad top domains. # %$detail is a hash, keyed by bad top domains, with details obtained from # reading the last 10^8 bytes of query_cache. # %$tail_count is also keyed by bad top domains with a value of a count # of instances found after tailing query-errors for one minute. # Both of these two hash references may be undefined, though we expect # one or the other is defined. sub update_cache { my $cache_text = shift; my ( $baddies, $detail, $tail_count, $bad_zones_cache, $dry_run ) = @_; my $summary = q{}; my $now = POSIX::strftime( '%F %T', localtime ); for my $bad ( @$baddies ) { my @entry = ( $bad, $now ); my $updated; my $desc; if ( $detail ) { my $domains = keys %{$detail->{$bad}{domain}}; my $reasons = keys %{$detail->{$bad}{reason}}; $desc = "$detail->{$bad}{count} instances of $domains subdomains " . "of $bad with $reasons reasons"; @entry[ 2, 3, 4 ] = ( $detail->{$bad}{count}, $domains, $reasons ); $updated = join "\t", @entry; } my @msg; if ( my ( $line ) = $cache_text =~ m{^(\Q$bad\E\t[^\n]+)}xms ) { @entry = split m{\t}, $line unless $detail; if ( $tail_count and exists $tail_count->{$bad} ) { $entry[ 5 ] = $tail_count->{$bad}; @msg = ( "Adding '$bad' back with $tail_count->{$bad} ", "instances after tailing for a minute" ); } @msg = ( "Updating $bad in $bad_zones_cache: $desc" ) if $detail; $updated = join "\t", @entry; $cache_text =~ s{^\Q$bad\E\t[^\n]+}{$updated}xms; } elsif ( not $detail ) { die "Cannot find $bad in the cache when it must be there"; } else { @msg = ( "Adding new zone $bad to cache: $desc" ); $cache_text .= "$updated\n"; } note @msg; $summary .= join q{}, @msg, "\n"; } text_install( $bad_zones_cache, $cache_text ) unless $dry_run; return wantarray ? ( $cache_text, $summary ) : $cache_text; } sub update_baddies_cache { my ( $detail, $tail_count, $baddies, $bad_zones_cache, $dry_run ) = @_; touch_file $bad_zones_cache unless -f $bad_zones_cache; open my $fh, '<', $bad_zones_cache or die "Cannot read '$bad_zones_cache': $!"; my $cache_text = do { local $/; <$fh> }; close $fh or die "Cannot close '$bad_zones_cache': $!"; ( $cache_text, my $summary ) = update_cache $cache_text, $baddies, $detail, $tail_count, $bad_zones_cache, $dry_run; return wantarray ? ( $cache_text, $summary ) : $cache_text; } sub build_bad_zones_conf { my $cache_text = shift; my ( $block_after, $bad_zones_file, $summary, $dry_run ) = @_; note "Block zones that are as recent as '$block_after'"; my $bad_conf_text = q{}; CACHE_ENTRY: for my $entry ( split m{\n}, $cache_text ) { next unless $entry =~ m{\S}; my ( $bad, $time, $instances, $domains, $reasons ) = split m{\t}, $entry; if ( not $block_after or $time ge $block_after ) { $bad_conf_text .= <watch( $logfile, IN_MODIFY | $LOG_ROTATE_EVENT ) or die "Could not watch $logfile"; while ( 1 ) { # $inotify->read blocks till the log file is modified. my @events = $inotify->read; unless ( @events ) { warn "inotify read error: $!"; last; } debug sprintf( "mask\t%d\n", $_->mask ) for @events; if ( grep { $_->mask & $LOG_ROTATE_EVENT } @events ) { note "It seems $logfile has been rotated; reopening"; last; } seek $log_fh, 0, SEEK_CUR or die "Cannot reset eof $logfile: $!"; while ( 1 ) { last unless defined( my $line = <$log_fh> ); if ( my ( $bad ) = $line =~ m{SERVFAIL.*\b($regexp)\b} ) { debug "tail_for_recent(): found $bad"; ++$tail_count{$bad}; } } } } alarm 0; }; if ( $@ ) { die $@ unless $@ eq "alarm\n"; note "tail_for_recent(): Time's up."; } return \%tail_count; } # This is a list of domains that may have only one dot. Currently we have # a problem with the domain 9lieyan.com. # The file contains one domain per line, and may have lines that are comments # only. # Domains in this file will be blocked if they show signs of being attacks. sub get_blacklist { my @BLACKLIST_FILES = qw( /var/cache/bind/blacklist /home/bind/var/blacklist /opt/bind/var/blacklist ); my %blacklist; for my $file ( @BLACKLIST_FILES ) { if ( -f $file ) { open my $fh, '<', $file or die "Cannot open '$file': $!"; while ( <$fh> ) { chomp; s{\s}{}g; # domains have no whitespace. next if m{^#}; next if m{^$}; next unless m{^\S+\.\S{2,}$}; ++$blacklist{$_}; } close $fh or die "Cannot close '$file': $!"; } } return %blacklist; } # $block_after is also an ISO format date string, or undef or false. # If false, then we block all zones in the cache, regardless of age. sub blacklist { my ( $detail, $baddies, $bad_zones_cache, $block_after, $bad_zones_file, $dry_run ) = @_; my ( $cache_text, $summary ) = update_baddies_cache $detail, undef, $baddies, $bad_zones_cache, $dry_run; $summary = build_bad_zones_conf $cache_text, $block_after, $bad_zones_file, $summary, $dry_run; return wantarray ? ( $cache_text, $summary ) : $summary; } # There is a problem where the query-errors log does not show anything for # domains that are blocked, even though they are still evil. # After two hours, we open ourselves to a flood again. # To deal with that: # 1. Read the bad zones cache, and find all zones that are between 2 hours and # 2.5 hours old. # 2. tail the query-error log for a limited time (set an alarm, and count # a limited number of lines), looking for instances of any of the zones # found in step 1. # 3. If the number of any of these domains appears more than a threshold, # a. Update the time in the cache entry for it. # b. Rebuild the bad-zones.conf file. sub get_recently_unblocked { my $cache_text = shift; my ( $earliest, $latest ) = @_ or return; my @recent = map { $_->[ 0 ]; } grep { $_->[ 1 ] and $_->[ 1 ] ge $earliest and $_->[ 1 ] le $latest; } map { my ( $top, $time ) = m{^([^\t]+)\t([^\t]+)\t}; [ $top, $time ]; } split m{\n}, $cache_text; return \@recent; } Readonly my $INDENT => q{ } x 4; sub format_domains { my ( $domain_ref, $brief ) = @_; return ( scalar keys %$domain_ref ) . " domains.\n" if $brief; my @domains = sort keys %$domain_ref; my $domain_str = q{}; while ( @domains ) { my @args = splice @domains, 0, 5; $domain_str .= sprintf $INDENT . ( "%35s " x ( scalar @args ) ) . "\n", @args; } return $domain_str; } sub sort_ip_blocks { my @blocks = @_; my @sorted = map { $_->[ 0 ] } sort { $a->[ 1 ] cmp $b->[ 1 ] } map { ( my $ip = $_ ) =~ s{/.*}{}; [ $_, sprintf( '%03i%03i%03i%03i', split /\./, $ip ) ] } @blocks; return @sorted; } sub format_clients { my ( $client_ref, $brief ) = @_; return ( scalar keys %$client_ref ) . " clients.\n" if $brief; my @clients = sort_ip_blocks( keys %$client_ref ); my $client_str = q{}; while ( @clients ) { my @args = splice @clients, 0, 10; $client_str .= sprintf $INDENT . ( "%15s " x ( scalar @args ) ) . "\n", @args; } return $client_str; } sub format_reasons_2_at_time { my ( $reasons_ref, $brief ) = @_; return ( scalar keys %$reasons_ref ) . " reasons.\n" if $brief; my @reasons = sort keys %$reasons_ref; my $reason_str = q{}; while ( @reasons ) { $reason_str .= $INDENT . join( q{, }, splice @reasons, 0, 2 ) . "\n"; } $reason_str .= "\n" unless $reason_str =~ m{\n\z}; return $reason_str; } sub format_reasons { my ( $reasons_ref, $brief ) = @_; return ( scalar keys %$reasons_ref ) . " reasons.\n" if $brief; return $INDENT . join( "\n$INDENT", sort keys %$reasons_ref ) . "\n"; } sub report_domain { my ( $top_dom, $detail, $brief ) = @_; my $types = join q{, }, sort keys %{$detail->{$top_dom}{type}}; my $clients = format_clients $detail->{$top_dom}{client}, $brief; my $reasons = format_reasons $detail->{$top_dom}{reason}, $brief; my $domains = format_domains $detail->{$top_dom}{domain}, $brief; note sprintf "%6i %-40s %s\n", $detail->{$top_dom}{count}, $top_dom, $types; note "$reasons"; note "$clients"; note "$domains\n"; } sub get_reoffenders { my ( $threshold, $tail_count ) = @_; my @reoffend = grep { $tail_count->{$_} > $threshold } keys %$tail_count; my $plural = @reoffend == 1 ? q{} : 's'; my $naughties = @reoffend == 0 ? "$plural." : "$plural: @reoffend"; my %reoffender; @reoffender{@reoffend} = @{$tail_count}{@reoffend}; $naughties .= "; count$plural: @reoffender{@reoffend}" if @reoffend; note "get_reoffenders() found ", scalar @reoffend, " reoffender$naughties"; return ( \@reoffend, \%reoffender ); } open my $self, '<', $0 or die "$0: cannot open my program code: $!"; flock $self, LOCK_EX | LOCK_NB or die "$0 already running: $!"; note "$0 starting with options: @ARGV"; my $threshold = $THRESHOLD; my $blacklist = 1; my $max_mins = $DEFAULT_MAX_MINS; GetOptions( debug => \my $debug, 'threshold=i' => \$threshold, 'max-age-mins=i' => $max_mins, brief => \my $brief, 'maxlog=i' => \my $maxlog, 'blacklist!' => $blacklist, 'dry-run' => \my $dry_run, 'bad-zones-file=s' => \my $bad_zones_file, 'min-fraction=f' => \my $fraction_min, 'skip-if-mindots' => \my $skip_if_mindots, 'bad-zones-cache-file=s' => \my $bad_zones_cache, 'min-count=i' => \my $min_count, help => \&usage, ) or usage; if_debug; my @logs = @ARGV; @logs = @LOGFILES unless @ARGV; $maxlog ||= $MAXSIZE; # Read $MAXSIZE MB of logs by default. $bad_zones_cache ||= $BAD_ZONES_CACHE_FILE; $bad_zones_file ||= $BAD_ZONES_FILE; # Minimum number of instances of a reoffender appearing in a minute of tailing: $min_count ||= $MIN_REAPPEARANCES; $fraction_min ||= $FRACTION_MIN; my $block_after; my $start; if ( $max_mins ) { $block_after = POSIX::strftime( '%F %T', localtime( time - $max_mins * $ONE_MINUTE ) ); $start = POSIX::strftime( '%F %T', localtime( time - ( $max_mins + 30 ) * $ONE_MINUTE ) ); } my $detail = top_servfails \@logs, $maxlog; for my $top_dom ( keys %$detail ) { delete $detail->{$top_dom} unless $detail->{$top_dom}{count} >= $threshold; } debug( Data::Dumper->Dump( [ $detail ], [ qw( *detail ) ] ) ) if $debug; sub by_count { return $detail->{$a}{count} <=> $detail->{$b}{count}; } my $summary; # Rules to block the domain and its subdomains: # 1. It must have SERVFAILS for A record lookups # 2. It must not be in the %WHITELIST # 3. There must have been at least $threshold SERVFAILS within that domain # 4. The number of distinct subdomains must be more than $fraction_min of the total number of servfails. # 5. If the parent domain is listed in the blacklist, block it. # 6. If the parent domain has less than $MINDOTS dots in it then require # a higher percentage of unique subdomain requests from all requests. if ( $blacklist ) { my %blacklist = get_blacklist; debug( Data::Dumper->Dump( [ \%blacklist ], [ qw( *blacklist ) ] ) ); my @bad_domains; for my $top_dom ( sort by_count keys %$detail ) { next unless exists $detail->{$top_dom}{type}{A}; next if exists $WHITELIST{$top_dom}; my $numdomains = keys %{$detail->{$top_dom}{domain}}; next unless exists $blacklist{$top_dom} or $numdomains > $fraction_min * $detail->{$top_dom}{count}; if ( not exists $blacklist{$top_dom} ) { # If we have less then $MINDOTS dot(s) in the parent domain, # i.e., $MINDOTS + 1 domain components, then we have a # more stringent requirement for percentage of unique subdomains my $ndots; ++$ndots while $top_dom =~ m{\.}g; if ( $ndots < $MINDOTS ) { warn "Skipping '$top_dom' because it has less than $MINDOTS dots" and next unless $numdomains > $MINDOT_FRACTION_MIN * $detail->{$top_dom}{count} and not $skip_if_mindots; # Do not block any top level domains! next unless $ndots; } } push @bad_domains, $top_dom; } my ( $cache_text, $summary ) = blacklist $detail, \@bad_domains, $bad_zones_cache, $block_after, $bad_zones_file, $dry_run; my $end = $block_after; my $recently_unblocked = get_recently_unblocked $cache_text, $start, $end; if ( $recently_unblocked and @$recently_unblocked ) { note "Looking for recently unblocked domains: @$recently_unblocked"; my $tail_count = tail_for_recent $recently_unblocked, $LOGFILES[ 0 ], $TAIL_TIME; ( my $baddies, $tail_count ) = get_reoffenders $min_count, $tail_count; ( $cache_text, my $extra_summary ) = update_cache $cache_text, $baddies, undef, $tail_count, $bad_zones_cache, $dry_run; $extra_summary = build_bad_zones_conf $cache_text, $block_after, $bad_zones_file, $extra_summary, $dry_run; $summary .= $extra_summary; } else { note "No domains were recently unblocked"; } } for my $top_dom ( sort by_count keys %$detail ) { report_domain $top_dom, $detail, $brief; } if ( not $brief ) { for my $top_dom ( sort by_count keys %$detail ) { report_domain $top_dom, $detail, 1; } } note "\n$summary" if $summary; touch_file $FILE_TO_TOUCH_AFTER_SUCCESS; note "$0 finished"; __END__