1#!/usr/local/bin/perl 2#$Id: crawler.pl 4843 2013-08-14 12:17:58Z pro $ $URL: svn://svn.setun.net/search/trunk/crawler.pl $ 3 4=copyright 5PRO-search crawler 6Copyright (C) 2003-2011 Oleg Alexeenkov http://pro.setun.net/search/ proler@gmail.com 7 8This program is free software: you can redistribute it and/or modify 9it under the terms of the GNU General Public License as published by 10the Free Software Foundation, either version 3 of the License, or 11(at your option) any later version. 12 13This program is distributed in the hope that it will be useful, 14but WITHOUT ANY WARRANTY; without even the implied warranty of 15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16GNU General Public License for more details. 17 18You should have received a copy of the GNU General Public License 19along with this program. If not, see <http://www.gnu.org/licenses/>. 20=cut 21 22package pscrawler; 23use strict; 24no warnings qw(uninitialized); 25no if $] >= 5.017011, warnings => 'experimental::smartmatch'; 26use utf8; 27use Time::Local; 28use Time::HiRes qw(time sleep); 29use Encode; 30use Net::FTP; 31use Data::Dumper; #dev only 32$Data::Dumper::Sortkeys = $Data::Dumper::Useqq = $Data::Dumper::Indent = $Data::Dumper::Terse = 1; 33exit if defined $ENV{'REMOTE_ADDR'}; # cgi not allowed 34#use lib './dcppp', './lib'; 35our ( %config, $param, %work, %stat, %static ); 36our ( %processor, %program ); #, %human 37#*pscrawler::work = *main::work; 38#*pscrawler::stat = *main::stat; 39#*pscrawler::config = *main::config; 40#*pscrawler::program = *main::program; 41*config = *main::config; 42#=z 43#use vars 44my ( $tq, $rq, $vq ); 45our ( @ISA, @EXPORT, @EXPORT_OK ); 46require Exporter; 47@ISA = qw(Exporter); 48@EXPORT = qw( %config $param %work %stat %static ); 49#=cut 50#$0 =~ m|^(.+)[/\\].+?$|; #v0 51#our $root_path ||= $1 . '/' if $1; 52#$root_path =~ s|\\|/|g; 53use lib::abs qw(./ ./lib); 54#eval "use lib '$root_path'" if $root_path; 55#eval "use lib '$root_path./lib'; 56# use psmisc; use pssql;"; #use Net::DirectConnect; 57use psmisc qw(:all); 58use pssql; 59psmisc::use_try('Net::DirectConnect'); 60#'$root_path./dcppp', 61print($@), exit if $@; 62#use lib ; 63my (%ipstt); 64#our was here 65$work{'start_timer'} = psmisc::timer(); 66use sigtrap qw(handler safedie normal-signals error-signals INFO); 67 68sub safedie { 69 printlog( 'info', $0, make_stat() ), return if $_[0] eq 'INFO'; 70 printlog( 71 'info', 'Signal ', @_, 72 'recieved, flushing buffer, reloading conf.', 73 ( %stat ? ( 'Stat: ', map { "$_=$stat{$_};" } keys %stat ) : () ) 74 ), 75 #$processor{'out'}{'array'}->() 76 $static{'db'}->flush_insert(), psmisc::config_read(), psmisc::config_reload(), return 77 if $_[0] eq 'HUP' and ( $config{'win_hup'} or $config{'system'} ne 'win' ); 78 printlog( 'info', 'Signal', @_, 'recieved, ignoring, not fatal?' ), return if grep { $_[0] eq $_ } qw(PIPE SEGV); 79 printlog( 'info', 'Signal', @_, 'recieved, exiting' ); 80 #{ 81 #local $config{'log_all'} = 1; #DEV 82 #printlog( 'info', 'executing on_interrupt', int(time) ); 83 $work{'on_interrupt'}->(@_) if ( !$work{'die'} and $work{'on_interrupt'} ); 84 #} 85 $static{'db'}->{'die'} = $work{'die'} = 1; 86 exit; 87} 88 89sub on_interrupt { 90 $work{'on_interrupt'} = undef, return unless @_; 91 $work{'on_interrupt'} = $_[0] if $_[0]; 92} 93 94sub allowscan { 95 my ( $by, $table, $tim, $period ) = @_; 96 return 0 unless %$by; 97 return 1 if $config{'force'}; 98 if ( #!$config{'sql _use_dbi'} or 99 $static{'banned'}->{ join_url($by) } or $static{'banned'}->{ $by->{'host'} } or $static{'banned'}->{ $by->{'range'} } 100 ) 101 { 102 103=c 104printlog('dbg','allowscan:banned:', join_url($by),'ra=',$by->{'range'} , 'reason:', 105"sud: ! $config{'sql _use_dbi'} 106 banby: or ",$static{'banned'}->{ join_url($by) }," 107 ho or ", $static{'banned'}->{ $by->{'host'} }, " 108 ra or ", $static{'banned'}->{ $by->{'range'} } , " 109 ra h", %{$static{'banned'}->{ $by->{'range'} } } 110 111); 112=cut 113 114 $static{'db'}->update( $table, undef, { %$by, 'scan' => int( time() ) + $config{'banned_time'} } ) 115 if $table and $config{'banned_time'}; 116 return 0; 117 } 118 #my ( $tq, $rq, $vq ) = $static{'db'}->quotes(); 119 my $where = join( 120 ' AND ', 121 map( "$rq$_$rq = " . $static{'db'}->quote( $by->{$_} ), 122 grep { %{ $config{'sql'}{'table'}{$table}{$_} or {} } and $config{'sql'}{'table'}{$table}{$_}{'primary'} } keys %$by ) 123 ); 124 #MAKE QUERY 125 #printlog('dev', "tim($tim) per($period)", ); 126 my $base = 127 ( $table 128 and %$by 129 and ( !$tim or !$period ) 130 ? $static{'db'}->line("SELECT ${rq}scan$rq, ${rq}period$rq FROM $tq$static{'db'}->{'table_prefix'}$table$tq WHERE $where") 131 : {} ); 132 #printlog('dev', 'btim, bper = ', $base->{'scan'}, $base->{'period'}); 133 #printlog('dev', 'skiptime',join_url($by), $tim); 134 $tim ||= $base->{'scan'}; 135 $work{'skiptime'}{ join_url($by) } = $tim; 136 $period ||= ( $base->{'period'} or hconfig( 'period', $by->{'host'}, $by->{'prot'} ) ); 137 $work{'skipperiod'}{ join_url($by) } = $period; 138 if ( !$tim or ( $tim < time - $period ) ) { 139 local $config{'scan_timeout'} = $period / 2 if $config{'scan_timeout'} >= $period; 140 $static{'db'}->update( 141 $table, undef, { 142 $static{'db'}->filter_row( $table, 'primary', $by ), ( 143 #$period ne $base->{'period'} ? ( 'period' => $period ) : (), 144 $config{'noscan'} ? () : ( 'scan' => int( time() ) - $config{'scan_timeout'} ) 145 ) 146 } 147 ) if $table; 148 #printlog('dbg','allowscan', join_url($by), psmisc::human('time_period',time - $tim), psmisc::human('time_period',$period)); 149 return 1; 150 } 151#else { printlog('dbg','NOT allowscan', join_url($by), psmisc::human('time_period',time - $tim), psmisc::human('time_period',$period)); } 152 return 0; 153} 154 155sub resetcounters { 156 ( $work{'sizec'}, $work{'filec'}, $work{'dirc'} ) = 157 ( counter( $stat{'size'} ), counter( $stat{'files'} ), counter( $stat{'dirs'} ) ); 158} 159 160sub uni_get { 161 my ($what) = @_; 162 my ($prot) = $what =~ m|^\s*(\w+):|; 163 return $processor{'prot'}{$prot}{'get'}->($what) if $prot and ref $processor{'prot'}{$prot}{'get'} eq 'CODE'; 164 return $what; 165} 166 167sub scan_stop () { 168 local $_ = $work{'die'} || $config{'periodic_stop'} || ( !$config{'no_stop'} and -f $config{'stop_file'} ); 169 printlog( 'info', "stopping: fl=$config{'periodic_stop'} file=$config{'stop_file'}" ) if $_; 170 $_; 171} 172 173sub oneliner { 174 my ( $oneline, $content ) = @_; 175 #if ( $content =~ /^GLOB/ ) { 176 if ( ref $content eq 'GLOB' ) { 177 while (<$content>) { $oneline->(); if ($work{'interrupt'} ){delete $work{'interrupt'}; close $content; last; }; } 178 #} elsif ( $content =~ /^ARRAY/ ) { 179 } elsif ( ref $content eq 'ARRAY' ) { 180 while ( defined( $_ = shift @$content ) ) { $oneline->(); } 181 } else { 182 for ( split( "\n", $content ) ) { $oneline->(); } 183 } 184} 185$processor{'desc'}{'m3u'}{'func'} ||= sub { 186 my ( $file, $data ) = @_; 187 return unless $file; 188 my ( $curdesc, %detectcp ); 189 $file = Encode::encode hconfig( 190 'cp_shell' #, $host, $prot 191 ), $file, Encode::FB_DEFAULT; 192 open( DESC, '<', $file ) or return (); 193 while (<DESC>) { 194 cp_detect_trans( $_, \%detectcp, $config{'cp_int'}, $config{'cp_m3u'} ); 195 if (/^#EXTINF:(\d+),(.+)/i) { 196 #$curdesc = '[' . $1 . 's] ' . $2; 197 $curdesc = $2; 198 $data->{$_}{'meta'} .= " len=$1"; 199 $curdesc =~ s/[\x0d\x0a\x00-\x1F]+/ /g; 200 } elsif (/^(.+\..+)$/) { 201 tr/A-Z/a-z/; 202 s/[\x0d\x0a\x00-\x1F]//g; 203 s/^\s+|\s+$//g; 204 $data->{$_}{'desc'} .= ' ' . $curdesc; 205 } 206 } 207 close(DESC); 208 return $data; 209}; 210$processor{'desc'}{'bbs'}{'func'} ||= sub { 211 my ( $file, $data ) = @_; 212 #printlog "bbs[( $file, )]", Dumper $data; 213 return unless $file; 214 my $curfile = ''; 215 my %detectcp; 216 $file = Encode::encode hconfig( 217 'cp_shell' #, $host, $prot 218 ), $file, Encode::FB_DEFAULT; 219 open( DESC, '<', $file ) or return; #(printlog('dev', "cant open [$file]"),return ()); 220 while (<DESC>) { 221 #printlog Dumper "bbs[$_]"; 222 if (/^-(.*)/) { next; } 223 elsif (/^(\w\S*)\s*(\[\s*(\d*)\]\s*)?(.*)\s*/) { 224 $curfile = $1; 225 $curfile =~ tr/A-Z/a-z/; 226 $curfile =~ s/\.$//g; 227 my $desc = $4; 228 cp_detect_trans( $desc, \%detectcp, $config{'cp_int'}, $config{'cp_desc'} ); 229 #!!! TODO UTF:# $desc =~ s/[\x0d\x0a\x00-\x1F]+/ /g; 230 $data->{$curfile}{'desc'} .= ' ' . $desc unless ( $desc =~ /^\s*$config{'nodesc'}\s*$/i ); 231 } else { 232 next unless $curfile; 233 next unless /^\s*(.+)/; 234 next if length( $data->{$curfile}{'desc'} ) > 250; 235 my $tmp = $1; 236 next unless $tmp; 237 cp_detect_trans( $tmp, \%detectcp, $config{'cp_int'}, $config{'cp_desc'} ); 238 #!!! TODO UTF:# $tmp =~ s/[\x0d\x0a\x00-\x1F]//g; 239 chomp($tmp); 240 next unless $tmp; 241 $data->{$curfile}{'desc'} .= '\\\\n' . $tmp; 242 } 243 #params load $data{$curfile}{desc} = dos2win($data{$curfile}{desc}) if ( exists($data{$curfile}{desc}) ); 244 #if ( exists($data{$curfile}{desc}) ) { #!! 245 #while ($data{$curfile}{desc} =~ s/^\s*file(\S*)\=(\S*)\s*//i) { 246 #$data{$curfile}{$1} = $2 if $1 and $2; 247 #} 248 #} 249 } 250 close(DESC); 251 return $data; 252}; 253 254sub fileparser { # $file, $prot, $full, $size, \%files # $host, 255 for my $dsk ( keys %{ $processor{'desc'} } ) { 256 #printlog("$config{'desc'}{$dsk}{'mask'} and $_[0] 2:$_[2] 3:$_[3] 4:$_[4] "); 257 if ( $config{'desc'}{$dsk}{'mask'} and $_[0] =~ /$config{'desc'}{$dsk}{'mask'}/i ) { 258 #printlog("matched![$dsk][m:$config{'desc'}{$dsk}{'max'}][1:$_[1]]"); 259 psmisc::code_run( $processor{'desc'}{$dsk}{'func'}, psmisc::code_run($processor{'prot'}{ $_[1] }{'get'}, $_[2] ), $_[4] ) 260 if !$config{'desc'}{$dsk}{'max'} 261 or $_[3] <= $config{'desc'}{$dsk}{'max'}; 262 } 263 } 264} 265 266sub filesave (@) { 267 for (@_) { 268 for ( values %$_ ) { 269 $_->{type} = $config{ext_to_type}{ $_->{ext} }; 270 $_->{type} = 1 if !$_->{size}; 271 } 272 #printlog 'ins', Dumper $_; 273 $static{'db'}->insert_hash_hash( $config{'sql_tfile'}, $_ ); 274 } 275} 276 277sub update_to_root (\%\%;$$) { 278 my ( $res, $core, $local, $url_orig ) = @_; 279 my $path_ = ''; #'/'; 280 #/ /a /a/b /a/b/c 281 for my $d ( 282 !length $res->{'path'} ? '' : split '/', 283 #'/' . 284 $local ? $url_orig : $res->{'path'} 285 ) 286 { 287 #printlog( 'dev', 'cd', $res->{'path'}, 'd:',$d, 'p:',$path_ ); 288 $core->{'path'} = $path_ || '/'; 289 $core->{'name'} = $d; 290 #printlog('dev', $res->{'path'}, $path_, Dumper $core); 291 $static{'db'}->update( $config{'sql_tfile'}, undef, $core ); 292 $core->{'desc'} = ''; 293 $path_ .= '/' . $d if length $d; 294 #dmp('dsc!'), 295 $core->{'desc'} = $res->{'desc'} if $res->{'path'} eq $path_; 296 } 297 #=cut 298} 299 300=cu 301 302$processor{'out'}{'mysql'} ||= sub { 303 return $static{'db'}->do(@_) if $config{'sql_use_dbi'}; 304 305 cut 306 my @ret; 307 for my $st (@_) { 308 my $str = $st; # bugg 309 my $q = ( $str =~ /\"/ ? "'" : '"' ); 310 $str =~ s/$q//g; 311 $str =~ s/\`/\\`/g; 312 printlog( 'dmp', "via mysql cmdline query {$str}" ); 313 my $up = 314 ( $config{'sql_user'} ? "-u$config{'sql_user'} " : '' ) 315 . ( $config{'sql_pass'} ? "-p$config{'sql_pass'} " : '' ) 316 . ( $config{'sql_port'} ? "-P$config{'sql_port'} " : '' ); 317 push( @ret, `$config{'mysql'} $up $config{'sql_mysqlopt'} -e $q$str$q $config{'sql_base'}` ); 318 } 319 return @ret; 320 321}; 322=cut 323 324=TODO 325$processor{'out'}{'mysql_file'} ||= sub { 326 printprog("$config{'mysql'} $config{'sql_mysqlopt'} $config{'sql_base'} < $_") for @_; 327}; 328 329{ 330 my $gt = timer(); 331 $processor{'out'}{'sql'} ||= sub { 332 my $ret; 333 for my $str (@_) { 334 next unless $str; 335 if ( $config{'sql_use_dbi'} ) { 336 my $t = timer() if $config{'debug_time'}; 337 $ret += $static{'db'}->do($str); 338 printlog( 339 'time', 340 'SQL', 341 psmisc::human( 'time_period', $t->() ), 342 ',by', 343 $ret, 344 ( $t->() > 1 ? '=' . psmisc::human( 'float', $ret / $t->() ) . 'fps' : () ), 345 ',SCAN=', 346 psmisc::human( 'time_period', $gt->() ), 347 ',', 348 ( $gt->() > 1 ? '=' . psmisc::human( 'float', $ret / $gt->() ) . 'fps' : () ) 349 ) if $config{'debug_time'} and $config{'debug_time'} <= $ret; 350 $gt = timer() if $config{'debug_time'}; 351 } else { 352 open_out_file('hosts') unless $work{'current_name_work'}; 353 print I$str; 354 ++$ret; 355 } 356 } 357 return $ret; 358 }; 359} 360 361=cut 362 363$processor{'format'}{'dir'} ||= sub { 364 my $scan = int( time() ); 365 my $curent_process = shift(@_); 366 $curent_process =~ s/^(\S+:)?(\/*[^\/]+\/[^\/]+)\/(.+)$/$1$2/; 367 #printlog('dev', 'cp', $curent_process); 368 return unless $curent_process; 369 $curent_process =~ tr/\\/\//; 370 #printlog('dev', 'cp', $curent_process); 371 my $path = $3; 372 $path = $curent_process . '/' . $path; 373 $path =~ s/[\\\/]+$//g; 374 my $local; 375 $local = 1 if $curent_process =~ m{^(?:\w:|/)}; 376 $local = 0 if $curent_process =~ m{^//}; 377 my $prot = 'file'; 378 $prot = $config{'local_prot'} if $local and $config{'local_prot'}; 379 my $errors = 0; 380 my %files = (); 381BUG: for my $content (@_) { 382 my ( $host, $dir_ ) = 383 $local ? ( $config{'local_host'}, $curent_process ) : $curent_process =~ /^\/+([^\/]+)\/+([^\/]+)(.*)/; 384 #printlog('dev', 'cp', $curent_process, "hd[$host, $dir_]"); 385 #$host = $config{'local_host'} if $local; 386 387=z 388 if ($host) { 389 $files{''} = { 390 'prot' => $prot, 391 'host' => $host, 392 'scan' => $scan, 393 ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ), 394 }; 395 396 $files{' '} = { 397 'prot' => $prot, 398 'host' => $host, 399 'path' => '/', 400 ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ), 401 'scan' => $scan, 402 }; 403#=cut 404 405#if (length $dir_) { 406 my $path_ = ''; #'/'; 407#/ /a /a/b /a/b/c 408 for my $d ( split '/', '/' . $dir_ ) { 409 #printlog('dev','cd', $d, $path_); 410 $files{ ' ' . $path_ . $d } = { 411 'prot' => $prot, 412 'host' => $host, 413 'scan' => $scan, 414 'path' => $path_ || '/', 415 'name' => $d, 416 ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ), 417 }; 418 $path_ .= '/' . $d if length $d; 419 } 420#} 421 } 422=cut 423 424 my $oneline = sub { 425 #printlog('dev', 'G0', $_); 426 my ( $mday, $mon, $year, $hours, $min, $size, $file, $ext, $desc, $utime ); 427 if (/^ (Directory of|Содержимое папки) (.+)\s*$/) { 428 #printlog('dbg', 'ins1', Dumper(\%files)); 429 #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files ); 430 filesave( \%files ); 431 %files = (); 432 $path = $2; 433 $path =~ tr/\\/\//; 434 $path =~ s/[\x0a\x0d\x00-\x1F]+//g; 435 chomp($path); 436 #$path = cp_trans(hconfig('cp_share', $host), hconfig('cp_db', $host), $path); 437 #$path = cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_db', $host, $prot ), $path ); 438 $config{'local_path'}->($path) if $local and ref $config{'local_path'} eq 'CODE'; 439 #printlog('dev', 'r0', $path), 440 return; 441 } elsif ( hconfig( 'skip_path_mask', $host ) and $path =~ /$config{'skip_path_mask'}/i ) { 442 ++$stat{'skipped'}; 443 #printlog('dev', 'r1'), 444 return; 445 } elsif (/^\s*Total Files Listed/) { 446 #printlog('dev', 'n2'), 447 next BUG; 448 } elsif ( /^Имя папки (.+) слишком длинно./ and ++$errors > hconfig( 'max_errors', $host, $prot ) ) { 449 printlog( 'err', 'max errors reached:', $errors, $_ ); 450 #printlog('dev', 'l3'), 451 last; 452 } 453 ( $mday, $mon, $year, $hours, $min, $size, $file ) = /(\d+)\.(\d+)\.(\d+)\s+(\d+)\:(\d+)\s+(\d+|\<DIR\>)\s+(.+)\s*/i 454 or return; 455 return if $size eq '0'; 456 $size = 0 if $size =~ /DIR/i; 457 return if $file =~ /^\.+$/; 458 $file =~ s/[\x0a\x0d\x00-\x1F]//g; 459 $file =~ s/\.$//g; 460 chomp($file); 461 return if hconfig( 'skip_hidden', $host ) and ( $file =~ /^\./ or $path =~ m;(^|/)\.; ); 462 fileparser( $file, $prot, $path . '/' . $file, $size, \%files ); 463 #$file = cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_db', $host, $prot ), $file ); 464 my $lfile = $file; 465 $lfile =~ tr/A-Z/a-z/; 466 $files{$lfile}{'name'} = $file; 467 $year += 2000 if length($year) == 2; 468 $year = 1990 if ( ( $year > 2020 ) or ( $year < 1980 ) ); #timelocal BUG 469 $files{$lfile}{'time'} = timelocal( 0, $min, $hours, $mday, --$mon, $year ); 470 $files{$lfile}{'time'} = 640000000 if ( $files{$lfile}{'time'} > ( $scan + 2592000 ) ); 471 $files{$lfile}{'fullname'} = $prot . ':' . $path . '/' . $file; 472 $files{$lfile}{'prot'} = $prot; 473 ( $files{$lfile}{'host'}, $files{$lfile}{'path'} ) = $path =~ /^?\/?\/?([^\/]+)(.+)/; 474 $files{$lfile}{'ext'} = ( $files{$lfile}{'name'} =~ s/\.([^\.]+)$// ? $1 : '' ); 475 $files{$lfile}{'scan'} = $scan; 476 $files{$lfile}{'size'} = $size; 477 #printlog('dbg', 'ins', Dumper(\%files)); 478 }; 479 oneliner( $oneline, $content ); 480 #printlog('dbg', 'ins0', Dumper(\%files)); 481 #exit; 482 #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files ); 483 #} continue { 484 #printlog('dbg', 'ins1', Dumper(\%files)); 485 } 486 #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files ); 487 filesave( \%files ); 488 #%files = (); 489}; 490$processor{'format'}{'smb-ls'} ||= sub { 491 my $scan = int( time() ); 492 my $curent_process = shift(@_); 493 $curent_process =~ s/^(\S+:)?(\/*[^\/]+\/[^\/]+)\/(.+)$/$1$2/; 494 return unless $curent_process; 495 $curent_process =~ tr/\\/\//; 496 $curent_process =~ s{/+$}{}; 497 my $path = $3; 498 $path = $curent_process . '/' . $path; 499 $path =~ s/[\\\/]+$//g; 500 my $prot = 'file'; 501 502 for my $content (@_) { 503 my %data = (); 504 my %files = (); 505 my $root = 0; 506 my ( $host, $dir_ ) = $curent_process =~ /^\/+([^\/]+)\/+([^\/]+)(.*)/; 507 508=z 509 if ($host) { 510 $files{''} = { 511 'prot' => $prot, 512 'host' => $host, 513 ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ), 514 'scan' => $scan 515 }; 516 $files{' '} = { 517 'prot' => $prot, 518 'host' => $host, 519 'path' => '/', 520 ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ), 521 'scan' => $scan 522 }; 523 $files{' '} = { 524 'prot' => $prot, 525 'host' => $host, 526 'path' => '/', 527 'name' => $dir_, 528 ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ), 529 'scan' => $scan 530 } 531 if $dir_; 532 533 534 535 536 537 } 538=cut 539 540 my $oneline = sub { 541 my ( $mday, $mon, $year, $hours, $min, $sec, $size, $file, $ext, $desc, $utime, $attr ); 542 if (/^(\\.*)\s*$/) { 543 #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files ); 544 filesave( \%files ); 545 %files = (); 546 ( $path = $1 ) =~ s/\\+/\\/g; 547 #$path = cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_db', $host, $prot ), $path ); 548 $path = $curent_process . $path; 549 $path =~ tr/\\/\//; 550 $path =~ s/[\x0a\x0d\x00-\x1F]//g; 551 chomp($path); 552 return; 553 } elsif ( hconfig( 'skip_path_mask', $host ) and $path =~ /$config{'skip_path_mask'}/i ) { 554 ++$stat{'skipped'}; 555 return; 556 } elsif ( hconfig( 'interrupt_path_re', $host ) and $path =~ $config{'interrupt_path_re'} ) { 557 printlog('warn', "found interrupt path [$path]"); 558 ++$work{'interrupt'}; 559 return; 560 } 561 ( $size, $mon, $mday, $hours, $min, $sec, $year ) = /\s+(\d+)\s+\w+\s+(\w+)\s+(\d+)\s+(\d+)\:(\d+)\:(\d+)\s+(\d+)\s*$/ 562 or return; 563 $file = $`; 564 $file =~ s/\s+([ADHSR]+)$//; 565 $attr = $1; 566 $file =~ s/(\s*$)|(^\s*)//g; 567 return if $file =~ /^\.+\s*$/; 568 return if hconfig( 'skip_hidden', $host ) and $attr =~ /H/; 569 return if $size eq '0' and !$attr =~ /D/; 570 $file =~ s/[\x0a\x0d\x00-\x1F]//g; 571 $file =~ s/\.$//g; 572 $file =~ s/\s*$//; 573 chomp($file); 574 return if hconfig( 'skip_hidden', $host ) and ( $file =~ /^\./ or $path =~ m;(^|/)\.; ); 575 #$file = cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_db', $host, $prot ), $file ); 576 fileparser( $file, $prot, $path . hconfig( 'slash_perl', $host ) . $file, $size, \%files ); 577 my $lfile = $file; 578 $lfile =~ tr/A-Z/a-z/; 579 $files{$lfile}{'name'} = $file; 580 $year += 2000 if length($year) == 2; 581 $year = 1990 if ( ( $year > 2020 ) or ( $year < 1980 ) ); #timelocal BUG 582 $mon = $config{'lng'}{ $config{'lng_smbls'} }{'month_table'}{$mon} unless $mon =~ /^\d+$/; 583 $files{$lfile}{'time'} = timelocal( $sec, $min, $hours, $mday, $mon, $year ); 584 $files{$lfile}{'time'} = 640000000 if ( $files{$lfile}{'time'} > ( $scan + 2592000 ) ); 585 $files{$lfile}{'fullname'} = $prot . ':' . $path . '/' . $file; 586 $files{$lfile}{'prot'} = $prot; 587 ( $files{$lfile}{'host'}, $files{$lfile}{'path'} ) = $path =~ /^?\/?\/?([^\/]+)(.+)/; 588 $files{$lfile}{'ext'} = ( $files{$lfile}{'name'} =~ s/\.([^\.]+)$// ? $1 : '' ); 589 $files{$lfile}{'scan'} = $scan; 590 $files{$lfile}{'size'} = $size; 591 }; 592 oneliner( $oneline, $content ); 593 #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files ); 594 filesave( \%files ); 595 } 596}; 597$processor{'prot'}{'file'}{'toplevel'} ||= sub { 598 return '' unless $config{'smbclient'} or $config{'net_view'}; 599 my ($host) = @_; 600 shelldata($host); 601 return unless $host =~ /\S/; 602 my $prot = 'file'; 603 #--user=username[%password] 604 #printlog("dev", 'opening'); 605 my $h = openproc( 606 '-|' . ':encoding(' . hconfig( 'cp_res', $host, $prot ) . ')', 607 qq{$config{'starter'} $config{'net_view'} $config{'smbclient'} } 608 . hconfig( 'smb_opt', $host ) 609 . qq{ $config{'view_opt'} "$config{'slash_net'}$host" $config{'stderr_redirect'}} 610 ) or return ''; 611 #printlog("dev", 'H:',$h); 612 while (<$h>) { 613 #printlog("dev", 'proc:',$_); 614 #next; 615 next unless /^\s*(.+)\s+((Disk)|(Диск)|(IPC))\s+(\w\:)?\s+(.*)\s*$/i; 616 my ( $share, $desc ) = ( $1, $7 ); 617 $share =~ s/\s*$//g; 618 next if hconfig( 'skip_hidden', $host ) and $share =~ /\$$/; 619 $processor{'prot'}{'file'}{'scan'}->( '//' . $host . '/' . $share ); 620 #?if ( $desc =~ /\S/ ) { 621 #$share = cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_db', $host, $prot ), $share ); 622 $desc =~ s/\s+$//; 623 #$desc = cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_db', $host, $prot ), $desc ); 624 #$static{'db'}->insert_hash_hash( 625 #printlog('dev',"ins [$share]"); 626 $static{'db'}->insert_hash( 627 $config{'sql_tfile'}, { 628 #'' => { 629 'prot' => $prot, 630 'host' => $host, 631 'path' => '/', 632 'name' => $share, 633 ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ), 634 'desc' => $desc, 635 'scan' => int( time() ) 636 #} 637 } 638 ); 639 #} 640 } 641 close($h); 642 return ''; 643}; 644$processor{'prot'}{'file'}{'scan'} ||= sub { 645 my $prot = 'file'; 646 for (@_) { 647 my $where = $_; 648 next if $where =~ /^\s*$/; 649 printlog( 'smb', "$where : scanning..." ); 650 $where =~ s/\\|\//$config{'slash_dir'}/g; #RO VALUE SOMETIMES 651 $where =~ s/[\x0d\x0a\"]//g; 652 my ($host) = $where =~ /([\w.]+)/; 653 #print('dev', "wbef[$where]"); 654 my $wheres = $where; #cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_shell', $host, $prot ), $where ); 655 $where = Encode::encode hconfig( 'cp_shell', $host, $prot ), $where, Encode::FB_DEFAULT; 656 #print('dev', "waft[$wheres]"); 657 my ($dir); 658 ( undef, $where, $dir ) = $where =~ /^(\S+:)?(\/*[^\/]+\/[^\/]+)?\/?(.+)?\/?$/; 659 $dir =~ s/\/+$//; 660 $dir = '-D ' . $dir if $config{'smbclient'} and $dir; 661 shelldata( $where, $dir ); 662 $where = qq|"$where"| if $where; 663 $dir = qq|"$dir"| if $dir; 664 #--user=username[%password] 665 my $h = openproc( 666 '-|' . ':encoding(' . hconfig( 'cp_res', $host, $prot ) . ')', 667 qq{$config{'starter'} $config{'smbclient'} } 668 . hconfig( 'smb_opt', $host ) 669 . qq{ $config{'dir_cmd'} $config{'smb_ls_opt'} $dir $where $config{'stderr_redirect'}} 670 ) or next; 671 #$wheres = cp_trans( hconfig( 'cp_shell', $host ), hconfig( 'cp_db', $host ), $wheres ); 672 $processor{'format'}{ hconfig( 'share_list_fmt', $host ) }->( $wheres, $h ); 673 #$processor{'format'}{ hconfig( 'share_list_fmt', $host ) }->( $where, $h ); 674 close($h); 675 } 676}; 677#== 678#$config{'scanner'}{ 'local' }{'no_res_stat'} =1; 679$processor{'prot'}{'local'}{'func'} ||= sub { 680 my $prot = 'local'; 681 for my $wheres (@_) { 682 #printlog('dev' , "prot'}{'local'}{'func",$wheres); 683 my %url = split_url($wheres); 684 $wheres =~ s|^$prot://||i; 685 $processor{'prot'}{$prot}{'scan'}->( ( 686 $wheres 687 #( $wheres =~ m|[^/]+/[^/]+| or $wheres =~ m{^(\.|/)} ) 688 #? ( ( $wheres =~ m{^(\.|/)} ? '' : '//' ) . $wheres ) 689 #: ( ( $wheres =~ /^\w\:/ ) ? $wheres : $processor{'prot'}{$prot}{'toplevel'}->($wheres) ) 690 ) 691 ); 692 purge_res( \%url ) if $config{'purge_every_res'}; 693 } 694}; 695$processor{'prot'}{'local'}{'get'} ||= sub { return $_[0]; }; 696$processor{'prot'}{'local'}{'scan'} ||= sub { 697 my $prot = $config{'local_prot'} || 'local'; 698 for my $where (@_) { 699 #my $where = $_; 700 next if $where =~ /^\s*$/; 701 printlog( 'local', "$where : scanning..." ); 702 $where =~ s/\\|\//$config{'slash_dir'}/g; 703 $where =~ s/[\x0d\x0a\"]//g; 704 my ($host) = $config{'local_host'}; #$where =~ /([\w.]+)/; 705 my $wheres = $where; # = cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_shell', $host, $prot ), $where ); 706 $where = Encode::encode hconfig( 'cp_shell', $host, $prot ), $where, Encode::FB_DEFAULT; 707 my ($dir); 708 #( $where, $dir ) = $where =~ /^((?:\S+:)?\/*[^\/]+\/[^\/]+)?\/?(.+)?\/?$/; 709 #$dir =~ s/\/+$//; 710 #$dir = '-D ' . $dir if $config{'smbclient'} and $dir; 711 shelldata($where); 712 $where = qq|"$where"| if $where; 713 #$dir = qq|"$dir"| if $dir; 714 #--user=username[%password] 715 my $h = openproc( 716 '-|' . ':encoding(' . hconfig( 'cp_res', $host, $prot ) . ')', 717#"$config{'starter'} $config{'smbclient'} $config{'smb_opt'} $config{'dir_cmd'} $config{'smb_ls_opt'} $dir $where $config{'stderr_redirect'} |" 718 qq{$config{'starter'} $config{'local_cmd'} $config{'local_opt'} $dir $where $config{'stderr_redirect'}} 719 ) or next; 720 #$wheres = cp_trans( hconfig( 'cp_shell', $host ), hconfig( 'cp_db', $host ), $wheres ); 721 #printlog('dev', 'parsef', hconfig( 'local_list_fmt', $host )); 722 $processor{'format'}{ hconfig( 'local_list_fmt', $host ) }->( $wheres, $h ); 723 close($h); 724 } 725}; 726 727sub purge_res { 728 local $config{'purge'} = ( $config{'purge_res_time'} or $config{'period'} * 2 ); 729 psmisc::program_one( 'purge', undef, $config{'sql_tfile'}, 730 'AND ' . $static{'db'}->where_body( $_[0], undef, $config{'sql_tfile'} ) ) 731 if $config{'purge_every_res'}; 732} 733$processor{'prot'}{'file'}{'func'} ||= sub { 734 my $prot = 'file'; 735 for my $wheres (@_) { 736 my %url = split_url($wheres); 737 $wheres =~ s|^$prot://||i; 738 $processor{'prot'}{$prot}{'scan'}->( ( 739 ( $wheres =~ m|[^/]+/[^/]+| or $wheres =~ m{^(\.|/)} ) 740 ? ( ( $wheres =~ m{^(\.|/)} ? '' : '//' ) . $wheres ) 741 : ( ( $wheres =~ /^\w\:/ ) ? $wheres : $processor{'prot'}{$prot}{'toplevel'}->($wheres) ) 742 ) 743 ); 744 purge_res( \%url ) if $config{'purge_every_res'}; 745 } 746}; 747$processor{'prot'}{'file'}{'get'} ||= sub { 748 return $_[0] if $config{'system'} =~ /win/i; 749 my ( $file, $service, $save ) = @_; 750 my $prot = 'file'; 751 $file =~ s/\|\>\<\"//g; 752 ( undef, $service, $file ) = $file =~ /^(\S+:)?(\/*[^\/]+\/[^\/]+)\/(.+)$/; 753 $save = encode_url( $prot . ':' . $service . '/' . $file, $config{'encode_url_file_mask'} ); 754 $file =~ s/\/+/\\\\/g; 755 #$file =~ s/\'/\'\'/g; bbb u u ggg ' symol in file name 756 #$file =~ s/\'/\\\'\\\'/g; bbb u u ggg !!!TODO test with " 757 #$file =~ s/\'/\\\'/g; bbb uuu ggg 758 return '' if $file =~ /\'/; 759 my ($host) = $service =~ /([\w.]+)/; 760 #$file = cp_trans( hconfig( 'cp_db', $host, $prot ), hconfig( 'cp_res', $host, $prot ), $file ); 761 $file = Encode::encode hconfig( 'cp_shell', $host, $prot ), $file, Encode::FB_DEFAULT; 762 $save = hconfig('tmpdir') . hconfig( 'slash_sys', $host ) . $save; 763 return $save if -e $save and time - $^T + 86400 * -M $save < hconfig( 'purge', $host, $prot ); 764 shelldata( $file, $save, $service ); 765 printprog( 766 qq{$config{'starter'} $config{'smbclient'} } 767 . hconfig( 'smb_opt', $host ) 768 . qq{ $config{'get_opt'} -c 'get "$file" "$save"' "$service"}, 769 1 770 ); 771 return $save; 772}; 773{ 774 my ($ftp); 775 $processor{'prot'}{'ftp'}{'get'} ||= sub { 776 my ( $file, $save ) = @_; 777 my %url = split_url($file); 778 $save = encode_url( $file, $config{'encode_url_file_mask'} ); 779 $file = join_url( { 'path' => $url{'path'}, 'name' => $url{'name'}, 'ext' => $url{'ext'} } ); 780 $save = hconfig( 'tmpdir', $url{'host'} ) . hconfig( 'slash_sys', $url{'host'} ) . $save; 781 $save =~ s/^(.{240}).*/$1/; 782 return $save if -e $save and time - $^T + 86400 * -M $save < hconfig( 'purge', $url{'host'}, $url{'prot'} ); 783 $ftp = 784 $work{'ftp'} 785 { join_url( { 'user' => $url{'user'}, 'pass' => $url{'pass'}, 'host' => $url{'host'}, 'port' => $url{'port'} } ) }; 786 #printlog('dev' ,"1:$ftp;", %url); 787 return if ftp_keep( $$ftp, %url ); 788 #printlog('dev' ,"2:$ftp;$$ftp;$$ftp;"); 789 eval { $$$ftp->get( $file, $save ); }; 790 return $save; 791 }; 792 END { $$$ftp->close() if !ftp_check($$ftp); } 793} 794$processor{'prot'}{'rsync'}{'toplevel'} ||= sub { 795 return '' unless $config{'rsync'}; 796 my ($host) = @_; 797 #my $prot = 'rsync'; 798 shelldata($host); 799 my %url = split_url($host); 800 return unless $host =~ /\S/; 801 my $h = openproc( '-|' . ':encoding(' . hconfig( 'cp_res', $url{'host'}, $url{'prot'} ) . ')', 802 "$config{'starter'} $config{'rsync'} $config{'rsync_param'} \"$host\" $config{'stderr_redirect'}" ) 803 or return ''; 804 my %scanned; 805 while (<$h>) { 806 next if /^rsync(?: error)?:/i; 807 next unless /(\S+)\s+(.*)\s*/; 808 my ( $share, $desc ) = ( $1, $2 ); 809 next if $scanned{$share}++; 810 $processor{'prot'}{'rsync'}{'scan'}->( $host . '/' . $share ); 811 if ( $desc =~ /\S/ ) { 812 #$share = cp_trans( hconfig( 'cp_rsync', $host ), hconfig( 'cp_db', $host ), $share ); 813 $desc =~ s/\s+$//; 814 #$desc = cp_trans( hconfig( 'cp_rsync', $host ), hconfig( 'cp_db', $host ), $desc ); 815 #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, 816 filesave( { 817 '' => { 818 %url, 819 'path' => '/', 820 'name' => $share, 821 ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ), 822 'desc' => $desc, 823 'scan' => int( time() ) 824 } 825 } 826 ); 827 } 828 } 829 close($h); 830 return ''; 831}; 832$processor{'prot'}{'rsync'}{'scan'} ||= sub { 833 for (@_) { 834 my $where = $_; 835 next if $where =~ /^\s*$/; 836 printlog( 'rsync', "$where : scanning..." ); 837 shelldata($where); 838 my %url = split_url($where); 839 #$where = cp_trans( hconfig( 'cp_rsync', $url{'host'} ), hconfig( 'cp_shell', $url{'host'} ), $where ); 840 my $wheres = Encode::encode hconfig( 'cp_shell', $url{'host'}, $url{'prot'} ), $where, Encode::FB_DEFAULT; 841 my $h = openproc( '-|' . ':encoding(' . hconfig( 'cp_res', $url{'host'}, $url{'prot'} ) . ')', 842 "$config{'starter'} $config{'rsync'} $config{'rsync_param'} \"$wheres\" $config{'stderr_redirect'}" ) 843 or next; 844 #$where = cp_trans( hconfig( 'cp_shell', $url{'host'} ), hconfig( 'cp_db', $url{'host'} ), $where ); 845 $processor{'format'}{'rsync'}->( $where, $h ); 846 close($h); 847 } 848}; 849$processor{'prot'}{'rsync'}{'get'} ||= sub { 850 #TODO 851}; 852$processor{'prot'}{'rsync'}{'func'} ||= sub { 853 for my $wheres (@_) { 854 my %url = split_url($wheres); 855 $processor{'prot'}{'rsync'}{'scan'}->( ( ( $url{'path'} and $url{'path'} ne '/' ) or $url{'name'} ) 856 ? $wheres 857 : $processor{'prot'}{'rsync'}{'toplevel'}->($wheres) ); 858 purge_res( \%url ) if $config{'purge_every_res'}; 859 } 860}; 861$processor{'format'}{'rsync'} ||= sub { 862 my $url = shift(@_); 863 $url .= '/' unless $url =~ m|/$|; 864 my %url = split_url($url); 865 my $scan = int( time() ); 866 my $path = $url{'path'}; 867 $path =~ s/[\\\/]+$//g; 868 my $lastpath = $path; 869 for my $content (@_) { 870 my %data = (); 871 my %files = (); 872 873=z 874 if ( $url{'host'} ) { # ?? 875 my ($dir_) = $url{'path'} =~ /([^\/]+)/; 876 $files{''} = { 877 'prot' => $url{'prot'}, 878 'host' => $url{'host'}, ( 879 hconfig( 'unknown_time', $url{'host'} ) ne '' 880 ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) ) 881 : () 882 ), 883 'scan' => $scan 884 }; 885 $files{' '} = { 886 'prot' => $url{'prot'}, 887 'host' => $url{'host'}, 888 'path' => '/', ( 889 hconfig( 'unknown_time', $url{'host'} ) ne '' 890 ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) ) 891 : () 892 ), 893 'scan' => $scan 894 }; 895 $files{' '} = { 896 'prot' => $url{'prot'}, 897 'host' => $url{'host'}, 898 'path' => '/', 899 'name' => $dir_, ( 900 hconfig( 'unknown_time', $url{'host'} ) ne '' 901 ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) ) 902 : () 903 ), 904 'scan' => $scan 905 } 906 if $dir_; 907 } 908=cut 909 910 my $oneline = sub { 911 chomp; 912 s/[\x0a\x0d\x00-\x1F]//g; 913 #printlog( 'rsyncdmp', 'ol', $_ ); 914 my ( $mday, $mon, $year, $hours, $min, $sec, $utime, $attr ); #$size, $name ,$ext, $desc, 915 my (%file); 916 #warn $_; 917 ( $attr, $file{'size'}, $year, $mon, $mday, $hours, $min, $sec, $file{'path'}, $file{'name'}, $file{'ext'} ) = 918 m|^(\S+)\s+(\d+)\s+(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+)\s+(?:(.+)/)?(.*?)(?:\.([^\.]+))?\s*$| 919 or return; 920 $file{'path'} = $path . ( $file{'path'} ? '/' : '' ) . $file{'path'}; 921 $file{'path'} =~ s/[\x0a\x0d\x00-\x1F]//g; 922 if ( $lastpath ne $file{'path'} ) { 923 #printlog( 'rsyncdbg', 'path change from', $lastpath, $file{'path'}, 'uploaded', scalar %files ); 924 #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files ); 925 filesave( \%files ); 926 %files = (); 927 $lastpath = $file{'path'}; 928 } 929 return if $file{'name'} =~ /^\.+\s*$/; 930 return if $file{'size'} == 0 and !$attr =~ /^d/; 931 $file{'size'} = 0 if $attr =~ /^d/; 932 $file{'file'} = $file{'name'} . ( $file{'ext'} ? '.' : '' ) . $file{'ext'}; 933 return if hconfig( 'skip_hidden', $url{'host'} ) and ( $file{'file'} =~ /^\./ or $file{'path'} =~ m;(^|/)\.; ); 934 #$file = cp_trans(hconfig('cp_rsync', $url{'host'}), hconfig('cp_db', $url{'host'}), $file); 935 fileparser( $file{'file'}, $url{'prot'}, 936 $url{'prot'} . '://' . $file{'path'} . hconfig( 'slash_perl', $url{'host'} ) . $file{'file'}, 937 $file{'size'}, \%files ); 938 my $lfile = $file{'file'}; 939 $lfile =~ tr/A-Z/a-z/; 940 ( $file{'prot'}, $file{'host'}, $file{'scan'} ) = ( $url{'prot'}, $url{'host'}, $scan ); 941 $year = 1990 if ( ( $year > 2020 ) or ( $year < 1980 ) ); #timelocal BUG 942 $file{'time'} = 640000000 943 if ( ( $file{'time'} = timelocal( $sec, $min, $hours, $mday, --$mon, $year ) ) > ( $scan + 2592000 ) ); 944 %{ $files{$lfile} } = %file; 945 }; 946 oneliner( $oneline, $content ); 947 #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files ); 948 filesave( \%files ); 949 } 950}; 951 952$processor{'prot'}{'nfs'}{'get'} ||= sub { return $_[0]; }; 953 954$processor{'prot'}{'nfs'}{'func'} ||= sub { 955 for my $wheres (@_) { 956 my %url = split_url($wheres); 957 $processor{'prot'}{'nfs'}{'scan'}->( ( ( $url{'path'} and $url{'path'} ne '/' ) or $url{'name'} ) 958 ? $wheres 959 : $processor{'prot'}{'nfs'}{'toplevel'}->($wheres) ); 960 purge_res( \%url ) if $config{'purge_every_res'}; 961 } 962}; 963$processor{'prot'}{'nfs'}{'toplevel'} //= sub { 964 #return '' unless $config{'rsync'}; 965 my ($host) = @_; 966 #my $prot = 'rsync'; 967 shelldata($host); 968 my %url = split_url($host); 969 return unless $host =~ /\S/; 970 #printlog('nfs', 'mount', ` $config{'showmount'} $url{host}`); 971 #printlog('nfs', 'mount-a', ` $config{'showmount'} -a $url{host}`); 972 #printlog('nfs', 'mount-e', ` $config{'showmount'} -e $url{host}`); 973 #printlog('nfs', 'mount-d', ` $config{'showmount'} -d $url{host}`); 974 #printlog( 'nfs', 'mount-e', ` $config{'showmount'} -e $url{host}` ); 975 my $h = openproc( '-|' . ':encoding(' . hconfig( 'cp_res', $url{'host'}, $url{'prot'} ) . ')', 976 "$config{'starter'} $config{'showmount'} $config{'showmount_param'} $url{host} $config{'stderr_redirect'}" ) 977 or return ''; 978 my %scanned; 979 my $line; 980 while (<$h>) { 981 printlog( 'nfs', $config{'showmount'}, $config{'showmount_param'}, $_ ); 982 next unless $line++; 983 #next if m{(?:can't do mountdump rpc|Unable to receive)}i; #' 984 #printlog( 'nfs', '-a:', $_ ); 985 next if /^(?:showmount|RPC):/; 986 #next unless /^(\S+):(\S.*)\s*/; 987 next unless /^\s*(\S+)(?:\s+(\S.*))?\s*/; 988 my ( $share, $desc ) = ( $1, $2 ); 989 #my ( $hostname, $share ) = ( $1, $2); 990 #printlog('nfs', 'parsed:', ( $hostname, $share )); 991 next if $scanned{$share}++; 992 $processor{'prot'}{'nfs'}{'scan'}->( $host . $share ); 993 #if ( $desc =~ /\S/ ) { 994 if ( $share =~ /\S/ ) { 995 #$share =~ s{^/}{}; 996 #$share .= '/'; 997 #$share = cp_trans( hconfig( 'cp_rsync', $host ), hconfig( 'cp_db', $host ), $share ); 998 #$desc =~ s/\s+$//; 999 #$desc = cp_trans( hconfig( 'cp_rsync', $host ), hconfig( 'cp_db', $host ), $desc ); 1000 #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, 1001 #=no 1002 my ( $path, $name ) = $share =~ m{^(.+)?/([^/]+)$}; 1003 filesave( { 1004 '' => { 1005 %url, 1006 'path' => $path, 1007 'name' => $name, 1008 ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ), 1009 'meta' => "mountpoint=1", 1010 'desc' => $desc, 1011 'scan' => int( time() ) 1012 } 1013 } 1014 ); 1015 #=cut 1016 #update_to_root( %url, %url, $local, $url_orig ); 1017 my %res = ( 1018 %url, 1019 'path' => $share, #. '/', 1020 #desc=>'desc!', 1021 ); 1022 local %_ = ( 1023 %res, 1024 'path' => '', 1025 'name' => '', 1026 'ext' => '', 1027 #'desc' => $work{'desc'}{ $url{'host'} }{ $url{'prot'} }, 1028 'scan' => int( time() ) 1029 ); 1030 update_to_root( %res, %_ ); 1031 } 1032 } 1033 close($h); 1034 return ''; 1035}; 1036$processor{'prot'}{'nfs'}{'scan'} //= sub { 1037 1038=later 1039 1040point= pointroot/host/path 1041 1042mount -t nfs -o soft,intr,retrans=10,retrycnt=2,timeout=10 1043 1044ls 1045 1046 local $config{'local_prot'} = 'nfs'; 1047 $processor{'prot'}{'nfs'}{'scan'}->() 1048 1049 1050 1051umount 1052 1053 1054 1055=cut 1056 1057 for my $where (@_) { 1058 #my $where = $_; 1059 next if $where =~ /^\s*$/; 1060 printlog( 'nfs', "$where : scanning..." ); 1061 shelldata($where); 1062 my %url = split_url($where); 1063 #dmp \%url; 1064 my $path = join_url( { path => $url{'path'}, name => $url{'name'} } ); 1065 #(my $point_name = $where) =~ s/\W/_/g; 1066 #$point_name = $path; 1067 my $point_host = "$config{'nfs_root'}/$url{host}"; 1068 my $point = "$point_host$path"; 1069 my $exists = -d $point; 1070 my @dirs = mkdir_rec $point; 1071 my ($mount) = "$url{'host'}:" . $path; 1072 #$where = cp_trans( hconfig( 'cp_rsync', $url{'host'} ), hconfig( 'cp_shell', $url{'host'} ), $where ); 1073 my $wheres = Encode::encode hconfig( 'cp_shell', $url{'host'}, $url{'prot'} ), $where, Encode::FB_DEFAULT; 1074 #printprog("mount -t nfs -o soft,intr,retrans=10,retrycnt=2,timeout=10 $mount $point"); # unless $exists; 1075 printprog("$config{'starter'} $config{'mount'} $config{'mount_param'} $mount $point"); 1076 printprog("$config{'starter'} $config{'df'} $config{'df_param'} $point"); 1077 local $config{'local_root_path'} = $point_host; 1078 local $config{'local_prot'} = 'nfs'; 1079 local $config{'local_host'} = $url{'host'}; 1080 $point .= '/'; 1081 $processor{'prot'}{'local'}{'scan'}->($point); 1082 printprog("$config{'starter'} $config{'umount'} $config{'umount_param'} $point") if $point; 1083 rmdir for reverse @dirs; 1084 #unlink $point; 1085 } 1086}; 1087$processor{'format'}{'ls'} ||= sub { 1088 my $url = shift @_; 1089 $url =~ tr/\\/\//; 1090 #printlog('dev', 'ls', $url); 1091 my $local; 1092 $local = 1 if $url =~ m{^(?:\w:|/)}; 1093 my ( $user, $pass, $host, $port, $path ) = @{ split_url($url) }{ 'user', 'pass', 'host', 'port', 'path' }; 1094 my $prot = 'ftp'; 1095 $prot = $config{'local_prot'} if $local and $config{'local_prot'}; 1096 $host = $config{'local_host'} if $local and $config{'local_host'}; 1097 my $path_cut = $config{'local_root_path'}; 1098 $path_cut =~ s{^(/)}{$1?}; 1099 $path =~ s{^$path_cut/?}{} if $path_cut; 1100 my (%files); 1101 1102 for my $content (@_) { 1103 my $scan = int( time() ); 1104 my $lfile; 1105 my $oneline = sub { 1106 s/[\x0a\x0d]//g; 1107 #printlog('dev', 'lsdmp', $_); 1108 my ( $mday, $mon, $year, $hours, $min, $size, $file, $ext ); 1109 if (/^\.?\/?(.*)\s*:\s*$/) { 1110 $path = $1; 1111 $path =~ s{^$path_cut/?}{} if $path_cut; 1112 #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files ); 1113 filesave( \%files ); 1114 %files = (); 1115 return; 1116 } elsif ( $config{'skip_path_mask'} and $path =~ /$config{'skip_path_mask'}/i ) { 1117 ++$stat{'skipped'}; 1118 return; 1119 } elsif (/^\s*[rd\-]/) { 1120 ( $size, $mon, $mday, $hours, $min, $file ) = 1121 /^\s*\S+\s+\d*\s*\S+\s*\S*\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+):?(\d+)?\s+(.+)\s*$/; 1122 return if $file =~ /^\.+\s*$/; 1123 return if $size == 0 and !/^d/; 1124 $size = 0 if /^d/; 1125 $file =~ s/[\x0a\x0d\x00-\x1F]//g; 1126 $file =~ s/(\.|\s)*$//g; 1127 chomp($file); 1128 return if hconfig( 'skip_hidden', $host ) and ( $file =~ /^\./ or $path =~ m;(^|/)\.; ); 1129 $lfile = $file; 1130 $lfile =~ tr/A-Z/a-z/; 1131 $files{$lfile}{'name'} = $file; 1132 $files{$lfile}{'size'} = $size; 1133 $mon = $config{'lng'}{ $config{'lng_ls'} }{'month_table'}{$mon}; 1134 1135 if ( $min eq '' ) { 1136 $year = $hours; 1137 $hours = $min = 0; 1138 } else { 1139 $year = ( localtime(time) )[5] + 1900; #$curyear; 1140 --$year if $mon > ( localtime(time) )[4]; 1141 } 1142 $year = 1990 if $year > 2020 or $year < 1980; #timelocal BUG 1143 $mday = 28 if ( $mon == 1 and $mday > 28 ) or !$mday; #Another bug 8( 1144 eval { #no more bugs 8) 1145 $files{$lfile}{'time'} = timelocal( 0, $min, $hours, $mday, $mon, $year ); 1146 }; 1147 $files{$lfile}{'time'} = 640000000 if ( $files{$lfile}{'time'} > ( $scan + 86400 ) ); # 2592000 1148 } else { 1149 return; 1150 } 1151 fileparser( 1152 $file, $prot, 1153 join_url( { 1154 'prot' => $prot, 1155 'user' => $user, 1156 'pass' => $pass, 1157 'host' => $host, 1158 'port' => $port, 1159 'path' => $path, 1160 'name' => $file 1161 } 1162 ), 1163 $size, 1164 \%files 1165 ); 1166 $files{$lfile}{'fullname'} = join_url( { 1167 'prot' => $prot, 1168 'user' => $user, 1169 'pass' => $pass, 1170 'host' => $host, 1171 'port' => $port, 1172 'path' => $path, 1173 'name' => $file 1174 } 1175 ); 1176 $files{$lfile}{'fullname'} =~ s/[\x00-\x1F]//g; 1177 #printlog"cpdet", $host, Dumper $work{$host}{'detectcp'}, $_ , hconfig( 'cp_res', $host, $prot ), 1178 #$files{$lfile}{'desc'} = "cp=$_ " . $files{$lfile}{'desc'} 1179 $files{$lfile}{'meta'} .= " cp=$_" 1180 if $_ = cp_detect_trans( $files{$lfile}{'fullname'}, \%{ $work{$host}{'detectcp'} }, undef, undef, $prot, $host ) 1181 and $_ ne hconfig( 'cp_res', $host, $prot ); 1182 ( 1183 $files{$lfile}{'prot'}, $files{$lfile}{'user'}, $files{$lfile}{'pass'}, $files{$lfile}{'host'}, 1184 $files{$lfile}{'port'}, $files{$lfile}{'path'}, $files{$lfile}{'name'}, $files{$lfile}{'ext'} 1185 ) = @{ split_url( $files{$lfile}{'fullname'} ) }{ 'prot', 'user', 'pass', 'host', 'port', 'path', 'name', 'ext' }; 1186 $files{$lfile}{'host'} = full_host( { 'host' => $host, 'user' => $user, 'pass' => $pass, 'port' => $port } ); 1187 $files{$lfile}{'path'} ||= '/'; 1188 $files{$lfile}{'scan'} = $scan; 1189 }; 1190 oneliner( $oneline, $content ); 1191 } 1192 filesave( \%files ); 1193 # $static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files ); 1194}; 1195 1196sub ftp_reconnect { 1197 my ( $ftp, %url ) = @_; 1198 $$ftp->close() if defined $$ftp; 1199 $work{'ftp_connected'} = 0; 1200 printlog( 1201 'hint', 1202 "FTP RECONNECTS1: host=$url{'host'} ftp_reconnects=$stat{'ftp_reconnects'} ftp_reconnect_max=", 1203 hconfig( 'ftp_reconnect_max', $url{'host'} ), 1204 'f=', 1205 $work{'filec'}->( $stat{'files'} ), 1206 'd=', 1207 $work{'dirc'}->( $stat{'dirs'} ), 1208 's=', 1209 $work{'sizec'}->( $stat{'size'} ) 1210 ), 1211 return 3 1212 if ++$stat{'ftp_reconnects'} >= hconfig( 'ftp_reconnect_max', $url{'host'} ); 1213 eval { 1214 $$ftp = Net::FTP->new( 1215 $url{'host'}, 1216 'Timeout' => hconfig( 'timeout', $url{'host'}, $url{'prot'} ), 1217 ( hconfig( 'passive', $url{'host'} ) ? ( 'Passive' => 1 ) : () ), ( $url{'port'} ? ( 'Port' => $url{'port'} ) : () ), 1218 %{ hconfig( 'sockopts', $url{'host'}, $url{'prot'} ) || {} } 1219 ); 1220 }; 1221 printlog( 'warn', 'ftp connect err', $@ ) if $@; 1222 return 1 if !defined $$ftp; 1223 my $message = $$ftp->message(); 1224 #printlog('ftp', "FTP: message: $message"); 1225 $message =~ s/\s+$//; 1226 if ( 1227 !hconfig( 'ftp_recurse_force', $url{'host'} ) and !defined( $config{'fine'}{ $url{'host'} }{'ftp_recurse_force'} ) and ( 1228 $message =~ /PureFTPd/i || $message =~ /Pure-FTPd/i # PureFTPd sends only 2000 lines of "ls -lR" 1229 || $message =~ /CesarFTP/i # CesarFTP does not print "<DIRECTORY>:" 1230 || $message =~ /GuildFTPd/i # GuildFTPd gets crazy when attempting to use "ls -lR" 1231 || $message =~ /Microsoft/i # ??? can't parse output of "ls -lR" of Microsoft FTP server 1232 || $message =~ /Serv-U/i # Serv-U's "ls -lR" does not work well in all cases 1233 || $message =~ /Gene6 FTP Server/i 1234 ) 1235 ) 1236 { 1237 $config{'fine'}{ $url{'host'} }{'ftp_recurse_force'} = 1; 1238 printlog( 'ftp', "ftp_recurse_force=1 for $url{'host'}" ); 1239 } 1240 if ( 1241 !$$ftp->login( 1242 ( $url{'user'} or hconfig( 'anonymous_user', $url{'host'} ) ), 1243 ( $url{'pass'} or hconfig( 'anonymous_pass', $url{'host'} ) ) 1244 ) 1245 ) 1246 { 1247 $$ftp->close() if defined $$ftp; 1248 printlog( 'ftp', "FTP: cannot login", join_url( \%url ) ); 1249 return 2; 1250 } 1251 psmisc::cp_detect_trans($message); 1252 printlog( 'ftp', 1253 "FTP: connected ($stat{'ftp_reconnects'}/" 1254 . hconfig( 'ftp_reconnect_max', $url{'host'} ) . ") to " 1255 . join_url( \%url ) 1256 . " pasv=" 1257 . hconfig( 'passive', $url{'host'} ) 1258 . "; [$message]" ); 1259 $work{'desc'}{ $url{'host'} }{'ftp'} = $message; 1260 ++$work{'ftp_connected'}; 1261 $work{'ftp'} 1262 { join_url( { 'user' => $url{'user'}, 'pass' => $url{'pass'}, 'host' => $url{'host'}, 'port' => $url{'port'} } ) } = \$ftp; 1263 #$ftp; 1264 #printlog("ftpis", "$ftp, $$ftp;"); 1265 return 0; 1266} 1267 1268sub ftp_reconnect_while { 1269 my ( $ftp, %url ) = @_; 1270 my $retry = 0; 1271 do { 1272 return 0 unless ftp_reconnect( $ftp, %url ); # !!!??? 1273 return 2 unless $work{'ftp_connected'}; 1274 printlog( 1275 'hint', 1276 "FTP RECONNECTS2: host=$url{'host'} ftp_reconnects=$stat{'ftp_reconnects'} ftp_reconnect_max=", 1277 hconfig( 'ftp_reconnect_max', $url{'host'} ) 1278 ), 1279 return 3 1280 if $stat{'ftp_reconnects'} >= hconfig( 'ftp_reconnect_max', $url{'host'} ); 1281 mysleep($_) if ( $_ = hconfig( 'ftp_reconnect_sleep', $url{'host'} ) ); 1282 printlog( 'ftp', 1283 "FTP $url{'host'} reconnecting $retry ($stat{'ftp_reconnects'}/" 1284 . hconfig( 'ftp_reconnect_max', $url{'host'} ) 1285 . ") [$@]" ); 1286 } while ( $retry++ <= hconfig( 'ftp_reconnect_retry', $url{'host'} ) ); 1287 return 1; 1288} 1289 1290sub ftp_check { 1291 my ($ftp) = @_; 1292 return 1 unless ( defined $ftp and defined $$ftp and $ftp and $$ftp ); 1293 my $code = $$ftp->code(); 1294 return 2 if grep( $code eq $_, ( '599', '000', '200', '425', '150' ) ); 1295 return 3 unless ( defined $ftp and defined $$ftp and $ftp and $$ftp ); 1296 return 0; 1297} 1298 1299sub ftp_keep { 1300 my ( $ftp, %url ) = @_; 1301 #printlog('dev', 'ftp_keep', $ftp); 1302 return ftp_reconnect_while( $ftp, %url ) if ftp_check($ftp); 1303 return 0; 1304} 1305 1306sub _list_cmd_ { #from NET:FTP 1307 my $ftp = shift; 1308 my $cmd = 'LIST'; 1309 my $res = pop; 1310 delete ${*$ftp}{'net_ftp_port'}; 1311 delete ${*$ftp}{'net_ftp_pasv'}; 1312 my $data; 1313 eval { $data = $ftp->_data_cmd( $cmd, @_ ) }; 1314 printlog( 'warn', "ftp LIST cmd failed", $@ ), return if $@; 1315 return unless ( defined $data ); 1316 require Net::FTP::A; 1317 bless $data, "Net::FTP::A"; # Force ASCII mode 1318 my $databuf = ''; 1319 my $buf = ''; 1320 @_ = (); 1321 my $blksize = ${*$ftp}{'net_ftp_blksize'}; 1322 1323 while ( $data->read( $databuf, $blksize ) ) { 1324 $buf .= $databuf; 1325 for my $line ( split( /\n/, $buf ) ) { 1326 $processor{'format'}{'ls'}->( $res, \@_ ), @_ = () if @_ and $line =~ /\:\s*$/ and !$_[$#_]; 1327 push @_, $line; #if $line; 1328 } 1329 $buf = ( $buf !~ /\n$/ ? "\n" . pop @_ : '' ); 1330 } 1331 $processor{'format'}{'ls'}->( $res, \@_ ) if @_; 1332 $data->close(); 1333} 1334{ 1335 my ( @toscan, $toscanfile ); 1336 1337 sub savearray { 1338 my ( $file, $arr ) = @_; 1339 return unless ( $file and $arr and open( TSO, '>', $file ) ); 1340 while ( defined( $_ = shift @$arr ) ) { print TSO$_ . "\n" if $_; } 1341 close(TSO); 1342 } 1343 $processor{'prot'}{'ftp'}{'func'} ||= sub { 1344 #printlog( 'dbg', "prot'}{'ftp'}{'func" ); 1345 foreach my $url (@_) { 1346 $url .= '/' if $url !~ m|/$|; 1347 my %url = split_url($url); 1348 my $ftp; 1349 $stat{'ftp_reconnects'} = $work{'ftp_connected'} = 0; 1350 next if ftp_keep( \$ftp, %url ); 1351 my ($content); 1352 my $tryrec = 1; 1353 $stat{'ftp_errors'} = 0; 1354 1355 unless ( hconfig( 'ftp_recurse_force', $url{'host'} ) ) { 1356 _list_cmd_( 1357 $ftp, 1358 "$config{'ls_opt'} $url{'path'}", 1359 join_url( { 1360 'user' => $url{'user'}, 1361 'pass' => $url{'pass'}, 1362 'host' => $url{'host'}, 1363 'port' => $url{'port'}, 1364 'path' => $url{'path'} 1365 } 1366 ) 1367 ); 1368 $tryrec = 0 1369 if ( $work{'filec'}->( $stat{'files'} ) > hconfig( 'ftp_recurse_threshold', $url{'host'} ) ) 1370 and !ftp_check( \$ftp ); 1371 if ( $config{'old_ftp_recurse'} and $tryrec ) { #delete block 1372 printlog( 'dbg', "Old recurse" ); 1373 $content = ''; 1374 eval { $content = $ftp->dir("-lRGn $url{'path'}") }; 1375 if ($content) { 1376 $tryrec = 0 if ( scalar @$content > hconfig( 'ftp_recurse_threshold', $url{'host'} ) ) and !ftp_check( \$ftp ); 1377 $processor{'format'}{'ls'}->( join_url( \%url ), $content ); 1378 } 1379 } 1380 } 1381 my $passive = 0; 1382 if ($tryrec) { 1383 printlog( 'ftp', "Trying recurse mode" ) unless hconfig( 'ftp_recurse_force', $url{'host'} ); 1384 my $dir; 1385 @toscan = (); 1386 $toscanfile = 1387 $config{'datadir'} 1388 . $config{'slash_sys'} 1389 . encode_url( 1390 join_url( { 1391 'prot' => $url{'prot'}, 1392 'user' => $url{'user'}, 1393 'pass' => $url{'pass'}, 1394 'host' => $url{'host'}, 1395 'port' => $url{'port'} 1396 } 1397 ) 1398 ) . '.toscan'; 1399 my $toscanold = time - $^T + 86400 * -M $toscanfile; 1400#printlog('warn', ".toscan too old, skipping, days=", -M $toscanfile, ' period=', hconfig( 'period', $url{'host'}, $url{'prot'} ), $toscanold, 'C', -C $toscanfile, 'A', -A $toscanfile) ; 1401 $toscanold = undef if $toscanold < hconfig( 'period', $url{'host'}, $url{'prot'} ); 1402 printlog( 1403 'info', 1404 ".toscan too old, skipping, days=", 1405 int -M $toscanfile, 1406 ' period=', hconfig( 'period', $url{'host'}, $url{'prot'} ) / 86400 1407 ) if $toscanold; 1408 if ( !$toscanold 1409 and -e $toscanfile 1410 and rename( $toscanfile, $toscanfile . $config{'work_ext'} ) 1411 and open( TSO, '<', $toscanfile . $config{'work_ext'} ) ) 1412 { 1413 printlog( 'dbg', "Loading", $toscanfile, "path=[$url{'path'}]" ); 1414 while (<TSO>) { 1415 s/^\s*//; 1416 s/\s*$//; 1417 my ($name) = m|/([^/]+)$|; 1418 printlog( 'dbg', 'toscan load skip path:', $_, $name ), next 1419 if ( $config{'skip_path_mask'} and $name =~ /^$config{'skip_path_mask'}$/i ) 1420 or ( $url{'path'} and !m{^$url{'path'}} ); 1421 push( @toscan, $_ ); 1422 } 1423 close(TSO); 1424 unlink( $toscanfile . $config{'work_ext'} ); 1425 my $where = 1426 $static{'db'}->where( { %url, ( $url{'path'} eq '/' ? ( 'path' => '' ) : () ) }, undef, $config{'sql_tresource'} ); 1427 #my ( $tq, $rq, $vq ) = $static{'db'}->quotes(); 1428 my $res = 1429 $static{'db'}->line("SELECT * FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq $where LIMIT 1"); 1430 printlog( 'dev', 'ftptoscan stat: ', %$res ); 1431 $stat{'dirs'} += $res->{'dirs'}; 1432 $stat{'files'} += $res->{'files'}; 1433 $stat{'size'} += $res->{'size'}; 1434 printlog( 'dev', 'ftptoscan s: ', $stat{'dirs'}, $stat{'files'}, $stat{'size'} ); 1435 } else { 1436 $dir = ( $url{'path'} or '/' ); 1437 push( @toscan, $dir ); 1438 } 1439 my %scanned; 1440 FTPSCAN: while (@toscan) { 1441 FTPTOSCAN: while ( $dir = pop(@toscan) ) { #v1:shift v2:pop 1442 @toscan = (), 1443 printlog( 1444 'hint', 1445 "MAX ERRORS REACHED: host=$url{'host'} ftp_errors=$stat{'ftp_errors'} max_errors=", 1446 hconfig( 'max_errors', $url{'host'}, $url{'prot'} ), 1447 'f=', 1448 $work{'filec'}->( $stat{'files'} ), 1449 'd=', 1450 $work{'dirc'}->( $stat{'dirs'} ), 1451 's=', 1452 $work{'sizec'}->( $stat{'size'} ), 1453 $_ 1454 ), 1455 printlog( 'warn', 'MAX ERRORS REACHED at', $url{'host'}, $dir, $_ ), last 1456 if $stat{'ftp_errors'} > hconfig( 'max_errors', $url{'host'}, $url{'prot'}, ); 1457 #printlog( 'ftpdbg', scalar @toscan, $dir ); 1458 ++$stat{'ftp_errors'}, printlog( 'dbg', 'TOO LONG PATH', $dir, $_ ), next 1459 if length($dir) > hconfig( 'ftp_recurse_path_max', $url{'host'} ); 1460 for my $retry ( 0 .. 3 ) { 1461 last if ftp_keep( \$ftp, %url ) or $work{'die'}; 1462 #printlog( 'ftpdbg', "trying change to [$dir]" ); 1463 $content = ''; 1464 if ( $work{'die'} ) { last; } 1465 elsif ( hconfig( 'ftp_use_cwd', $url{'host'} ) ) { 1466 my $ret; 1467 eval { $ret = $ftp->cwd($dir); }; 1468 if ($ret) { 1469 eval { $content = $ftp->dir(); }; 1470 } else { 1471 printlog( 'dbg', "Cannot change to $dir" ); 1472 last; 1473 } 1474 } else { 1475 eval { $content = $ftp->dir($dir); }; 1476 } 1477 local $_ = $ftp->pwd(); 1478 s|/+$||; 1479 $_ = '/' . $_ if $_ and $_ !~ m|^/|; 1480 printlog( 'err', "CWD failed or PWD bug mustbe:", $dir, "pwd:", $_ ), $dir = $_, if ( $dir ne $_ ) and $_ and !m|^/\w\:|; 1481 printlog( 'err', "loop scan [$dir] $scanned{$dir} times"), next FTPTOSCAN if $scanned{$dir}++ > 2; 1482 $passive = 9, last if ( !ftp_check( \$ftp ) and $content ) or $work{'die'}; 1483 printlog( 'dbg', "FTP getdir retry $retry " ); #, ++$passive, next if ftp_check(\$ftp); 1484 printlog( 'dbg', "Trying passive mode [$passive]" ), $config{'fine'}{ $url{'host'} }{'passive'} = 1 1485 if ( ++$passive == 1 ) 1486 and !$config{'fine'}{ $url{'host'} }{'passive'}; 1487 $ftp->pasv() if $passive == 1; 1488 ftp_reconnect( \$ftp, %url ) if $passive == 2 or $retry == 2; 1489 delete $config{'fine'}{ $url{'host'} }{'passive'} if $passive == 3; 1490 } 1491 ( $work{'ftp_connected'} ? next : last ) unless $content; 1492 my ( @thisdir, $listed_dirs, $num ); 1493 for (@$content) { 1494 printlog( 'dmp', $_ ); 1495 ++$stat{'ftp_errors'}, printlog( 'dbg', 'BUGGY FTPD ', $dir, $_ ), @thisdir = (), last 1496 if /^ftpd: .*: No such file or directory/i; 1497 ++$stat{'ftp_errors'}, printlog( 'dbg', 'BAD RECURSION SKIPPED [second listed dir]:', $dir, $1 ), @thisdir = (), 1498 last 1499 if /^\.?\/?(.+)\s*:\s*$/ and $listed_dirs++; 1500 next 1501 if !(/^\s*d\S+\s+\d*\s*\S+\s*\S*\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+):?(\d+)?\s+(.+)\s*$/) 1502 or ( $6 =~ /^\.+[\s\xFF]*$/ ) 1503 or ( $6 =~ /^[\s\xFF]*$/ ); 1504 my $name = $6; 1505 $name =~ s/(^\s*)|(\s*$)//g; 1506 #printlog( 'dev', 'name:', $name ); 1507 printlog( 'dbg', 'skip path:', $name ), next 1508 if $config{'skip_path_mask'} and $name =~ /^$config{'skip_path_mask'}$/i; 1509 my $add = $dir . ( $dir eq '/' ? '' : '/' ) . $name; 1510 #printlog( 'dev', "DIR:$dir; name:$name; add:$add;\n$_" ); 1511 ++$stat{'ftp_errors'}, printlog( 'dbg', 'BAD RECURSION SKIPPED [path dupes]:', $dir, $add ), next 1512 if $add =~ m|(/\Q$name\E){$config{'ftp_rec_path_dupes'},}$|; 1513 push( @thisdir, $add ); #v1:unshift v2:push 1514 } 1515 push( @toscan, @thisdir ); 1516 unshift( @$content, $dir . ':' ); 1517 $processor{'format'}{'ls'}->( join_url( \%url ), $content ); 1518 $content = ''; 1519 #!!! DIRS FILES !!! 1520 @toscan = (), 1521 printlog( 1522 'warn', 1523 "! recurse detected: $url{'host'} :", 1524 $work{'sizec'}->( $stat{'size'} ), 1525 $work{'filec'}->( $stat{'files'} ) 1526 ) 1527 if $work{'filec'}->( $stat{'files'} ) > hconfig( 'ftp_rec_max_number', $url{'host'} ) 1528 and $work{'sizec'}->( $stat{'size'} ) / $work{'filec'}->( $stat{'files'} ) < 1529 hconfig( 'ftp_rec_min_size', $url{'host'} ); 1530 last unless $work{'ftp_connected'}; 1531 } 1532 last unless $work{'ftp_connected'}; 1533 } 1534 if (@toscan) { 1535 ++$work{'scan_unfinished'}; 1536 savearray( $toscanfile, \@toscan ); 1537 } 1538 } 1539 $ftp->quit() if defined $ftp; 1540 purge_res( \%url ) if $config{'purge_every_res'}; 1541 } 1542 }; 1543 END { savearray( $toscanfile, \@toscan ) if @toscan and $toscanfile; } 1544} 1545$processor{'format'}{'html'} ||= sub { 1546 my %outh; 1547 my $host = shift(@_); 1548 my $scan = int( time() ); 1549 my $prot = 'http'; 1550 my $desc; 1551 for my $content (@_) { 1552 my $oneline = sub { 1553 my @l = split /href/i; 1554 for (@l) { 1555 if (/^=([\"\'])((\w|[\.=\/:?&%\-])+)\1/i) { #??? 1556 #my $url = decode_url($2); 1557 my $url = $2; 1558 #printlog('dev', 'hol:',$_); 1559 if ( $url =~ /^(http\:\/\/)/i ) { 1560 if ( $url =~ /^http\:\/\/$host/i ) #\w*\.? 1561 { 1562 if ( $url =~ /^http\:\/\/$host\/((\w|[\.\-])+)\/?/i ) { #\w*\.? 1563 my $file = decode_url($1); 1564 ++$outh{$host}{$file}{'c'}; 1565 #$outh{$host}{$file}{'desc'} = $desc; 1566 #$outh{$host}{$1}{'desc'} = cp_detect_trans(\$desc, \%detectcp, undef, $prot, $host); 1567 } 1568 } 1569 } elsif ( $url =~ /^(\S+\:\/\/)/i ) { 1570 } elsif ( $url =~ m{(?:^|/)((\w*[\.\-\%]*)+)(?:$|/)} ) { 1571 #} elsif ($url =~ m{((\w*|[.\-%]*)+)}){ 1572 #printlog('dev', $host," url1:[$url] -> [$1]"); 1573 my $file = decode_url($1); 1574 #printlog('dev', $host,' url2:',$url, " : $file"); 1575 ++$outh{$host}{$file}{'c'}; 1576 #$outh{$host}{$file}{'desc'} = $desc; 1577 #$outh{$host}{$1}{'desc'} = cp_detect_trans(\$desc, \%detectcp, undef, $prot, $host); 1578 } 1579 } 1580 } 1581 }; 1582 oneliner( $oneline, $content ); 1583 } 1584 for my $host ( keys %outh ) { 1585 for my $rr ( keys %{ $outh{$host} } ) { 1586 next if $rr =~ /cgi-bin/i; 1587 my %url = split_url( $prot . '://' . $host . '/' . $rr ); 1588 #$static{'db'}->insert_hash_hash($config{'sql_tfile'}, 1589 filesave( { 1590 '' => { 1591 %url, 1592 'size' => ( $url{'ext'} ? 1 : 0 ), ( 1593 hconfig( 'unknown_time', $url{'host'} ) ne '' 1594 ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) ) 1595 : () 1596 ), 1597 'desc' => $outh{ $url{'host'} }{$rr}{'desc'}, 1598 'scan' => $scan 1599 } 1600 } 1601 ); 1602 } 1603 } 1604}; 1605$processor{'prot'}{'http'}{'get'} ||= $processor{'prot'}{'https'}{'get'} ||= sub { 1606 local $config{'cachedir'} = $config{'tmpdir'}; 1607 return http_get( $_[0], 1 ); 1608}; 1609$processor{'prot'}{'http'}{'func'} ||= sub { 1610 if ( $config{'use_old_http'} ) { 1611 my $scan = int( time() ); 1612 SKIP0: for my $url (@_) { 1613 next unless $url =~ /\S/; 1614 $url =~ s/\/+$//; 1615 my %url = split_url($url); 1616 my @content = ( http_get($url) ); 1617 my %detectcp; 1618 if ( $content[0] =~ /\<.+\>/ ) { 1619 for my $skipmask ( @{ $config{'http_skip'} } ) { next SKIP0 if $content[0] =~ /$skipmask/i; } 1620 my ($desc) = $content[0] =~ /\<title.*\>\s*(.+)\s*\<\/title\s*\>/is; 1621 cp_detect_trans( $desc, \%detectcp, undef, undef, $url{'prot'}, $url{'host'} ); 1622 $work{'desc'}{ $url{'host'} }{ $url{'prot'} } = $desc; 1623 $processor{'format'}{'html'}->( $url{'host'}, \@content ); 1624 $stat{'dirs'} += 1; 1625 $stat{'files'} += 1; 1626 $stat{'size'} += 1; 1627 } 1628 } 1629 } else { 1630 my $scan = int( time() ); 1631 SKIP0: for my $url (@_) { 1632 next unless $url =~ /\S/; 1633 $url =~ s/\/+$//; 1634 http_dl( 1635 $url, 1636 sub { 1637 my $url = $_[0]; 1638 my %url = split_url($url); 1639 #my @content = ( http_get($url) ); 1640 my @content = ( $_[1] ); 1641 my %detectcp; 1642 if ( $content[0] =~ /\<.+\>/ ) { 1643 for my $skipmask ( @{ $config{'http_skip'} } ) { next SKIP0 if $content[0] =~ /$skipmask/i; } 1644 my ( $desc, $content ); 1645 ($desc) = $content[0] =~ /\<title.*\>\s*(.+?)\s*\<\/title\s*\>/is; 1646 #if (psmisc::is_hash_size $config{'sql'}{'table'}{ $config{'sql_tfile'} }{'content'}) { 1647 #$content = striplinks($content[0]); 1648 #} else { 1649 $content = psmisc::html_strip( $config{'http_no_strip'} ? $content[0] : striplinks( $content[0] ) ); 1650 $content =~ s/&\w+;/ /g; 1651 $content =~ s/\s+/ /g; 1652 #} 1653 cp_detect_trans( $desc, \%detectcp, undef, undef, $url{'prot'}, $url{'host'} ); 1654 cp_detect_trans( $content, \%detectcp, undef, undef, $url{'prot'}, $url{'host'} ); 1655 #dmp \%detectcp; 1656 #printlog('dev', "desc:[$desc]"); 1657 #$work{'desc'}{ $url{'host'} }{ $url{'prot'} } = $desc; 1658 #$processor{'format'}{'html'}->( $url{'host'}, \@content ); 1659 $static{'db'}->insert_hash( 1660 $config{'sql_tfile'}, { 1661 %url, ( 1662 keys %{ $config{'sql'}{'table'}{ $config{'sql_tfile'} }{'param'} } ? () 1663 : ( 'ext' => $url{'ext'} . ( $url{'param'} ? '?' . $url{'param'} : '' ) ) 1664 ), 1665 'size' => length $content[0], ( 1666 hconfig( 'unknown_time', $url{'host'} ) ne '' 1667 ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) ) 1668 : () 1669 ), 1670 'content' => $content, 1671 'desc' => $desc, 1672 'scan' => $scan, 1673 } 1674 ); 1675 #$stat{'dirs'} += 1; 1676 $stat{'files'} += 1; 1677 $stat{'size'} += length $content[0]; 1678 } 1679 }, 1680 sub { 1681 my $url = $_[0]; 1682 my %url = split_url($url); 1683 #my @content = ( http_get($url) ); 1684 $static{'db'}->insert_hash( 1685 $config{'sql_tfile'}, { 1686 %url, ( 1687 keys %{ $config{'sql'}{'table'}{ $config{'sql_tfile'} }{'param'} } ? () 1688 : ( 'ext' => $url{'ext'} . ( $url{'param'} ? '?' . $url{'param'} : '' ) ) 1689 ), 1690 'size' => 1, ( 1691 hconfig( 'unknown_time', $url{'host'} ) ne '' 1692 ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) ) 1693 : () 1694 ), 1695 #'desc' => $desc, 1696 'scan' => $scan 1697 } 1698 ); 1699 #$stat{'dirs'} += 1; 1700 $stat{'files'} += 1; 1701 $stat{'size'} += 1; 1702 } 1703 ); 1704 } 1705 } 1706}; 1707{ 1708 my $state; 1709 1710 sub http_get2 { 1711 my ( $what, $asfile, $lwpopt, $method ) = @_; 1712 printlog( 'dev', 'http_get', @_ ); 1713 my %url = split_url($what); 1714 use LWP::UserAgent; 1715 use URI::URL; 1716 my $ua = LWP::UserAgent->new( 1717 #$config{lwp} = { timeout => 10, max_size => 1000000, max_redirect => 1 }; 1718 max_size => hconfig( 'max_size', $url{host}, $url{prot} ) || 1000000, 1719 max_redirect => hconfig( 'max_redirect', $url{host}, $url{prot} ) || 1, 1720 timeout => hconfig( 'timeout', $url{host}, $url{prot} ) || 10, 1721 %{ $config{lwp} || {} }, %{ $lwpopt || {} } 1722 ); 1723 if ( ref $config{'proxy'} eq 'ARRAY' ) { 1724 local @_ = @{ psmisc::shuffle( $config{proxy} )->[0] }; 1725 $ua->proxy(@_); 1726 } elsif ( $config{'proxy'} ) { 1727 $ua->proxy( 'http', $config{'proxy'} ); 1728 } 1729 $method ||= 'GET'; 1730 #my $resp = $method eq 'HEAD' ? $ua->head($what) : 1731 my $resp = ( 1732 $ua->request( 1733 new HTTP::Request( $method, new URI::URL($what), new HTTP::Headers( 'User-Agent' => $config{'crawler_name'} ) ) 1734 ) 1735 ); 1736 $stat{'bytes'} += length( $resp->content ); 1737 #my $type = lc( ( split /[\s;,]/, $resp->header('Content-Type') )[0] ); 1738 my $type = http_type( undef, $resp->header('Content-Type') ); 1739 #printlog ("Type",$type); 1740 #printlog ("hdr",$resp->as_string()); 1741 $stat{'bytes_skipped'} += length( $resp->content ), printlog( 'skip', $what, "type1=[$type]" ), 1742 $state->{callback_skip}->( $what, $type ), return ( wantarray ? ( undef, $resp ) : undef ) 1743 unless $config{http_avail_types}{$type}; 1744 #print "[H:",Dumper($resp); 1745 #print "[H:",$resp->code(); 1746 if ( $resp->is_success ) { 1747 ++$stat{'urls'}; 1748 return wantarray ? ( $resp->as_string(), $resp ) : $resp->as_string(); 1749 } else { 1750 printlog( 'warn', 'http getfail', $what, $resp->message, ); #Dumper($resp) 1751 #return $asfile ? undef: $resp->message; 1752 return wantarray ? ( undef, $resp ) : undef; 1753 } 1754 } 1755 1756 sub http_type_full { 1757 return $_[1] if $_[1]; 1758 return $state->{type}{ $_[0] } if defined $state->{type}{ $_[0] }; 1759 return $state->{type}{ $_[0] } = ( ( http_get2( $_[0], undef, undef, 'HEAD' ) )[1]->header('Content-Type') ) || ''; 1760 } 1761 sub http_type { return lc( ( split /[\s;,]/, http_type_full(@_) )[0] ); } 1762 1763 sub http_parse { 1764 my ( $url, $dir ) = @_[ 1 .. 2 ]; #$state, 1765 $dir =~ s{[?#].*}{}g; 1766 $dir =~ s{[^/]+?$}{}; 1767 #for ( $_[0] =~ m{<a.+?href=["']?([^'"\s>]+)['"]?.*?>}ig ) { 1768 for ( 1769 $_[0] =~ 1770 #m{<(?:a|frame).+?(?:href|src)=["']?([^'"\s>]+)['"]?.*?>}ig 1771 m{<(?:a|frame)[^>]+(?:href|src)=["']?([^'"\s>]+)['"]?[^>]*>}ig 1772 ) 1773 { 1774 #dmp ("[c=$_]\n"), 1775 #$_ = $c, next unless defined $_; 1776 #print ("set $_ = $c]\n"), 1777 s/\s//g; 1778 #printlog( 'dev', "link0 [$_]" ); 1779 #printlog( 'warn', "not protocol [$_]" ), 1780 next if m{^\w+:(?:[^/]|$)}; 1781 my $no_check = 0; 1782 ++$no_check if s/#.*//g; 1783 s/&/&/g; 1784 s{^/*\.\.?/*}{}g; 1785 next unless length $_; 1786 $_ = ( s{^\.*/}{}g ? $state->{root} : $dir ) . $_ unless m{^\w+://}; 1787 s{/+\./+}{/}g; 1788 #s{/[^/]+/\.\.(/|$)}{$1}g; 1789 #s{[^/]+/\.\./?}{}g; 1790 #s{[^/]+/+\.\.}{}g; 1791 s{[^/]+/+\.\.}{}g while m{[^/]+/+\.\.}; 1792 s{/+\.$}{}g; 1793 #s{(?<!(?:^\w:))/{2,}}{/}; 1794 s{^(\w+://)}{}; 1795 my $prot = $1; 1796 s{/{2,}}{/}g; 1797 $_ = $prot . $_; 1798 #printlog( 'dev', "link1 [$_]" ); 1799 if (m{^$state->{root}}) { 1800 my ( $filename, $script ) = m{/([^/]*?)(?:\?(.*))?$}; 1801 my $ext = lc( ( $filename =~ m{\.([^\.]+)$} )[0] ); 1802 #printlog('dev', "$_ => filename=[$filename] ext=[$ext] script=[$script] $force_download"); 1803 printlog( 'skip', $_, "ext=[$ext] filename=[$filename]" ), $state->{callback_skip}->( $_, $ext ), next 1804 if $config{http_ext_skip}{$ext}; 1805 ++$no_check if !$ext or !$filename or $script or $config{http_ext_force}{$ext}; 1806 #if ($force_download) { 1807 $state->{int}{$_} ||= { n => ++$state->{links}, check => !$no_check }; 1808 $state->{depth}{$_} = $state->{depth}{$url} + 1 1809 if !$state->{depth}{$_} 1810 or $state->{depth}{$_} > $state->{depth}{$url} + 1; 1811 #} 1812 } else { 1813 $state->{ext}{$_} ||= ++$state->{links}; 1814 } 1815 } 1816 } 1817 1818 sub http_page { 1819 return if $state->{download}{ $state->{current} }; 1820 { 1821 printlog( "warn", "max_depth reached on $state->{current} $state->{depth}{$state->{current}}" ), last 1822 if $state->{depth}{ $state->{current} } > hconfig( 'http_max_depth', $state->{current_url}{'host'} ); 1823 local @_; 1824 printlog( 'skip', $state->{current}, "robots.txt", @_ ), last 1825 if @_ = grep { $state->{current} =~ m{\w+://[^/]+\Q$_} ? $_ : () } keys %{ $state->{disallow} }; 1826 if ( $state->{int}{ $state->{current} }{check} ) { 1827 my $type = http_type( $state->{current} ); 1828 printlog( 'skip', $state->{current}, "type2=[$type]" ), $state->{callback_skip}->( $state->{current}, $type ), last 1829 unless $config{http_avail_types}{$type}; 1830 } 1831 my ( $content, $resp ) = http_get2( $state->{current} ); 1832 #CB! print { $state->{OUT} } $content; 1833 $state->{callback_page}->( $state->{current}, $content ); 1834 printlog( "info", 'downloaded', $state->{page_limit}, $state->{current}, 1835 "depth=$state->{depth}{$state->{current}}, size=", 1836 length($content), 'type=', $resp->header('Content-Type') ) 1837 if length($content); 1838 http_parse( $content, $state->{current}, $resp->base ); #$state, 1839 } 1840 ++$state->{download}{ $state->{current} }; 1841 } 1842 1843 sub http_dl { 1844 my ( $root, $callback_page, $callback_skip ) = @_; #, $state 1845 #local %config = %config; 1846 $root .= '/' unless $root =~ m{/$}; 1847 $root = 'http://' . $root unless $root =~ m{^\w+://}; 1848 $state = {}; 1849 $state->{callback_page} = $callback_page || sub { }; 1850 $state->{callback_skip} = $callback_skip || sub { }; 1851 $state->{root} = $root; 1852 $state->{current} ||= $root; 1853 1854 if ( !$state->{robots_ok} ) { 1855 local $_ = $root; 1856 #$state->{current} = ; 1857 s{(\w+://[^/]+).*$}{$1}; 1858 $_ .= '/robots.txt'; 1859 for ( split /\n+/, http_get($_) ) { 1860 #printlog('rob', $_); 1861 #++$state->{disallow}{$1}, printlog( 'rob', $1 ) if m{Disallow:\s*(.+)\s*$}i; 1862 if (m{Disallow:\s*(.+)\s*$}i) { 1863 $_ = $1; 1864 s/[\x0D\x0A]//g; 1865 s{/$}{}g; 1866 ++$state->{disallow}{$_}, printlog( 'rob', "[$_]" ); 1867 } 1868 } 1869 ++$state->{robots_ok}; 1870 } 1871 #$state->{int}{$root} = {}; 1872 $state->{out} = $root; 1873 $state->{out} =~ s{\w+://}{}; 1874 $state->{out} =~ s{/$}{}g; 1875 $state->{out} =~ s{/}{_}g; 1876 my %url = split_url($root); 1877 while ( $state->{current} ) { 1878 printlog( 'err', "pages limit reached on $state->{root}", $state->{page_limit}, ), last 1879 if ++$state->{page_limit} > hconfig( 'http_max_pages', $state->{current_url}{'host'} ); 1880 http_page($state); 1881 $state->{current} = ( ( 1882 hconfig( 'http_next_selector', $state->{current_url}{'host'} ) 1883 ->( grep { !$state->{download}{$_} } keys %{ $state->{int} } ) 1884 )[0] 1885 ); 1886 #printlog('dev' ,'selected', $state->{current},'from', grep { !$state->{download}{$_} } keys %{ $state->{int} }); 1887 $state->{current_url} = split_url( $state->{current} ); 1888 sleep( hconfig( 'http_page_sleep', $state->{current_url}{'host'} ) ); 1889 } 1890 #close( $state->{OUT} ); 1891 $state->{finished} = 1; 1892 #save_ext(); 1893 } 1894} 1895$config{'select_weight'} //= 0.1; 1896$config{'select_around'} //= 1; 1897$config{'ret_field'} //= 'block'; #'text' 1898$config{'ret_sep'} //= ' '; 1899 1900sub striplinks { # $Id: crawler.pl 4843 2013-08-14 12:17:58Z pro $ 1901 my $file = shift; 1902 # print "[$file]"; 1903 #my @blocks = split m{</?a} ; 1904 #my @blocks = split m{(<a.*?>)} ; 1905 $file =~ s{<$_.*?>.*?</$_.*?>}{}gsi for qw(style script); 1906 $file =~ s{<!--.*?-->}{}gs; 1907 my @blocks = split m{((?:<a.*?>.*?</a.*?>)|(?:<(?:td).*?>))}is, $file; 1908 my @flags; 1909 my $n = 0; 1910 my ( $depth, %avg, %avgn, %min ); 1911 for my $block (@blocks) { 1912 $flags[$n]{'n'} = $n; 1913 $flags[$n]{'link'} = 1 if $block =~ /^<a/i; 1914 my $blockstrip = $block; 1915 $blockstrip =~ s{</?$_.*?>}{}gsi for qw(p br); 1916 #print "\nbs:[$blockstrip]\n"; 1917 $flags[$n]{'blocklen'} = length $blockstrip; 1918 #print "BLOCK[$block]\n"; 1919 ++$depth, 1920 #print("+[$_]") 1921 for $block =~ m{<\w+.*?>}gs; 1922 --$depth, 1923 #print("-[$_]") 1924 for $block =~ m{</\w+.*?>}gs; 1925 --$depth, 1926 #print("-![$_]") 1927 for $block =~ m{<\w+.*?/>}gs; 1928 #$depth += $block =~ m{<\w+>}g; 1929 #print "DEP:". join ':', ($block =~ m{</?\w+>}g); 1930 # print "DEP:" . ( $flags[$n]{'depth'} = $depth ); 1931 # my $text = HTML::Entities::decode($block); 1932 my $text = HTML::Entities::decode_entities($block); 1933 $text =~ tr/\xAB\xBB\xA0/<> /; 1934 # $text =~ s/ / /g; 1935 # $text =~ s/&\w+;/ /g; 1936 #unless ($flags[$n]{'link'}) 1937 { 1938 $text =~ s{<.*?>}{}gs; 1939 $text =~ s/\W/ /g; 1940 $text =~ s/\d/ /g; 1941 $text =~ s/\s+/ /g; 1942 $text =~ s/\s+$//g; 1943 $text =~ s/^\s+//g; 1944 $flags[$n]{'textlen'} = length $text; 1945 } # else {$text= '' ;} 1946 $flags[$n]{'bt'} = $flags[$n]{'blocklen'} / $flags[$n]{'textlen'} if $flags[$n]{'textlen'}; 1947 $avg{bt} += $flags[$n]{'bt'}; 1948 ++$avgn{bt} if $flags[$n]{'bt'}; 1949 $min{bt} = $n if $flags[$n]{'bt'} and $flags[$n]{'bt'} < $flags[ $min{bt} ]{'bt'}; 1950 local $Data::Dumper::Indent = 0; 1951 # print Dumper( $flags[$n] ) . " [$text]\n"; #$block 1952 $flags[$n]{text} = $text; 1953 $flags[$n]{block} = $block; 1954 } continue { 1955 ++$n; 1956 } 1957 $avg{$_} /= $avgn{$_} for keys %avg; 1958 #print join ":", @blocks; 1959 # print 'avg:', join ':', map { "$_=$avg{$_}" } keys %avg; 1960 # print "\nminbt:", Dumper( $flags[ $min{bt} ] ); 1961 # print "\n\nSelecting\n\n"; 1962 my @a = grep { $_->{textlen} } @flags; 1963 my $step = 1 / ( @a || 1 ); 1964 my $n = 0; 1965 $_->{weight_textlen} = $n += $step for sort { 1966 # $b->{bt} <=> $a->{bt} 1967 $a->{textlen} <=> $b->{textlen} 1968 } @a; 1969 # $step = 1 / @flags; 1970 $n = 0; 1971 # @a = grep { $_->{textlen} } @flags; 1972 $_->{weight_bt} = $n += $step for sort { $b->{bt} <=> $a->{bt} } @a; 1973 $_->{weight} = $_->{weight_bt} + $_->{weight_textlen} for @a; 1974 my $max_weight; 1975 #print( Dumper($_) . "\n" ), 1976 $max_weight = $_->{weight} for sort { 1977 # $b->{bt} <=> $a->{bt} 1978 # $a->{textlen} <=> $b->{textlen} 1979 $a->{weight} <=> $b->{weight} 1980 } 1981 # grep { $_->{bt} and $_->{bt} < $avg{bt}} 1982 grep { !$_->{link} } @a; 1983 for my $n ( grep { $_->{weight} > $max_weight - $config{'select_weight'} } @flags ) { 1984 # $n->{selected} = 1; 1985 $flags[ $n->{n} + $_ ]{selected} = 1 for ( -$config{'select_around'} .. $config{'select_around'} ); 1986 } 1987 #print "\n\nResult:\n\n"; 1988 my $ret; 1989 for my $n ( grep { $_->{selected} } @flags ) { 1990 #print "$n->{text} "; 1991 $ret .= $n->{ $config{ret_field} } . $config{ret_sep}; 1992 } 1993 return $ret; 1994} 1995 1996sub updateuser { 1997 my ( $dc, $user, $params ) = @_; 1998 my $peerlist = $dc->{'adc'} ? $dc->{'peers'} : $dc->{'NickList'}; 1999#dmp $user, $params; 2000#printlog( 'dbg', 'updateuser: skipping user', $user, 'with share =', $peerlist->{$user}{INF}{SS}, $peerlist->{$user}{'sharesize'} , caller, Dumper $params ), 2001 return if ( !$peerlist->{$user}{'sharesize'} and !$peerlist->{$user}{INF}{SS} ) and length $user; 2002 if ($user) { 2003 $params->{'ip'} ||= $peerlist->{$user}{INF}{I4} || $peerlist->{$user}{INF}{I6} || $peerlist->{$user}{'ip'}; 2004 $params->{'desc'} ||= $peerlist->{$user}{INF}{DE} 2005 || $peerlist->{$user}{'description'}; #if $peerlist->{$user}{'description'}; 2006 $params->{'cid'} ||= $peerlist->{$user}{INF}{ID}; 2007 } 2008 #printlog('dev', "userdesc1=[$params->{'desc'}] ($user)") if $params->{'desc'}; 2009 #$params->{'desc'} ||= $params->{'DE'}; 2010 $params->{'desc'} =~ s/^(?:\s*(?:\{\S*?\})?(?:\[\S*?\])?)+//g; 2011 #printlog('dev', "userdesc2=[$params->{'desc'}]") if $params->{'desc'}; 2012 #$params->{'desc'} =~ s/^\s*(?:\[\S+?\])\s*PeerWeb\s*DC\+\+\s*//ig; 2013 $params->{'desc'} =~ s/^\s*(PeerWeb|Apex)\s*DC\+\+\s*//ig; 2014 #printlog('dev', "userdesc3=[$params->{'desc'}]") if $params->{'desc'}; 2015 $params->{'prot'} ||= $dc->{'adc'} ? 'adc' : 'dchub'; 2016 $params->{'dcuser'} ||= $params->{NI} || $peerlist->{$user}{INF}{NI} || $user; 2017 #delete $params->{'port'} if $params->{'prot'} eq 'dchub' and $params->{'port'} == 411; 2018 $params->{'host'} = full_host( { 'prot' => $params->{'prot'}, %$dc } ); #, %$params 2019 #printlog('dev',' host= ',$params->{'host'},'from',$dc->{'host'} ,$dc->{'port'},'and',%$params); 2020 $params->{'time'} ||= ( $params->{'off'} ? int( time() ) - $config{'online_minutes'} * 120 : int( time() ) ) 2021 unless $params->{'no_time'}; 2022 #printlog('dev','timeup',$params->{'dcuser'}, $params->{'time'}, int(time())); 2023 #!$params->{'host'} = cp_trans( $config{ 'cp_' . $params->{'prot'} }, $config{'cp_db'}, $params->{'host'} ); 2024 #!$params->{'dcuser'} = cp_trans( $config{ 'cp_' . $params->{'prot'} }, $config{'cp_db'}, $params->{'dcuser'} ); 2025 #!$params->{'desc'} = cp_trans( $config{ 'cp_' . $params->{'prot'} }, $config{'cp_db'}, $params->{'desc'} ); 2026 $static{'db'}->update( $config{'sql_tresource'}, undef, { 'path' => '', %$params } ) unless $params->{'no_res'}; #, 1 2027 unless ( $params->{'no_file'} ) { 2028 $static{'db'}->update( $config{'sql_tfile'}, undef, { 'path' => $_, %$params } ) 2029 for ( '', '/' ); 2030 } 2031 $params->{'port'} ||= $peerlist->{$user}{INF}{U4} || $peerlist->{$user}{INF}{U6} || $peerlist->{$user}{'port'} 2032 if $user and $config{'allow_ping_user_port'}; 2033 delete $params->{'port'} if $params->{'port'} > 65535; 2034 #printlog('dev', 'update', %$params); 2035 $static{'db'}->update( $config{'sql_thost'}, undef, $params ); #, 1 'ip' => $ip, 2036} 2037$processor{'prot'}{'dchub'}{'func'} ||= sub { 2038 my ($filelists) = $stat{'filelists'}; 2039 my $dc; 2040 foreach my $url (@_) { 2041 my %url = split_url($url); 2042 #printlog('split:', Dumper(\%url)); 2043 $url{'dcuser'} = $1 if $url{'host'} =~ s|/(.+)||; 2044 my $hub = join_url( { 'host' => $url{'host'}, 'port' => $url{'port'} } ); 2045 #next if $config{'dchub'}{$hub}{'disabled'}; 2046 next if hconfig( 'disabled', $url{'prot'}, $hub ); 2047 printlog( 2048 'dc', 2049 "Creating bot ver", 2050 ( $config{'version'} or 'r' . ( split( ' ', '$Revision: 4843 $' ) )[1] ), 2051 $Net::DirectConnect::VERSION, "to $url", 2052 ); 2053 local %_ = ( %{ $config{'fine'}{ $url{'prot'} } }, %{ $config{'fine'}{$hub} } ); 2054 $_{modules}{filelist} = 1 if $_{share}; 2055 $dc = Net::DirectConnect->new( 2056 #'host' => $url, 2057 'host' => join_url( { map { $_ => $url{$_} } qw(prot host) } ), 2058 'port' => $url{'port'}, 2059 'log' => sub { 2060 my $dc = shift; 2061 #printlog('ERR', $dc, @_), return unless ref $dc; 2062 printlog( shift, "[$dc->{'number'}]", @_, ); 2063 }, 2064 'no_charset_console' => 1, 2065 #%{ $config{'fine'}{'dchub'} }, 2066 'no_auto_share_downloaded' => 1, 2067 'sql' => $config{'sql'}, 2068 'db' => $static{'db'}, 2069 %_, 2070 'auto_connect' => 0, 2071 'auto_GetINFO' => 1, 2072 ); 2073 #printlog ('dev', 'SH1', Dumper $config{'fine'}{'dchub'}); 2074 $dc->{'sharesize'} = psmisc::human( 'number_k', $dc->{'sharesize'} ); 2075 #printlog ('dev', 'SH2', Dumper $dc); 2076 my $nicklistres = 1 if $url{'dcuser'}; 2077 $dc->{'handler'}{'NickList'} = sub { 2078 my $dcu = shift; 2079 updateuser( $dc, $_ ) for keys %{ $dc->{'NickList'} }; 2080 printlog( 'dc', "Nicklist recieved = ", scalar keys %{ $dc->{'NickList'} } ); 2081 $nicklistres = 1; 2082 }; 2083 $dc->{'handler'}{'MyINFO'} = sub { 2084 my $dcu = shift; 2085 ($_) = $_[0] =~ /\S+\s+(\S+)\s+(.*)/; 2086 updateuser( 2087 $dc, $_, ( 2088 hconfig( 'dcbot_size_myinfo', $hub ) 2089 && $dc->{'NickList'}->{$_}{'sharesize'} > 0 ? { ( 'size' => $dc->{'NickList'}->{$_}{'sharesize'} ) } : () 2090 ) 2091 ); 2092 }; 2093 $dc->{'handler'}{'chatline'} = sub { 2094 my $dcu = shift; 2095 ($_) = $_[0] =~ /^<(\S+)>/; 2096 updateuser( $dc, $_ ) if $dc->{'NickList'}->{$_} or $config{'dcbot_update_unknown'}; #Hub-Security 2097 }; 2098 $dc->{'handler'}{'welcome'} = sub { 2099 my $dcu = shift; 2100 printlog( 2101 'dc', 'welcome:', $_[0] #scalar cp_trans( $config{ 'cp_' . $url{'prot'} }, $config{'cp_log'}, $_[0] ) 2102 ); 2103 }; 2104 $dc->{'handler'}{'quit'} = sub { 2105 my $dcu = shift; 2106 updateuser( $dc, $_, { 'off' => 1 } ); 2107 }; 2108 $dc->{'handler'}{'BadPass'} = sub { 2109 my $dcu = shift; 2110 printlog( 'err', 'BadPassword' ); 2111 }; 2112 $dc->{'handler'}{'LogedIn'} = sub { 2113 my $dcu = shift; 2114 printlog( 'dbg', $_[0], 'is LogedIn' ); 2115 }; 2116 $dc->{'handler'}{'user_ip'} = sub { 2117 my $dcu = shift; 2118 #printlog('dev', 'hUserip', @_); 2119 updateuser( 2120 $dc, 2121 $_[0], { 2122 'ip' => $_[1], 2123 #( $_[2] != 411 ? ( 'port' => $_[2] ) : () ), 2124 'port' => $_[2], 2125 'no_res' => 1, 2126 'no_file' => 1, 2127 'no_time' => 1 2128 } 2129 ) if $_[1] ne name_to_ip( split_url( $url{'host'} )->{'host'} ); 2130 }; 2131 $dc->{'handler'}{'INF'} = sub { 2132 my $dcu = shift; 2133 #dmp @_; 2134 #($_) = $_[0] =~ /\S+\s+(\S+)\s+(.*)/; 2135 return if $_[0][-1] eq 'I'; 2136 updateuser( $dc, $_[-1]{ID} || $_[0][-1], $_[-1] ); 2137 # $dc, $_, ( 2138 # hconfig( 'dcbot_size_myinfo', $hub ) 2139 # && $dc->{'NickList'}->{$_}{'sharesize'} > 0 ? { ( 'size' => $dc->{'NickList'}->{$_}{'sharesize'} ) } : () 2140 # ) 2141 #); 2142 }; 2143 $dc->{'handler'}{'QUI'} = sub { 2144 my $dcu = shift; 2145 dmp \@_; 2146 updateuser( $dc, $_[1], { 'off' => 1 } ); 2147 }; 2148 $dc->{'auto_recv'} = 0 if $url{'dcuser'}; 2149 printlog( 'dc', "Connecting to", $url ); 2150 $dc->connect(); 2151 #printlog( 'dev', __LINE__); 2152 $dc->wait(); #for 1 .. 5; 2153 #printlog( 'dev', __LINE__, 'waited'); 2154 next unless $dc->{'socket'}; 2155 $dc->wait_connect(20); #for 1 .. 5; 2156 #printlog( 'dev', __LINE__, 'waited',20); 2157 #1 while !$dc->recv(1); 2158 printlog( 'warn', "connect timeout", $dc->{'status'} ), next unless $dc->{'status'} eq 'connected'; 2159 printlog( 'dbg', "connected", $dc->{'status'} ); 2160 if ( !$dc->{'adc'} and !$url{'dcuser'} and !$nicklistres and !$dc->{'auto_GetNickList'} ) { 2161 printlog( 'dbg', "Get nicks" ); 2162 $dc->cmd('GetNickList'); 2163 next unless $dc->{'socket'}; 2164 my $trynickres = hconfig( 'nicks_receive', $hub ) || 50; 2165 #$dc->recv(1), sleep(0.1) while --$trynickres > 0 and !$nicklistres; 2166 $dc->wait(5) while --$trynickres > 0 and !$nicklistres; 2167 } 2168 #printlog( 'dbg', "nicks:", $nicklistres, ); 2169 printlog( 'dbg', "Starting [$dc->{'host'} : $dc->{'port'}]" ); 2170 my $saveto = 2171 $config{'datadir'} 2172 . $config{'slash_sys'} 2173 . $dc->{'host'} 2174 . ( $dc->{'port'} != $config{'scanner'}{ $url{'prot'} }{'port'} ? '.' . $dc->{'port'} : '' ) . '.' 2175 . $url{'prot'}; #'dchub'; 2176 $dc->wait(5); 2177 next unless $dc->{'socket'}; 2178 my $peerlist = $dc->{'adc'} ? $dc->{'peers_cid'} : $dc->{'NickList'}; 2179 #my $peerlist = $dc->{'adc'} ? $dc->{'peers'} : $dc->{'NickList'}; 2180 printlog( 'dev', "work", hconfig( 'dcbot_wait_start', $hub ) ); 2181 $dc->wait( hconfig( 'dcbot_wait_start', $hub ) || 5 ); 2182 #psmisc::file_rewrite( 'dump', Dumper $dc); 2183 printlog( 'dbg', "Starting users", ( $url{'dcuser'} or scalar keys %$peerlist ) ); 2184 my $hubhost = $url{'host'}; 2185 my $hubport = $url{'port'}; 2186 my $n; 2187 my @todo = ( $url{'dcuser'} || ( sort keys %$peerlist ) ); 2188 printlog( 2189 'info', "todo users:", scalar @todo, #Dumper \@todo#, $dc 2190 ); 2191 2192 for my $user (@todo) { 2193 #printlog( 'dev', "looking at [$user]"); 2194 printlog( 'err', 'skipping bad user', "[$user]" ), next if !length $user or $user !~ /\S/; 2195 last if scan_stop; 2196 ++$n; 2197 my $get = 1; 2198 $url{'dcuser'} = $peerlist->{$user}{INF}{NI} || $user; 2199 $url{'host'} = full_host( { %url, 'host' => $hubhost, 'port' => $hubport } ); 2200 #printlog('split:', Dumper(\%url)); 2201 my %url_db = %url; 2202 #cp_trans_hash( $config{ 'cp_' . $url{'prot'} }, $config{'cp_db'}, \%url_db ); 2203 #my $file = ; 2204 my $file = encode_url( 2205 #scalar cp_trans( $config{ 'cp_' . $url{'prot'} }, $config{'cp_shell'}, 2206 Encode::encode( 2207 'utf-8', #hconfig( 'cp_shell' ), 2208 $dc->{'adc'} ? $peerlist->{$user}{INF}{NI} : $user, Encode::FB_DEFAULT 2209 ), 2210 $config{'encode_dc_user_mask'} 2211 ); 2212 my $saveas_noext = $saveto . $config{'slash_sys'} . $file; 2213 my $saveas = $saveas_noext . '.xml.bz2'; 2214#my $saveasshell = Encode::encode hconfig( 'cp_shell' ), $saveas; 2215#$saveas_noext = Encode::encode hconfig( 'cp_shell' ), $saveas_noext; 2216#printlog( 'dev', "1user=$url{'dcuser'}; host=$url{'host'}; get=$get; config{'no_dcbot_download'}=$config{'no_dcbot_download'}" ); 2217 my $sharesize = $peerlist->{$user}{INF}{SS} || $peerlist->{$user}{'sharesize'}; 2218 if ( !hconfig( 'no_dcbot_download', $hub, $user ) ) { 2219 printlog( 2220 'dbg', 'looking at', $url{'host'}, '/', $url{'dcuser'}, ' filelist:', -s $saveas, int -M $saveas, $n, '/', 2221 scalar keys %$peerlist, 'share:', $sharesize, 2222 #Dumper $peerlist->{$user} 2223 ); 2224 #printlog( 'dbg', "skip [$user] (filelist exists)" ), $get = 0 2225 #if 2226 #( !hconfig( 'forceforce', $hub ) and -s $saveto . $config{'slash_sys'} . $file . '.xml.bz2' ); 2227 printlog( 'dbg', "skip [$user] (self)" ), $get = 0 2228 if $user eq $dc->{'Nick'} 2229 or $dc->{'INF'}{'SID'} eq $user 2230 or $dc->{'INF'}{'CID'} eq $user; 2231 printlog( 'dbg', "skip [$user] (scanned)" ), $get = 0 2232 if ( 2233 !hconfig( 'forcedc', $hub, $user ) 2234 and (( -s $saveas and ( ( time - $^T + 86400 * -M $saveas ) < $config{'dcbot_client_period'} ) ) 2235 or ( -s $saveas . '.ok' and ( ( time - $^T + 86400 * -M $saveas . '.ok' ) < $config{'dcbot_client_period'} ) ) ) 2236 ) or !allowscan( \%url_db, $config{'sql_tresource'}, undef, $config{'dcbot_client_period'} ); 2237 my $already = dcscanned($user); 2238 #dmp 'already', $already; 2239 if ( 2240 ( $already->{tiger} or $already->{cid} ) and ( 2241 time - $already->{scan} < $config{'dcbot_client_period'} 2242 #or $scan < $already->{scan} 2243 ) 2244 ) 2245 { 2246 printlog( 'dbg', "skip [$user] (cid: already scanned)", $config{'dcbot_client_period'}, time - $already->{scan} ), 2247 $get = 0; 2248 } 2249 } 2250#$dc->cmd( 'GetINFO', $user ), $dc->work()if !$dc->{'adc'}and !hconfig( 'dcbot_no_getinfo', $hub, $user )and !$peerlist->{$user}{'info'}and grep { $_ ne 'INF' } keys %{ $peerlist->{$user} }; #, $dc->recv() 2251 if ( !hconfig( 'no_dcbot_download', $hub, $user ) ) { 2252 printlog( 'dbg', "skip [$user] (share $sharesize bytes)" ), $get = 0 2253 if !hconfig( 'forcedc', $hub, $user ) #!$config{'force'} 2254 and ( defined($sharesize) or hconfig( 'no_dcbot_download_unknown', $hub, $user ) ) and !$sharesize; 2255 printlog( 'dbg', "skip [$user] unable get from passive user" ), $get = 0 2256 if $dc->{'M'} eq 'P' and ( ( $dc->{adc} and !$peerlist->{$user}{INF}{I4} ) or $peerlist->{$user}{'M'} eq 'P' ); 2257 printlog( 'dbg', "skip [$user] no free user slots ($peerlist->{$user}{'S'})" ), $get = 0 if #!$config{'force'} 2258 !$dc->{'adc'} and !hconfig( 'forcedc', $hub, $user ) and $peerlist->{$user}{'S'} eq '0'; 2259 } 2260#printlog( 'dev', "2user=$url{'dcuser'}; host=$url{'host'}; get=$get; config{'no_dcbot_download'}=$config{'no_dcbot_download'}" ); 2261 if ( !hconfig( 'no_dcbot_download', $hub, $user ) and $get ) { 2262 printlog( 'dc', "Skip unknown user [$user]" ), next 2263 if !keys %{ $peerlist->{$user}{'INF'} } and !grep { $_ ne 'INF' } keys %{ $peerlist->{$user} }; 2264 printlog( 'dc', "Get filelist from [$user]" ); 2265 printlog( 'dbg', "get: [$saveas_noext] user=$user to[$saveto]" ); 2266 mkdir_rec( $saveto, 0777 ); 2267 $dc->get( $user, '', $saveas_noext ); #.get 2268 ++$stat{'filelists'}; 2269 #$dc->wait(1, hconfig( 'dcbot_pause', $hub )) and 2270 #$dc->wait(10, 0.1) for 0..5; 2271 #printlog( 'dev', "sleep", hconfig( 'dcbot_pause', $hub ) ); 2272 $dc->wait( hconfig( 'dcbot_pause', $hub ) ); 2273 #sleep hconfig( 'dcbot_pause', $hub ); 2274 #sleep $config{'dcbot_pause'}; 2275 } elsif ( $config{'force_ip_collect'} 2276 and $user ne $dc->{'Nick'} 2277 and $dc->{'INF'}{'SID'} ne $user 2278 and $dc->{'INF'}{'CID'} ne $user 2279 and $peerlist->{$user}{'M'} ne 'P' 2280 and !( $peerlist->{$user}{'INF'}{'I4'} or $peerlist->{$user}{'INF'}{'I6'} ) 2281 and ( ( $dc->{'M'} ne 'P' ) or $dc->{'allow_passive_ConnectToMe'} ) ) 2282 { #to do to dcpp lib 2283 printlog( 'dc', 'trying connect to us', $user ); 2284 $dc->cmd( 'Rev' . 'ConnectToMe', $user ) if !$dc->{'adc'}; #TODO for adc 2285 } 2286 #$dc->info(); 2287 #printlog( 'dev', "cycle end" ); 2288 printlog( 'err', 'hub inactive!' ), $dc->info(), last unless $dc->active(); #$dc->{'socket'}; 2289 } 2290 my $share; 2291 $share += $peerlist->{$_}{INF}{SS} || $peerlist->{$_}{'sharesize'} for keys %$peerlist; 2292 $share = psmisc::human( 'size', $share ); 2293 updateuser( 2294 $dc, '', { 2295 %$dc, 2296 'desc' => ( scalar keys %$peerlist or 0 ) . ' users ' . $share . ' ' . $dc->{'HubName'}, 2297 'scan' => int( time() ) 2298 } 2299 ); 2300 printlog( 'dbg', 'ended, aftersleep' ); 2301 #$dc->wait(100,1) for ( 0 .. 60 ); 2302 $dc->wait(10) if $url{'dcuser'}; 2303 $dc->wait_finish(); 2304 printlog( 'dc', 'Finished' ); 2305 $dc->destroy(); 2306 psmisc::file_rewrite( 'dcexit.dump', Dumper $dc ); 2307 #todo: hubstat 2308 #printlog('info', $dc->{'IpList'}); 2309 #printlog('info', $dc->{'NickList'}); 2310 #for( keys %{ $dc->{'IpList'} } ) { 2311##printlog('dev', "ip=$_, nick=",$dc->{'IpList'}->{$_}{'Nick'}, 'port=',$dc->{'IpList'}->{$_}->{'port'}); 2312 #printlog('dbg','updating info about user ip:', $dc->{'IpList'}->{$_}{'Nick'}, $_), 2313 #updateuser($dc, $dc->{'IpList'}->{$_}->{'Nick'}, {'ip' => $_, 2314 #'port' => $dc->{'IpList'}->{$_}->{'port'}, 2315 #'no_res' => 1, 'no_file' => 1, 'no_time' => 1}); 2316 #} 2317 $processor{'file'}{ $url{'prot'} }{'func'}->($saveto) unless hconfig( 'no_dcbot_upload', $hub ); 2318 } continue { 2319 #$dc = undef; 2320 $dc->destroy() if $dc; 2321 #printlog('dev', Dumper($dc)); 2322 #delete $dc->{$_}for keys %$dc; 2323 $dc = undef; 2324 #undef $dc; 2325 printlog( 'dev', "dcexit (fail)" ); 2326 } 2327 $work{'proc_scans'} -= $stat{'filelists'} - $filelists; 2328}; 2329$processor{'prot'}{'dchub'}{'get'} ||= sub { 2330 #TODO 2331}; 2332$processor{'prot'}{'adc'}{$_} ||= $processor{'prot'}{'dchub'}{$_}, 2333 $processor{'prot'}{'adcs'}{$_} ||= $processor{'prot'}{'dchub'}{$_}, 2334 for keys %{ $processor{'prot'}{'dchub'} }; 2335 2336sub nmap_host_name () { 2337 my $hostip = $+{ip}; 2338 my $hostname = $+{name}; 2339 $hostip ||= $hostname; 2340 $hostname ||= $hostip; 2341 my $host = ( ( !$config{'nmap_nores'} and $hostname ) ? $hostname : $hostip ); 2342 $host =~ s/\(|\)//g; 2343 return wantarray ? ( $host, $hostip, $hostname ) : $host; 2344} 2345$processor{'format'}{'nmap'} ||= sub { 2346 my $nping = shift @_; 2347 my ( $host, $hostip, $hostname ); 2348 my @resbuffer; 2349 my %nstat; 2350 $nping = $nping =~ /-P[0n]/; 2351 for my $content (@_) { 2352 my ( $hostup, $resolvs ); 2353 my $oneline = sub { 2354 chomp; 2355 s/[\x0d\x0a]+//g; 2356 #printlog( 'nmap', $_ ); 2357 if ( 2358 /Host\s+(?<name>\S*)\s+(:?\((?<ip>\S+)\)\s*)?appears to be up/io 2359 or /Host:\s*(?<ip>\S+)\s*(?<name>\S*)\s*(Status: Up|Ports:)/io 2360 or /Interesting ports on (?<name>\S*)\s*\((?<ip>\S+)\)/io 2361 or /Interesting ports on (?<ip>\S+):/io 2362 or /Discovered open port (?<port>\S+)\/tcp on (?<ip>\S+)/io 2363 or /Nmap scan report for (?<name>\S+)\s*(:?\((?<ip>\S+)\)\s*)?\s*(?<down>\[host down\])?/io 2364 #Nmap scan report for 24.117.235.52 [host down] 2365 ) 2366 { 2367 ( $host, $hostip, $hostname ) = nmap_host_name(); 2368 #$hostip = $+{ip}; 2369 #$hostname = $+{name}; 2370 #$hostip ||= $hostname; 2371 #$hostname ||= $hostip; 2372 #$host = ( ( !$config{'nmap_nores'} and $hostname ) ? $hostname : $hostip ); 2373 #$host =~ s/\(|\)//g; 2374 return if !$host or $host =~ /^\d$/; 2375 $hostup = 1 if !$nping; 2376 $hostup = 0 if $+{down}; 2377 #printlog ('up', $host, $hostup, Dumper \%+); 2378 #$resolvs = 0; 2379 #$nstat{$hostip} 2380 } elsif (/^\s*Host is up(?:\.| \((?<latency>\S+)s latency\))/) { 2381 $hostup = 1 unless $nping; 2382 $nstat{'host'}{$host}{$_} = $+{$_} for grep { length $+{$_} } qw(latency); 2383 } elsif ( 2384 /^\s*Nmap (?:done|finished): (?<total>\d+) IP addresses \((?<up>\d+) hosts up\) scanned in (?<seconds>\S+) seconds/) 2385 { 2386 # 2387 #Completed Connect Scan at 20:21, 7.66s elapsed (1024 total ports) 2388 #Scanning 256 hosts [4 ports/host] 2389 #$work{'range_size'} = $+{total}; 2390 $nstat{'up'} = $+{up}; 2391 $nstat{'per'} = $+{seconds}; 2392 $nstat{'size'} = $+{total}; 2393 } elsif (/^\s*Raw packets sent: (?<sp>\d+) \((?<sb>\S+)\) \| Rcvd: (?<rp>\d+) \((?<rb>\S+)\)/) { 2394 $nstat{$_} = $+{$_} for keys %+; 2395 #count traffic bytes? 2396 } elsif ( 2397/^\s*All (?<ports>\d+) scanned ports on (?<name>\S+)(?: \((?<ip>\S+)\))? are (?:closed|filtered)(?: \((?<closed>\S+)\) or filtered \((?<filtered>\S+)\))?/ 2398 ) 2399 { 2400 ( $host, $hostip, $hostname ) = nmap_host_name(); 2401 $hostup = 0 if $nping; 2402 #printlog ('down', $host, $hostup, Dumper \%+); 2403 $nstat{'host'}{$host}{$_} = $+{$_} for grep { length $+{$_} } qw(closed filtered); # ports 2404 } elsif (/Not shown: (?<closed>\d+) closed ports/) { 2405 $nstat{'host'}{$host}{$_} = $+{$_} for grep { length $+{$_} } qw(closed); 2406 } 2407 if ( m{(?<port>\S+)/tcp\s+(?<state>open|on|unfiltered)}io or m{Discovered open port (?<port>\S+)/tcp on (?<host>\S+)} ) { 2408 my ( $port, $prot ) = ( $1, $static{'port2prot'}{$1} ); 2409 return unless $host; 2410 return if $config{'scanner'}{$prot}{'disabled'}; 2411 $host = normalize_ip($host) if $config{'host_by_name_norm'}; # and !$resolvs++; 2412 printlog( 'warn', "invalid host" ), next unless $host; 2413 my $res = $prot . '://' . $host; 2414 return if $nstat{'scanned'}{$res}++; 2415 push( @resbuffer, $res ) unless $config{'nmap_only'}; 2416 2417 unless ( $static{'banned'}->{$res} or $static{'banned'}->{$host} ) { 2418 #warn "tryup"; 2419 #psmisc::dmp('tryup', $prot, $_->database), 2420 $_->update( $config{'sql_tresource'}, undef, { 'prot' => $prot, 'host' => $host } ) 2421 for grep { $_ } $static{'db'}, $static{'db_shard'}{$prot}; 2422 } 2423 $processor{'urls'}->(@resbuffer), @resbuffer = () if $#resbuffer >= $config{'start_by'} - 1; 2424 $hostup = 1 if $nping; 2425 ++$ipstt{'resalive'}{$host}; #remove 2426 ++$nstat{'resalive'}{$host}; 2427 $nstat{'prot'}{$prot}{$host} = 1; 2428 $nstat{'host'}{$host}{$prot} = 1; 2429 } 2430 if ($hostup) { 2431 $host = normalize_ip($host) if $config{'host_by_name_norm'}; 2432 #$static{'db'}->line("SELECT * FROM $tq$config{'sql_thost'}$tq WHERE host!=$vq$host$vq AND ip=$vq$ip$vq")->{'host'} 2433 $static{'db'}->update( 2434 $config{'sql_thost'}, 2435 undef, { 2436 'host' => $host, 2437 'ip' => $hostip, 2438 'time' => int( time() ), 2439 'meta' => join( ' ', map { "$_=$nstat{'host'}{$host}{$_}" } keys %{ $nstat{'host'}{$host} } ) 2440 } 2441 ); 2442 ++$ipstt{'alive'}{$host}; 2443 ++$ipstt{'ralive'}{$host}; # not optimal! 2444 #++$nstat{'up'}{$host}; 2445 $hostup = 0; 2446 } 2447 }; 2448 oneliner( $oneline, $content ); 2449 } 2450 for my $p ( keys %{ $nstat{'prot'} || {} } ) { $nstat{$p} = scalar keys %{ $nstat{'prot'}{$p} || {} }; } 2451 dmp( \%nstat ); 2452 $processor{'urls'}->(@resbuffer) if @resbuffer; 2453 return \%nstat; 2454}; 2455 2456sub speed ($$) { 2457 my ( $t, $c ) = @_; 2458 my $ps; 2459 $ps = $c / $t if $c and $t > 0; 2460 $ps = ' at ' . psmisc::human( 'float', $ps ) . 'ps' if $ps; 2461 return 'per ' . psmisc::human( 'time_period', $t ) . $ps; 2462} 2463$processor{'url'} ||= sub { 2464 my @rng = @_; 2465 #printlog('dev', '$processor{url}', @_); 2466 %ipstt = (); 2467 @rng = reverse(@rng) if $config{'ranges_reverse'}; 2468 shuffle( \@rng ) if $config{'ranges_shuffle'}; 2469 my $ranges = 0; 2470 for my $range (@rng) { 2471 ++$ranges; 2472 $range =~ s/^\s+|\s+$//g; 2473 next if !$range or $range =~ /^[;#]/; 2474 my ($desc) = $1 if $range =~ s/\s+(.+)//; 2475 $range = 'local://' . $range 2476 if ( $range =~ /^\w:/ and $config{'system'} =~ /win/ ) 2477 or $range =~ m{^([/\\])(!:\1)} and -d $range; 2478 $range = '\\\\' . $range if $range =~ /^(?:\.|\w\:[^\/\\]|\\[^\\])/; # |(\/[^\/])!!! |((\\)|(\/)$) 2479 $range =~ tr/\\/\// if $range =~ s/^\s*(?:\\\\|\/\/)/file:\/\//; 2480 my $tim = timer(); 2481 #todo $range = 'range://' unless ( $range =~ /\:\/\// ); 2482 #printlog('dev', '$processor{url}2', $range); 2483 if ( $range =~ /\:\/\// ) { $processor{'scan_proto_host'}->($range); next; } 2484 next 2485 #unless allowscan( { 'range' => $range, 'desc' => $desc }, $config{'sql_tranges'}, undef, $config{'range_period'} ); 2486 #unless allowscan( { 'range' => $range, 'desc' => $desc }, $config{'sql_tranges'}, undef, hconfig('range_period', $range) ); 2487 unless allowscan( { 'range' => $range, 'desc' => $desc }, $config{'sql_tranges'}, undef, 2488 hconfig( 'period', $range, 'range' ) ); 2489 next if $config{'noscan'}; 2490 ( $ipstt{'ralive'}, $ipstt{'resalive'} ) = (); 2491 printlog( 'nmap', "scanning $range [", $ranges, '/', $#rng + 1, ']' ); 2492 state( "scan:", $range ); 2493 my $nmap_myip; 2494 $nmap_myip = '-S ' . $config{'myip'} 2495 if $config{'myip'} 2496 and $config{'get_myip_mask'} 2497 and $range =~ /$config{'get_myip_mask'}/; 2498 my $port = join ',', 2499 map { $config{'scanner'}{$_}{'port'} } 2500 grep { $config{'scanner'}{$_}{'port'} and !$config{'scanner'}{$_}{'disabled'} and !$config{'scanner'}{$_}{'no_nmap'} } 2501 keys %{ $config{'scanner'} }; 2502 $port = $config{'nmap_param_port'} . ' ' . $port 2503 if $port and $config{'nmap_param_port'} and !$config{'pingonly'} and !$param->{'pingonly'}; # del $param 2504 my $scantype = $port ? $config{'nmap_scan'} : $config{'nmap_ping'}; 2505 shelldata($range); 2506 state( "scan range: $range", ); 2507 on_interrupt( 2508 sub { 2509 $static{'db'}->update( 2510 $config{'sql_tranges'}, 2511 undef, { 2512 'range' => $range, 2513 #'scan' => int( time() + hconfig('on_interrupt', $range) - hconfig('range_period', $range)), 2514 'scan' => int( time() + hconfig( 'on_interrupt', $range ) - hconfig( 'period', $range, 'range' ) ), 2515 } 2516 ); 2517 } 2518 ) if hconfig( 'on_interrupt', $range ); 2519 my $h = openproc( 2520"$config{'starter'} $config{'nmap'} $scantype $port $config{'nmap_param'} $config{'nmap_nores'} $nmap_myip $range $config{'stderr_redirect'} |" 2521 ) or next; 2522 my $nstat = $processor{'format'}{'nmap'}->( $config{'nmap_param'}, $h ) || {}; 2523 on_interrupt(); 2524 close($h); 2525 my $ips = ( 2**( 32 - $1 ) ) if !$nstat->{'size'} and $range =~ m|^\s*\d+\.\d+\.\d+\.\d+/(\d+)\s*$|; 2526 $ips = $nstat->{'size'} if $nstat->{'size'}; 2527 #$nstat->{'range_size'} = 0; 2528 %_ = ( 2529 'range' => $range, 2530 'scan' => int( time() ), 2531 'alive' => scalar( keys %{ $ipstt{'ralive'} } ), 2532 'size' => ( $ips or 1 ), 2533 'meta' => join( ' ', map { "$_=$nstat->{$_}" } grep { !ref $nstat->{$_} } keys %$nstat ) 2534 ); 2535 $_{'time'} = int( time() ) if scalar( keys %{ $ipstt{'ralive'} } ); 2536 $static{'db'}->update( $config{'sql_tranges'}, undef, \%_ ); 2537 my $t = $tim->(); 2538 #my $ipps = $ips / $t if $t > 0; 2539 #( $ipps = sprintf( "%.3f", $ipps ) ) .= ' ipps' if $ipps; 2540 #$ips = " [$ips] " if $ips; 2541 printlog( 2542 'nmap', "finished $range $ips alive:", scalar( keys %{ $ipstt{'ralive'} } ), ', resalive:', 2543 scalar( keys %{ $ipstt{'resalive'} } ), 2544 #' per', psmisc::human( 'time_period', $t ), $ipps 2545 speed( $t, $ips ), $_{meta}, 2546 ); 2547 ++$work{'scans'}; 2548 } 2549}; 2550 2551sub url_local2ext (\%) { 2552 my $url = \%{ $_[0] }; 2553 $url->{prot} = $config{'local_prot'}; 2554 $url->{host} = $config{'local_host'}; 2555 $config{'local_path'}->( $url->{path} ) if $config{'local_path'} eq 'CODE'; 2556 $url; 2557} 2558$processor{'prot'}{'range'}{'func'} ||= sub { printlog( 'dev', '$processor{range}', @_ ); }; 2559$processor{'scan_proto_host'} ||= sub { 2560 for my $url (@_) { 2561 #printlog('dev', '$processor{scan_proto_host}', $url); 2562 $url =~ s/\s+$//; 2563 $url =~ tr|\\|/|; 2564 $url .= '/' unless $url =~ m|/$|; 2565 my $url_orig = $url; 2566 my %url = split_url($url); 2567 #printlog('dev', '$processor{scan_proto_host}', %url); 2568 #printlog('dev', '$processor{scan_proto_host} pre norm', join_url(\%url)); 2569 $url{'host'} = normalize_ip( $url{'host'} ) 2570 if $url{'host'} 2571 and $url{'prot'} ne 'range' 2572 and hconfig( 'host_by_name_norm', $url{'host'}, $url{'prot'} ) 2573 and hconfig( 'host_by_name_norm_force', $url{'host'}, $url{'prot'} ) 2574 #and !( $config{'norm_skip_host'} and $url{'host'} =~ /$config{'norm_skip_host'}/i ) 2575 ; 2576 #printlog('warn', "host [$url{'host'}]", name_to_ip($url{'host'})); 2577 printlog( 'warn', "invalid host [$url]" ), next if !$url{'host'} and $url{'prot'} ne 'local'; 2578#printlog('dev', '$processor{scan_proto_host}3', %url, $config{'scanner'}{ $url{'prot'} }{'disabled'}, $config{'scanner'}{ $url{'prot'} }{'func'}); 2579#printlog('dev', '$processor{scan_proto_host} post norm', join_url(\%url)); 2580 next 2581 if $config{'scanner'}{ $url{'prot'} }{'disabled'} 2582 or ( !$url{'host'} and $url{'prot'} ne 'local' ) 2583 or !$processor{'prot'}{ $url{'prot'} }{'func'}; 2584 $url{'path'} = '' if $url{'path'} eq '/'; 2585 $url = join_url( \%url ); 2586 my $desc; #todo 2587 my $table = $url{'prot'} eq 'range' ? $config{'sql_tranges'} : $config{'sql_tresource'}; 2588 my %res = $url{'prot'} eq 'range' ? ( 'range' => $url, 'desc' => $desc ) : ( %url, 'host' => full_host( \%url ), ); 2589 #printlog('dev', '$processor{scan_proto_host}2', %res); 2590 printlog( 2591 'dbg', 2592 "Skipping $url (scanned", 2593 psmisc::human( 'time_period', int( time() ) - $work{'skiptime'}{$url} ), 2594 ' ago) period=', 2595 psmisc::human( 'time_period', $work{'skipperiod'}{$url} ), 2596 ), 2597 next 2598 if ( 2599 !allowscan( 2600 \%res, $table, undef, ( ( $url{'prot'} eq 'dchub' and $url{'host'} =~ m|/| ) ? $config{'dcbot_client_period'} : undef ) 2601 ) 2602 ); 2603 next if $config{'noscan'}; 2604 #} else { 2605 #if !$config{'noscan'}; 2606 #{ 2607 local $config{'force'} = 0 if $work{'scan_proto_host_reset_force'}; 2608 resetcounters(); 2609 $work{'scan_unfinished'} = 0; 2610 state( "scan:", join_url( \%url ) ); 2611 printlog( 'dbg', "Starting scan", join_url( \%url ) ); 2612 on_interrupt( 2613 sub { 2614 #$static{'db'}->flush(); 2615 #local $config{'log_dmp'} = 1; 2616 #printlog('dev', 'on_interrupt update'), 2617 $static{'db'}->update( $table, undef, 2618 { %url, 'scan' => int( time() + hconfig( 'on_interrupt', values %url ) - hconfig( 'period', values %url ) ), } ) 2619 if hconfig( 'on_interrupt', values %url ); 2620 } 2621 ); 2622 my $tim = timer(); 2623 #printlog('dev', '$processor{scan_proto_host} fu ', $url_orig,join_url(\%url)); 2624 $processor{'prot'}{ $url{'prot'} }{'func'}->( ( $url{'prot'} eq 'local' ) ? $url_orig : join_url( \%url ) ); 2625 on_interrupt(); 2626 ++$work{'scans'}; 2627 $static{'db'}->flush_insert() 2628 #$processor{'out'}{'array'}->() 2629 if hconfig( 'flush_sql_every_res', values %url ); 2630 next if $config{'scanner'}{ $url{'prot'} }{'no_res_stat'}; 2631 #printlog('dev', '$processor{scan_proto_host} fu0 ', $url_orig,join_url(\%url), join_url(\%res)); 2632 my $local; 2633 $local = 1 if $url{'prot'} eq 'local'; 2634 %url = %{ url_local2ext(%url) }, %res = %{ url_local2ext(%res) }, if $url{'prot'} eq 'local'; 2635 #printlog('dev', '$processor{scan_proto_host} fu ', $url_orig,join_url(\%url), join_url(\%res), "loc[$local]"); 2636 #local $config{'log_dmp'} = 1; 2637 #printlog('dev', 'on_finish update', 'files=',$work{'filec'}->( $stat{'files'} ), 'size=', $work{'sizec'}->( $stat{'size'} )); 2638 #dmp \%stat; 2639 local %_; 2640 if ( $work{'filec'}->( $stat{'files'} ) ) { 2641 %_ = ( 2642 %res, %{ $config{'clear_file'} || {} }, 2643 'files' => $work{'filec'}->( $stat{'files'} ), 2644 'dirs' => $work{'dirc'}->( $stat{'dirs'} ), 2645 'size' => $work{'sizec'}->( $stat{'size'} ) 2646 ); 2647 $_{'time'} = int( time() ); 2648 $_{'scan'} = int( time() ) unless $work{'scan_unfinished'}; 2649 $static{'db'}->update( $config{'sql_tresource'}, undef, \%_ ); 2650 } 2651 #%_ = ( %res, 'ip' => ip_to_name( $url{'host'} ), 'scan' => int( time() ) ); 2652 %_ = ( %res, 'ip' => name_to_ip( $url{'host'} ), 'scan' => int( time() ) ); 2653 $_{'time'} = int( time() ) if $work{'filec'}->( $stat{'files'} ); 2654 $static{'db'}->update( $config{'sql_thost'}, undef, \%_ ); 2655#printlog('dev', 'sizec', $work{'sizec'}->( $stat{'size'}),$work{'filec'}->( $stat{'files'}) ,$work{'filec'}->( $stat{'dirs'})); 2656 if ( $work{'sizec'}->( $stat{'size'} ) and ( $work{'filec'}->( $stat{'files'} ) or $work{'filec'}->( $stat{'dirs'} ) ) ) { 2657 %_ = ( 2658 %res, 2659 'path' => '', 2660 'name' => '', 2661 'ext' => '', 2662 'desc' => $work{'desc'}{ $url{'host'} }{ $url{'prot'} }, 2663 'scan' => int( time() ) 2664 ); 2665 $static{'db'}->update( $config{'sql_tfile'}, undef, \%_ ); 2666 #$_{'path'} = '/'; 2667 #$static{'db'}->update( $config{'sql_tfile'}, undef, \%_ ); 2668 #printlog( 'dev', 'cd0', $res{'path'}, $local ? $url_orig : $res{'path'}, Dumper \%res); 2669 #=z 2670 update_to_root( %res, %_, $local, $url_orig ); 2671 my $t = $tim->(); 2672 printlog( 2673 'stat', $url, 'size', $work{'sizec'}->( $stat{'size'} ), 2674 'files', $work{'filec'}->( $stat{'files'} ), 2675 'dirs', 2676 $work{'filec'}->( $stat{'dirs'} ), 2677 speed( $t, $work{'filec'}->( $stat{'files'} ) ) 2678 ); 2679 } 2680 #} 2681 } 2682}; 2683 2684=del 2685sub startme { 2686 my ($start) = @_; 2687 if ($start) { 2688#$processor{'out'}{'array'}->(); 2689 my $com = 2690#"$config{'starter'} $config{'spawn_prefix'} $config{'perl'} $config{'root_path'}crawler.pl $force $start $config{'spawn_postfix'}"; 2691"$config{'starter'} $config{'spawn_prefix'} $^X $0 $force $start $config{'spawn_postfix'}"; 2692 2693 printlog( 'dbg', "starting with $start:", $com ); 2694#printlog( 'dbg', $com ); 2695 system($com); 2696 } 2697} 2698=cut 2699 2700sub startself { 2701 my ($start) = @_; 2702 $static{'db'}->flush_insert(); 2703 #if (defined $start) { 2704 my $force = "--force=$config{'force'}" if $config{'force'}; 2705 return psmisc::startme( join ' ', $force, $start ); 2706 #} 2707} 2708$processor{'urls'} ||= sub { 2709 my ( $start, @urlproc ); 2710 for (@_) { 2711 next unless $_; 2712 if ( !$config{'noscan'} and $config{'spawn'} and /\:\/\// ) { $start .= " $_ "; } 2713 else { push( @urlproc, $_ ); } 2714 } 2715 startself($start); 2716 mysleep( $config{'scan_sleep'} ); 2717 $processor{'url'}->(@urlproc) if @urlproc; 2718}; 2719 2720=c 2721$processor{'file'}{'sql'}{'func'} ||= sub { 2722 my $tper = timer(); 2723 for my $datafile ( grep { $_ } @_ ) { 2724 my $filesize = ( -s ( $datafile or next ) or next ); 2725 printlog( 2726 'info', 2727 'Uploading ', 2728 $datafile, 2729 " ($filesize bytes) ...", ( 2730 $tper->() 2731 and $stat{'total_bytes'} 2732 ? " prognose:" . psmisc::human('time_period', ( $filesize * $tper->() ) / $stat{'total_bytes'} ) . 's' 2733 : '' 2734 ) 2735 ); 2736 my $per = timer(); 2737 rename( $datafile, $datafile . '.up' ); 2738 $processor{'out'}{'mysql_file'}->("$datafile.up"); 2739 rename( $datafile . '.up', $datafile . '.ok' ); 2740 $stat{'total_bytes'} += $filesize; 2741 printlog( 2742 'time', 2743 " done per ", 2744 $per->(), 2745 " sec ", 2746 $per->() ? 'at ' . ( $filesize / $per->() ) . ' b/s ' : '', 2747 "[total $stat{'total_bytes'} bytes per ", 2748 psmisc::human('time_period', $tper - () ), 2749 " sec ", 2750 ( $tper->() ? 'at ' . int( $stat{'total_bytes'} / $tper->() ) : '' ), 2751 ' b/s]', 2752 ); 2753 } 2754}; 2755=cut 2756 2757sub dcscanned { 2758 local %_ = ref $_[0] ? %{ $_[0] } : ( cid => $_[0] ); 2759 #$static{'db'}->line 2760 $static{'db'}->query( $config{'sql'}{'table'}{ $config{'sql_tresource'} }{'cid'} 2761 ? qq{SELECT * FROM $tq$config{'sql_tresource'}$tq WHERE ${rq}cid${rq}=} . $static{'db'}->quote( $_{'cid'} ) . qq{ LIMIT 1} 2762 : qq{SELECT * FROM $tq$config{'sql_tfile'}$tq WHERE ${rq}tiger${rq}=} 2763 . $static{'db'}->quote( $_{'cid'} ) 2764 . qq{ AND ${rq}path${rq}=${vq}${vq} LIMIT 1} )->[0]; 2765} 2766#!!! TODO : gz tgz 2767$processor{'file'}{'bz2'}{'func'} ||= sub { 2768 my $tper = timer(); 2769 $processor{'file'}{'xml'}{'func'}->(@_), return if $config{'direct_unzip'}; 2770 for my $datafile ( grep { $_ } @_ ) { 2771 #my $filesize = -s $datafile; 2772 my $filesize = -s ( $datafile || next ); 2773 unless ($filesize) { 2774 prinlog( 'no fs', $datafile ); 2775 rename( $datafile, $datafile . '.ok' ); 2776 next; 2777 } 2778 printlog( 2779 'info', 2780 'Extracting ', 2781 $datafile, 2782 " ($filesize bytes) ...", ( 2783 $tper->() and $stat{'total_bytes'} ? " prognose:" . int( ( $filesize * $tper->() ) / $stat{'total_bytes'} ) . 's' : '' 2784 ) 2785 ); 2786 my $per = timer(); 2787 shelldata($datafile); 2788 printprog( "$config{'bzip2'} $config{'bzip2_uncompress'} \"$datafile\"", 1 ); 2789 rename( $datafile, $datafile . '.ok' ); 2790 $stat{'total_bytes'} += $filesize; 2791 printlog( 2792 'time', 2793 " done per ", 2794 $per->(), 2795 " sec ", 2796 $per->() ? 'at ' . ( $filesize / $per->() ) . ' b/s ' : '', 2797 "[total", 2798 psmisc::human( 'size', $stat{'total_bytes'} ), 2799 "bytes per", 2800 psmisc::human( 'time_period', $tper->() ), 2801 ' ', 2802 ( $tper->() ? 'at ' . int( $stat{'total_bytes'} / $tper->() ) : '' ), 2803 ' b/s]', 2804 ); 2805 } 2806}; 2807$processor{'file'}{'xml'}{'func'} ||= sub { #dc++ 2808 our $tper ||= timer(); 2809 my ( %url, %files ); 2810 $url{'prot'} = 'dchub'; 2811 for my $datafile ( grep { $_ } @_ ) { 2812 last if scan_stop; 2813 my $datafilebug = $datafile; 2814 my $per = timer(); 2815 #my $filesize = -s $datafile; 2816 my $filesize = ( -s ( $datafile or next ) or next ); 2817 my $files; 2818 my $scan = int( $^T - 86400 * -M $datafile ); 2819 #int( time() ) 2820 printlog( 2821 'dc', 2822 'Parsing ', 2823 $datafile, 2824 " ($filesize bytes) ...", ( 2825 $tper->() and $stat{'total_bytes'} ? " prognose:" . int( ( $filesize * $tper->() ) / $stat{'total_bytes'} ) . 's' : '' 2826 ) 2827 #, 'scan=', $scan, ' now=', int(time()), ' mdf=' , ( -M $datafile ) * 86400 2828 ); 2829 rename( $datafile, $datafile . '.up' ); 2830 on_interrupt( 2831 sub { 2832 rename( $datafile . '.up', $datafile ); 2833 filesave( \%files ); 2834 %files = (); 2835 } 2836 ); 2837 #printlog('dev', "tryop[$datafile]"); 2838 #local @_ = ; 2839 my $cpdata = {}; 2840 my $fh; 2841 if ( 2842 #open ((),@_) 2843 $datafile =~ /\.xml\.bz2$/i 2844 ? open( $fh, '-|' . ':encoding(utf-8)', qq{$config{'bzip2'} $config{'bzip2_uncompress'}c "$datafile.up"} ) 2845 : open( $fh, '<' . ':encoding(utf-8)', "$datafile.up" ) 2846 #open($fh, ($datafile =~ /\.xml\.bz2$/i 2847 #? ('-|' . ':encoding(utf-8)', , qq{$config{'bzip2'} $config{'bzip2_uncompress'}c "$datafile.up"}) 2848 #: ('<' . ':encoding(utf-8)', "$datafile.up") )) 2849 ) 2850 { 2851 my @dir; 2852 my $path = '/'; 2853 %url = ( %url, 'port' => '', split_url( $work{'current_dchub'} ) ); #clean port of prev iteration 2854 #$url{'dcuser'} = cp_trans( $config{'cp_shell'}, $config{'cp_db'}, decode_url($datafile) ); 2855 #eval { 2856 $url{'dcuser'} = Encode::decode 'utf-8', decode_url($datafile), Encode::FB_DEFAULT; 2857 #}; 2858 printlog( 'err', 'cant decode ', $datafile, decode_url($datafile) ), rename( $datafile . '.up', $datafile . '.err' ), next 2859 if $@; 2860 $url{'dcuser'} =~ s/\.xml(\.bz2)?$//i; 2861 $url{'dcuser'} =~ s{^.+(\\|/)}{}i; 2862 my $url = join_url( \%url ); 2863 $url{'host'} = full_host( \%url ); 2864 my %time = 2865 ( hconfig( 'unknown_time', $url{'host'} ) ne '' 2866 ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) ) 2867 : () ); 2868 ( $work{'sizec'}, $work{'filec'}, $work{'dirc'} ) = 2869 ( counter( $stat{'size'} ), counter( $stat{'files'} ), counter( $stat{'dirs'} ) ); 2870 local $/ = '<'; 2871 my $decoder = sub { 2872 $_ = HTML::Entities::decode_entities($_); 2873 #printlog('decoder'); 2874 } 2875 if psmisc::use_try 'HTML::Entities'; 2876 my $cid; 2877 while (<$fh>) { 2878 last if $work{'die'}; 2879 #if ( my ( $file, $size, $tiger ) = /^File Name="([^"]+)" Size="(\d+)" TTH="([^"]+)"/i ) { 2880 local %_; 2881 #$_ = HTML::Entities::decode_entities_old $_; 2882 #$_ = HTML::Entities::decode_entities $_; 2883 psmisc::code_run $decoder; 2884 #psmisc::cp_detect_trans($_, $cpdata, 'utf-8', 'utf-8'); #some filelist not in utf 2885 s/^(\w+)\s//; 2886 my ($w) = lc($1); 2887 s/(\w+)="([^"]+)"/$_{lc$1}=$2;''/eg; #" 2888 #printlog 'dev', "s: [$w]", Dumper \%_; 2889 #if ( /^File Name="([^"]+)" Size="(\d+)" TTH="([^"]+)"/i ) { 2890 if ( $w eq 'file' and length $_{name} ) { 2891 #printlog('dev','1', $config{'cp_dc_list'}, $config{'cp_db'}, $_{name}); 2892 local $_ = $_{name}; # = cp_trans( $config{'cp_dc_list'}, $config{'cp_db'}, $file ); 2893 #printlog('dev','2', $config{'cp_dc_list'}, $config{'cp_db'}, $file); 2894 my ($ext) = ( $_{name} =~ s/\.([^\.]+)$// ? ($1) : ('') ); 2895 $_{meta} = join ' ', map { qq{$_="$_{$_}"} } grep { $_{$_} } qw(br hit); 2896 #my %meta; 2897 #my @k = sort grep {$_ ne qw(name size ) , ($config{'sql'}{'table'}{ $config{'sql_tfile'} }{'tiger'} ? () : ('tth'))} keys %_; 2898 $files{ $path . '/' . $_ } = { 2899 %url, %_, 2900 'path' => $path, 2901 #'name' => $_{name}, 2902 'ext' => $ext, 2903 'size' => int $_{size}, 2904 'time' => int $_{ts}, 2905 %time, ( 2906 $config{'sql'}{'table'}{ $config{'sql_tfile'} }{'tiger'} #{'array_insert'} 2907 ? ( 'tiger' => $_{tth} ) 2908 : ( 'desc' => 'tiger=' . $_{tth} ) 2909 ), 2910 'scan' => $scan 2911 }; 2912 ++$stat{'total_files'}; 2913 ++$files; 2914 #} elsif ( my ($file) = /^Directory Name="([^"]+)">/i ) { #" 2915 } elsif ( $w eq 'directory' and length $_{name} ) { 2916 local $_ = $_{name}; # = cp_trans( $config{'cp_dc_list'}, $config{'cp_db'}, $file ); 2917 push( @dir, $_{name} ); 2918 my ($ext) = ( $_{name} =~ s/\.([^\.]+)$// ? ($1) : ('') ); 2919 $files{ $path . '/' . $_ } = { %url, 'path' => $path, 'name' => $_{name}, 'ext' => $ext, %time, 'scan' => $scan }; 2920 $path = ( join( '/', @dir ) or '/' ); 2921 #printlog 'dev', 'dir start', "[$_]", $path, 'd=', @dir; 2922 } elsif (m|^/Directory>|i) { 2923 #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files ); 2924 #printlog 'saving', Dumper \%files; 2925 filesave( \%files ); 2926 %files = (); 2927 #printlog 'dev', 'dir end', "[$_]",$path, 'd=', @dir; 2928 pop(@dir); 2929 $path = join( '/', @dir ) || '/'; 2930 #} elsif (/^FileListing .*Base="([^"]+)"/i) { #"mc 2931 } elsif ( $w eq 'filelisting' and length $_{base} ) { 2932 my $last; 2933 $cid //= $_{cid} if $_{cid}; 2934 my $already; 2935 if ( $_{cid} and $config{'use_dc'} and !$config{'use_dc_user_dupes'} ) { 2936 #printlog 'Cur', Dumper #$static{'db'}->select($config{'sql_tfile'}, {'tiger'=>$_{'cid'}, 'path'=>''}); 2937 $already = dcscanned( \%_ ); 2938 #printlog 'cid', $_{cid}, Dumper $already, $config{'dcbot_client_period'}, time - $already->{scan}, $scan ; 2939 #; 2940 if ( ( $already->{tiger} or $already->{cid} ) 2941 and ( time - $already->{scan} < $config{'dcbot_client_period'} or $scan < $already->{scan} ) ) 2942 { 2943 printlog( 2944 'info', 'skipping already scanned ', 2945 $url, 'as', join_url($already), 2946 psmisc::human( 'time_period', time - $already->{scan} ), 2947 $config{'dcbot_client_period'}, 2948 ); 2949 #printlog('warn', 'not uploading'); 2950 rename( $datafile . '.up', $datafile . '.ok' ); 2951 $last = 1; 2952 } 2953 } 2954 @dir = ( $_{base} eq '/' ? '' : $_{base} ); 2955 $path = join( '/', @dir ) || '/'; 2956 my $save = { 2957 %url, ( 2958 hconfig( 'unknown_time', $url{'host'} ) ne '' 2959 ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) ) 2960 : () 2961 ), 2962 'scan' => $scan 2963 }; 2964 #%_ = ( '' => { %$_, 'desc' => $_{'generator'}, 'tiger' => $_{'cid'}, }, ' ' => { %$_, 'path' => '/' }, ); 2965 #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%_ ); 2966 #filesave( \%_ ); 2967 my @replace = qw(prot host port dcuser); 2968 my %meta = ( 'meta' => 'link=' . encode_url( join_url($already) ) ); 2969 for ( 0 .. 1 ) { 2970 filesave( { 2971 '' => { %meta, %$save, 'desc' => $_{'generator'}, 'tiger' => ( $_{'cid'} || $_{'tiger'} ), }, 2972 ' ' => { %meta, %$save, 'path' => '/' }, 2973 } 2974 ); 2975 last unless psmisc::is_hash_size $already; 2976 %meta = (); 2977 printlog( 2978 'info', 'saving already scanned', 2979 $url, 'at', psmisc::human( 'time_period', time - $already->{'scan'} ), 2980 'as', join_url($already) 2981 ) if !$_ and !$last; 2982 $url{$_} = $save->{$_} = $already->{$_} for @replace; 2983 $url{'host'} = full_host( \%url ); 2984 } 2985 last if $last; 2986 } 2987 } 2988 close($fh); 2989 on_interrupt(); 2990 #filesave( \%files ); %files = (); 2991 %_ = ( 2992 %url, 2993 'scan' => $scan, 2994 'files' => $work{'filec'}->( $stat{'files'} ), 2995 'dirs' => $work{'dirc'}->( $stat{'dirs'} ), 2996 'size' => $work{'sizec'}->( $stat{'size'} ), 2997 ( !$cid ? () : ( 'cid' => $cid ) ), 2998 ); 2999 $_{'time'} = $scan if $work{'filec'}->( $stat{'files'} ); 3000 $static{'db'}->update( $config{'sql_tresource'}, undef, { %_, %{ $config{'clear_file'} || {} } } ); 3001 } 3002 rename( $datafilebug . '.up', $datafilebug . '.ok' ); 3003 $stat{'total_bytes'} += $filesize; 3004 printlog( 3005 'time', 3006 " done per ", 3007 psmisc::human( 'time_period', $per->() ), 3008 , 3009 $per->() 3010 ? 'at ' . psmisc::human( 'float', $filesize / $per->() ) . ' bps ' . psmisc::human( 'float', $files / $per->() ) . ' fps ' 3011 : '', 3012 "[total $stat{'total_bytes'} bytes $stat{'total_files'} files per ", 3013 psmisc::human( 'time_period', $tper->() ), 3014 ' ', ( 3015 $tper->() 3016 ? 'at ' . int( $stat{'total_bytes'} / $tper->() ) . ' bps ' . int( $stat{'total_files'} / $tper->() ) . ' fps' 3017 : '' 3018 ), 3019 ' ]', 3020 ); 3021 } 3022}; 3023$processor{'file'}{'toscan'}{'func'} ||= sub { 3024 local $config{'force'} = 1; 3025 for my $datafile ( grep { $_ } @_ ) { 3026 my $url = ( $datafile or next ); 3027 $url =~ s{^.+(\\|/)}{}i; 3028 $url =~ s/\.toscan$//i; 3029 $url = decode_url($url); 3030 local $config{'ftp_recurse_force'} = 1; 3031 resetcounters(); 3032 $processor{'prot'}{'ftp'}{'func'}->($url); 3033 } 3034}; 3035for ( keys %{ $processor{'format'} } ) { 3036 $processor{'file'}{$_}{'func'} ||= sub { 3037 for my $datafile ( grep { $_ } @_ ) { 3038 rename( $datafile, $datafile . '.up' ); 3039 open( F, '<', $datafile . '.up' ) or next; 3040 $datafile =~ s/.$_$//i; 3041 $processor{'format'}{$_}->( $datafile, \*F ); 3042 close(F); 3043 rename( $datafile . '.up', $datafile . '.ok' ); 3044 } 3045 }; 3046} 3047$processor{'file'}{'url'}{'func'} ||= sub { 3048 for my $datafile ( grep { $_ } @_ ) { 3049 #printlog('dev', "LOAD:$datafile"); 3050 open( my $fh, ( ( $datafile eq '-' ) ? ($datafile) : ( '<', $datafile ) ) ) or next; 3051 $processor{'url'}->(<$fh>); 3052 close($fh); 3053 } 3054}; 3055$processor{'file'}{'config'}{'func'} = $processor{'file'}{'hublist'}{'func'} ||= sub { 3056 local $config{'host_by_name_norm'} = 0; 3057 for my $datafile ( grep { $_ } @_ ) { 3058 $datafile = uni_get($datafile); 3059 next 3060 unless ( 3061 open( 3062 my $fh, ( 3063 ( $datafile eq '-' ) 3064 ? ($datafile) 3065 : ( $datafile =~ /\.xml\.bz2$/i ? "$config{'bzip2'} $config{'bzip2_uncompress'}c \"$datafile\"|" : '<' . $datafile ) 3066 ) 3067 ) 3068 ); 3069 printlog( 'dc', 'hublist: loading', $datafile ); 3070 while (<$fh>) { 3071 #printlog ('dev', $_); 3072 if (m|^\s*<Hub\s.*?Address="\s*(\w+://)?([^:/"]+):?(\d*)\s*"|i) { #"mc 3073 #printlog ('dev', $_); 3074 my %url = ( 'prot' => 'dchub' ); 3075 $processor{'url'}->( 3076 ( $1 || $url{'prot'} . '://' ) . $2 . ( $3 and $3 != $config{'scanner'}{ $url{'prot'} }{'port'} ? ':' . $3 : '' ) ); 3077 ++$stat{'hublist'}; 3078 } 3079 } 3080 close($fh); 3081 } 3082}; 3083$processor{'file'}{'list'}{'func'} ||= sub { 3084 local $config{'host_by_name_norm'} = 0; 3085 for my $datafile ( grep { $_ } @_ ) { 3086 $datafile = uni_get($datafile); 3087 next unless ( open( my $fh, ( ( $datafile eq '-' ) ? ($datafile) : ( '<', $datafile ) ) ) ); 3088 printlog( 'dc', 'hub .list: loading', $datafile ); 3089 while (<$fh>) { 3090 if (m{^\s*([^:/\|]+):?(\d*)\s*\|}i) { 3091 my %url = ( 'prot' => 'dchub' ); 3092 $processor{'url'}->( 'dchub://' . $1 . ( $2 and $2 != $config{'scanner'}{ $url{'prot'} }{'port'} ? ':' . $2 : '' ) ); 3093 ++$stat{'list'}; 3094 } 3095 } 3096 close($fh); 3097 } 3098}; 3099$processor{'file'}{'ok'}{'func'} ||= sub { unlink(@_) unless $config{'keep_ok'} or $config{'debug'}; }; 3100$processor{'file'}{'partial'}{'func'} ||= sub { 3101 #printlog( 'pt', "$_", 86400 * -M $_ ) for @_ 3102 unlink( grep { 3600 < 86400 * -M $_ } @_ ) unless $config{'keep_ok'} or $config{'debug'}; 3103}; 3104$processor{'file'}{'up'}{'func'} ||= sub { 3105 #printlog( 'pt', "$_", 86400 * -M $_ ) for @_ 3106 unlink( grep { 7 < -M $_ } @_ ) unless $config{'debug'}; 3107}; 3108$processor{'file'}{'dchub'}{'func'} ||= sub { 3109 for my $datadir ( grep { $_ and -d } @_ ) { 3110 $work{'current_dchub'} = $datadir; 3111 $work{'current_dchub'} =~ s{^.*[/\\]}{}ig; 3112 $work{'current_dchub'} = $1 . '://' . $work{'current_dchub'} if $work{'current_dchub'} =~ s{\.((?:dchub|adc))$}{}ig; 3113 $work{'current_dchub'} =~ s/\.(\d+)$/:$1/ 3114 if $work{'current_dchub'} =~ /(\d+\.){4}(\d+)$/ 3115 or $work{'current_dchub'} =~ /\D\.(\d+)$/; 3116 printlog( 'dbg', "dir:", $datadir, " hub:$work{'current_dchub'}" ); 3117 local $config{'del_empty_dir'} = $config{'del_empty_dcdir'}; 3118 $processor{'dir'}->($datadir); 3119 } 3120 #$processor{'out'}{'array'}->(); 3121 $static{'db'}->flush_insert(); 3122}; 3123$processor{'file'}{'adc'}{'func'} ||= $processor{'file'}{'dchub'}{'func'}; 3124$processor{'file'}{'adcs'}{'func'} ||= $processor{'file'}{'dchub'}{'func'}; 3125$processor{'file'}{'ldi'}{'func'} ||= sub { 3126 my $table = $config{'sql_tfile'}; 3127 for my $datafile ( grep { $_ } @_ ) { 3128 rename( $datafile, $datafile . '.up' ); 3129 my $mydatafile = $datafile; 3130 $mydatafile =~ s!\\+|/+!$config{'slash_mysql'}!g; 3131 next if !$mydatafile or !-s $mydatafile . '.up'; 3132 #my ( $tq, $rq, $vq ) = $static{'db'}->quotes(); 3133 $static{'db'}->do( 3134"LOAD DATA $static{'db'}->{'LOW_PRIORITY'} LOCAL INFILE '$mydatafile.up' REPLACE INTO TABLE $config{'sql_tfile'} FIELDS OPTIONALLY ENCLOSED BY \"'\" (" 3135 . join( 3136 ',', 3137 map( "$rq$_$rq", ( 3138 sort { $config{'sql'}{'table'}{$table}{$b}{'order'} <=> $config{'sql'}{'table'}{$table}{$a}{'order'} } 3139 grep( $config{'sql'}{'table'}{$table}{$_}{'array_insert'}, keys %{ $config{'sql'}{'table'}{$table} } ) 3140 ) ) 3141 ) 3142 . ")" 3143 ); 3144 rename( $datafile . '.up', $datafile . '.ok' ); 3145 } 3146}; 3147$processor{'dir'} ||= sub { 3148 for my $dir (@_) { 3149 printlog( 'info', "uploading [$dir]" ); 3150 $dir .= $config{'slash_perl'} if $dir and !( $dir =~ /[\/\\]$/ ); 3151 #printlog( 'dev', "dir: [$dir]:",<$dir*.$_>, <$dir.*.$_> ); 3152 #!!! READDIR HERE 3153 for my $type ( shuffle( keys %{ $processor{'file'} } ) ) { #!!!!SORT 3154 #$processor{'file'}{$_}{'func'}->( @{ shuffle( [ <$dir*.$_>, <$dir.*.$_> ] ) } ) 3155 next if ref $processor{'file'}{$type}{'func'} ne 'CODE' or $processor{'file'}{$type}{'disabled'}; #and @dir; 3156 last if scan_stop; 3157 my @files = shuffle( <$dir*.$type>, <$dir.*.$type> ); 3158 $processor{'file'}{$type}{'func'}->( 3159 grep { 3160#printlog('dev', 'dt', $_, int(time), int( $^T - 86400 * -M $_ ), psmisc::human('time_period', time - $^T + 86400 * -M $_ )); 3161 $_ and time - $^T + 86400 * -M $_ > $processor{'file'}{$type}{'purge'} || $config{'purge'} 3162 } @files 3163 ); 3164 unlink for grep { !-s } @files; 3165 #my $scan = int( $^T - ( -M $datafile ) * 86400 ); 3166 } 3167 my @dir = grep { !m{/\.\.?$} } <$dir*>, <$dir.*>; 3168 #printlog 'D', Dumper \@dir; 3169 printlog( 'dbg', "deleted empty $dir:", ( rmdir($dir) or $! ) ) if $config{'del_empty_dir'} and !@dir; 3170 } 3171 $static{'db'}->flush_insert(); 3172 #$processor{'out'}{'array'}->(); 3173}; 3174#{ 3175#my ( $current, $order ); 3176#sub newcurrent { $program{ $current = $_[0] }{'order'} ||= $order += 10; } #v2 3177#} 3178$program{ psmisc::program('init') }{'force'} = 1; 3179$program{ psmisc::program() }{'func'} ||= sub { 3180 printlog( 3181 'info', "Started with [", 3182 $^X, $work{'$0'}, join ' ', 3183 map( $_ . ( ( $param->{$_} ne '' and $param->{$_} != 1 ) ? "=$param->{$_}" : '' ), sort keys %$param ), 3184 "] version:", ( $config{'version'} or 'r' . ( split( ' ', '$Revision: 4843 $' ) )[1] ) 3185 ) 3186 if # $param and 3187 %$param and !$config{'nolog_startstop'}; 3188 mkdir_rec( $config{'datadir'}, 0777 ); 3189 mkdir_rec( $config{'tmpdir'}, 0777 ); 3190#later $config{'sql_mysqlopt'} = ( $config{'sql_host'} ? ' -h ' . $config{'sql_host'} : ' ' ) . ' ' . $config{'sql_mysqlopt'} . ' '; 3191 $static{'banned'} = { loadlist( $config{'file_banned'} ), %{ $config{'banned'} || {} } }; 3192#if ( ref $config{'sql_base_up'} eq 'HASH' ) { $config{'sql'}{$_} = $config{'sql_base_up'}{$_} for keys %{ $config{'sql_base_up'} }; 3193#} else { $config{'sql'}{'database'} = $config{'sql_base_up'}; } 3194 $work{'cp_input'} = $config{'cp_shell'}; 3195 psmisc::lib_init(); 3196 $config{'sql'}{'codepage'} ||= $config{'cp_db'} if $config{'cp_db'}; 3197 unless ( keys %$param ) { 3198 printlog( 'info', 3199 'Short usage: ' 3200 . ( $work{'$0'} || $0 ) 3201 . ' [install] [file.to.load] [ftp://res.to.scan] [proc[=3]] [--configparam=value] [--sql__configparam=value]' ); 3202 } 3203 return 0; 3204}; 3205$program{ psmisc::program('db') }{'force'} = 1; 3206$program{ psmisc::program() }{'func'} ||= sub { 3207 #END {printlog('WOW1', $static{'db'}->{'dbh'});}; 3208 #printlog 'dev', "user0:", $static{'db'}, $config{'sql'}{user}, $config{'sql_base_up'}{user}; 3209 #printlog 'dev', 'U', Dumper $config{'sql_base_up'}; 3210 $static{'db'} ||= pssql->new( 3211 #'codepage' => $config{'cp_db'}, 3212 'handler_insert0' => sub { 3213 my ( $self, $table, $hash ) = @_; 3214 #printlog('dev', 'handler_insert0', @_); 3215 if ( $table eq $config{'sql_tfile'} ) { #TODO CONFIGURABLE 3216 3217=old 3218 if ( !$self->{'use_dbi'} 3219 and $work{'current_output_file'} ne $hash->{$_}{'host'} 3220 and $hash->{'host'} ) 3221 { 3222 $work{'current_output_file'} = $hash->{'host'}; 3223 open_out_file( $work{'current_output_file'} ); 3224 } 3225=cut 3226 3227 if ( $hash->{'size'} ) { 3228 ++$stat{'files'}; 3229 $stat{'size'} += $hash->{'size'} if $hash->{'size'} < $config{'max_stat_file_size'}; 3230 } else { 3231 ++$stat{'dirs'}; 3232 } 3233 } 3234 }, 3235 'handler_insert2' => sub { 3236 my ( $self, $table, $hash ) = @_; 3237 #printlog('dev', 'handler_insert2', @_); 3238 $work{'upscanned'} = counter( $stat{'files'} ) unless $work{'upscanned'}; 3239 if ( $work{'filec'} 3240 and $work{'filec'}->( $stat{'files'} ) 3241 and $work{'upscanned'}->( $stat{'files'} ) > $config{'update_scan_every'} ) 3242 { 3243 #local $config{'log_dmp'} = 1; 3244 #printlog('dev', 'on_update update'), 3245 $work{'upscanned'} = counter( $stat{'files'} ); 3246 #my ($values) = values(%$hash); 3247 my ($values) = $hash; 3248 #printlog('dev', 'handler_insert2values:', $values); 3249 #sql_update( 3250 my %scantime = ( 'scan' => int( time() ), 'time' => int( time() ) ) if $values->{'prot'} ne 'dchub'; 3251 $self->update( 3252 $config{'sql_tresource'}, 3253 undef, { 3254 $self->filter_row( $config{'sql_tresource'}, 'primary', $values ), 3255 'path' => '', 3256 #'scan' => int( time() ), 3257 #'time' => int( time() ) 3258 %scantime 3259 } 3260 ); 3261#!!! TODO < BUG !!! 3262#printlog('dev', 'now=', 'files=',$work{'filec'}->( $stat{'files'} ), 'size=', $work{'sizec'}->( $stat{'size'} )); 3263#$self->select_log($config{'sql_tresource'}, {$self->filter_row( $config{'sql_tresource'}, 'primary', $values ), 'path' => '',}); 3264#printlog('dev', 'wherebody=',$self->where_body({'files' => $work{'filec'}->( $stat{'files'} ), 'files_mode'=>'g'}, undef, $config{'sql_tresource'}, )); 3265 $self->update( 3266 #sql_update( 3267 $config{'sql_tresource'}, 3268 undef, { 3269 $self->filter_row( $config{'sql_tresource'}, 'primary', $values ), 3270 'path' => '', 3271 'files' => $work{'filec'}->( $stat{'files'} ), 3272 'dirs' => $work{'dirc'}->( $stat{'dirs'} ), 3273 'size' => $work{'sizec'}->( $stat{'size'} ) 3274 }, 3275 $self->where_body( 3276 { 'files' => $work{'filec'}->( $stat{'files'} ), 'files_mode' => 'l' }, 3277 undef, $config{'sql_tresource'}, 3278 ) 3279 #"$self->{'rq'}files$self->{'rq'} < " . $self->{'vq'} . $work{'filec'}->( $stat{'files'} ) . $self->{'vq'} 3280 ); 3281 my $h = split_url( $values->{'host'} )->{'host'}; 3282 $self->update( 3283 #sql_update( 3284 $config{'sql_thost'}, 3285 undef, { 3286 $self->filter_row( $config{'sql_thost'}, 'primary', $values ), 3287 'host' => $h, 3288 #'ip' => ip_to_name($h), 3289 ( $values->{'prot'} eq 'dchub' ? () : ( 'ip' => name_to_ip($h) ) ), 3290 #'scan' => int( time() ), 3291 #'time' => int( time() ) 3292 %scantime 3293 } 3294 ); 3295 } 3296 }, 3297 %{ $config{'sql'} || {} }, 3298 ( ref $config{'sql_base_up'} eq 'HASH' ? %{ $config{'sql_base_up'} } : ( 'database' => $config{'sql_base_up'} ) ), 3299 ); 3300 #END {printlog('WOW2', $static{'db'}->{'dbh'});}; 3301 #printlog 'dev', Dumper $static{'db'}; 3302 #printlog 'dev', "user:", $static{'db'}{user}, $config{'sql'}{user}, $config{'sql_base_up'}{user}; 3303 #printlog 'dev', "u", %{ $config{'sql_base_up'} }; 3304 ( $tq, $rq, $vq ) = $static{'db'}->quotes(); 3305#$processor{'out'}{'array'}->(); 3306#my ( $tq, $rq, $vq ) = $static{'db'}->quotes(); 3307#$static{'db'}->query_log( 'SELECT * FROM ' . ( join ',', map { "$tq$_$tq" } sort keys %{ $config{'sql'}{'table'} } ) . ' WHERE 1 LIMIT 1' ) 3308#use Data::Dumper; 3309#printlog( 'dev', Dumper($static{'db'})); 3310 undef; 3311}; 3312$program{ psmisc::program('start') }{'func'} ||= sub { unlink( $config{'stop_file'} ); -f $config{'stop_file'}; }; 3313$program{ psmisc::program('check') }{'func'} ||= sub { 3314 printlog( 'info', 'checkset' ); 3315 #$config{'sql'}{'auto_repair'} = 1; 3316 #$config{'sql'}{'force_repair'} = 1; 3317 $static{'db'}->check_data(); 3318 # for my $table ( $_[1] or keys %{ $static{'db'}{'table'} } ) { 3319 # printlog ('check', $table,$static{'db'}->query_log("SELECT * FROM $table LIMIT 1")); 3320 # } 3321}; 3322$program{ psmisc::program('drop') }{'func'} ||= sub { 3323 @_ = keys %{ $config{'sql'}{'table'} } if !@_; 3324 printlog( 'info', "Dropping ", @_ ); 3325 #printlog( 'info', "Dropping $static{'db'}->{'table_prefix'} [$_]" ), $processor{'out'}{'mysql'}->( drop_table_sql($_) ) 3326 #for sort grep { $_ and $config{'sql'}{'table'}{$_} } @_; 3327 $static{'db'}->drop_tables( sort grep { $_ and $config{'sql'}{'table'}{$_} } @_ ); 3328 return 0; 3329}; 3330$program{ psmisc::program('dropdb') }{'func'} ||= sub { 3331 @_ = $config{'sql'}{'database'} if !@_; 3332 printlog( 'info', "Dropping ", @_ ); 3333 #printlog( 'info', "Dropping $static{'db'}->{'table_prefix'} [$_]" ), $processor{'out'}{'mysql'}->( drop_table_sql($_) ) 3334 #for sort grep { $_ and $config{'sql'}{'table'}{$_} } @_; 3335 $static{'db'}->drop_database( sort grep { $_ } @_ ); 3336 return 0; 3337}; 3338$program{ psmisc::program('install') }{'func'} ||= sub { 3339 my %bases; 3340 $bases{ $config{$_} } = $_ for grep { /^sql_base_/ } keys %config; 3341 $static{'db'}->install( grep { $_ } keys %bases ); 3342 3343=cu 3344 my ( $tdbi, $tb ) = ( $config{'sql_use_dbi'}, $config{'sql_base'} ); 3345 $config{'sql_use_dbi'} = 0; 3346 $config{'sql_base'} = ''; 3347 for ( grep $_, keys %bases ) { 3348 printlog( 'info', "Creating database '$_' [$bases{$_}]" ); 3349 $processor{'out'}{'mysql'}->( 3350 "CREATE DATABASE IF NOT EXISTS $_" 3351 . ( 3352 $config{'sql_default_character_set'} 3353 and $config{'sql'}{ $config{'sql_driver'} }{'DEFAULT CHARACTER SET'} 3354 ? " $config{'sql'}{ $config{'sql_driver'} }{'DEFAULT CHARACTER SET'} $config{'sql_default_character_set'}" 3355 : '' 3356 ) 3357 ); 3358 } 3359 ( $config{'sql_base'}, $config{'sql_use_dbi'} ) = ( $tb, $tdbi ); 3360 printlog( 'info', "Creating $static{'db'}->{'table_prefix'} [", ( keys %{ $config{'sql'}{'table'} } ), ']' ); 3361 $processor{'out'}{'mysql'}->( create_table_sql( %{ $config{'sql'}{'table'} } ) ); 3362 $processor{'out'}{'mysql'}->( create_index_sql( %{ $config{'sql'}{'table'} } ) ) 3363 unless $config{'sql'}{ $config{'sql_driver'} }{'index in create table'}; 3364=cut 3365 3366 printlog( 'info', 'tryweb', `$^X web/index.cgi` ); 3367 `chmod o-rwx tools/rsync_password`; 3368 return 0; 3369}; 3370$program{ psmisc::program('index_disable') }{'func'} ||= sub { 3371 $static{'db'}->index_disable( $config{'sql_tfile'} ); 3372 #my $tim = timer(); 3373 #printlog( 'info', 'Disabling indexes...' ); 3374 #printlog( 'err', 'ALTER TABLE ... DISABLE KEYS available in mysql >= 4' ), return if $config{'sql'}{'driver'} eq 'mysql3'; 3375 #$static{'db'}->query_log("ALTER TABLE $static{'db'}->{'table_prefix'}$config{'sql_tfile'} DISABLE KEYS"); 3376 #printlog( 'time', "Disable index per", psmisc::human( 'time_period', $tim->() ), "sec" ); 3377 return 0; 3378}; 3379$program{ psmisc::program('upgrade') }{'func'} ||= sub { 3380 local $config{'sql_base'} = $config{'sql_base_web'}; 3381 local $config{'sql_error_tries'} = 0; 3382 local $config{'sql_error_sleep'} = 0; 3383 printlog( 'info', "Upgrade from $_[0]. dont worry about errors" ); 3384 printlog( 'info', 'Removing very old resource.vote,votelast' ), 3385 $static{'db'}->do("ALTER TABLE resource DROP `vote`, DROP `votelast`, DROP INDEX votei, DROP INDEX votelasti") 3386 if $_[0] < 1905; 3387 printlog( 'info', 'Removing host.votelast' ), $static{'db'}->do("ALTER TABLE host DROP `votelast`, DROP INDEX votelasti"), 3388 printlog( 'info', 'Adding host voting' ), 3389 $static{'db'}->do( 3390"ALTER TABLE host ADD `voteu` INT NOT NULL DEFAULT 0, ADD `voted` INT NOT NULL DEFAULT 0, ADD INDEX voteui (`voteu`), ADD INDEX votedi (`voted`)" 3391 ), $static{'db'}->do("ALTER TABLE host ADD `lastip` VARCHAR (15) NOT NULL DEFAULT 0, ADD INDEX lastipi (`lastip`(15))"), 3392 printlog( 'info', 'Adding top query lastip' ), 3393 $static{'db'} 3394 ->do("ALTER TABLE $config{'sql_twordstat'} ADD `lastip` VARCHAR (15) NOT NULL DEFAULT 0, ADD INDEX lastipi (`lastip`(15))"), 3395 $static{'db'}->do( 3396 "ALTER TABLE $config{'sql_tquerystat'} ADD `lastip` VARCHAR (15) NOT NULL DEFAULT 0, ADD INDEX lastipi (`lastip`(15))") 3397 if $_[0] < 1933; 3398 printlog( 'info', '{2618} 0.15.18 -> 0.16.0 (adding port column into host table)' ), 3399 $static{'db'} 3400 ->do("ALTER TABLE $config{'sql_thost'} ADD `port` SMALLINT UNSIGNED NOT NULL DEFAULT 0, ADD INDEX porti (`port`)") 3401 if $_[0] < 2618; 3402 printlog( 'info', '{2970} 0.16.1 -> 0.16.2 (adding fulltext query index)' ), 3403 $static{'db'}->do( 3404"ALTER TABLE $config{'sql_tquerystat'} ADD FULLTEXT $config{'sql'}{'table'}{$config{'sql_tquerystat'}}{'query'}{'fulltext'} (`query`)" 3405 ) if $_[0] < 2970 and $config{'sql'}{'table'}{ $config{'sql_tquerystat'} }{'query'}{'fulltext'}; 3406 printlog( 'info', '{3010} 0.16.1 -> 0.16.2 (adding dcuser to filebase and resource)' ), 3407 $static{'db'} 3408 ->do("ALTER TABLE `$config{'sql_tfile'}` ADD `dcuser` VARCHAR( 30 ) NOT NULL AFTER `host`, ADD INDEX dcuseri (`dcuser`)"), 3409 $static{'db'}->do( 3410 "ALTER TABLE `$config{'sql_tresource'}` ADD `dcuser` VARCHAR( 30 ) NOT NULL AFTER `host`, ADD INDEX dcuseri (`dcuser`)"), 3411 $static{'db'}->do( 3412"ALTER TABLE `$config{'sql_tfile'}` DROP PRIMARY KEY , ADD PRIMARY KEY ( `prot` , `host` , `path` , `name` , `ext` , `dcuser` )" 3413 ), 3414 $static{'db'} 3415 ->do("ALTER TABLE `$config{'sql_tresource'}` DROP PRIMARY KEY , ADD PRIMARY KEY ( `prot` , `host` , `path` , `dcuser` )"), 3416 if $config{'use_dc'} and $_[0] < 3010; 3417 printlog( 'info', '{3120} 0.16.1 -> 0.16.2 (adding voting to filebase)' ), 3418 $static{'db'}->do( 3419"ALTER TABLE `$config{'sql_tfile'}` ADD `voteu` INT UNSIGNED NOT NULL DEFAULT '0' , ADD `voted` INT UNSIGNED NOT NULL DEFAULT '0' , ADD `lastip` VARCHAR (15) NOT NULL DEFAULT '' , ADD INDEX voteui (`voteu`), ADD INDEX votedi (`voted`), ADD INDEX lastipi (`lastip`(15))" 3420 ), 3421 if $_[0] < 3120; 3422 3423 if ( $_[0] < 3939 ) { 3424 printlog( 'info', '{3939} 0.17.2 -> 0.18.0 (adding added to filebase)' ), 3425 $static{'db'}->do("ALTER TABLE `$config{'sql_tfile'}` ADD `added` INT UNSIGNED NOT NULL DEFAULT '0'"); 3426 printlog( 'info', '{3939} 0.17.2 -> 0.18.0 (adding stem to filebase)' ), 3427 $static{'db'}->do("ALTER TABLE `$config{'sql_tfile'}` ADD `stem` VARCHAR (128) NOT NULL DEFAULT ''"), 3428 printlog( 'info', '{3939} 0.17.2 -> 0.18.0 (adding stem index to filebase)' ), 3429 $static{'db'}->do("ALTER TABLE `$config{'sql_tfile'}` ADD FULLTEXT `stemi` (`stem`)"), 3430 printlog( 'info', '{3939} 0.17.2 -> 0.18.0 (adding stem to querystat)' ), 3431 $static{'db'}->do("ALTER TABLE `$config{'sql_tquerystat'}` ADD `stem` VARCHAR (128) NOT NULL DEFAULT ''"), 3432 printlog( 'info', '{3939} 0.17.2 -> 0.18.0 (adding stem index to querystat)' ), 3433 $static{'db'}->do("ALTER TABLE `$config{'sql_tquerystat'}` ADD FULLTEXT `stemi` (`stem`)"), 3434 if $config{'use_stem'}; 3435 } 3436 # 3437 if ( $_[0] < 4617 ) { 3438 if ( $config{'use_dc'} ) { 3439 printlog( 'info', '{4617} 0.19.1 -> 0.19.2 (dc: CID on resource table)' ), $static{'db'} 3440 ->do("ALTER TABLE `$config{'sql_tresource'}` ADD `cid` VARCHAR (39) NOT NULL DEFAULT '', ADD INDEX `cid_i` (`cid`)"); 3441 } 3442 } 3443 if ( $_[0] < 4775 ) { 3444 printlog( 'info', '{4775} 0.19.1 -> 0.19.2 (ranges,host: more stat)' ); 3445 $static{'db'}->do( 3446"ALTER TABLE `$config{'sql_tranges'}` ADD `meta` VARCHAR ($config{'sql'}{'table'}{$config{'sql_tranges'}}{'meta'}{'length'}) NOT NULL DEFAULT ''" 3447 ); 3448 $static{'db'}->do( 3449"ALTER TABLE `$config{'sql_thost'}` ADD `meta` VARCHAR ($config{'sql'}{'table'}{$config{'sql_thost'}}{'meta'}{'length'}) NOT NULL DEFAULT ''" 3450 ); 3451 } 3452 # $Id: crawler.pl 4843 2013-08-14 12:17:58Z pro $# 3453 $program{$_}{'upgrade'}->(@_) for grep { ref $program{$_}{'upgrade'} eq 'CODE' } keys %program; 3454 psmisc::program_one('install'); 3455}; 3456$program{ psmisc::program('repair') }{'func'} ||= sub { 3457 printlog( 'warn', 'repair locked', @_ ), return if !psmisc::lock( 'repair_' . ( join '-', @_ ), { timeout => 1 } ); 3458 $static{'db'}->repair(@_); 3459}; 3460$program{ psmisc::program('purge') }{'func'} ||= sub { 3461 my ($purge) = ( $_[0] =~ /(\d+)/ ? $1 : $config{'purge_by'} ); 3462 my ( $cut, $timr, $total, $now ) = ( int( time() ), timer() ); 3463 printlog( 'info', "Purging tables (by $purge). cut =", psmisc::human( 'date_time', $cut -= $config{'purge'} ), $_[2] ); 3464 local $config{'nav_all'} = 1; 3465 for my $table ( $_[1] or grep { $config{'sql'}{'table_param'}{$_}{'purge'} } keys %{ $config{'sql'}{'table'} } ) { 3466 #printlog('dev', 'pg', $config{'sql'}{'table_param'}{$table}{'purge'}); 3467 for my $i ( 1 .. $config{'purges'} ) { 3468 my $timl = timer(); 3469 $total += ( 3470 $now = $static{'db'}->do( 3471 "DELETE $static{'db'}->{'LOW_PRIORITY'} FROM $tq$static{'db'}->{'table_prefix'}$table$tq " . $static{'db'}->where( { 3472 $config{'sql'}{'table_param'}{$table}{'purge'} . '' => $cut, 3473 $config{'sql'}{'table_param'}{$table}{'purge'} . '_mode' => '<', 3474 #'glueg1' => 'or', 3475 #$config{'sql'}{'table_param'}{$table}{'purge'} . '1' => 0, 3476 ( 3477 ( !$config{'sql'}{'table'}{$table}{'scan'} or $config{'sql'}{'table_param'}{$table}{'purge'} eq 'scan' ) 3478 ? () 3479 : ( 'scan' . '1' => 0, 'glueg1' => 'and', 'scan' . '_mode1' => '!', ) 3480 ), 3481 }, 3482 undef, 3483 $table 3484 ) 3485 . " $_[2]" 3486 . ( $static{'db'}->{'no_delete_limit'} ? () : " LIMIT $purge" ) 3487 ) 3488 ); 3489 local $_ = $timl->(); 3490 printlog( 3491 'info', 3492 "purge $table pass $i of $config{'purges'}, $now records purged ($total total) per", 3493 psmisc::human( 'time_period', $_ ), 3494 ',', psmisc::human( 'float', $now / ( $_ || 1 ) ), 'fps' 3495 ); 3496 last if $now < $purge; 3497 mysleep( $config{'purge_sleep'} ) if $config{'purge_sleep'}; 3498 } 3499 } 3500 printlog( 3501 'time', 3502 "Purged $total records per", 3503 psmisc::human( 'time_period', $timr->() ), 3504 ' (', ( $timr->() > 0 ? psmisc::human( 'float', $total / $timr->() ) : '?' ), 3505 ' fps)' 3506 ); 3507 return $total; 3508}; 3509$program{ psmisc::program('delhost') }{'func'} ||= sub { 3510 return unless $_[0]; 3511 my ($purge) = ( $config{'purge_by'} ); 3512 my ( $tfiles, $files, $rec ); 3513 my $timr = timer(); 3514 printlog( 'info', "delhost $_[0] (by $purge)." ); 3515 my $purges = ( $param->{'purges'} or 10000 ); 3516 for my $i ( 1 .. $purges ) { 3517 my $timl = timer(); 3518 $tfiles += ( 3519 $files = 3520 #$processor{'out'}{'mysql'} 3521 $static{'db'}->do( 3522"DELETE $static{'db'}->{'LOW_PRIORITY'} FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tfile'}$tq WHERE ${rq}host${rq} LIKE ${vq}$_[0]${vq} LIMIT $purge" 3523 ) 3524 ); 3525 printlog( 'info', "pass $i/$purges, $files files deleted, (total $tfiles) per", psmisc::human( 'time_period', $timl->() ) ); 3526 mysleep( $config{'purge_sleep'} ) if $config{'purge_sleep'}; 3527 last if $files < $purge; 3528 } 3529 $tfiles += 3530 $static{'db'}->do( 3531 "DELETE $static{'db'}->{'LOW_PRIORITY'} FROM $tq$static{'db'}->{'table_prefix'}$_$tq WHERE ${rq}host${rq} = ${vq}$_[0]${vq}" 3532 ) for ( $config{'sql_thost'}, $config{'sql_tresource'} ); 3533 printlog( 3534 'time', 3535 "Deleted $tfiles files and $rec records per", 3536 psmisc::human( 'time_period', $timr->() ), 3537 'sec (', psmisc::human( 'float', $timr->() > 0 ? $tfiles / $timr->() : '?' ), 3538 ' fps)' 3539 ); 3540}; 3541$program{ psmisc::program('deldead') }{'func'} ||= sub { 3542 my $rec; 3543 $rec += 3544 $static{'db'}->do( 3545"DELETE $static{'db'}->{'LOW_PRIORITY'} FROM $tq$static{'db'}->{'table_prefix'}$_$tq WHERE ${rq}scan${rq} AND ! ${rq}time${rq} AND ! ${rq}files${rq} AND ! ${rq}dl${rq} AND ! ${rq}added${rq}" 3546 ) for ( $config{'sql_tresource'} ); #, $config{'sql_thost'} 3547 $rec += 3548 $static{'db'}->do( 3549"DELETE $static{'db'}->{'LOW_PRIORITY'} FROM $tq$static{'db'}->{'table_prefix'}$_$tq WHERE ! ${rq}files${rq} AND ! ${rq}dl${rq} AND ! ${rq}added${rq} AND ! ${rq}voteu${rq} AND ! ${rq}voted${rq} AND ! ${rq}lastip${rq}" 3550 ) # ${rq}scan${rq} AND ! ${rq}time${rq} AND 3551 for ( $config{'sql_thost'} ); 3552 printlog( 'info', "Deleted $rec records" ); 3553}; 3554$program{ psmisc::program('reset_dead') }{'func'} ||= sub { 3555 my $rec; 3556 $rec += 3557 $static{'db'}->do( 3558"UPDATE $tq$static{'db'}->{'table_prefix'}$_$tq SET ${rq}scan${rq} = ${vq}0${vq}, ${rq}time${rq} = ${vq}0${vq} WHERE ! ${rq}files${rq}" 3559 ) for ( $config{'sql_tresource'} ); #, $config{'sql_thost'} 3560 printlog( 'info', "Updated $rec records" ); 3561}; 3562$program{ psmisc::program('reset_res') }{'func'} ||= sub { 3563 local $_; 3564 printlog( 3565 'info', 3566 'reset_res:', 3567 $_ = $static{'db'}->do( 3568"UPDATE $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq SET ${rq}scan${rq} = ${vq}0${vq}, ${rq}time${rq} = ${vq}0${vq}" 3569 ) 3570 ); 3571 return $_; 3572}; 3573$program{ psmisc::program('reset_res_bug') }{'func'} ||= sub { 3574 printlog( 3575 'info', 3576 'reset_res:', 3577 $static{'db'}->do( 3578"UPDATE $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq SET ${rq}scan${rq} = ${vq}0${vq}, ${rq}time${rq} = ${vq}0${vq} WHERE ${rq}scan${rq} > $vq" 3579 . int( time() ) 3580 . $vq 3581 ) 3582 ); 3583}; 3584$program{ psmisc::program('reset_range') }{'func'} ||= sub { 3585 printlog( 3586 'info', 3587 'reset_range:', 3588 $static{'db'}->do( 3589"UPDATE $tq$static{'db'}->{'table_prefix'}$config{'sql_tranges'}$tq SET ${rq}scan${rq} = ${vq}0${vq}, ${rq}time${rq} = ${vq}0${vq}" 3590 ) 3591 ); 3592}; 3593$program{ psmisc::program('reset_range_bug') }{'func'} ||= sub { 3594 printlog( 3595 'info', 3596 'reset_range:', 3597 $static{'db'}->do( 3598"UPDATE $tq$static{'db'}->{'table_prefix'}$config{'sql_tranges'}$tq SET ${rq}scan${rq} = ${vq}0${vq}, ${rq}time${rq} = ${vq}0${vq} WHERE ${rq}scan${rq} > $vq" 3599 . int( time() ) 3600 . $vq 3601 ) 3602 ); 3603}; 3604$program{ psmisc::program('reset_period') }{'func'} ||= sub { 3605 my $rec; 3606 $rec += $static{'db'}->do("UPDATE $tq$static{'db'}->{'table_prefix'}$_$tq SET ${rq}period${rq} = ${vq}0${vq} ") 3607 for ( $config{'sql_tresource'}, $config{'sql_tranges'} ); #, $config{'sql_thost'} 3608 printlog( 'info', "Updated $rec records" ); 3609}; 3610$program{ psmisc::program('skip') }{'func'} ||= sub { $static{'banned'}->{ $_[0] } = 1; }; 3611$program{ psmisc::program('file') }{'func'} ||= sub { 3612 printlog( 'info', 'loading file', $_[0] ); 3613 $processor{'file'}{'url'}{'func'}->( uni_get( $_[0] ) ); 3614}; 3615$program{ psmisc::program('ipdig') }{'func'} ||= sub { 3616 my %dig; 3617 for my $file (@_) { 3618 printlog( 'info', 'founding ips in', $file ); 3619 open( IF, ( ( $file eq '-' ) ? ($file) : ( '<', uni_get($file) ) ) ) or printlog( 'err', 'unable to open' ), return; 3620 while (<IF>) { 3621 s/<.+?>//g if $file =~ /^http/; 3622 for my $name (m!(?:\W|^)($config{'dig_find_mask'})\W!g) { 3623 next unless $name; 3624 next if grep { $name =~ /$_$/ } @{ $config{'dig_not_domain'} }; 3625 my ( $ip, $err, $add ); 3626 my %url = split_url($name); 3627 ( $ip, $err ) = name_to_ip( $url{'host'} ); 3628 printlog( 'dbg', ' NOT ip', $name, $url{'host'}, $ip ), next if $err; 3629 printlog( 'dbg', ' ip not in mask:', $ip ), next if $ip !~ /$config{'dig_ip_mask'}/; 3630 unless ( $config{'dig_no_prot'} and !$url{'prot'} ) { 3631 next if !defined( $config{'scanner'}{ $url{'prot'} } ); 3632 next if $config{'scanner'}{ $url{'prot'} }{'no_dig'}; 3633 } 3634 $add = join_url( { 3635 %url, 3636 ( $config{'dig_as_ip'} ? ( 'host' => $ip ) : () ), 3637 ( ( $url{'prot'} eq $static{'port2prot'}{ $url{'port'} } ) ? ( 'port' => '' ) : () ), 3638 ( ( $config{'dig_prot_add'} and !$url{'prot'} ) ? ( 'prot' => $config{'dig_prot_add'} ) : () ), 3639 ( $config{'host_by_name_norm'} ? ( 'host' => normalize_ip( $url{'host'} ) ) : () ), 3640 ( ( $url{'user'} =~ /anonymous|ftp|none|\*+/i ) ? ( 'user' => '', 'pass' => '' ) : () ), 3641 ( ( $url{'pass'} =~ /none|\*+/i ) ? ( 'pass' => '' ) : () ), 3642 } 3643 ) unless $config{'dig_add_netmask'}; 3644 $add = $ip, $add =~ s/\d+$//, $add = $add . '0/' . $config{'dig_add_netmask'} if $config{'dig_add_netmask'}; 3645 printlog( 'dbg', ' digged', $name, '=', $ip, '->', $add ) if !$dig{$add}; 3646 $processor{'url'}->($add) if ( $config{'dig_add_every'} and !$dig{$add} ); 3647 ++$dig{$add}; 3648 } 3649 } 3650 close(IF); 3651 } 3652 $processor{'url'}->( keys %dig ) unless $config{'dig_add_every'}; 3653 printlog( 'info', ' founded', scalar keys %dig, sort keys %dig ); 3654}; 3655$program{ psmisc::program('ipdigf') }{'func'} ||= sub { 3656 for my $file (@_) { 3657 printlog( 'info', 'founding ips hrefs in', $file ); 3658 open( my $fh, ( ( $file eq '-' ) ? ($file) : ( '<', uni_get($file) ) ) ) or printlog( 'err', 'unable to open' ), return; 3659 while (<$fh>) { 3660 next if /^\s*[#;]/; 3661 s/^\s+//; 3662 s/\s+$//; 3663 psmisc::program_one( 'ipdig', $_ ) if $_; 3664 } 3665 close($fh); 3666 } 3667}; 3668$program{ psmisc::program('hublist') }{'func'} ||= sub { 3669 $_ = ( $_[0] or $config{'hublist'} ); 3670 $processor{'file'}{ /\.(?:list)|(?:config)$/i ? 'list' : 'hublist' }{'func'}->( uni_get($_) ); #/ 3671}; 3672$program{ psmisc::program('filelist') }{'func'} ||= sub { 3673 return unless psmisc::use_try 'Net::DirectConnect::filelist'; 3674 push @_, grep { -d } @ARGV; 3675 delete $program{default}; 3676 @_ = @{ $config{fine}{dchub}{share} || [] } unless @_; 3677 #printlog('params', Dumper $config{fine}{dchub}); 3678 Net::DirectConnect::filelist->new( 3679 log => sub (@) { 3680 my $dc = ref $_[0] ? shift : {}; 3681 psmisc::printlog shift(), "[$dc->{'number'}]", @_,; 3682 }, 3683 #'sql' => $config{'sql'}, 3684 'db' => $static{'db'}, 3685 , 3686 %{ $config{fine}{dchub} || {} } 3687 )->filelist_make(@_),; 3688 0; 3689}; 3690$program{ psmisc::program('default') }{'mask'} ||= '\S'; 3691$program{ psmisc::program() }{'multi'} ||= 1; 3692$program{ psmisc::program() }{'param_name'} ||= 1; 3693$program{ psmisc::program() }{'func'} ||= sub { 3694 return if scan_stop; 3695 for my $current ( keys %program ) { 3696 next if $current eq 'default'; 3697 return if $program{ psmisc::program() }{'mask'} and $_[0] =~ /$program{psmisc::program()}{'mask'}/i; 3698 } 3699 psmisc::program_one( ( -f $_ && -s $_ ? 'file' : 'scan' ), $_ ) for @_; 3700 #$processor{'url'}->(@_); 3701}; 3702$program{ psmisc::program('scan') }{'func'} ||= sub { 3703 #printlog('dev', 'scan', @_); 3704 $processor{'url'}->(@_); 3705}; 3706$program{ psmisc::program('upload') }{'func'} ||= sub { 3707 #close_out_file(); 3708 startn( $_[0], 'upload' ); 3709 $processor{'dir'}->( $config{'datadir'} ); 3710 #$static{'db'}->flush_insert(); 3711}; 3712$program{ psmisc::program('index_enable') }{'func'} ||= sub { 3713 $static{'db'}->index_enable( $config{'sql_tfile'} ); 3714 #my $tim = timer(); 3715 #printlog( 'info', 'Enabling indexes...' ); 3716 #$static{'db'}->query_log("ALTER TABLE $static{'db'}->{'table_prefix'}$config{'sql_tfile'} ENABLE KEYS"); 3717 #printlog( 'time', 'Enable index per ', psmisc::human( 'time_period', $tim->() ) ); 3718 return 0; 3719}; 3720$program{ psmisc::program('optimize') }{'func'} ||= sub { 3721 return $static{'db'}->optimize(@_); 3722 #@_ = keys %{ $config{'sql'}{'table'} } unless @_; 3723 #@_ = grep { $_ and $config{'sql'}{'table'}{$_} } @_; 3724 #printlog( 'info', 'Optimizing table...', @_ ); 3725 #$static{'db'}->query_log( "OPTIMIZE TABLE " . join( ',', map( "$static{'db'}->{'table_prefix'}$_", @_ ) ) ); 3726}; 3727$program{ psmisc::program('analyze') }{'func'} ||= sub { 3728 return $static{'db'}->analyze(@_); 3729 #@_ = keys %{ $config{'sql'}{'table'} } unless @_; 3730 #@_ = grep { $_ and $config{'sql'}{'table'}{$_} } @_; 3731 #printlog( 'info', 'Analyze table...', @_ ); 3732 #$static{'db'}->query_log( "ANALYZE TABLE " . join( ',', map( "$static{'db'}->{'table_prefix'}$_", @_ ) ) ); 3733}; 3734$program{ psmisc::program('flush') }{'func'} ||= sub { 3735 #printlog( 'dev', 'Flush'); 3736 return $static{'db'}->flush(@_); 3737 #my $tim = timer(); 3738 #my ( $tq, $rq, $vq ) = $static{'db'}->quotes(); 3739 #$static{'db'}->do( "FLUSH TABLES " . ( join ',', map { "$tq$_$tq" } sort keys %{ $config{'sql'}{'table'} } ) ); 3740 #printlog( 'time', 'Flush per', psmisc::human( 'time_period', $tim->() ) ); 3741 #0; 3742}; 3743$program{ psmisc::program('prof') }{'func'} ||= sub { 3744 my $tim = timer(); 3745 #my ( $tq, $rq, $vq ) = $static{'db'}->quotes(); 3746 @_ = grep { $_ ne $config{'sql_tfile'} } keys %{ $config{'sql'}{'table'} } unless @_; 3747 @_ = grep { $_ and $config{'sql'}{'table'}{$_} } @_; 3748 psmisc::program( $_, @_ ) for qw(repair optimize); 3749 printlog( 'time', 'Prof per', psmisc::human( 'time_period', $tim->() ) ); 3750 return 0; 3751}; 3752$program{ psmisc::program('res') }{'proc'} ||= 100; 3753$program{ psmisc::program() }{'func'} ||= sub { 3754 #my ( $tq, $rq, $vq ) = $static{'db'}->quotes(); 3755 #printlog('err', 'logtest'); 3756 my ( $join, $what, $where, $lock ) = ( 3757 "LEFT JOIN $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq USING (${rq}host$rq)", 3758 ", $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq.${rq}time$rq AS ${rq}live$rq", 3759" $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq.${rq}time$rq IS NULL OR $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq.${rq}time$rq = ${vq}0$vq OR $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq.${rq}time$rq > $vq" 3760 . ( int( time() ) + $config{'timediff'} - $config{'online_minutes'} * 60 ) 3761 . $vq, 3762 ", $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq WRITE" 3763 ) if $config{'scan_online_res'} and !$config{'scan_dead'}; 3764##### TODO !!!!!!!!!!!!! sql_where !!!!!!!!!!! 3765 #printlog( 'dev', 'PRELOCK' ); 3766 my $protos = join( ' OR ', 3767 map { " $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.${rq}prot$rq = $vq$_$vq" } 3768 grep { $config{'scanner'}{$_}{'scanable'} and !$config{'scanner'}{$_}{'disabled'} and !$config{"no_$_"} } 3769 keys %{ $config{'scanner'} } ) 3770 or return $config{'nothing_scan_sleep'}; 3771 $static{'db'}->lock_tables("$tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq WRITE $lock"); 3772 local @_ = ( ( 3773 $config{'scan_dead'} ? () 3774 : " NOT $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.${rq}time$rq OR $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.${rq}size$rq > ${vq}0$vq " 3775 ), ( 3776 $config{'scanner'}{'dchub'}{'disabled'} ? () 3777 : ( !%{ $config{'sql'}{'table'}{ $config{'sql_tresource'} }{'dcuser'} or {} } 3778 ? " $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.${rq}host$rq NOT LIKE $vq%/%$vq " 3779 : " $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.${rq}dcuser$rq = $vq$vq " ) 3780 ), 3781 $protos, 3782 ( $where or () ) 3783 ); 3784 local $_ = grep( $_, @_ ) ? ' WHERE ( ' . join( ') AND (', grep( $_, @_ ) ) . ' ) ' : ''; 3785 my $res = 3786 $static{'db'}->line( 3787"SELECT $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.*$what FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq $join $_ ORDER BY ($tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.${rq}scan$rq" 3788 . ( $config{'res_fast'} ? '' : "+$tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.${rq}period$rq" ) 3789 . ") LIMIT 1" ); 3790#printlog('dev', 'res selectedp:', ($res), ); 3791#printlog('dev', 'res selectedp:', join_url($res), Dumper $res); 3792#printlog('dev', 'res selected:',( join_url($res), psmisc::human('date_time',$res->{'scan'}),'p=', $res->{'period'},'=', psmisc::human('time_period',$res->{'period'}))); 3793 our ( $lastres, $lasttime ); 3794 my %origres = %$res; 3795 $res->{'period'} ||= hconfig( 'period', $res->{'host'}, $res->{'prot'} ); 3796 printlog( 3797 'dev', 3798 'res selected:', ( 3799 join_url($res), !!$res->{'scan'}, 3800 psmisc::human( 'time_period', time - $res->{'scan'} ), 3801 psmisc::human( 'time_period', time - $res->{'time'} ), 3802 'p=', $res->{'period'}, '=', psmisc::human( 'time_period', $res->{'period'} ) 3803 ) 3804 ); 3805 $_ = ( %origres and allowscan( $res, $config{'sql_tresource'}, $res->{'scan'} + 1, $res->{'period'} ) ); 3806#$_ = ( $static{'db'}->filter_row( $config{'sql_tresource'}, 'primary', $res ) and allowscan( $res, $config{'sql_tresource'}, $res->{'scan'} + 1, $res->{'period'} ) ); 3807#$_ = ( @{$res}{$static{'db'}->filter_row( $config{'sql_tresource'}, 'primary', $res )} and allowscan( $res, $config{'sql_tresource'}, $res->{'scan'} + 1, $res->{'period'} ) ); 3808#my $period = ($res->{'period'} or hconfig( 'period', $res->{'host'}, $res->{'prot'} )); 3809#$_ = ( %$res and allowscan( $res, $config{'sql_tresource'}, $res->{'scan'} + 1, $period ) ); 3810 $static{'db'}->unlock_tables(); 3811#printlog( 'dev', "resrun[$_]", %$res, 'fr:', @{$res}{ $static{'db'}->filter_row( $config{'sql_tresource'}, 'primary', $res ) } ); 3812 if ($_) { 3813 printlog( 'err', "resource table probably corrupted", "duplicate", $lastres ), sleep( 10 + rand(100) ), 3814 psmisc::program_one( 'repair', $config{'sql_tresource'}, $config{'sql_thost'} ), return 0, 3815 if $config{'resourse_breakable'} 3816 and $lastres 3817 and $lastres eq join_url($res) 3818 and $lasttime + $config{'resourse_breakable'} > time; 3819 $lastres = join_url($res); 3820 $lasttime = time; 3821 local $work{'scan_proto_host_reset_force'} = 1 unless $config{'force'}; 3822 local $config{'force'} = 1; 3823 $processor{'url'}->( join_url($res) ); 3824 return 0; 3825 } elsif (%$res) { 3826 my $wait = $res->{'period'} - ( $res->{'scan'} ? int( time() ) - $res->{'scan'} : 0 ); 3827 my $nextscan = ( $_ = $res->{'period'} + $res->{'scan'} - int( time() ) ) > 0 ? $_ : $config{'nothing_scan_sleep'}; 3828 $_ = min( max( $nextscan, 0 ), $config{'nothing_scan_sleep'}, max( $wait, 0 ) + 1 ); 3829 printlog( 3830 'dbg', 'Res: nothing to do (next:', 3831 join_url($res), ':', psmisc::human( 'time_period', $nextscan ), 3832 ') sl(', $_, ')' 3833 ); 3834 return $_; 3835 } else { 3836 printlog( 'dbg', 'all resources is scanned or dead or table is empty?' ); 3837 return $config{'nothing_scan_sleep'}; 3838 } 3839}; 3840$program{ psmisc::program('dcbot') }{'proc'} ||= 1; 3841$program{ psmisc::program() }{'func'} ||= sub { 3842 return $config{'nothing_scan_sleep'} if $config{'scanner'}{'dchub'}{'disabled'}; 3843 local $config{'scan_online_res'} = 0; 3844 local $config{'scan_dead'} = 1; 3845 local $config{'host_by_name_norm'} = 0; 3846 local $config{'scanner'}{'dchub'}{'scanable'} = 1; 3847 local $config{'scanner'}{'adc'}{'scanable'} = 1; 3848 local $config{'scanner'}{'adcs'}{'scanable'} = 1; 3849 local $config{'scanner'}{'ftp'}{'disabled'} = 1; 3850 local $config{'scanner'}{'http'}{'disabled'} = 1; 3851 local $config{'scanner'}{'file'}{'disabled'} = 1; 3852 local $config{'scanner'}{'rsync'}{'disabled'} = 1; 3853 return psmisc::program_one('res'); 3854}; 3855$program{ psmisc::program('range') }{'proc'} ||= 1; 3856$program{ psmisc::program() }{'func'} ||= sub { 3857 #my ( $tq, $rq, $vq ) = $static{'db'}->quotes(); 3858 my $where = $static{'db'}->where( { 3859 'scan' => int( time() ), 3860 'scan_mode' => '<', 3861 'glueg' => 'or', 3862 ( $config{'scan_dead'} ? () : ( 'time1' => 0, 'alive1' => 1, 'alive_mode1' => '>', 'gluel1' => 'or' ) ) 3863 }, 3864 undef, 3865 $config{'sql_tranges'} 3866 ); 3867 $static{'db'}->lock_tables("$tq$static{'db'}->{'table_prefix'}$config{'sql_tranges'}$tq WRITE"); 3868 #TODO MAKE QUERY HERE: 3869 my ( $rng, $tim, $period ) = @{ 3870 $static{'db'}->line( 3871"SELECT ${rq}range$rq, ${rq}scan$rq, ${rq}period$rq FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tranges'}$tq $where ORDER BY (${rq}scan$rq+${rq}period$rq) LIMIT 1" 3872 ) 3873 }{ ( 'range', 'scan', 'period' ) }; 3874 #printlog('dev', 'range selected:',( $rng, psmisc::human('time_period',time-$tim), $period )); 3875 $static{'db'}->unlock_tables(), return $config{'nothing_scan_sleep'} unless $rng; 3876 #$period ||= hconfig( 'range_period', $rng ); 3877 $period ||= hconfig( 'period', $rng, 'range' ); 3878 local $config{'scan_timeout'} = $config{'scan_range_timeout'}; 3879 $_ = allowscan( { 'range' => $rng }, $config{'sql_tranges'}, $tim + 1, $period ); 3880 #printlog('dbg', 'range allowscan:', $_); 3881 $static{'db'}->unlock_tables(); 3882 if ($_) { 3883 local $config{'force'} = 1; 3884 $config{'nmap_only'} = 1 unless defined $config{'nmap_only'}; 3885 $processor{'url'}->($rng); 3886 return 0; 3887 } else { 3888 $period ||= hconfig( 'period', $rng ); 3889 my $nextscan = $period + $tim - int( time() ); 3890 $_ = min( max( $nextscan, 0 ), $config{'nothing_scan_sleep'}, $config{'nothing_scan_sleep'}, max( $period, 0 ) + 1 ); 3891 printlog( 3892 'dbg', 'range: nothing to do, sleeping ', 3893 $_, 'seconds (next:', 3894 $rng, ':', psmisc::human( 'time_period', $nextscan ), 3895 'period:', psmisc::human( 'time_period', $period ), 3896 'last:', psmisc::human( 'time_period', int( time() ) - $tim ), ')' 3897 ); 3898 return $_; 3899 } 3900}; 3901 3902sub startn { 3903 my ( $how, $from ) = (@_); 3904 #printlog('dev', 'startn', @_) ; 3905 #my $proc = $_[0]; 3906 #printlog('dev', "start1 proc=$how", @ARGV); 3907 s/\s*($from)=\d+\s*/$1/i for @ARGV; 3908 #printlog('dev', "start2 proc=$how", @ARGV); 3909 #printlog('dev', "proc=$how;", join( ' ', grep { /^(-|$from)/ } @ARGV )), 3910 startself( join( ' ', grep { /^(-|$from)/ } @ARGV ) ) while --$how > 0; 3911} 3912$program{ psmisc::program('proc') }{'func'} ||= sub { 3913 #return unless $config{'sql_use_dbi'}; 3914 return if scan_stop; 3915 my $only = $config{'scan'}; 3916 if ( int( $_[0] ) ) { startn( $_[0], 'proc' ) } 3917 elsif ( $program{ $_[0] } ) { $only ||= $_[0]; } 3918 my $min_sleep; 3919 #printlog( 'dmp', "gggg" ); 3920 while ( !scan_stop ) { 3921 my ( $runned, $sleep ) = ( 0, $config{'nothing_scan_sleep'} ); 3922 for my $prog ( 3923 $only 3924 or sort { $program{$a}{'order'} <=> $program{$b}{'order'} } 3925 grep { $program{$_}{'proc'} and !$config{ 'noscan_' . $_ } } keys %program 3926 ) 3927 { 3928 ++$runned; 3929 $work{'proc_scans'} = $program{$prog}{'proc'}; 3930 do { 3931 state("proc: $prog"); 3932 #printlog( 'dev', "proc $prog run" ); 3933 $_ = alarmed( $program{$prog}{'alarm'} || $config{'max_proc_time'}, $program{$prog}{'func'} ); 3934 printlog( 'dbg', 'proc', "prog $prog returned $_;", $work{'proc_scans'} ); 3935 printlog( 'info', "starting child and exit.." ), startself( join( ' ', grep { /^(-|proc)/ } @ARGV ) ), exit 3936 if ( $config{'reload_time'} and $work{'start_timer'}->() > $config{'reload_time'} ) 3937 or ( $config{'reload_every'} and $work{'scans'} >= $config{'reload_every'} ); 3938 } while --$work{'proc_scans'} > 0 and !$_ and !scan_stop; 3939 $sleep = min( $sleep, max( $_, 0 ) ); 3940 $min_sleep = 0 unless $_; 3941 #printlog( 'dbg', "sleep=$sleep, r=$_" ); 3942 } 3943 printlog( 'info', 'proc: nothing to do, exit' ), last 3944 if !$runned 3945 or ( $config{'once'} and $sleep == $config{'nothing_scan_sleep'} ); 3946 $sleep = min( $sleep, ( $min_sleep *= 2 ) ||= 5 ); 3947 $min_sleep = min( $min_sleep, $config{'nothing_scan_sleep'} ); 3948 state( "proc: sleep:", $sleep + $config{'scan_sleep'} ); 3949 printlog( 'dbg', 'sleep..', $sleep, ' + rand', $config{'scan_sleep'}, " min=$min_sleep max=$config{'nothing_scan_sleep'}" ); 3950 mysleep( $sleep + rand( $config{'scan_sleep'} ) ); 3951 } 3952}; 3953$program{ psmisc::program('stat') }{'func'} ||= sub { 3954 my ( $qq, $qn ); 3955 $static{'db'}{'auto_repair'} = 1; 3956 $static{'db'}{'force_repair'} = 1; 3957 my ( $tq, $rq, $vq ) = $static{'db'}->quotes(); 3958 $qn = "WHERE ${rq}scan${rq} = ${vq}0${vq}"; 3959 for ( 0 .. $_[0] ) { 3960 mysleep( $config{'stat_sleep'} or 1 ) if $_; 3961 unless ( $config{'stat_no_res'} ) { 3962 my $dq; 3963 $dq = " ${rq}dcuser${rq} LIKE $vq$vq " if keys %{ $static{'db'}{'table'}{ $config{'sql_tresource'} }{'dcuser'} }; 3964 $qq = " ${rq}scan${rq} < $vq" . ( int( time() ) - $config{'period'} ) . "$vq "; 3965 $qq .= " AND ( ${rq}time${rq}=${vq}0${vq} OR ${rq}size${rq}>${vq}0${vq} ) " unless $config{'scan_dead'}; 3966 #printlog( 'dch', keys %{$config{'table'}{$config{'sql_tresource'}}{'dcuser'}}); 3967 my $where = 'WHERE'; 3968 #$where = ' WHERE ' . join (' AND ', grep {$_}$dq, $qq) if $dq or $qq; 3969 printlog( 3970 'info', 3971 #'host=', $static{'db'}->{'host'}, 'db=', $static{'db'}->{'database'}, 3972 "Resources: to scan ", 3973 values %{ 3974 $static{'db'}->line( 3975 "SELECT COUNT(*) FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq " 3976 . ( $dq || $qq ? $where . join( ' AND ', grep { $_ } $dq, $qq ) : '' ) 3977 ) 3978 }, 3979 'from', 3980 values %{ 3981 $static{'db'}->line( 3982 "SELECT COUNT(*) FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq " . ( $dq ? $where . $dq : '' ) 3983 ) 3984 }, 3985 '; new', 3986 values %{ $static{'db'}->line("SELECT COUNT(*) FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq $qn") 3987 }, 3988 ); 3989 } 3990 unless ( $config{'stat_no_range'} ) { 3991 #$qq = 'WHERE ${rq}scan${rq} < \'' . ( int( time() ) - $config{'range_period'} ) . "' "; 3992 $qq = "WHERE ${rq}scan${rq} < $vq" . ( int( time() ) - hconfig( 'period', 'range' ) ) . "$vq "; 3993 $qq .= " AND ( ${rq}time${rq}=${vq}0${vq} OR ${rq}alive${rq}>${vq}0${vq} ) " unless $config{'scan_dead'}; 3994 printlog( 3995 'info', 3996 'ranges: to scan ', 3997 values %{ $static{'db'}->line("SELECT COUNT(*) FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tranges'}$tq $qq ") 3998 }, 3999 'from', 4000 values %{ $static{'db'}->line("SELECT COUNT(*) FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tranges'}$tq ") }, 4001 '; new', 4002 values %{ $static{'db'}->line("SELECT COUNT(*) FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tranges'}$tq $qn") }, 4003 ); 4004 } 4005 our %total; 4006 my %totallast = %total; 4007 unless ( $config{'stat_no_total'} ) { 4008 printlog( 4009 'info', 4010 'totals:', 4011 map { 4012 ( 4013 $_, 4014 '=', 4015 $total{$_} = ( values %{ $static{'db'}->line("SELECT COUNT(*) FROM $tq$static{'db'}->{'table_prefix'}$_$tq ") } )[0] 4016 ), 4017 $totallast{$_} 4018 ? ( $totallast{$_} = $total{$_} - $totallast{$_} ) 4019 ? ( $totallast{$_} > 0 ? '+' : () ) . $totallast{$_} 4020 : () 4021 : () 4022 } keys %{ $config{'sql'}{'table'} } 4023 ); 4024 } 4025 } 4026}; 4027$program{ psmisc::program('statuniq') }{'func'} ||= sub { 4028 my $table = 'filebase'; 4029 #printlog Dumper 4030 my $indexes = $static{'db'}->query("show index from $table"); 4031 #printlog Dumper $bykey ; 4032 my $oldseq = 1; 4033 my $n = 0; 4034 for my $i (@$indexes) { 4035 $i->{"Key"} = ++$n if $i->{"Seq_in_index"} <= $oldseq; 4036 $oldseq = $i->{"Seq_in_index"}; 4037 } 4038 my $bykey = { map { $_->{Key} => $_ } @$indexes }; 4039 #printlog Dumper $indexes; 4040 my ( $run, $total ); 4041 for my $s (`$config{myisamchk} -v --description /var/db/storage/$config{sql_base_web}/$table`) { 4042 $total = $1 if $s =~ /Data records:\s*(\d+)/; 4043 ++$run if $s =~ /table description/; 4044 #printlog $s; 4045 next unless $run; 4046 #Key Start Len Index Type Rec/key Root Blocksize 4047 #1 2 8 unique ulonglong 1 422342656 1024 4048 $s =~ /^\s*(?<Key>\d+)\s+\d+\s+\d+.*?(?<reckey>\d+)/; 4049 next unless $+{Key}; 4050 $bykey->{ $+{Key} }{reckey} = $+{reckey} unless $bykey->{ $+{Key} }{reckey}; 4051 #printlog $s, Dumper \%+; 4052 } 4053 my ($size) = values %{ $static{'db'}->line("SELECT SUM(size) FROM resource WHERE files>0") }; 4054 printlog "totals: files: $total; size $size"; 4055 my %byname; 4056 for my $i ( values %$bykey ) { 4057 $i->{rec} = int( $total / $i->{reckey} ) if $i->{reckey}; 4058 next if $i->{rec} <= 1 or $i->{rec} == $total; 4059 $byname{ $i->{Column_name} } = $i; 4060 printlog "unique $i->{Column_name}'s: $i->{rec}"; #" $i->{reckey}"; 4061 } 4062 printlog 4063"averages: file copies: $byname{tiger}{reckey}, files per user: $byname{dcuser}{reckey}, files per hub: $byname{host}{reckey}, file names per hash:", 4064 ( $byname{name}{reckey} / $byname{tiger}{reckey} ), ", file size: ", int( $size / $total ) 4065 if $byname{tiger}; 4066 printlog "files per host: $byname{host}{reckey},", "file size:", int( $size / $total ) unless $byname{tiger}; 4067 #printlog Dumper $bykey; 4068 #warn "dddd"; 4069 return 0; 4070}; 4071$program{ psmisc::program('mrtg') }{'func'} ||= sub { 4072 my ( $first, $second ); 4073 4074=c 4075 local $config{'allow_auto_repair'} = 0; 4076 local $config{'sql_reconnect_tries'} = 0; 4077 local $config{'sql_reconnect_sleep'} = 0; 4078 local $config{'sql_error_tries'} = 0; 4079 local $config{'sql_error_sleep'} = 0; 4080=cut 4081 4082 $static{'db'}->retry_off(); 4083 if ( $_[0] eq 'online' ) { 4084 ( $first, $second ) = @{ 4085 $static{'db'}->line( 4086"SELECT SUM($static{'db'}->{'table_prefix'}$config{'sql_tresource'}.files) AS files, SUM($static{'db'}->{'table_prefix'}$config{'sql_tresource'}.size) AS size FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq LEFT JOIN $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq USING (host) WHERE $static{'db'}->{'table_prefix'}$config{'sql_tresource'}.files > ${vq}0${vq} AND $static{'db'}->{'table_prefix'}$config{'sql_thost'}.time > '" 4087 . ( int( time() ) + $config{'timediff'} - $config{'online_minutes'} * 60 ) . "'" 4088 ) 4089 }{ ( 'files', 'size' ) }; 4090 $first /= $config{'mrtg_online_files'}, $first = int($first) if $config{'mrtg_online_files'}; 4091 $second /= $config{'mrtg_online_bytes'}, $second = int($second) if $config{'mrtg_online_bytes'}; 4092 } else { 4093 ($first) = @{ 4094 $static{'db'}->line( 4095"SELECT count(*) as shared FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq LEFT JOIN $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq USING (host) WHERE $static{'db'}->{'table_prefix'}$config{'sql_tresource'}.files > ${vq}0${vq} AND $static{'db'}->{'table_prefix'}$config{'sql_thost'}.time > '" 4096 . ( int( time() ) + $config{'timediff'} - $config{'online_minutes'} * 60 ) . "'" 4097 ) 4098 }{ ('shared') }; 4099 ($second) = @{ 4100 $static{'db'}->line( 4101 "SELECT count(*) as now FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq WHERE ${rq}time${rq}>$vq" 4102 . ( int( time() ) - $config{'online_minutes'} * 60 ) 4103 . $vq 4104 ) 4105 }{ ('now') }; 4106 } 4107 if ( $config{'mrtg_out'} and open( MF, '>', $config{'mrtg_out'} ) ) { 4108 print MF"$first\n$second\nz\nz\n"; 4109 close(MF); 4110 } else { 4111 print "$first\n$second\nz\nz\n"; 4112 } 4113 $static{'db'}->retry_on(); 4114 return 0; 4115}; 4116$program{ psmisc::program('cycle') }{'func'} ||= sub { 4117 if ( $config{'periodic_func'} ) { 4118 while ( !scan_stop ) { 4119 my $tim = timer(); 4120 $config{'periodic_func'}->() if $config{'periodic_func'}; 4121 printlog( 4122 'time', "Cycle done per", 4123 $tim->(), 4124 "sec; Sleeping $config{'periodic_time'} seconds to ", 4125 psmisc::human( 'date_time', int( time() ) + $config{'periodic_time'} ) 4126 ); 4127 last if $config{'cycle_once'}; 4128 mysleep( $config{'periodic_time'} ); 4129 } 4130 } 4131}; 4132$program{ psmisc::program('mystatus') }{'func'} ||= sub { 4133 $static{'db'}->query_log('SHOW FULL PROCESSLIST'); 4134 4135=c 4136 if ( $config{'my_dump_file'} and open( FM, '>>', $config{'my_dump_file'} ) ) { 4137 local $config{'sql_use_dbi'} = 0; 4138 print FM $static{'db'}->query('SHOW FULL PROCESSLIST'); 4139 close(FM); 4140 } 4141=cut 4142 4143}; 4144 4145=for SIG tests 4146 psmisc::program('dummy'); 4147 $program{psmisc::program()}{'func'} ||= sub { 4148 $_[0] = 10 if $_[0] eq 'dummy'; 4149 printlog('info', 'dummy', $_), sleep(1) for 0..$_[0]; 4150 }; 4151=cut 4152 4153$program{ psmisc::program('applyfilter') }{'func'} ||= sub { 4154 @_ = qw(query file word) unless @_; 4155 for my $table (@_) { 4156 #printlog "[$table],", $config{'sql'}{'table_param'}{ $config{ 'sql_t' . $table . 'stat' } }{'filter'}, ; 4157 next unless my $filter = $config{'sql'}{'table_param'}{ $config{ 'sql_t' . $table . 'stat' } }{'filter'}; 4158 my $by = 10000; 4159 #my $by = 5; 4160 printlog $table, $filter; 4161 my $now = 0; 4162 my $del = 0; 4163 while (1) { 4164 my @rows = $static{'db'}->query( "SELECT * from $tq" . $config{ 'sql_t' . $table . 'stat' } . "$tq LIMIT $now, $by" ); 4165 for my $row (@rows) { 4166 next unless ref $filter eq 'CODE' and $filter->( undef, $row ); 4167 $static{'db'}->do( "DELETE FROM $tq" 4168 . $config{ 'sql_t' . $table . 'stat' } 4169 . "$tq WHERE $table=" 4170 . $static{'db'}->quote( $row->{$table} ) ); 4171 ++$del; 4172 } 4173 $now += $by; 4174 last if scalar @rows < $by; 4175 } 4176 printlog "deleted $del"; 4177 } 4178}; 4179$config{'killer_time'} ||= 20; 4180$config{'killer_time_max'} ||= 40; 4181$config{'killer_sleep'} ||= 10; 4182@{ $config{'killer_state'} } = ( 'Sending data', 'Sorting result', 'FULLTEXT initialization', 'Waiting for table', 'Locked' ); 4183$config{'killer_commands'} ||= "select|update"; 4184$config{'killer_mask'} = qr{^\s*(?:/\*.*?\*/)?\s*(?:$config{'killer_commands'})}i; 4185$config{'log_kill'} ||= 0; 4186$program{ psmisc::program('killer') }{'func'} ||= sub { 4187 4188 for ( 1 .. $_[0] || 1 ) { 4189 sleep( $config{'killer_sleep'} ), next unless psmisc::lock( 'killer', timeout => 10, old => 5 * 60 ); 4190 $static{'db'} = pssql->new( %{ $config{'sql'} || {} }, 4191 ( ref $config{'sql_base_web'} eq 'HASH' ? %{ $config{'sql_base_web'} } : 'database' => $config{'sql_base_web'} ) ) 4192 if $static{'db'}{'database'} ne $config{'sql_base_web'}; 4193 local $static{'db'}->{'auto_repair'} = 0; 4194 local $static{'db'}->{'reconnect_tries'} = 1 unless $_[0]; 4195 local $static{'db'}->{'connect_tries'} = 1 unless $_[0]; 4196 local $static{'db'}->{'reconnect_sleep'} = 0 unless $_[0]; 4197 local $static{'db'}->{'error_tries'} = 0 unless $_[0]; 4198 local $static{'db'}->{'error_sleep'} = 0 unless $_[0]; 4199 local $config{'log_default'} = 'kill.log'; 4200 my $killed; 4201 4202 for my $row ( sort { $b->{'Time'} <=> $a->{'Time'} } @{ $static{'db'}->query('SHOW FULL PROCESSLIST') } ) { 4203 next unless $row->{'Id'} and $row->{'db'} and $row->{'Time'} and $row->{'State'}; 4204 printlog( 'kill', 'other base', $_, map( "[$_:$row->{$_}] ", keys %{$row} ) ), next 4205 unless grep { $row->{'db'} eq $_ } array( $config{'killer_base'} ), $config{'sql_base_web'}; 4206 printlog( 'kill', 'other state', $_, map( "[$_:$row->{$_}] ", keys %{$row} ) ), next 4207 unless grep { $row->{'State'} } @{ $config{'killer_state'} }; 4208 printlog( 'kill', 'no time', $_, map( "[$_:$row->{$_}] ", keys %{$row} ) ), next 4209 unless $row->{'Time'} > $config{'killer_time'}; 4210 printlog( 'kill', "not in mask [$row->{'Info'}]" ), next 4211 if $config{'killer_mask'} and !( $row->{'Info'} =~ $config{'killer_mask'} ); 4212 printlog( 'info', "Killing query:", map( "[$_:$row->{$_}] ", keys %{$row} ) ); 4213 $static{'db'}->do("KILL QUERY $row->{'Id'}"); 4214 ++$killed; 4215 last if $killed and $_[0] and $row->{'Time'} < $config{'killer_time_max'}; 4216 } 4217 psmisc::unlock('killer'); 4218 sleep $config{'killer_sleep'} if $_[0]; 4219 } 4220}; 4221$program{ psmisc::program('stop') }{'func'} ||= sub { 4222 psmisc::file_rewrite( $config{'stop_file'}, time ); 4223 0; 4224}; 4225#unless ( $config{'module_crawler'} ) { 4226psmisc::program_run($param) unless (caller); 4227 4228sub make_stat { 4229 return ( 4230 ( defined $work{'start_timer'} ? ( '[run time', psmisc::human( 'time_period', $work{'start_timer'}->() ), ']', ) : () ), 4231 ( 4232 $stat{'total_bytes'} 4233 ? " [total $stat{'total_bytes'} bytes per " 4234 . psmisc::human( 'time_period', $work{'start_timer'}->() ) . ' ' 4235 . ( $work{'start_timer'}->() ? 'at ' . int( $stat{'total_bytes'} / $work{'start_timer'}->() ) : '' ) . ' b/s]' 4236 : '' 4237 ), ( 4238 $stat{'files'} 4239 ? "$stat{'files'} files $stat{'dirs'} dirs scanned." 4240 . ( $stat{'skipped'} ? " ($stat{'skipped'} skipped)" : '' ) 4241 . ( 4242 $work{'start_timer'}->() ? '(' . psmisc::human( 'float', $stat{'files'} / $work{'start_timer'}->() ) . ' fps)' : '' ) 4243 : '' 4244 ), 4245 ( int( $stat{'ftp_reconnects'} ) > 1 ? " with $stat{'ftp_reconnects'} recon." : '' ), 4246 ( %stat ? ( 'Stat: ', map { "$_=$stat{$_};" } keys %stat ) : () ), 4247 , 4248 ( ref $static{'db'} ? ( '[', $static{'db'}->stat_string(), ']' ) : () ), 4249 times, 4250 ); 4251} 4252#END {printlog('WOW3', $static{'db'}->{'dbh'});}; 4253#sub 4254END { 4255 #printlog('dbh4.0=',$static{'db'}->{'dbh'}); 4256 #close_out_file(); 4257 #printlog('dbh4.1=',$static{'db'}->{'dbh'}); 4258 $static{'db'}->flush_insert() if $static{'db'}; 4259 printlog( 'info', 'Finished [', join ' ', map( { $_ . ( $param->{$_} ne '' ? "=$param->{$_}" : '' ) } keys %$param ), 4260 '] ', make_stat() ) 4261 if $param 4262 and %$param 4263 and !$config{'nolog_startstop'}; 4264 #$static{'db'}->log_stat(); 4265} 4266#print Dumper \%program; 4267#print Dumper \%config; 4268#print Dumper $param; 4269#print "\n\n param:\n";for (sort keys %$param) { print "$_ = $param->{$_}\n"; } # debug # 4270#print "\n\n config:\n";for (sort keys %config) { print "$_ = $config{$_}\n"; } # debug # 4271#print "\n\n config:\n", Dumper\%config; # debug # 4272#warn "\n\n configkeys:\n", scalar keys %config; # debug # 4273#print "\n\n INC:\n", Dumper(\%INC); # debug # 4274#$static{'db'}->query('error'); 4275#$static{'db'}->do('error'); 4276#http_dl('127.0.0.1', sub {print "cb:",$_[0], "\n"}); 42771; 4278