1#!/usr/local/bin/perl
2#$Id: crawler.pl 4843 2013-08-14 12:17:58Z pro $ $URL: svn://svn.setun.net/search/trunk/crawler.pl $
3
4=copyright
5PRO-search crawler
6Copyright (C) 2003-2011 Oleg Alexeenkov http://pro.setun.net/search/ proler@gmail.com
7
8This program is free software: you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation, either version 3 of the License, or
11(at your option) any later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program.  If not, see <http://www.gnu.org/licenses/>.
20=cut
21
22package pscrawler;
23use strict;
24no warnings qw(uninitialized);
25no if $] >= 5.017011, warnings => 'experimental::smartmatch';
26use utf8;
27use Time::Local;
28use Time::HiRes qw(time sleep);
29use Encode;
30use Net::FTP;
31use Data::Dumper;    #dev only
32$Data::Dumper::Sortkeys = $Data::Dumper::Useqq = $Data::Dumper::Indent = $Data::Dumper::Terse = 1;
33exit if defined $ENV{'REMOTE_ADDR'};    # cgi not allowed
34#use lib './dcppp', './lib';
35our ( %config, $param, %work, %stat, %static );
36our ( %processor, %program );           #, %human
37#*pscrawler::work = *main::work;
38#*pscrawler::stat = *main::stat;
39#*pscrawler::config = *main::config;
40#*pscrawler::program = *main::program;
41*config = *main::config;
42#=z
43#use vars
44my ( $tq, $rq, $vq );
45our ( @ISA, @EXPORT, @EXPORT_OK );
46require Exporter;
47@ISA    = qw(Exporter);
48@EXPORT = qw( %config $param %work %stat %static );
49#=cut
50#$0 =~ m|^(.+)[/\\].+?$|;                #v0
51#our $root_path ||= $1 . '/' if $1;
52#$root_path =~ s|\\|/|g;
53use lib::abs qw(./ ./lib);
54#eval "use lib '$root_path'" if $root_path;
55#eval "use lib '$root_path./lib';
56#      use psmisc; use pssql;";          #use Net::DirectConnect;
57use psmisc qw(:all);
58use pssql;
59psmisc::use_try('Net::DirectConnect');
60#'$root_path./dcppp',
61print($@), exit if $@;
62#use lib ;
63my (%ipstt);
64#our was here
65$work{'start_timer'} = psmisc::timer();
66use sigtrap qw(handler safedie normal-signals error-signals INFO);
67
68sub safedie {
69  printlog( 'info', $0, make_stat() ), return if $_[0] eq 'INFO';
70  printlog(
71    'info', 'Signal ', @_,
72    'recieved, flushing buffer, reloading conf.',
73    ( %stat ? ( 'Stat: ', map { "$_=$stat{$_};" } keys %stat ) : () )
74    ),
75    #$processor{'out'}{'array'}->()
76    $static{'db'}->flush_insert(), psmisc::config_read(), psmisc::config_reload(), return
77    if $_[0] eq 'HUP' and ( $config{'win_hup'} or $config{'system'} ne 'win' );
78  printlog( 'info', 'Signal', @_, 'recieved, ignoring, not fatal?' ), return if grep { $_[0] eq $_ } qw(PIPE SEGV);
79  printlog( 'info', 'Signal', @_, 'recieved, exiting' );
80  #{
81  #local $config{'log_all'} = 1; #DEV
82  #printlog( 'info', 'executing on_interrupt', int(time) );
83  $work{'on_interrupt'}->(@_) if ( !$work{'die'} and $work{'on_interrupt'} );
84  #}
85  $static{'db'}->{'die'} = $work{'die'} = 1;
86  exit;
87}
88
89sub on_interrupt {
90  $work{'on_interrupt'} = undef, return unless @_;
91  $work{'on_interrupt'} = $_[0] if $_[0];
92}
93
94sub allowscan {
95  my ( $by, $table, $tim, $period ) = @_;
96  return 0 unless %$by;
97  return 1 if $config{'force'};
98  if (    #!$config{'sql _use_dbi'}    or
99    $static{'banned'}->{ join_url($by) } or $static{'banned'}->{ $by->{'host'} } or $static{'banned'}->{ $by->{'range'} }
100    )
101  {
102
103=c
104printlog('dbg','allowscan:banned:', join_url($by),'ra=',$by->{'range'} , 'reason:',
105"sud: ! $config{'sql _use_dbi'}
106  banby:  or ",$static{'banned'}->{ join_url($by) },"
107  ho  or ", $static{'banned'}->{ $by->{'host'} }, "
108  ra  or ", $static{'banned'}->{ $by->{'range'} } , "
109  ra h", %{$static{'banned'}->{ $by->{'range'} } }
110
111);
112=cut
113
114    $static{'db'}->update( $table, undef, { %$by, 'scan' => int( time() ) + $config{'banned_time'} } )
115      if $table and $config{'banned_time'};
116    return 0;
117  }
118  #my ( $tq, $rq, $vq ) = $static{'db'}->quotes();
119  my $where = join(
120    ' AND ',
121    map( "$rq$_$rq = " . $static{'db'}->quote( $by->{$_} ),
122      grep { %{ $config{'sql'}{'table'}{$table}{$_} or {} } and $config{'sql'}{'table'}{$table}{$_}{'primary'} } keys %$by )
123  );
124  #MAKE QUERY
125  #printlog('dev', "tim($tim) per($period)", );
126  my $base =
127    ( $table
128      and %$by
129      and ( !$tim or !$period )
130    ? $static{'db'}->line("SELECT ${rq}scan$rq, ${rq}period$rq FROM $tq$static{'db'}->{'table_prefix'}$table$tq WHERE $where")
131    : {} );
132  #printlog('dev', 'btim, bper = ', $base->{'scan'}, $base->{'period'});
133  #printlog('dev', 'skiptime',join_url($by), $tim);
134  $tim ||= $base->{'scan'};
135  $work{'skiptime'}{ join_url($by) } = $tim;
136  $period ||= ( $base->{'period'} or hconfig( 'period', $by->{'host'}, $by->{'prot'} ) );
137  $work{'skipperiod'}{ join_url($by) } = $period;
138  if ( !$tim or ( $tim < time - $period ) ) {
139    local $config{'scan_timeout'} = $period / 2 if $config{'scan_timeout'} >= $period;
140    $static{'db'}->update(
141      $table, undef, {
142        $static{'db'}->filter_row( $table, 'primary', $by ), (
143          #$period ne $base->{'period'} ? ( 'period' => $period ) : (),
144          $config{'noscan'} ? () : ( 'scan' => int( time() ) - $config{'scan_timeout'} )
145        )
146      }
147    ) if $table;
148    #printlog('dbg','allowscan', join_url($by), psmisc::human('time_period',time - $tim), psmisc::human('time_period',$period));
149    return 1;
150  }
151#else { printlog('dbg','NOT allowscan', join_url($by), psmisc::human('time_period',time - $tim), psmisc::human('time_period',$period)); }
152  return 0;
153}
154
155sub resetcounters {
156  ( $work{'sizec'}, $work{'filec'}, $work{'dirc'} ) =
157    ( counter( $stat{'size'} ), counter( $stat{'files'} ), counter( $stat{'dirs'} ) );
158}
159
160sub uni_get {
161  my ($what) = @_;
162  my ($prot) = $what =~ m|^\s*(\w+):|;
163  return $processor{'prot'}{$prot}{'get'}->($what) if $prot and ref $processor{'prot'}{$prot}{'get'} eq 'CODE';
164  return $what;
165}
166
167sub scan_stop () {
168  local $_ = $work{'die'} || $config{'periodic_stop'} || ( !$config{'no_stop'} and -f $config{'stop_file'} );
169  printlog( 'info', "stopping: fl=$config{'periodic_stop'} file=$config{'stop_file'}" ) if $_;
170  $_;
171}
172
173sub oneliner {
174  my ( $oneline, $content ) = @_;
175  #if ( $content =~ /^GLOB/ ) {
176  if ( ref $content eq 'GLOB' ) {
177    while (<$content>) { $oneline->(); if ($work{'interrupt'} ){delete $work{'interrupt'}; close $content; last; }; }
178    #} elsif ( $content =~ /^ARRAY/ ) {
179  } elsif ( ref $content eq 'ARRAY' ) {
180    while ( defined( $_ = shift @$content ) ) { $oneline->(); }
181  } else {
182    for ( split( "\n", $content ) ) { $oneline->(); }
183  }
184}
185$processor{'desc'}{'m3u'}{'func'} ||= sub {
186  my ( $file, $data ) = @_;
187  return unless $file;
188  my ( $curdesc, %detectcp );
189  $file = Encode::encode hconfig(
190    'cp_shell'    #, $host, $prot
191  ), $file, Encode::FB_DEFAULT;
192  open( DESC, '<', $file ) or return ();
193  while (<DESC>) {
194    cp_detect_trans( $_, \%detectcp, $config{'cp_int'}, $config{'cp_m3u'} );
195    if (/^#EXTINF:(\d+),(.+)/i) {
196      #$curdesc = '[' . $1 . 's] ' . $2;
197      $curdesc = $2;
198      $data->{$_}{'meta'} .= " len=$1";
199      $curdesc =~ s/[\x0d\x0a\x00-\x1F]+/ /g;
200    } elsif (/^(.+\..+)$/) {
201      tr/A-Z/a-z/;
202      s/[\x0d\x0a\x00-\x1F]//g;
203      s/^\s+|\s+$//g;
204      $data->{$_}{'desc'} .= ' ' . $curdesc;
205    }
206  }
207  close(DESC);
208  return $data;
209};
210$processor{'desc'}{'bbs'}{'func'} ||= sub {
211  my ( $file, $data ) = @_;
212  #printlog "bbs[( $file,  )]", Dumper $data;
213  return unless $file;
214  my $curfile = '';
215  my %detectcp;
216  $file = Encode::encode hconfig(
217    'cp_shell'    #, $host, $prot
218  ), $file, Encode::FB_DEFAULT;
219  open( DESC, '<', $file ) or return;    #(printlog('dev', "cant open [$file]"),return ());
220  while (<DESC>) {
221    #printlog Dumper "bbs[$_]";
222    if (/^-(.*)/) { next; }
223    elsif (/^(\w\S*)\s*(\[\s*(\d*)\]\s*)?(.*)\s*/) {
224      $curfile = $1;
225      $curfile =~ tr/A-Z/a-z/;
226      $curfile =~ s/\.$//g;
227      my $desc = $4;
228      cp_detect_trans( $desc, \%detectcp, $config{'cp_int'}, $config{'cp_desc'} );
229      #!!! TODO UTF:# $desc =~ s/[\x0d\x0a\x00-\x1F]+/ /g;
230      $data->{$curfile}{'desc'} .= ' ' . $desc unless ( $desc =~ /^\s*$config{'nodesc'}\s*$/i );
231    } else {
232      next unless $curfile;
233      next unless /^\s*(.+)/;
234      next if length( $data->{$curfile}{'desc'} ) > 250;
235      my $tmp = $1;
236      next unless $tmp;
237      cp_detect_trans( $tmp, \%detectcp, $config{'cp_int'}, $config{'cp_desc'} );
238      #!!! TODO UTF:# $tmp =~ s/[\x0d\x0a\x00-\x1F]//g;
239      chomp($tmp);
240      next unless $tmp;
241      $data->{$curfile}{'desc'} .= '\\\\n' . $tmp;
242    }
243    #params load    $data{$curfile}{desc} = dos2win($data{$curfile}{desc}) if ( exists($data{$curfile}{desc}) );
244    #if ( exists($data{$curfile}{desc}) ) { #!!
245    #while ($data{$curfile}{desc} =~ s/^\s*file(\S*)\=(\S*)\s*//i) {
246    #$data{$curfile}{$1} = $2 if $1 and $2;
247    #}
248    #}
249  }
250  close(DESC);
251  return $data;
252};
253
254sub fileparser {    # $file, $prot, $full, $size, \%files    # $host,
255  for my $dsk ( keys %{ $processor{'desc'} } ) {
256    #printlog("$config{'desc'}{$dsk}{'mask'} and $_[0] 2:$_[2] 3:$_[3] 4:$_[4] ");
257    if ( $config{'desc'}{$dsk}{'mask'} and $_[0] =~ /$config{'desc'}{$dsk}{'mask'}/i ) {
258      #printlog("matched![$dsk][m:$config{'desc'}{$dsk}{'max'}][1:$_[1]]");
259      psmisc::code_run( $processor{'desc'}{$dsk}{'func'}, psmisc::code_run($processor{'prot'}{ $_[1] }{'get'}, $_[2] ), $_[4] )
260        if !$config{'desc'}{$dsk}{'max'}
261          or $_[3] <= $config{'desc'}{$dsk}{'max'};
262    }
263  }
264}
265
266sub filesave (@) {
267  for (@_) {
268    for ( values %$_ ) {
269      $_->{type} = $config{ext_to_type}{ $_->{ext} };
270      $_->{type} = 1 if !$_->{size};
271    }
272    #printlog 'ins', Dumper $_;
273    $static{'db'}->insert_hash_hash( $config{'sql_tfile'}, $_ );
274  }
275}
276
277sub update_to_root (\%\%;$$) {
278  my ( $res, $core, $local, $url_orig ) = @_;
279  my $path_ = '';    #'/';
280  #/ /a /a/b /a/b/c
281  for my $d (
282    !length $res->{'path'} ? '' : split '/',
283    #'/' .
284    $local ? $url_orig : $res->{'path'}
285    )
286  {
287    #printlog( 'dev', 'cd', $res->{'path'}, 'd:',$d, 'p:',$path_ );
288    $core->{'path'} = $path_ || '/';
289    $core->{'name'} = $d;
290    #printlog('dev',  $res->{'path'}, $path_, Dumper $core);
291    $static{'db'}->update( $config{'sql_tfile'}, undef, $core );
292    $core->{'desc'} = '';
293    $path_ .= '/' . $d if length $d;
294    #dmp('dsc!'),
295    $core->{'desc'} = $res->{'desc'} if $res->{'path'} eq $path_;
296  }
297  #=cut
298}
299
300=cu
301
302$processor{'out'}{'mysql'} ||= sub {
303  return $static{'db'}->do(@_) if $config{'sql_use_dbi'};
304
305  cut
306  my @ret;
307  for my $st (@_) {
308    my $str = $st;                          # bugg
309    my $q = ( $str =~ /\"/ ? "'" : '"' );
310    $str =~ s/$q//g;
311    $str =~ s/\`/\\`/g;
312    printlog( 'dmp', "via mysql cmdline query {$str}" );
313    my $up =
314        ( $config{'sql_user'} ? "-u$config{'sql_user'} " : '' )
315      . ( $config{'sql_pass'} ? "-p$config{'sql_pass'} " : '' )
316      . ( $config{'sql_port'} ? "-P$config{'sql_port'} " : '' );
317    push( @ret, `$config{'mysql'} $up $config{'sql_mysqlopt'} -e $q$str$q $config{'sql_base'}` );
318  }
319  return @ret;
320
321};
322=cut
323
324=TODO
325$processor{'out'}{'mysql_file'} ||= sub {
326  printprog("$config{'mysql'} $config{'sql_mysqlopt'} $config{'sql_base'} < $_") for @_;
327};
328
329{
330  my $gt = timer();
331  $processor{'out'}{'sql'} ||= sub {
332    my $ret;
333    for my $str (@_) {
334      next unless $str;
335      if ( $config{'sql_use_dbi'} ) {
336        my $t = timer() if $config{'debug_time'};
337        $ret += $static{'db'}->do($str);
338        printlog(
339          'time',
340          'SQL',
341          psmisc::human( 'time_period', $t->() ),
342          ',by',
343          $ret,
344          ( $t->() > 1 ? '=' . psmisc::human( 'float', $ret / $t->() ) . 'fps' : () ),
345          ',SCAN=',
346          psmisc::human( 'time_period', $gt->() ),
347          ',',
348          ( $gt->() > 1 ? '=' . psmisc::human( 'float', $ret / $gt->() ) . 'fps' : () )
349        ) if $config{'debug_time'} and $config{'debug_time'} <= $ret;
350        $gt = timer() if $config{'debug_time'};
351      } else {
352        open_out_file('hosts') unless $work{'current_name_work'};
353        print I$str;
354        ++$ret;
355      }
356    }
357    return $ret;
358  };
359}
360
361=cut
362
363$processor{'format'}{'dir'} ||= sub {
364  my $scan           = int( time() );
365  my $curent_process = shift(@_);
366  $curent_process =~ s/^(\S+:)?(\/*[^\/]+\/[^\/]+)\/(.+)$/$1$2/;
367  #printlog('dev', 'cp', $curent_process);
368  return unless $curent_process;
369  $curent_process =~ tr/\\/\//;
370  #printlog('dev', 'cp', $curent_process);
371  my $path = $3;
372  $path = $curent_process . '/' . $path;
373  $path =~ s/[\\\/]+$//g;
374  my $local;
375  $local = 1 if $curent_process =~ m{^(?:\w:|/)};
376  $local = 0 if $curent_process =~ m{^//};
377  my $prot = 'file';
378  $prot = $config{'local_prot'} if $local and $config{'local_prot'};
379  my $errors = 0;
380  my %files  = ();
381BUG: for my $content (@_) {
382    my ( $host, $dir_ ) =
383      $local ? ( $config{'local_host'}, $curent_process ) : $curent_process =~ /^\/+([^\/]+)\/+([^\/]+)(.*)/;
384    #printlog('dev', 'cp', $curent_process, "hd[$host, $dir_]");
385    #$host = $config{'local_host'} if $local;
386
387=z
388    if ($host) {
389      $files{''} = {
390        'prot' => $prot,
391        'host' => $host,
392        'scan' => $scan,
393        ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ),
394      };
395
396      $files{' '} = {
397        'prot' => $prot,
398        'host' => $host,
399        'path' => '/',
400        ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ),
401        'scan' => $scan,
402      };
403#=cut
404
405#if (length $dir_) {
406      my $path_ = '';    #'/';
407#/ /a /a/b /a/b/c
408      for my $d ( split '/', '/' . $dir_ ) {
409        #printlog('dev','cd', $d, $path_);
410        $files{ '  ' . $path_ . $d } = {
411          'prot' => $prot,
412          'host' => $host,
413          'scan' => $scan,
414          'path' => $path_ || '/',
415          'name' => $d,
416          ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ),
417        };
418        $path_ .= '/' . $d if length $d;
419      }
420#}
421    }
422=cut
423
424    my $oneline = sub {
425      #printlog('dev', 'G0', $_);
426      my ( $mday, $mon, $year, $hours, $min, $size, $file, $ext, $desc, $utime );
427      if (/^ (Directory of|Содержимое папки) (.+)\s*$/) {
428        #printlog('dbg', 'ins1', Dumper(\%files));
429        #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files );
430        filesave( \%files );
431        %files = ();
432        $path  = $2;
433        $path =~ tr/\\/\//;
434        $path =~ s/[\x0a\x0d\x00-\x1F]+//g;
435        chomp($path);
436        #$path = cp_trans(hconfig('cp_share', $host), hconfig('cp_db', $host), $path);
437        #$path = cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_db', $host, $prot ), $path );
438        $config{'local_path'}->($path) if $local and ref $config{'local_path'} eq 'CODE';
439        #printlog('dev', 'r0', $path),
440        return;
441      } elsif ( hconfig( 'skip_path_mask', $host ) and $path =~ /$config{'skip_path_mask'}/i ) {
442        ++$stat{'skipped'};
443        #printlog('dev', 'r1'),
444        return;
445      } elsif (/^\s*Total Files Listed/) {
446        #printlog('dev', 'n2'),
447        next BUG;
448      } elsif ( /^Имя папки (.+) слишком длинно./ and ++$errors > hconfig( 'max_errors', $host, $prot ) ) {
449        printlog( 'err', 'max errors reached:', $errors, $_ );
450        #printlog('dev', 'l3'),
451        last;
452      }
453      ( $mday, $mon, $year, $hours, $min, $size, $file ) = /(\d+)\.(\d+)\.(\d+)\s+(\d+)\:(\d+)\s+(\d+|\<DIR\>)\s+(.+)\s*/i
454        or return;
455      return if $size eq '0';
456      $size = 0 if $size =~ /DIR/i;
457      return if $file =~ /^\.+$/;
458      $file =~ s/[\x0a\x0d\x00-\x1F]//g;
459      $file =~ s/\.$//g;
460      chomp($file);
461      return if hconfig( 'skip_hidden', $host ) and ( $file =~ /^\./ or $path =~ m;(^|/)\.; );
462      fileparser( $file, $prot, $path . '/' . $file, $size, \%files );
463      #$file = cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_db', $host, $prot ), $file );
464      my $lfile = $file;
465      $lfile =~ tr/A-Z/a-z/;
466      $files{$lfile}{'name'} = $file;
467      $year += 2000 if length($year) == 2;
468      $year = 1990 if ( ( $year > 2020 ) or ( $year < 1980 ) );    #timelocal BUG
469      $files{$lfile}{'time'} = timelocal( 0, $min, $hours, $mday, --$mon, $year );
470      $files{$lfile}{'time'}     = 640000000 if ( $files{$lfile}{'time'} > ( $scan + 2592000 ) );
471      $files{$lfile}{'fullname'} = $prot . ':' . $path . '/' . $file;
472      $files{$lfile}{'prot'}     = $prot;
473      ( $files{$lfile}{'host'}, $files{$lfile}{'path'} ) = $path =~ /^?\/?\/?([^\/]+)(.+)/;
474      $files{$lfile}{'ext'}  = ( $files{$lfile}{'name'} =~ s/\.([^\.]+)$// ? $1 : '' );
475      $files{$lfile}{'scan'} = $scan;
476      $files{$lfile}{'size'} = $size;
477      #printlog('dbg', 'ins', Dumper(\%files));
478    };
479    oneliner( $oneline, $content );
480    #printlog('dbg', 'ins0', Dumper(\%files));
481    #exit;
482    #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files );
483    #} continue {
484    #printlog('dbg', 'ins1', Dumper(\%files));
485  }
486  #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files );
487  filesave( \%files );
488  #%files = ();
489};
490$processor{'format'}{'smb-ls'} ||= sub {
491  my $scan           = int( time() );
492  my $curent_process = shift(@_);
493  $curent_process =~ s/^(\S+:)?(\/*[^\/]+\/[^\/]+)\/(.+)$/$1$2/;
494  return unless $curent_process;
495  $curent_process =~ tr/\\/\//;
496  $curent_process =~ s{/+$}{};
497  my $path = $3;
498  $path = $curent_process . '/' . $path;
499  $path =~ s/[\\\/]+$//g;
500  my $prot = 'file';
501
502  for my $content (@_) {
503    my %data  = ();
504    my %files = ();
505    my $root  = 0;
506    my ( $host, $dir_ ) = $curent_process =~ /^\/+([^\/]+)\/+([^\/]+)(.*)/;
507
508=z
509    if ($host) {
510      $files{''} = {
511        'prot' => $prot,
512        'host' => $host,
513        ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ),
514        'scan' => $scan
515      };
516      $files{' '} = {
517        'prot' => $prot,
518        'host' => $host,
519        'path' => '/',
520        ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ),
521        'scan' => $scan
522      };
523      $files{'  '} = {
524        'prot' => $prot,
525        'host' => $host,
526        'path' => '/',
527        'name' => $dir_,
528        ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ),
529        'scan' => $scan
530        }
531        if $dir_;
532
533
534
535
536
537    }
538=cut
539
540    my $oneline = sub {
541      my ( $mday, $mon, $year, $hours, $min, $sec, $size, $file, $ext, $desc, $utime, $attr );
542      if (/^(\\.*)\s*$/) {
543        #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files );
544        filesave( \%files );
545        %files = ();
546        ( $path = $1 ) =~ s/\\+/\\/g;
547        #$path = cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_db', $host, $prot ), $path );
548        $path = $curent_process . $path;
549        $path =~ tr/\\/\//;
550        $path =~ s/[\x0a\x0d\x00-\x1F]//g;
551        chomp($path);
552        return;
553      } elsif ( hconfig( 'skip_path_mask', $host ) and $path =~ /$config{'skip_path_mask'}/i ) {
554        ++$stat{'skipped'};
555        return;
556      } elsif ( hconfig( 'interrupt_path_re', $host ) and $path =~ $config{'interrupt_path_re'} ) {
557	printlog('warn', "found interrupt path [$path]");
558        ++$work{'interrupt'};
559        return;
560      }
561      ( $size, $mon, $mday, $hours, $min, $sec, $year ) = /\s+(\d+)\s+\w+\s+(\w+)\s+(\d+)\s+(\d+)\:(\d+)\:(\d+)\s+(\d+)\s*$/
562        or return;
563      $file = $`;
564      $file =~ s/\s+([ADHSR]+)$//;
565      $attr = $1;
566      $file =~ s/(\s*$)|(^\s*)//g;
567      return if $file =~ /^\.+\s*$/;
568      return if hconfig( 'skip_hidden', $host ) and $attr =~ /H/;
569      return if $size eq '0' and !$attr =~ /D/;
570      $file =~ s/[\x0a\x0d\x00-\x1F]//g;
571      $file =~ s/\.$//g;
572      $file =~ s/\s*$//;
573      chomp($file);
574      return if hconfig( 'skip_hidden', $host ) and ( $file =~ /^\./ or $path =~ m;(^|/)\.; );
575      #$file = cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_db', $host, $prot ), $file );
576      fileparser( $file, $prot, $path . hconfig( 'slash_perl', $host ) . $file, $size, \%files );
577      my $lfile = $file;
578      $lfile =~ tr/A-Z/a-z/;
579      $files{$lfile}{'name'} = $file;
580      $year += 2000 if length($year) == 2;
581      $year = 1990 if ( ( $year > 2020 ) or ( $year < 1980 ) );    #timelocal BUG
582      $mon = $config{'lng'}{ $config{'lng_smbls'} }{'month_table'}{$mon} unless $mon =~ /^\d+$/;
583      $files{$lfile}{'time'} = timelocal( $sec, $min, $hours, $mday, $mon, $year );
584      $files{$lfile}{'time'}     = 640000000 if ( $files{$lfile}{'time'} > ( $scan + 2592000 ) );
585      $files{$lfile}{'fullname'} = $prot . ':' . $path . '/' . $file;
586      $files{$lfile}{'prot'}     = $prot;
587      ( $files{$lfile}{'host'}, $files{$lfile}{'path'} ) = $path =~ /^?\/?\/?([^\/]+)(.+)/;
588      $files{$lfile}{'ext'}  = ( $files{$lfile}{'name'} =~ s/\.([^\.]+)$// ? $1 : '' );
589      $files{$lfile}{'scan'} = $scan;
590      $files{$lfile}{'size'} = $size;
591    };
592    oneliner( $oneline, $content );
593    #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files );
594    filesave( \%files );
595  }
596};
597$processor{'prot'}{'file'}{'toplevel'} ||= sub {
598  return '' unless $config{'smbclient'} or $config{'net_view'};
599  my ($host) = @_;
600  shelldata($host);
601  return unless $host =~ /\S/;
602  my $prot = 'file';
603  #--user=username[%password]
604  #printlog("dev", 'opening');
605  my $h = openproc(
606    '-|' . ':encoding(' . hconfig( 'cp_res', $host, $prot ) . ')',
607    qq{$config{'starter'} $config{'net_view'} $config{'smbclient'} }
608      . hconfig( 'smb_opt', $host )
609      . qq{ $config{'view_opt'} "$config{'slash_net'}$host" $config{'stderr_redirect'}}
610  ) or return '';
611  #printlog("dev", 'H:',$h);
612  while (<$h>) {
613    #printlog("dev", 'proc:',$_);
614    #next;
615    next unless /^\s*(.+)\s+((Disk)|(Диск)|(IPC))\s+(\w\:)?\s+(.*)\s*$/i;
616    my ( $share, $desc ) = ( $1, $7 );
617    $share =~ s/\s*$//g;
618    next if hconfig( 'skip_hidden', $host ) and $share =~ /\$$/;
619    $processor{'prot'}{'file'}{'scan'}->( '//' . $host . '/' . $share );
620    #?if ( $desc =~ /\S/ ) {
621    #$share = cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_db', $host, $prot ), $share );
622    $desc =~ s/\s+$//;
623    #$desc = cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_db', $host, $prot ), $desc );
624    #$static{'db'}->insert_hash_hash(
625    #printlog('dev',"ins [$share]");
626    $static{'db'}->insert_hash(
627      $config{'sql_tfile'}, {
628        #'' => {
629        'prot' => $prot,
630        'host' => $host,
631        'path' => '/',
632        'name' => $share,
633        ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ),
634        'desc' => $desc,
635        'scan' => int( time() )
636          #}
637      }
638    );
639    #}
640  }
641  close($h);
642  return '';
643};
644$processor{'prot'}{'file'}{'scan'} ||= sub {
645  my $prot = 'file';
646  for (@_) {
647    my $where = $_;
648    next if $where =~ /^\s*$/;
649    printlog( 'smb', "$where : scanning..." );
650    $where =~ s/\\|\//$config{'slash_dir'}/g;    #RO VALUE SOMETIMES
651    $where =~ s/[\x0d\x0a\"]//g;
652    my ($host) = $where =~ /([\w.]+)/;
653    #print('dev', "wbef[$where]");
654    my $wheres = $where;    #cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_shell', $host, $prot ), $where );
655    $where = Encode::encode hconfig( 'cp_shell', $host, $prot ), $where, Encode::FB_DEFAULT;
656    #print('dev', "waft[$wheres]");
657    my ($dir);
658    ( undef, $where, $dir ) = $where =~ /^(\S+:)?(\/*[^\/]+\/[^\/]+)?\/?(.+)?\/?$/;
659    $dir =~ s/\/+$//;
660    $dir = '-D ' . $dir if $config{'smbclient'} and $dir;
661    shelldata( $where, $dir );
662    $where = qq|"$where"| if $where;
663    $dir   = qq|"$dir"|   if $dir;
664    #--user=username[%password]
665    my $h = openproc(
666      '-|' . ':encoding(' . hconfig( 'cp_res', $host, $prot ) . ')',
667      qq{$config{'starter'} $config{'smbclient'} }
668        . hconfig( 'smb_opt', $host )
669        . qq{ $config{'dir_cmd'} $config{'smb_ls_opt'} $dir $where $config{'stderr_redirect'}}
670    ) or next;
671    #$wheres = cp_trans( hconfig( 'cp_shell', $host ), hconfig( 'cp_db', $host ), $wheres );
672    $processor{'format'}{ hconfig( 'share_list_fmt', $host ) }->( $wheres, $h );
673    #$processor{'format'}{ hconfig( 'share_list_fmt', $host ) }->( $where, $h );
674    close($h);
675  }
676};
677#==
678#$config{'scanner'}{ 'local' }{'no_res_stat'} =1;
679$processor{'prot'}{'local'}{'func'} ||= sub {
680  my $prot = 'local';
681  for my $wheres (@_) {
682    #printlog('dev' , "prot'}{'local'}{'func",$wheres);
683    my %url = split_url($wheres);
684    $wheres =~ s|^$prot://||i;
685    $processor{'prot'}{$prot}{'scan'}->( (
686        $wheres
687          #( $wheres =~ m|[^/]+/[^/]+| or $wheres =~ m{^(\.|/)} )
688          #? ( ( $wheres =~ m{^(\.|/)} ? '' : '//' ) . $wheres )
689          #: ( ( $wheres =~ /^\w\:/ ) ? $wheres : $processor{'prot'}{$prot}{'toplevel'}->($wheres) )
690      )
691    );
692    purge_res( \%url ) if $config{'purge_every_res'};
693  }
694};
695$processor{'prot'}{'local'}{'get'} ||= sub { return $_[0]; };
696$processor{'prot'}{'local'}{'scan'} ||= sub {
697  my $prot = $config{'local_prot'} || 'local';
698  for my $where (@_) {
699    #my $where = $_;
700    next if $where =~ /^\s*$/;
701    printlog( 'local', "$where : scanning..." );
702    $where =~ s/\\|\//$config{'slash_dir'}/g;
703    $where =~ s/[\x0d\x0a\"]//g;
704    my ($host) = $config{'local_host'};    #$where =~ /([\w.]+)/;
705    my $wheres = $where;    # = cp_trans( hconfig( 'cp_res', $host, $prot ), hconfig( 'cp_shell', $host, $prot ), $where );
706    $where = Encode::encode hconfig( 'cp_shell', $host, $prot ), $where, Encode::FB_DEFAULT;
707    my ($dir);
708    #( $where, $dir ) = $where =~ /^((?:\S+:)?\/*[^\/]+\/[^\/]+)?\/?(.+)?\/?$/;
709    #$dir =~ s/\/+$//;
710    #$dir = '-D ' . $dir if $config{'smbclient'} and $dir;
711    shelldata($where);
712    $where = qq|"$where"| if $where;
713    #$dir   = qq|"$dir"|   if $dir;
714    #--user=username[%password]
715    my $h = openproc(
716      '-|' . ':encoding(' . hconfig( 'cp_res', $host, $prot ) . ')',
717#"$config{'starter'} $config{'smbclient'} $config{'smb_opt'} $config{'dir_cmd'} $config{'smb_ls_opt'} $dir $where $config{'stderr_redirect'} |"
718      qq{$config{'starter'} $config{'local_cmd'} $config{'local_opt'} $dir $where $config{'stderr_redirect'}}
719    ) or next;
720    #$wheres = cp_trans( hconfig( 'cp_shell', $host ), hconfig( 'cp_db', $host ), $wheres );
721    #printlog('dev', 'parsef', hconfig( 'local_list_fmt', $host ));
722    $processor{'format'}{ hconfig( 'local_list_fmt', $host ) }->( $wheres, $h );
723    close($h);
724  }
725};
726
727sub purge_res {
728  local $config{'purge'} = ( $config{'purge_res_time'} or $config{'period'} * 2 );
729  psmisc::program_one( 'purge', undef, $config{'sql_tfile'},
730    'AND ' . $static{'db'}->where_body( $_[0], undef, $config{'sql_tfile'} ) )
731    if $config{'purge_every_res'};
732}
733$processor{'prot'}{'file'}{'func'} ||= sub {
734  my $prot = 'file';
735  for my $wheres (@_) {
736    my %url = split_url($wheres);
737    $wheres =~ s|^$prot://||i;
738    $processor{'prot'}{$prot}{'scan'}->( (
739          ( $wheres =~ m|[^/]+/[^/]+| or $wheres =~ m{^(\.|/)} )
740        ? ( ( $wheres =~ m{^(\.|/)} ? '' : '//' ) . $wheres )
741        : ( ( $wheres =~ /^\w\:/ ) ? $wheres : $processor{'prot'}{$prot}{'toplevel'}->($wheres) )
742      )
743    );
744    purge_res( \%url ) if $config{'purge_every_res'};
745  }
746};
747$processor{'prot'}{'file'}{'get'} ||= sub {
748  return $_[0] if $config{'system'} =~ /win/i;
749  my ( $file, $service, $save ) = @_;
750  my $prot = 'file';
751  $file =~ s/\|\>\<\"//g;
752  ( undef, $service, $file ) = $file =~ /^(\S+:)?(\/*[^\/]+\/[^\/]+)\/(.+)$/;
753  $save = encode_url( $prot . ':' . $service . '/' . $file, $config{'encode_url_file_mask'} );
754  $file =~ s/\/+/\\\\/g;
755  #$file =~ s/\'/\'\'/g;       bbb u u ggg   ' symol in file name
756  #$file =~ s/\'/\\\'\\\'/g;   bbb u u ggg       !!!TODO test with "
757  #$file =~ s/\'/\\\'/g;       bbb uuu ggg
758  return '' if $file =~ /\'/;
759  my ($host) = $service =~ /([\w.]+)/;
760  #$file = cp_trans( hconfig( 'cp_db', $host, $prot ), hconfig( 'cp_res', $host, $prot ), $file );
761  $file = Encode::encode hconfig( 'cp_shell', $host, $prot ), $file, Encode::FB_DEFAULT;
762  $save = hconfig('tmpdir') . hconfig( 'slash_sys', $host ) . $save;
763  return $save if -e $save and time - $^T + 86400 * -M $save < hconfig( 'purge', $host, $prot );
764  shelldata( $file, $save, $service );
765  printprog(
766    qq{$config{'starter'} $config{'smbclient'} }
767      . hconfig( 'smb_opt', $host )
768      . qq{ $config{'get_opt'} -c 'get "$file" "$save"' "$service"},
769    1
770  );
771  return $save;
772};
773{
774  my ($ftp);
775  $processor{'prot'}{'ftp'}{'get'} ||= sub {
776    my ( $file, $save ) = @_;
777    my %url = split_url($file);
778    $save = encode_url( $file, $config{'encode_url_file_mask'} );
779    $file = join_url( { 'path' => $url{'path'}, 'name' => $url{'name'}, 'ext' => $url{'ext'} } );
780    $save = hconfig( 'tmpdir', $url{'host'} ) . hconfig( 'slash_sys', $url{'host'} ) . $save;
781    $save =~ s/^(.{240}).*/$1/;
782    return $save if -e $save and time - $^T + 86400 * -M $save < hconfig( 'purge', $url{'host'}, $url{'prot'} );
783    $ftp =
784      $work{'ftp'}
785      { join_url( { 'user' => $url{'user'}, 'pass' => $url{'pass'}, 'host' => $url{'host'}, 'port' => $url{'port'} } ) };
786    #printlog('dev' ,"1:$ftp;", %url);
787    return if ftp_keep( $$ftp, %url );
788    #printlog('dev' ,"2:$ftp;$$ftp;$$ftp;");
789    eval { $$$ftp->get( $file, $save ); };
790    return $save;
791  };
792  END { $$$ftp->close() if !ftp_check($$ftp); }
793}
794$processor{'prot'}{'rsync'}{'toplevel'} ||= sub {
795  return '' unless $config{'rsync'};
796  my ($host) = @_;
797  #my $prot = 'rsync';
798  shelldata($host);
799  my %url = split_url($host);
800  return unless $host =~ /\S/;
801  my $h = openproc( '-|' . ':encoding(' . hconfig( 'cp_res', $url{'host'}, $url{'prot'} ) . ')',
802    "$config{'starter'} $config{'rsync'} $config{'rsync_param'} \"$host\" $config{'stderr_redirect'}" )
803    or return '';
804  my %scanned;
805  while (<$h>) {
806    next if /^rsync(?: error)?:/i;
807    next unless /(\S+)\s+(.*)\s*/;
808    my ( $share, $desc ) = ( $1, $2 );
809    next if $scanned{$share}++;
810    $processor{'prot'}{'rsync'}{'scan'}->( $host . '/' . $share );
811    if ( $desc =~ /\S/ ) {
812      #$share = cp_trans( hconfig( 'cp_rsync', $host ), hconfig( 'cp_db', $host ), $share );
813      $desc =~ s/\s+$//;
814      #$desc = cp_trans( hconfig( 'cp_rsync', $host ), hconfig( 'cp_db', $host ), $desc );
815      #$static{'db'}->insert_hash_hash( $config{'sql_tfile'},
816      filesave( {
817          '' => {
818            %url,
819            'path' => '/',
820            'name' => $share,
821            ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ),
822            'desc' => $desc,
823            'scan' => int( time() )
824          }
825        }
826      );
827    }
828  }
829  close($h);
830  return '';
831};
832$processor{'prot'}{'rsync'}{'scan'} ||= sub {
833  for (@_) {
834    my $where = $_;
835    next if $where =~ /^\s*$/;
836    printlog( 'rsync', "$where : scanning..." );
837    shelldata($where);
838    my %url = split_url($where);
839    #$where = cp_trans( hconfig( 'cp_rsync', $url{'host'} ), hconfig( 'cp_shell', $url{'host'} ), $where );
840    my $wheres = Encode::encode hconfig( 'cp_shell', $url{'host'}, $url{'prot'} ), $where, Encode::FB_DEFAULT;
841    my $h = openproc( '-|' . ':encoding(' . hconfig( 'cp_res', $url{'host'}, $url{'prot'} ) . ')',
842      "$config{'starter'} $config{'rsync'} $config{'rsync_param'} \"$wheres\" $config{'stderr_redirect'}" )
843      or next;
844    #$where = cp_trans( hconfig( 'cp_shell', $url{'host'} ), hconfig( 'cp_db', $url{'host'} ), $where );
845    $processor{'format'}{'rsync'}->( $where, $h );
846    close($h);
847  }
848};
849$processor{'prot'}{'rsync'}{'get'} ||= sub {
850  #TODO
851};
852$processor{'prot'}{'rsync'}{'func'} ||= sub {
853  for my $wheres (@_) {
854    my %url = split_url($wheres);
855    $processor{'prot'}{'rsync'}{'scan'}->( ( ( $url{'path'} and $url{'path'} ne '/' ) or $url{'name'} )
856      ? $wheres
857      : $processor{'prot'}{'rsync'}{'toplevel'}->($wheres) );
858    purge_res( \%url ) if $config{'purge_every_res'};
859  }
860};
861$processor{'format'}{'rsync'} ||= sub {
862  my $url = shift(@_);
863  $url .= '/' unless $url =~ m|/$|;
864  my %url  = split_url($url);
865  my $scan = int( time() );
866  my $path = $url{'path'};
867  $path =~ s/[\\\/]+$//g;
868  my $lastpath = $path;
869  for my $content (@_) {
870    my %data  = ();
871    my %files = ();
872
873=z
874    if ( $url{'host'} ) {    # ??
875      my ($dir_) = $url{'path'} =~ /([^\/]+)/;
876      $files{''} = {
877        'prot' => $url{'prot'},
878        'host' => $url{'host'}, (
879          hconfig( 'unknown_time', $url{'host'} ) ne ''
880          ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) )
881          : ()
882        ),
883        'scan' => $scan
884      };
885      $files{' '} = {
886        'prot' => $url{'prot'},
887        'host' => $url{'host'},
888        'path' => '/', (
889          hconfig( 'unknown_time', $url{'host'} ) ne ''
890          ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) )
891          : ()
892        ),
893        'scan' => $scan
894      };
895      $files{'  '} = {
896        'prot' => $url{'prot'},
897        'host' => $url{'host'},
898        'path' => '/',
899        'name' => $dir_, (
900          hconfig( 'unknown_time', $url{'host'} ) ne ''
901          ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) )
902          : ()
903        ),
904        'scan' => $scan
905        }
906        if $dir_;
907    }
908=cut
909
910    my $oneline = sub {
911      chomp;
912      s/[\x0a\x0d\x00-\x1F]//g;
913      #printlog( 'rsyncdmp', 'ol', $_ );
914      my ( $mday, $mon, $year, $hours, $min, $sec, $utime, $attr );    #$size, $name ,$ext, $desc,
915      my (%file);
916      #warn $_;
917      ( $attr, $file{'size'}, $year, $mon, $mday, $hours, $min, $sec, $file{'path'}, $file{'name'}, $file{'ext'} ) =
918        m|^(\S+)\s+(\d+)\s+(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+)\s+(?:(.+)/)?(.*?)(?:\.([^\.]+))?\s*$|
919        or return;
920      $file{'path'} = $path . ( $file{'path'} ? '/' : '' ) . $file{'path'};
921      $file{'path'} =~ s/[\x0a\x0d\x00-\x1F]//g;
922      if ( $lastpath ne $file{'path'} ) {
923        #printlog( 'rsyncdbg', 'path change from', $lastpath, $file{'path'}, 'uploaded', scalar %files );
924        #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files );
925        filesave( \%files );
926        %files    = ();
927        $lastpath = $file{'path'};
928      }
929      return if $file{'name'} =~ /^\.+\s*$/;
930      return if $file{'size'} == 0 and !$attr =~ /^d/;
931      $file{'size'} = 0 if $attr =~ /^d/;
932      $file{'file'} = $file{'name'} . ( $file{'ext'} ? '.' : '' ) . $file{'ext'};
933      return if hconfig( 'skip_hidden', $url{'host'} ) and ( $file{'file'} =~ /^\./ or $file{'path'} =~ m;(^|/)\.; );
934      #$file = cp_trans(hconfig('cp_rsync', $url{'host'}), hconfig('cp_db', $url{'host'}), $file);
935      fileparser( $file{'file'}, $url{'prot'},
936        $url{'prot'} . '://' . $file{'path'} . hconfig( 'slash_perl', $url{'host'} ) . $file{'file'},
937        $file{'size'}, \%files );
938      my $lfile = $file{'file'};
939      $lfile =~ tr/A-Z/a-z/;
940      ( $file{'prot'}, $file{'host'}, $file{'scan'} ) = ( $url{'prot'}, $url{'host'}, $scan );
941      $year = 1990 if ( ( $year > 2020 ) or ( $year < 1980 ) );    #timelocal BUG
942      $file{'time'} = 640000000
943        if ( ( $file{'time'} = timelocal( $sec, $min, $hours, $mday, --$mon, $year ) ) > ( $scan + 2592000 ) );
944      %{ $files{$lfile} } = %file;
945    };
946    oneliner( $oneline, $content );
947    #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files );
948    filesave( \%files );
949  }
950};
951
952$processor{'prot'}{'nfs'}{'get'} ||= sub { return $_[0]; };
953
954$processor{'prot'}{'nfs'}{'func'} ||= sub {
955  for my $wheres (@_) {
956    my %url = split_url($wheres);
957    $processor{'prot'}{'nfs'}{'scan'}->( ( ( $url{'path'} and $url{'path'} ne '/' ) or $url{'name'} )
958      ? $wheres
959      : $processor{'prot'}{'nfs'}{'toplevel'}->($wheres) );
960    purge_res( \%url ) if $config{'purge_every_res'};
961  }
962};
963$processor{'prot'}{'nfs'}{'toplevel'} //= sub {
964  #return '' unless $config{'rsync'};
965  my ($host) = @_;
966  #my $prot = 'rsync';
967  shelldata($host);
968  my %url = split_url($host);
969  return unless $host =~ /\S/;
970  #printlog('nfs', 'mount', ` $config{'showmount'} $url{host}`);
971  #printlog('nfs', 'mount-a', ` $config{'showmount'} -a $url{host}`);
972  #printlog('nfs', 'mount-e', ` $config{'showmount'} -e $url{host}`);
973  #printlog('nfs', 'mount-d', ` $config{'showmount'} -d $url{host}`);
974  #printlog( 'nfs', 'mount-e', ` $config{'showmount'} -e $url{host}` );
975  my $h = openproc( '-|' . ':encoding(' . hconfig( 'cp_res', $url{'host'}, $url{'prot'} ) . ')',
976    "$config{'starter'} $config{'showmount'} $config{'showmount_param'} $url{host} $config{'stderr_redirect'}" )
977    or return '';
978  my %scanned;
979  my $line;
980  while (<$h>) {
981    printlog( 'nfs', $config{'showmount'}, $config{'showmount_param'}, $_ );
982    next unless $line++;
983    #next if m{(?:can't do mountdump rpc|Unable to receive)}i; #'
984    #printlog( 'nfs', '-a:', $_ );
985    next if /^(?:showmount|RPC):/;
986    #next unless /^(\S+):(\S.*)\s*/;
987    next unless /^\s*(\S+)(?:\s+(\S.*))?\s*/;
988    my ( $share, $desc ) = ( $1, $2 );
989    #my ( $hostname, $share ) = ( $1, $2);
990    #printlog('nfs', 'parsed:', ( $hostname, $share ));
991    next if $scanned{$share}++;
992    $processor{'prot'}{'nfs'}{'scan'}->( $host . $share );
993    #if ( $desc =~ /\S/ ) {
994    if ( $share =~ /\S/ ) {
995      #$share =~ s{^/}{};
996      #$share .= '/';
997      #$share = cp_trans( hconfig( 'cp_rsync', $host ), hconfig( 'cp_db', $host ), $share );
998      #$desc =~ s/\s+$//;
999      #$desc = cp_trans( hconfig( 'cp_rsync', $host ), hconfig( 'cp_db', $host ), $desc );
1000      #$static{'db'}->insert_hash_hash( $config{'sql_tfile'},
1001      #=no
1002      my ( $path, $name ) = $share =~ m{^(.+)?/([^/]+)$};
1003      filesave( {
1004          '' => {
1005            %url,
1006            'path' => $path,
1007            'name' => $name,
1008            ( hconfig( 'unknown_time', $host ) ne '' ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $host ) ) : () ),
1009            'meta' => "mountpoint=1",
1010            'desc' => $desc,
1011            'scan' => int( time() )
1012          }
1013        }
1014      );
1015      #=cut
1016      #update_to_root( %url, %url, $local, $url_orig );
1017      my %res = (
1018        %url,
1019        'path' => $share,    #. '/',
1020        #desc=>'desc!',
1021      );
1022      local %_ = (
1023        %res,
1024        'path' => '',
1025        'name' => '',
1026        'ext'  => '',
1027        #'desc' => $work{'desc'}{ $url{'host'} }{ $url{'prot'} },
1028        'scan' => int( time() )
1029      );
1030      update_to_root( %res, %_ );
1031    }
1032  }
1033  close($h);
1034  return '';
1035};
1036$processor{'prot'}{'nfs'}{'scan'} //= sub {
1037
1038=later
1039
1040point= pointroot/host/path
1041
1042mount -t nfs -o soft,intr,retrans=10,retrycnt=2,timeout=10
1043
1044ls
1045
1046  local $config{'local_prot'} = 'nfs';
1047  $processor{'prot'}{'nfs'}{'scan'}->()
1048
1049
1050
1051umount
1052
1053
1054
1055=cut
1056
1057  for my $where (@_) {
1058    #my $where = $_;
1059    next if $where =~ /^\s*$/;
1060    printlog( 'nfs', "$where : scanning..." );
1061    shelldata($where);
1062    my %url = split_url($where);
1063    #dmp \%url;
1064    my $path = join_url( { path => $url{'path'}, name => $url{'name'} } );
1065    #(my $point_name = $where) =~ s/\W/_/g;
1066    #$point_name = $path;
1067    my $point_host = "$config{'nfs_root'}/$url{host}";
1068    my $point      = "$point_host$path";
1069    my $exists     = -d $point;
1070    my @dirs       = mkdir_rec $point;
1071    my ($mount)    = "$url{'host'}:" . $path;
1072    #$where = cp_trans( hconfig( 'cp_rsync', $url{'host'} ), hconfig( 'cp_shell', $url{'host'} ), $where );
1073    my $wheres = Encode::encode hconfig( 'cp_shell', $url{'host'}, $url{'prot'} ), $where, Encode::FB_DEFAULT;
1074    #printprog("mount -t nfs -o soft,intr,retrans=10,retrycnt=2,timeout=10 $mount $point");    # unless $exists;
1075    printprog("$config{'starter'} $config{'mount'} $config{'mount_param'} $mount $point");
1076    printprog("$config{'starter'} $config{'df'} $config{'df_param'} $point");
1077    local $config{'local_root_path'} = $point_host;
1078    local $config{'local_prot'}      = 'nfs';
1079    local $config{'local_host'}      = $url{'host'};
1080    $point .= '/';
1081    $processor{'prot'}{'local'}{'scan'}->($point);
1082    printprog("$config{'starter'} $config{'umount'} $config{'umount_param'} $point") if $point;
1083    rmdir for reverse @dirs;
1084    #unlink $point;
1085  }
1086};
1087$processor{'format'}{'ls'} ||= sub {
1088  my $url = shift @_;
1089  $url =~ tr/\\/\//;
1090  #printlog('dev', 'ls', $url);
1091  my $local;
1092  $local = 1 if $url =~ m{^(?:\w:|/)};
1093  my ( $user, $pass, $host, $port, $path ) = @{ split_url($url) }{ 'user', 'pass', 'host', 'port', 'path' };
1094  my $prot = 'ftp';
1095  $prot = $config{'local_prot'} if $local and $config{'local_prot'};
1096  $host = $config{'local_host'} if $local and $config{'local_host'};
1097  my $path_cut = $config{'local_root_path'};
1098  $path_cut =~ s{^(/)}{$1?};
1099  $path =~ s{^$path_cut/?}{} if $path_cut;
1100  my (%files);
1101
1102  for my $content (@_) {
1103    my $scan = int( time() );
1104    my $lfile;
1105    my $oneline = sub {
1106      s/[\x0a\x0d]//g;
1107      #printlog('dev', 'lsdmp', $_);
1108      my ( $mday, $mon, $year, $hours, $min, $size, $file, $ext );
1109      if (/^\.?\/?(.*)\s*:\s*$/) {
1110        $path = $1;
1111        $path =~ s{^$path_cut/?}{} if $path_cut;
1112        #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files );
1113        filesave( \%files );
1114        %files = ();
1115        return;
1116      } elsif ( $config{'skip_path_mask'} and $path =~ /$config{'skip_path_mask'}/i ) {
1117        ++$stat{'skipped'};
1118        return;
1119      } elsif (/^\s*[rd\-]/) {
1120        ( $size, $mon, $mday, $hours, $min, $file ) =
1121          /^\s*\S+\s+\d*\s*\S+\s*\S*\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+):?(\d+)?\s+(.+)\s*$/;
1122        return if $file =~ /^\.+\s*$/;
1123        return if $size == 0 and !/^d/;
1124        $size = 0 if /^d/;
1125        $file =~ s/[\x0a\x0d\x00-\x1F]//g;
1126        $file =~ s/(\.|\s)*$//g;
1127        chomp($file);
1128        return if hconfig( 'skip_hidden', $host ) and ( $file =~ /^\./ or $path =~ m;(^|/)\.; );
1129        $lfile = $file;
1130        $lfile =~ tr/A-Z/a-z/;
1131        $files{$lfile}{'name'} = $file;
1132        $files{$lfile}{'size'} = $size;
1133        $mon                   = $config{'lng'}{ $config{'lng_ls'} }{'month_table'}{$mon};
1134
1135        if ( $min eq '' ) {
1136          $year = $hours;
1137          $hours = $min = 0;
1138        } else {
1139          $year = ( localtime(time) )[5] + 1900;    #$curyear;
1140          --$year if $mon > ( localtime(time) )[4];
1141        }
1142        $year = 1990 if $year > 2020 or $year < 1980;    #timelocal BUG
1143        $mday = 28 if ( $mon == 1 and $mday > 28 ) or !$mday;    #Another bug 8(
1144        eval {                                                   #no more bugs 8)
1145          $files{$lfile}{'time'} = timelocal( 0, $min, $hours, $mday, $mon, $year );
1146        };
1147        $files{$lfile}{'time'} = 640000000 if ( $files{$lfile}{'time'} > ( $scan + 86400 ) );    # 2592000
1148      } else {
1149        return;
1150      }
1151      fileparser(
1152        $file, $prot,
1153        join_url( {
1154            'prot' => $prot,
1155            'user' => $user,
1156            'pass' => $pass,
1157            'host' => $host,
1158            'port' => $port,
1159            'path' => $path,
1160            'name' => $file
1161          }
1162        ),
1163        $size,
1164        \%files
1165      );
1166      $files{$lfile}{'fullname'} = join_url( {
1167          'prot' => $prot,
1168          'user' => $user,
1169          'pass' => $pass,
1170          'host' => $host,
1171          'port' => $port,
1172          'path' => $path,
1173          'name' => $file
1174        }
1175      );
1176      $files{$lfile}{'fullname'} =~ s/[\x00-\x1F]//g;
1177      #printlog"cpdet", $host, Dumper $work{$host}{'detectcp'}, $_ , hconfig( 'cp_res', $host, $prot ),
1178      #$files{$lfile}{'desc'} = "cp=$_ " . $files{$lfile}{'desc'}
1179      $files{$lfile}{'meta'} .= " cp=$_"
1180        if $_ = cp_detect_trans( $files{$lfile}{'fullname'}, \%{ $work{$host}{'detectcp'} }, undef, undef, $prot, $host )
1181          and $_ ne hconfig( 'cp_res', $host, $prot );
1182      (
1183        $files{$lfile}{'prot'}, $files{$lfile}{'user'}, $files{$lfile}{'pass'}, $files{$lfile}{'host'},
1184        $files{$lfile}{'port'}, $files{$lfile}{'path'}, $files{$lfile}{'name'}, $files{$lfile}{'ext'}
1185      ) = @{ split_url( $files{$lfile}{'fullname'} ) }{ 'prot', 'user', 'pass', 'host', 'port', 'path', 'name', 'ext' };
1186      $files{$lfile}{'host'} = full_host( { 'host' => $host, 'user' => $user, 'pass' => $pass, 'port' => $port } );
1187      $files{$lfile}{'path'} ||= '/';
1188      $files{$lfile}{'scan'} = $scan;
1189    };
1190    oneliner( $oneline, $content );
1191  }
1192  filesave( \%files );
1193  #  $static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files );
1194};
1195
1196sub ftp_reconnect {
1197  my ( $ftp, %url ) = @_;
1198  $$ftp->close() if defined $$ftp;
1199  $work{'ftp_connected'} = 0;
1200  printlog(
1201    'hint',
1202    "FTP RECONNECTS1: host=$url{'host'} ftp_reconnects=$stat{'ftp_reconnects'} ftp_reconnect_max=",
1203    hconfig( 'ftp_reconnect_max', $url{'host'} ),
1204    'f=',
1205    $work{'filec'}->( $stat{'files'} ),
1206    'd=',
1207    $work{'dirc'}->( $stat{'dirs'} ),
1208    's=',
1209    $work{'sizec'}->( $stat{'size'} )
1210    ),
1211    return 3
1212    if ++$stat{'ftp_reconnects'} >= hconfig( 'ftp_reconnect_max', $url{'host'} );
1213  eval {
1214    $$ftp = Net::FTP->new(
1215      $url{'host'},
1216      'Timeout' => hconfig( 'timeout', $url{'host'}, $url{'prot'} ),
1217      ( hconfig( 'passive', $url{'host'} ) ? ( 'Passive' => 1 ) : () ), ( $url{'port'} ? ( 'Port' => $url{'port'} ) : () ),
1218      %{ hconfig( 'sockopts', $url{'host'}, $url{'prot'} ) || {} }
1219    );
1220  };
1221  printlog( 'warn', 'ftp connect err', $@ ) if $@;
1222  return 1 if !defined $$ftp;
1223  my $message = $$ftp->message();
1224  #printlog('ftp', "FTP: message: $message");
1225  $message =~ s/\s+$//;
1226  if (
1227    !hconfig( 'ftp_recurse_force', $url{'host'} ) and !defined( $config{'fine'}{ $url{'host'} }{'ftp_recurse_force'} ) and (
1228      $message =~ /PureFTPd/i || $message =~ /Pure-FTPd/i    # PureFTPd sends only 2000 lines of "ls -lR"
1229      || $message =~ /CesarFTP/i                             # CesarFTP does not print "<DIRECTORY>:"
1230      || $message =~ /GuildFTPd/i                            # GuildFTPd gets crazy when attempting to use "ls -lR"
1231      || $message =~ /Microsoft/i                            # ??? can't parse output of "ls -lR" of Microsoft FTP server
1232      || $message =~ /Serv-U/i                               # Serv-U's "ls -lR" does not work well in all cases
1233      || $message =~ /Gene6 FTP Server/i
1234    )
1235    )
1236  {
1237    $config{'fine'}{ $url{'host'} }{'ftp_recurse_force'} = 1;
1238    printlog( 'ftp', "ftp_recurse_force=1 for $url{'host'}" );
1239  }
1240  if (
1241    !$$ftp->login(
1242      ( $url{'user'} or hconfig( 'anonymous_user', $url{'host'} ) ),
1243      ( $url{'pass'} or hconfig( 'anonymous_pass', $url{'host'} ) )
1244    )
1245    )
1246  {
1247    $$ftp->close() if defined $$ftp;
1248    printlog( 'ftp', "FTP: cannot login", join_url( \%url ) );
1249    return 2;
1250  }
1251  psmisc::cp_detect_trans($message);
1252  printlog( 'ftp',
1253        "FTP: connected ($stat{'ftp_reconnects'}/"
1254      . hconfig( 'ftp_reconnect_max', $url{'host'} ) . ") to "
1255      . join_url( \%url )
1256      . " pasv="
1257      . hconfig( 'passive', $url{'host'} )
1258      . "; [$message]" );
1259  $work{'desc'}{ $url{'host'} }{'ftp'} = $message;
1260  ++$work{'ftp_connected'};
1261  $work{'ftp'}
1262    { join_url( { 'user' => $url{'user'}, 'pass' => $url{'pass'}, 'host' => $url{'host'}, 'port' => $url{'port'} } ) } = \$ftp;
1263  #$ftp;
1264  #printlog("ftpis", "$ftp, $$ftp;");
1265  return 0;
1266}
1267
1268sub ftp_reconnect_while {
1269  my ( $ftp, %url ) = @_;
1270  my $retry = 0;
1271  do {
1272    return 0 unless ftp_reconnect( $ftp, %url );    # !!!???
1273    return 2 unless $work{'ftp_connected'};
1274    printlog(
1275      'hint',
1276      "FTP RECONNECTS2: host=$url{'host'} ftp_reconnects=$stat{'ftp_reconnects'} ftp_reconnect_max=",
1277      hconfig( 'ftp_reconnect_max', $url{'host'} )
1278      ),
1279      return 3
1280      if $stat{'ftp_reconnects'} >= hconfig( 'ftp_reconnect_max', $url{'host'} );
1281    mysleep($_) if ( $_ = hconfig( 'ftp_reconnect_sleep', $url{'host'} ) );
1282    printlog( 'ftp',
1283          "FTP $url{'host'} reconnecting $retry ($stat{'ftp_reconnects'}/"
1284        . hconfig( 'ftp_reconnect_max', $url{'host'} )
1285        . ") [$@]" );
1286  } while ( $retry++ <= hconfig( 'ftp_reconnect_retry', $url{'host'} ) );
1287  return 1;
1288}
1289
1290sub ftp_check {
1291  my ($ftp) = @_;
1292  return 1 unless ( defined $ftp and defined $$ftp and $ftp and $$ftp );
1293  my $code = $$ftp->code();
1294  return 2 if grep( $code eq $_, ( '599', '000', '200', '425', '150' ) );
1295  return 3 unless ( defined $ftp and defined $$ftp and $ftp and $$ftp );
1296  return 0;
1297}
1298
1299sub ftp_keep {
1300  my ( $ftp, %url ) = @_;
1301  #printlog('dev', 'ftp_keep', $ftp);
1302  return ftp_reconnect_while( $ftp, %url ) if ftp_check($ftp);
1303  return 0;
1304}
1305
1306sub _list_cmd_ {    #from NET:FTP
1307  my $ftp = shift;
1308  my $cmd = 'LIST';
1309  my $res = pop;
1310  delete ${*$ftp}{'net_ftp_port'};
1311  delete ${*$ftp}{'net_ftp_pasv'};
1312  my $data;
1313  eval { $data = $ftp->_data_cmd( $cmd, @_ ) };
1314  printlog( 'warn', "ftp LIST cmd failed", $@ ), return if $@;
1315  return unless ( defined $data );
1316  require Net::FTP::A;
1317  bless $data, "Net::FTP::A";    # Force ASCII mode
1318  my $databuf = '';
1319  my $buf     = '';
1320  @_ = ();
1321  my $blksize = ${*$ftp}{'net_ftp_blksize'};
1322
1323  while ( $data->read( $databuf, $blksize ) ) {
1324    $buf .= $databuf;
1325    for my $line ( split( /\n/, $buf ) ) {
1326      $processor{'format'}{'ls'}->( $res, \@_ ), @_ = () if @_ and $line =~ /\:\s*$/ and !$_[$#_];
1327      push @_, $line;            #if $line;
1328    }
1329    $buf = ( $buf !~ /\n$/ ? "\n" . pop @_ : '' );
1330  }
1331  $processor{'format'}{'ls'}->( $res, \@_ ) if @_;
1332  $data->close();
1333}
1334{
1335  my ( @toscan, $toscanfile );
1336
1337  sub savearray {
1338    my ( $file, $arr ) = @_;
1339    return unless ( $file and $arr and open( TSO, '>', $file ) );
1340    while ( defined( $_ = shift @$arr ) ) { print TSO$_ . "\n" if $_; }
1341    close(TSO);
1342  }
1343  $processor{'prot'}{'ftp'}{'func'} ||= sub {
1344    #printlog( 'dbg', "prot'}{'ftp'}{'func" );
1345    foreach my $url (@_) {
1346      $url .= '/' if $url !~ m|/$|;
1347      my %url = split_url($url);
1348      my $ftp;
1349      $stat{'ftp_reconnects'} = $work{'ftp_connected'} = 0;
1350      next if ftp_keep( \$ftp, %url );
1351      my ($content);
1352      my $tryrec = 1;
1353      $stat{'ftp_errors'} = 0;
1354
1355      unless ( hconfig( 'ftp_recurse_force', $url{'host'} ) ) {
1356        _list_cmd_(
1357          $ftp,
1358          "$config{'ls_opt'} $url{'path'}",
1359          join_url( {
1360              'user' => $url{'user'},
1361              'pass' => $url{'pass'},
1362              'host' => $url{'host'},
1363              'port' => $url{'port'},
1364              'path' => $url{'path'}
1365            }
1366          )
1367        );
1368        $tryrec = 0
1369          if ( $work{'filec'}->( $stat{'files'} ) > hconfig( 'ftp_recurse_threshold', $url{'host'} ) )
1370          and !ftp_check( \$ftp );
1371        if ( $config{'old_ftp_recurse'} and $tryrec ) {    #delete block
1372          printlog( 'dbg', "Old recurse" );
1373          $content = '';
1374          eval { $content = $ftp->dir("-lRGn $url{'path'}") };
1375          if ($content) {
1376            $tryrec = 0 if ( scalar @$content > hconfig( 'ftp_recurse_threshold', $url{'host'} ) ) and !ftp_check( \$ftp );
1377            $processor{'format'}{'ls'}->( join_url( \%url ), $content );
1378          }
1379        }
1380      }
1381      my $passive = 0;
1382      if ($tryrec) {
1383        printlog( 'ftp', "Trying recurse mode" ) unless hconfig( 'ftp_recurse_force', $url{'host'} );
1384        my $dir;
1385        @toscan = ();
1386        $toscanfile =
1387            $config{'datadir'}
1388          . $config{'slash_sys'}
1389          . encode_url(
1390          join_url( {
1391              'prot' => $url{'prot'},
1392              'user' => $url{'user'},
1393              'pass' => $url{'pass'},
1394              'host' => $url{'host'},
1395              'port' => $url{'port'}
1396            }
1397          )
1398          ) . '.toscan';
1399        my $toscanold = time - $^T + 86400 * -M $toscanfile;
1400#printlog('warn', ".toscan too old, skipping, days=",  -M $toscanfile, ' period=', hconfig( 'period', $url{'host'}, $url{'prot'} ), $toscanold, 'C', -C $toscanfile, 'A', -A $toscanfile) ;
1401        $toscanold = undef if $toscanold < hconfig( 'period', $url{'host'}, $url{'prot'} );
1402        printlog(
1403          'info',
1404          ".toscan too old, skipping, days=",
1405          int -M $toscanfile,
1406          ' period=', hconfig( 'period', $url{'host'}, $url{'prot'} ) / 86400
1407        ) if $toscanold;
1408        if (  !$toscanold
1409          and -e $toscanfile
1410          and rename( $toscanfile, $toscanfile . $config{'work_ext'} )
1411          and open( TSO, '<', $toscanfile . $config{'work_ext'} ) )
1412        {
1413          printlog( 'dbg', "Loading", $toscanfile, "path=[$url{'path'}]" );
1414          while (<TSO>) {
1415            s/^\s*//;
1416            s/\s*$//;
1417            my ($name) = m|/([^/]+)$|;
1418            printlog( 'dbg', 'toscan load skip path:', $_, $name ), next
1419              if ( $config{'skip_path_mask'} and $name =~ /^$config{'skip_path_mask'}$/i )
1420              or ( $url{'path'} and !m{^$url{'path'}} );
1421            push( @toscan, $_ );
1422          }
1423          close(TSO);
1424          unlink( $toscanfile . $config{'work_ext'} );
1425          my $where =
1426            $static{'db'}->where( { %url, ( $url{'path'} eq '/' ? ( 'path' => '' ) : () ) }, undef, $config{'sql_tresource'} );
1427          #my ( $tq, $rq, $vq ) = $static{'db'}->quotes();
1428          my $res =
1429            $static{'db'}->line("SELECT * FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq $where LIMIT 1");
1430          printlog( 'dev', 'ftptoscan stat: ', %$res );
1431          $stat{'dirs'}  += $res->{'dirs'};
1432          $stat{'files'} += $res->{'files'};
1433          $stat{'size'}  += $res->{'size'};
1434          printlog( 'dev', 'ftptoscan s: ', $stat{'dirs'}, $stat{'files'}, $stat{'size'} );
1435        } else {
1436          $dir = ( $url{'path'} or '/' );
1437          push( @toscan, $dir );
1438        }
1439        my %scanned;
1440        FTPSCAN: while (@toscan) {
1441          FTPTOSCAN: while ( $dir = pop(@toscan) ) {    #v1:shift v2:pop
1442            @toscan = (),
1443              printlog(
1444              'hint',
1445              "MAX ERRORS REACHED: host=$url{'host'} ftp_errors=$stat{'ftp_errors'} max_errors=",
1446              hconfig( 'max_errors', $url{'host'}, $url{'prot'} ),
1447              'f=',
1448              $work{'filec'}->( $stat{'files'} ),
1449              'd=',
1450              $work{'dirc'}->( $stat{'dirs'} ),
1451              's=',
1452              $work{'sizec'}->( $stat{'size'} ),
1453              $_
1454              ),
1455              printlog( 'warn', 'MAX ERRORS REACHED at', $url{'host'}, $dir, $_ ), last
1456              if $stat{'ftp_errors'} > hconfig( 'max_errors', $url{'host'}, $url{'prot'}, );
1457            #printlog( 'ftpdbg', scalar @toscan, $dir );
1458            ++$stat{'ftp_errors'}, printlog( 'dbg', 'TOO LONG PATH', $dir, $_ ), next
1459              if length($dir) > hconfig( 'ftp_recurse_path_max', $url{'host'} );
1460            for my $retry ( 0 .. 3 ) {
1461              last if ftp_keep( \$ftp, %url ) or $work{'die'};
1462              #printlog( 'ftpdbg', "trying change  to [$dir]" );
1463              $content = '';
1464              if ( $work{'die'} ) { last; }
1465              elsif ( hconfig( 'ftp_use_cwd', $url{'host'} ) ) {
1466                my $ret;
1467                eval { $ret = $ftp->cwd($dir); };
1468                if ($ret) {
1469                  eval { $content = $ftp->dir(); };
1470                } else {
1471                  printlog( 'dbg', "Cannot change to $dir" );
1472                  last;
1473                }
1474              } else {
1475                eval { $content = $ftp->dir($dir); };
1476              }
1477              local $_ = $ftp->pwd();
1478              s|/+$||;
1479              $_ = '/' . $_ if $_ and $_ !~ m|^/|;
1480              printlog( 'err', "CWD failed or PWD bug  mustbe:", $dir, "pwd:", $_ ), $dir = $_, if ( $dir ne $_ ) and $_ and !m|^/\w\:|;
1481              printlog( 'err', "loop scan [$dir] $scanned{$dir} times"), next FTPTOSCAN if $scanned{$dir}++ > 2;
1482              $passive = 9, last if ( !ftp_check( \$ftp ) and $content ) or $work{'die'};
1483              printlog( 'dbg', "FTP getdir retry $retry " );    #, ++$passive, next if ftp_check(\$ftp);
1484              printlog( 'dbg', "Trying passive mode [$passive]" ), $config{'fine'}{ $url{'host'} }{'passive'} = 1
1485                if ( ++$passive == 1 )
1486                and !$config{'fine'}{ $url{'host'} }{'passive'};
1487              $ftp->pasv() if $passive == 1;
1488              ftp_reconnect( \$ftp, %url ) if $passive == 2 or $retry == 2;
1489              delete $config{'fine'}{ $url{'host'} }{'passive'} if $passive == 3;
1490            }
1491            ( $work{'ftp_connected'} ? next : last ) unless $content;
1492            my ( @thisdir, $listed_dirs, $num );
1493            for (@$content) {
1494              printlog( 'dmp', $_ );
1495              ++$stat{'ftp_errors'}, printlog( 'dbg', 'BUGGY FTPD ', $dir, $_ ), @thisdir = (), last
1496                if /^ftpd: .*: No such file or directory/i;
1497              ++$stat{'ftp_errors'}, printlog( 'dbg', 'BAD RECURSION SKIPPED [second listed dir]:', $dir, $1 ), @thisdir = (),
1498                last
1499                if /^\.?\/?(.+)\s*:\s*$/ and $listed_dirs++;
1500              next
1501                if !(/^\s*d\S+\s+\d*\s*\S+\s*\S*\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+):?(\d+)?\s+(.+)\s*$/)
1502                  or ( $6 =~ /^\.+[\s\xFF]*$/ )
1503                  or ( $6 =~ /^[\s\xFF]*$/ );
1504              my $name = $6;
1505              $name =~ s/(^\s*)|(\s*$)//g;
1506              #printlog( 'dev', 'name:', $name );
1507              printlog( 'dbg', 'skip path:', $name ), next
1508                if $config{'skip_path_mask'} and $name =~ /^$config{'skip_path_mask'}$/i;
1509              my $add = $dir . ( $dir eq '/' ? '' : '/' ) . $name;
1510              #printlog( 'dev', "DIR:$dir; name:$name; add:$add;\n$_" );
1511              ++$stat{'ftp_errors'}, printlog( 'dbg', 'BAD RECURSION SKIPPED [path dupes]:', $dir, $add ), next
1512                if $add =~ m|(/\Q$name\E){$config{'ftp_rec_path_dupes'},}$|;
1513              push( @thisdir, $add );    #v1:unshift v2:push
1514            }
1515            push( @toscan, @thisdir );
1516            unshift( @$content, $dir . ':' );
1517            $processor{'format'}{'ls'}->( join_url( \%url ), $content );
1518            $content = '';
1519            #!!! DIRS FILES !!!
1520            @toscan = (),
1521              printlog(
1522              'warn',
1523              "! recurse detected: $url{'host'} :",
1524              $work{'sizec'}->( $stat{'size'} ),
1525              $work{'filec'}->( $stat{'files'} )
1526              )
1527              if $work{'filec'}->( $stat{'files'} ) > hconfig( 'ftp_rec_max_number', $url{'host'} )
1528                and $work{'sizec'}->( $stat{'size'} ) / $work{'filec'}->( $stat{'files'} ) <
1529                hconfig( 'ftp_rec_min_size', $url{'host'} );
1530            last unless $work{'ftp_connected'};
1531          }
1532          last unless $work{'ftp_connected'};
1533        }
1534        if (@toscan) {
1535          ++$work{'scan_unfinished'};
1536          savearray( $toscanfile, \@toscan );
1537        }
1538      }
1539      $ftp->quit() if defined $ftp;
1540      purge_res( \%url ) if $config{'purge_every_res'};
1541    }
1542  };
1543  END { savearray( $toscanfile, \@toscan ) if @toscan and $toscanfile; }
1544}
1545$processor{'format'}{'html'} ||= sub {
1546  my %outh;
1547  my $host = shift(@_);
1548  my $scan = int( time() );
1549  my $prot = 'http';
1550  my $desc;
1551  for my $content (@_) {
1552    my $oneline = sub {
1553      my @l = split /href/i;
1554      for (@l) {
1555        if (/^=([\"\'])((\w|[\.=\/:?&%\-])+)\1/i) {    #???
1556          #my $url = decode_url($2);
1557          my $url = $2;
1558          #printlog('dev', 'hol:',$_);
1559          if ( $url =~ /^(http\:\/\/)/i ) {
1560            if ( $url =~ /^http\:\/\/$host/i )         #\w*\.?
1561            {
1562              if ( $url =~ /^http\:\/\/$host\/((\w|[\.\-])+)\/?/i ) {    #\w*\.?
1563                my $file = decode_url($1);
1564                ++$outh{$host}{$file}{'c'};
1565                #$outh{$host}{$file}{'desc'} = $desc;
1566                #$outh{$host}{$1}{'desc'} = cp_detect_trans(\$desc, \%detectcp, undef, $prot, $host);
1567              }
1568            }
1569          } elsif ( $url =~ /^(\S+\:\/\/)/i ) {
1570          } elsif ( $url =~ m{(?:^|/)((\w*[\.\-\%]*)+)(?:$|/)} ) {
1571            #} elsif ($url =~ m{((\w*|[.\-%]*)+)}){
1572            #printlog('dev', $host," url1:[$url] -> [$1]");
1573            my $file = decode_url($1);
1574            #printlog('dev', $host,' url2:',$url, " : $file");
1575            ++$outh{$host}{$file}{'c'};
1576            #$outh{$host}{$file}{'desc'} = $desc;
1577            #$outh{$host}{$1}{'desc'} = cp_detect_trans(\$desc, \%detectcp, undef, $prot, $host);
1578          }
1579        }
1580      }
1581    };
1582    oneliner( $oneline, $content );
1583  }
1584  for my $host ( keys %outh ) {
1585    for my $rr ( keys %{ $outh{$host} } ) {
1586      next if $rr =~ /cgi-bin/i;
1587      my %url = split_url( $prot . '://' . $host . '/' . $rr );
1588      #$static{'db'}->insert_hash_hash($config{'sql_tfile'},
1589      filesave( {
1590          '' => {
1591            %url,
1592            'size' => ( $url{'ext'} ? 1 : 0 ), (
1593              hconfig( 'unknown_time', $url{'host'} ) ne ''
1594              ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) )
1595              : ()
1596            ),
1597            'desc' => $outh{ $url{'host'} }{$rr}{'desc'},
1598            'scan' => $scan
1599          }
1600        }
1601      );
1602    }
1603  }
1604};
1605$processor{'prot'}{'http'}{'get'} ||= $processor{'prot'}{'https'}{'get'} ||= sub {
1606  local $config{'cachedir'} = $config{'tmpdir'};
1607  return http_get( $_[0], 1 );
1608};
1609$processor{'prot'}{'http'}{'func'} ||= sub {
1610  if ( $config{'use_old_http'} ) {
1611    my $scan = int( time() );
1612  SKIP0: for my $url (@_) {
1613      next unless $url =~ /\S/;
1614      $url =~ s/\/+$//;
1615      my %url     = split_url($url);
1616      my @content = ( http_get($url) );
1617      my %detectcp;
1618      if ( $content[0] =~ /\<.+\>/ ) {
1619        for my $skipmask ( @{ $config{'http_skip'} } ) { next SKIP0 if $content[0] =~ /$skipmask/i; }
1620        my ($desc) = $content[0] =~ /\<title.*\>\s*(.+)\s*\<\/title\s*\>/is;
1621        cp_detect_trans( $desc, \%detectcp, undef, undef, $url{'prot'}, $url{'host'} );
1622        $work{'desc'}{ $url{'host'} }{ $url{'prot'} } = $desc;
1623        $processor{'format'}{'html'}->( $url{'host'}, \@content );
1624        $stat{'dirs'}  += 1;
1625        $stat{'files'} += 1;
1626        $stat{'size'}  += 1;
1627      }
1628    }
1629  } else {
1630    my $scan = int( time() );
1631  SKIP0: for my $url (@_) {
1632      next unless $url =~ /\S/;
1633      $url =~ s/\/+$//;
1634      http_dl(
1635        $url,
1636        sub {
1637          my $url = $_[0];
1638          my %url = split_url($url);
1639          #my @content = ( http_get($url) );
1640          my @content = ( $_[1] );
1641          my %detectcp;
1642          if ( $content[0] =~ /\<.+\>/ ) {
1643            for my $skipmask ( @{ $config{'http_skip'} } ) { next SKIP0 if $content[0] =~ /$skipmask/i; }
1644            my ( $desc, $content );
1645            ($desc) = $content[0] =~ /\<title.*\>\s*(.+?)\s*\<\/title\s*\>/is;
1646            #if (psmisc::is_hash_size $config{'sql'}{'table'}{ $config{'sql_tfile'} }{'content'}) {
1647            #$content = striplinks($content[0]);
1648            #} else {
1649            $content = psmisc::html_strip( $config{'http_no_strip'} ? $content[0] : striplinks( $content[0] ) );
1650            $content =~ s/&\w+;/ /g;
1651            $content =~ s/\s+/ /g;
1652            #}
1653            cp_detect_trans( $desc,    \%detectcp, undef, undef, $url{'prot'}, $url{'host'} );
1654            cp_detect_trans( $content, \%detectcp, undef, undef, $url{'prot'}, $url{'host'} );
1655            #dmp \%detectcp;
1656            #printlog('dev', "desc:[$desc]");
1657            #$work{'desc'}{ $url{'host'} }{ $url{'prot'} } = $desc;
1658            #$processor{'format'}{'html'}->( $url{'host'}, \@content );
1659            $static{'db'}->insert_hash(
1660              $config{'sql_tfile'}, {
1661                %url, (
1662                  keys %{ $config{'sql'}{'table'}{ $config{'sql_tfile'} }{'param'} } ? ()
1663                  : ( 'ext' => $url{'ext'} . ( $url{'param'} ? '?' . $url{'param'} : '' ) )
1664                ),
1665                'size' => length $content[0], (
1666                  hconfig( 'unknown_time', $url{'host'} ) ne ''
1667                  ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) )
1668                  : ()
1669                ),
1670                'content' => $content,
1671                'desc'    => $desc,
1672                'scan'    => $scan,
1673              }
1674            );
1675            #$stat{'dirs'}  += 1;
1676            $stat{'files'} += 1;
1677            $stat{'size'}  += length $content[0];
1678          }
1679        },
1680        sub {
1681          my $url = $_[0];
1682          my %url = split_url($url);
1683          #my @content = ( http_get($url) );
1684          $static{'db'}->insert_hash(
1685            $config{'sql_tfile'}, {
1686              %url, (
1687                keys %{ $config{'sql'}{'table'}{ $config{'sql_tfile'} }{'param'} } ? ()
1688                : ( 'ext' => $url{'ext'} . ( $url{'param'} ? '?' . $url{'param'} : '' ) )
1689              ),
1690              'size' => 1, (
1691                hconfig( 'unknown_time', $url{'host'} ) ne ''
1692                ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) )
1693                : ()
1694              ),
1695              #'desc' => $desc,
1696              'scan' => $scan
1697            }
1698          );
1699          #$stat{'dirs'}  += 1;
1700          $stat{'files'} += 1;
1701          $stat{'size'}  += 1;
1702        }
1703      );
1704    }
1705  }
1706};
1707{
1708  my $state;
1709
1710  sub http_get2 {
1711    my ( $what, $asfile, $lwpopt, $method ) = @_;
1712    printlog( 'dev', 'http_get', @_ );
1713    my %url = split_url($what);
1714    use LWP::UserAgent;
1715    use URI::URL;
1716    my $ua = LWP::UserAgent->new(
1717      #$config{lwp}          = { timeout => 10, max_size => 1000000, max_redirect => 1 };
1718      max_size     => hconfig( 'max_size',     $url{host}, $url{prot} ) || 1000000,
1719      max_redirect => hconfig( 'max_redirect', $url{host}, $url{prot} ) || 1,
1720      timeout      => hconfig( 'timeout',      $url{host}, $url{prot} ) || 10,
1721      %{ $config{lwp} || {} }, %{ $lwpopt || {} }
1722    );
1723    if ( ref $config{'proxy'} eq 'ARRAY' ) {
1724      local @_ = @{ psmisc::shuffle( $config{proxy} )->[0] };
1725      $ua->proxy(@_);
1726    } elsif ( $config{'proxy'} ) {
1727      $ua->proxy( 'http', $config{'proxy'} );
1728    }
1729    $method ||= 'GET';
1730    #my $resp = $method eq 'HEAD' ? $ua->head($what) :
1731    my $resp = (
1732      $ua->request(
1733        new HTTP::Request( $method, new URI::URL($what), new HTTP::Headers( 'User-Agent' => $config{'crawler_name'} ) )
1734      )
1735    );
1736    $stat{'bytes'} += length( $resp->content );
1737    #my $type = lc( ( split /[\s;,]/, $resp->header('Content-Type') )[0] );
1738    my $type = http_type( undef, $resp->header('Content-Type') );
1739    #printlog ("Type",$type);
1740    #printlog ("hdr",$resp->as_string());
1741    $stat{'bytes_skipped'} += length( $resp->content ), printlog( 'skip', $what, "type1=[$type]" ),
1742      $state->{callback_skip}->( $what, $type ), return ( wantarray ? ( undef, $resp ) : undef )
1743      unless $config{http_avail_types}{$type};
1744    #print "[H:",Dumper($resp);
1745    #print "[H:",$resp->code();
1746    if ( $resp->is_success ) {
1747      ++$stat{'urls'};
1748      return wantarray ? ( $resp->as_string(), $resp ) : $resp->as_string();
1749    } else {
1750      printlog( 'warn', 'http getfail', $what, $resp->message, );    #Dumper($resp)
1751      #return $asfile ? undef: $resp->message;
1752      return wantarray ? ( undef, $resp ) : undef;
1753    }
1754  }
1755
1756  sub http_type_full {
1757    return $_[1] if $_[1];
1758    return $state->{type}{ $_[0] } if defined $state->{type}{ $_[0] };
1759    return $state->{type}{ $_[0] } = ( ( http_get2( $_[0], undef, undef, 'HEAD' ) )[1]->header('Content-Type') ) || '';
1760  }
1761  sub http_type { return lc( ( split /[\s;,]/, http_type_full(@_) )[0] ); }
1762
1763  sub http_parse {
1764    my ( $url, $dir ) = @_[ 1 .. 2 ];    #$state,
1765    $dir =~ s{[?#].*}{}g;
1766    $dir =~ s{[^/]+?$}{};
1767    #for ( $_[0] =~ m{<a.+?href=["']?([^'"\s>]+)['"]?.*?>}ig ) {
1768    for (
1769      $_[0] =~
1770      #m{<(?:a|frame).+?(?:href|src)=["']?([^'"\s>]+)['"]?.*?>}ig
1771      m{<(?:a|frame)[^>]+(?:href|src)=["']?([^'"\s>]+)['"]?[^>]*>}ig
1772      )
1773    {
1774      #dmp ("[c=$_]\n"),
1775      #$_ = $c, next unless defined $_;
1776      #print ("set $_ = $c]\n"),
1777      s/\s//g;
1778      #printlog( 'dev', "link0 [$_]" );
1779      #printlog( 'warn', "not protocol [$_]" ),
1780      next if m{^\w+:(?:[^/]|$)};
1781      my $no_check = 0;
1782      ++$no_check if s/#.*//g;
1783      s/&amp;/&/g;
1784      s{^/*\.\.?/*}{}g;
1785      next unless length $_;
1786      $_ = ( s{^\.*/}{}g ? $state->{root} : $dir ) . $_ unless m{^\w+://};
1787      s{/+\./+}{/}g;
1788      #s{/[^/]+/\.\.(/|$)}{$1}g;
1789      #s{[^/]+/\.\./?}{}g;
1790      #s{[^/]+/+\.\.}{}g;
1791      s{[^/]+/+\.\.}{}g while m{[^/]+/+\.\.};
1792      s{/+\.$}{}g;
1793      #s{(?<!(?:^\w:))/{2,}}{/};
1794      s{^(\w+://)}{};
1795      my $prot = $1;
1796      s{/{2,}}{/}g;
1797      $_ = $prot . $_;
1798      #printlog( 'dev', "link1 [$_]" );
1799      if (m{^$state->{root}}) {
1800        my ( $filename, $script ) = m{/([^/]*?)(?:\?(.*))?$};
1801        my $ext = lc( ( $filename =~ m{\.([^\.]+)$} )[0] );
1802        #printlog('dev', "$_ => filename=[$filename] ext=[$ext] script=[$script] $force_download");
1803        printlog( 'skip', $_, "ext=[$ext] filename=[$filename]" ), $state->{callback_skip}->( $_, $ext ), next
1804          if $config{http_ext_skip}{$ext};
1805        ++$no_check if !$ext or !$filename or $script or $config{http_ext_force}{$ext};
1806        #if ($force_download) {
1807        $state->{int}{$_} ||= { n => ++$state->{links}, check => !$no_check };
1808        $state->{depth}{$_} = $state->{depth}{$url} + 1
1809          if !$state->{depth}{$_}
1810            or $state->{depth}{$_} > $state->{depth}{$url} + 1;
1811        #}
1812      } else {
1813        $state->{ext}{$_} ||= ++$state->{links};
1814      }
1815    }
1816  }
1817
1818  sub http_page {
1819    return if $state->{download}{ $state->{current} };
1820    {
1821      printlog( "warn", "max_depth reached on $state->{current} $state->{depth}{$state->{current}}" ), last
1822        if $state->{depth}{ $state->{current} } > hconfig( 'http_max_depth', $state->{current_url}{'host'} );
1823      local @_;
1824      printlog( 'skip', $state->{current}, "robots.txt", @_ ), last
1825        if @_ = grep { $state->{current} =~ m{\w+://[^/]+\Q$_} ? $_ : () } keys %{ $state->{disallow} };
1826      if ( $state->{int}{ $state->{current} }{check} ) {
1827        my $type = http_type( $state->{current} );
1828        printlog( 'skip', $state->{current}, "type2=[$type]" ), $state->{callback_skip}->( $state->{current}, $type ), last
1829          unless $config{http_avail_types}{$type};
1830      }
1831      my ( $content, $resp ) = http_get2( $state->{current} );
1832      #CB!     print { $state->{OUT} }     $content;
1833      $state->{callback_page}->( $state->{current}, $content );
1834      printlog( "info", 'downloaded', $state->{page_limit}, $state->{current},
1835        "depth=$state->{depth}{$state->{current}}, size=",
1836        length($content), 'type=', $resp->header('Content-Type') )
1837        if length($content);
1838      http_parse( $content, $state->{current}, $resp->base );    #$state,
1839    }
1840    ++$state->{download}{ $state->{current} };
1841  }
1842
1843  sub http_dl {
1844    my ( $root, $callback_page, $callback_skip ) = @_;           #, $state
1845    #local %config = %config;
1846    $root .= '/' unless $root =~ m{/$};
1847    $root = 'http://' . $root unless $root =~ m{^\w+://};
1848    $state = {};
1849    $state->{callback_page} = $callback_page || sub { };
1850    $state->{callback_skip} = $callback_skip || sub { };
1851    $state->{root}          = $root;
1852    $state->{current} ||= $root;
1853
1854    if ( !$state->{robots_ok} ) {
1855      local $_ = $root;
1856      #$state->{current} = ;
1857      s{(\w+://[^/]+).*$}{$1};
1858      $_ .= '/robots.txt';
1859      for ( split /\n+/, http_get($_) ) {
1860        #printlog('rob', $_);
1861        #++$state->{disallow}{$1}, printlog( 'rob', $1 ) if m{Disallow:\s*(.+)\s*$}i;
1862        if (m{Disallow:\s*(.+)\s*$}i) {
1863          $_ = $1;
1864          s/[\x0D\x0A]//g;
1865          s{/$}{}g;
1866          ++$state->{disallow}{$_}, printlog( 'rob', "[$_]" );
1867        }
1868      }
1869      ++$state->{robots_ok};
1870    }
1871    #$state->{int}{$root} = {};
1872    $state->{out} = $root;
1873    $state->{out} =~ s{\w+://}{};
1874    $state->{out} =~ s{/$}{}g;
1875    $state->{out} =~ s{/}{_}g;
1876    my %url = split_url($root);
1877    while ( $state->{current} ) {
1878      printlog( 'err', "pages limit reached on $state->{root}", $state->{page_limit}, ), last
1879        if ++$state->{page_limit} > hconfig( 'http_max_pages', $state->{current_url}{'host'} );
1880      http_page($state);
1881      $state->{current} = ( (
1882          hconfig( 'http_next_selector', $state->{current_url}{'host'} )
1883            ->( grep { !$state->{download}{$_} } keys %{ $state->{int} } )
1884        )[0]
1885      );
1886      #printlog('dev' ,'selected', $state->{current},'from', grep { !$state->{download}{$_} } keys %{ $state->{int} });
1887      $state->{current_url} = split_url( $state->{current} );
1888      sleep( hconfig( 'http_page_sleep', $state->{current_url}{'host'} ) );
1889    }
1890    #close( $state->{OUT} );
1891    $state->{finished} = 1;
1892    #save_ext();
1893  }
1894}
1895$config{'select_weight'} //= 0.1;
1896$config{'select_around'} //= 1;
1897$config{'ret_field'}     //= 'block';    #'text'
1898$config{'ret_sep'}       //= ' ';
1899
1900sub striplinks {                         # $Id: crawler.pl 4843 2013-08-14 12:17:58Z pro $
1901  my $file = shift;
1902  #  print "[$file]";
1903  #my @blocks = split m{</?a} ;
1904  #my @blocks = split m{(<a.*?>)} ;
1905  $file =~ s{<$_.*?>.*?</$_.*?>}{}gsi for qw(style script);
1906  $file =~ s{<!--.*?-->}{}gs;
1907  my @blocks = split m{((?:<a.*?>.*?</a.*?>)|(?:<(?:td).*?>))}is, $file;
1908  my @flags;
1909  my $n = 0;
1910  my ( $depth, %avg, %avgn, %min );
1911  for my $block (@blocks) {
1912    $flags[$n]{'n'} = $n;
1913    $flags[$n]{'link'} = 1 if $block =~ /^<a/i;
1914    my $blockstrip = $block;
1915    $blockstrip =~ s{</?$_.*?>}{}gsi for qw(p br);
1916    #print "\nbs:[$blockstrip]\n";
1917    $flags[$n]{'blocklen'} = length $blockstrip;
1918    #print "BLOCK[$block]\n";
1919    ++$depth,
1920      #print("+[$_]")
1921      for $block =~ m{<\w+.*?>}gs;
1922    --$depth,
1923      #print("-[$_]")
1924      for $block =~ m{</\w+.*?>}gs;
1925    --$depth,
1926      #print("-![$_]")
1927      for $block =~ m{<\w+.*?/>}gs;
1928    #$depth += $block =~ m{<\w+>}g;
1929    #print "DEP:". join ':', ($block =~ m{</?\w+>}g);
1930    #    print "DEP:" . ( $flags[$n]{'depth'} = $depth );
1931    #    my $text = HTML::Entities::decode($block);
1932    my $text = HTML::Entities::decode_entities($block);
1933    $text =~ tr/\xAB\xBB\xA0/<> /;
1934    #    $text =~ s/&nbsp;/ /g;
1935    #    $text =~ s/&\w+;/ /g;
1936    #unless ($flags[$n]{'link'})
1937    {
1938      $text =~ s{<.*?>}{}gs;
1939      $text =~ s/\W/ /g;
1940      $text =~ s/\d/ /g;
1941      $text =~ s/\s+/ /g;
1942      $text =~ s/\s+$//g;
1943      $text =~ s/^\s+//g;
1944      $flags[$n]{'textlen'} = length $text;
1945    }    # else {$text= '' ;}
1946    $flags[$n]{'bt'} = $flags[$n]{'blocklen'} / $flags[$n]{'textlen'} if $flags[$n]{'textlen'};
1947    $avg{bt} += $flags[$n]{'bt'};
1948    ++$avgn{bt} if $flags[$n]{'bt'};
1949    $min{bt} = $n if $flags[$n]{'bt'} and $flags[$n]{'bt'} < $flags[ $min{bt} ]{'bt'};
1950    local $Data::Dumper::Indent = 0;
1951    #    print Dumper( $flags[$n] ) . " [$text]\n";    #$block
1952    $flags[$n]{text}  = $text;
1953    $flags[$n]{block} = $block;
1954  } continue {
1955    ++$n;
1956  }
1957  $avg{$_} /= $avgn{$_} for keys %avg;
1958  #print join ":", @blocks;
1959  #  print 'avg:', join ':', map { "$_=$avg{$_}" } keys %avg;
1960  #  print "\nminbt:", Dumper( $flags[ $min{bt} ] );
1961  #  print "\n\nSelecting\n\n";
1962  my @a = grep { $_->{textlen} } @flags;
1963  my $step = 1 / ( @a || 1 );
1964  my $n = 0;
1965  $_->{weight_textlen} = $n += $step for sort {
1966    #    $b->{bt} <=> $a->{bt}
1967    $a->{textlen} <=> $b->{textlen}
1968  } @a;
1969  #  $step = 1 / @flags;
1970  $n = 0;
1971  #   @a    = grep { $_->{textlen} } @flags;
1972  $_->{weight_bt} = $n += $step for sort { $b->{bt} <=> $a->{bt} } @a;
1973  $_->{weight} = $_->{weight_bt} + $_->{weight_textlen} for @a;
1974  my $max_weight;
1975  #print( Dumper($_) . "\n" ),
1976  $max_weight = $_->{weight} for sort {
1977    #    $b->{bt} <=> $a->{bt}
1978    #    $a->{textlen} <=> $b->{textlen}
1979    $a->{weight} <=> $b->{weight}
1980    }
1981    #    grep {    $_->{bt} and $_->{bt} < $avg{bt}}
1982    grep { !$_->{link} } @a;
1983  for my $n ( grep { $_->{weight} > $max_weight - $config{'select_weight'} } @flags ) {
1984    #   $n->{selected} = 1;
1985    $flags[ $n->{n} + $_ ]{selected} = 1 for ( -$config{'select_around'} .. $config{'select_around'} );
1986  }
1987  #print "\n\nResult:\n\n";
1988  my $ret;
1989  for my $n ( grep { $_->{selected} } @flags ) {
1990    #print "$n->{text} ";
1991    $ret .= $n->{ $config{ret_field} } . $config{ret_sep};
1992  }
1993  return $ret;
1994}
1995
1996sub updateuser {
1997  my ( $dc, $user, $params ) = @_;
1998  my $peerlist = $dc->{'adc'} ? $dc->{'peers'} : $dc->{'NickList'};
1999#dmp $user, $params;
2000#printlog( 'dbg', 'updateuser: skipping user', $user, 'with share =', $peerlist->{$user}{INF}{SS}, $peerlist->{$user}{'sharesize'} , caller, Dumper $params   ),
2001  return if ( !$peerlist->{$user}{'sharesize'} and !$peerlist->{$user}{INF}{SS} ) and length $user;
2002  if ($user) {
2003    $params->{'ip'} ||= $peerlist->{$user}{INF}{I4} || $peerlist->{$user}{INF}{I6} || $peerlist->{$user}{'ip'};
2004    $params->{'desc'} ||= $peerlist->{$user}{INF}{DE}
2005      || $peerlist->{$user}{'description'};    #if $peerlist->{$user}{'description'};
2006    $params->{'cid'} ||= $peerlist->{$user}{INF}{ID};
2007  }
2008  #printlog('dev', "userdesc1=[$params->{'desc'}] ($user)") if $params->{'desc'};
2009  #$params->{'desc'} ||= $params->{'DE'};
2010  $params->{'desc'} =~ s/^(?:\s*(?:\{\S*?\})?(?:\[\S*?\])?)+//g;
2011  #printlog('dev', "userdesc2=[$params->{'desc'}]") if $params->{'desc'};
2012  #$params->{'desc'} =~ s/^\s*(?:\[\S+?\])\s*PeerWeb\s*DC\+\+\s*//ig;
2013  $params->{'desc'} =~ s/^\s*(PeerWeb|Apex)\s*DC\+\+\s*//ig;
2014  #printlog('dev', "userdesc3=[$params->{'desc'}]") if $params->{'desc'};
2015  $params->{'prot'} ||= $dc->{'adc'} ? 'adc' : 'dchub';
2016  $params->{'dcuser'} ||= $params->{NI} || $peerlist->{$user}{INF}{NI} || $user;
2017  #delete $params->{'port'} if $params->{'prot'} eq 'dchub' and $params->{'port'} == 411;
2018  $params->{'host'} = full_host( { 'prot' => $params->{'prot'}, %$dc } );    #, %$params
2019  #printlog('dev',' host= ',$params->{'host'},'from',$dc->{'host'} ,$dc->{'port'},'and',%$params);
2020  $params->{'time'} ||= ( $params->{'off'} ? int( time() ) - $config{'online_minutes'} * 120 : int( time() ) )
2021    unless $params->{'no_time'};
2022  #printlog('dev','timeup',$params->{'dcuser'}, $params->{'time'}, int(time()));
2023  #!$params->{'host'}   = cp_trans( $config{ 'cp_' . $params->{'prot'} }, $config{'cp_db'}, $params->{'host'} );
2024  #!$params->{'dcuser'} = cp_trans( $config{ 'cp_' . $params->{'prot'} }, $config{'cp_db'}, $params->{'dcuser'} );
2025  #!$params->{'desc'}   = cp_trans( $config{ 'cp_' . $params->{'prot'} }, $config{'cp_db'}, $params->{'desc'} );
2026  $static{'db'}->update( $config{'sql_tresource'}, undef, { 'path' => '', %$params } ) unless $params->{'no_res'};    #, 1
2027  unless ( $params->{'no_file'} ) {
2028    $static{'db'}->update( $config{'sql_tfile'}, undef, { 'path' => $_, %$params } )
2029      for ( '', '/' );
2030  }
2031  $params->{'port'} ||= $peerlist->{$user}{INF}{U4} || $peerlist->{$user}{INF}{U6} || $peerlist->{$user}{'port'}
2032    if $user and $config{'allow_ping_user_port'};
2033  delete $params->{'port'} if $params->{'port'} > 65535;
2034  #printlog('dev', 'update', %$params);
2035  $static{'db'}->update( $config{'sql_thost'}, undef, $params );    #, 1 'ip' => $ip,
2036}
2037$processor{'prot'}{'dchub'}{'func'} ||= sub {
2038  my ($filelists) = $stat{'filelists'};
2039  my $dc;
2040  foreach my $url (@_) {
2041    my %url = split_url($url);
2042    #printlog('split:', Dumper(\%url));
2043    $url{'dcuser'} = $1 if $url{'host'} =~ s|/(.+)||;
2044    my $hub = join_url( { 'host' => $url{'host'}, 'port' => $url{'port'} } );
2045    #next if $config{'dchub'}{$hub}{'disabled'};
2046    next if hconfig( 'disabled', $url{'prot'}, $hub );
2047    printlog(
2048      'dc',
2049      "Creating bot ver",
2050      ( $config{'version'} or 'r' . ( split( ' ', '$Revision: 4843 $' ) )[1] ),
2051      $Net::DirectConnect::VERSION, "to $url",
2052    );
2053    local %_ = ( %{ $config{'fine'}{ $url{'prot'} } }, %{ $config{'fine'}{$hub} } );
2054    $_{modules}{filelist} = 1 if $_{share};
2055    $dc = Net::DirectConnect->new(
2056      #'host' => $url,
2057      'host' => join_url( { map { $_ => $url{$_} } qw(prot host) } ),
2058      'port' => $url{'port'},
2059      'log'  => sub {
2060        my $dc = shift;
2061        #printlog('ERR', $dc, @_), return unless ref $dc;
2062        printlog( shift, "[$dc->{'number'}]", @_, );
2063      },
2064      'no_charset_console' => 1,
2065      #%{ $config{'fine'}{'dchub'} },
2066      'no_auto_share_downloaded' => 1,
2067      'sql'                      => $config{'sql'},
2068      'db'                       => $static{'db'},
2069      %_,
2070      'auto_connect' => 0,
2071      'auto_GetINFO' => 1,
2072    );
2073    #printlog ('dev', 'SH1', Dumper $config{'fine'}{'dchub'});
2074    $dc->{'sharesize'} = psmisc::human( 'number_k', $dc->{'sharesize'} );
2075    #printlog ('dev', 'SH2', Dumper $dc);
2076    my $nicklistres = 1 if $url{'dcuser'};
2077    $dc->{'handler'}{'NickList'} = sub {
2078      my $dcu = shift;
2079      updateuser( $dc, $_ ) for keys %{ $dc->{'NickList'} };
2080      printlog( 'dc', "Nicklist recieved = ", scalar keys %{ $dc->{'NickList'} } );
2081      $nicklistres = 1;
2082    };
2083    $dc->{'handler'}{'MyINFO'} = sub {
2084      my $dcu = shift;
2085      ($_) = $_[0] =~ /\S+\s+(\S+)\s+(.*)/;
2086      updateuser(
2087        $dc, $_, (
2088          hconfig( 'dcbot_size_myinfo', $hub )
2089            && $dc->{'NickList'}->{$_}{'sharesize'} > 0 ? { ( 'size' => $dc->{'NickList'}->{$_}{'sharesize'} ) } : ()
2090        )
2091      );
2092    };
2093    $dc->{'handler'}{'chatline'} = sub {
2094      my $dcu = shift;
2095      ($_) = $_[0] =~ /^<(\S+)>/;
2096      updateuser( $dc, $_ ) if $dc->{'NickList'}->{$_} or $config{'dcbot_update_unknown'};    #Hub-Security
2097    };
2098    $dc->{'handler'}{'welcome'} = sub {
2099      my $dcu = shift;
2100      printlog(
2101        'dc', 'welcome:', $_[0]    #scalar cp_trans( $config{ 'cp_' . $url{'prot'} }, $config{'cp_log'}, $_[0] )
2102      );
2103    };
2104    $dc->{'handler'}{'quit'} = sub {
2105      my $dcu = shift;
2106      updateuser( $dc, $_, { 'off' => 1 } );
2107    };
2108    $dc->{'handler'}{'BadPass'} = sub {
2109      my $dcu = shift;
2110      printlog( 'err', 'BadPassword' );
2111    };
2112    $dc->{'handler'}{'LogedIn'} = sub {
2113      my $dcu = shift;
2114      printlog( 'dbg', $_[0], 'is LogedIn' );
2115    };
2116    $dc->{'handler'}{'user_ip'} = sub {
2117      my $dcu = shift;
2118      #printlog('dev', 'hUserip', @_);
2119      updateuser(
2120        $dc,
2121        $_[0], {
2122          'ip' => $_[1],
2123          #( $_[2] != 411 ? ( 'port' => $_[2] ) : () ),
2124          'port'    => $_[2],
2125          'no_res'  => 1,
2126          'no_file' => 1,
2127          'no_time' => 1
2128        }
2129      ) if $_[1] ne name_to_ip( split_url( $url{'host'} )->{'host'} );
2130    };
2131    $dc->{'handler'}{'INF'} = sub {
2132      my $dcu = shift;
2133      #dmp @_;
2134      #($_) = $_[0] =~ /\S+\s+(\S+)\s+(.*)/;
2135      return if $_[0][-1] eq 'I';
2136      updateuser( $dc, $_[-1]{ID} || $_[0][-1], $_[-1] );
2137      #  $dc, $_, (
2138      #    hconfig( 'dcbot_size_myinfo', $hub )
2139      #      && $dc->{'NickList'}->{$_}{'sharesize'} > 0 ? { ( 'size' => $dc->{'NickList'}->{$_}{'sharesize'} ) } : ()
2140      #  )
2141      #);
2142    };
2143    $dc->{'handler'}{'QUI'} = sub {
2144      my $dcu = shift;
2145      dmp \@_;
2146      updateuser( $dc, $_[1], { 'off' => 1 } );
2147    };
2148    $dc->{'auto_recv'} = 0 if $url{'dcuser'};
2149    printlog( 'dc', "Connecting to", $url );
2150    $dc->connect();
2151    #printlog( 'dev', __LINE__);
2152    $dc->wait();    #for 1 .. 5;
2153    #printlog( 'dev', __LINE__, 'waited');
2154    next unless $dc->{'socket'};
2155    $dc->wait_connect(20);    #for 1 .. 5;
2156    #printlog( 'dev', __LINE__, 'waited',20);
2157    #1    while !$dc->recv(1);
2158    printlog( 'warn', "connect timeout", $dc->{'status'} ), next unless $dc->{'status'} eq 'connected';
2159    printlog( 'dbg', "connected", $dc->{'status'} );
2160    if ( !$dc->{'adc'} and !$url{'dcuser'} and !$nicklistres and !$dc->{'auto_GetNickList'} ) {
2161      printlog( 'dbg', "Get nicks" );
2162      $dc->cmd('GetNickList');
2163      next unless $dc->{'socket'};
2164      my $trynickres = hconfig( 'nicks_receive', $hub ) || 50;
2165      #$dc->recv(1), sleep(0.1) while --$trynickres > 0 and !$nicklistres;
2166      $dc->wait(5) while --$trynickres > 0 and !$nicklistres;
2167    }
2168    #printlog( 'dbg', "nicks:", $nicklistres, );
2169    printlog( 'dbg', "Starting [$dc->{'host'} : $dc->{'port'}]" );
2170    my $saveto =
2171        $config{'datadir'}
2172      . $config{'slash_sys'}
2173      . $dc->{'host'}
2174      . ( $dc->{'port'} != $config{'scanner'}{ $url{'prot'} }{'port'} ? '.' . $dc->{'port'} : '' ) . '.'
2175      . $url{'prot'};    #'dchub';
2176    $dc->wait(5);
2177    next unless $dc->{'socket'};
2178    my $peerlist = $dc->{'adc'} ? $dc->{'peers_cid'} : $dc->{'NickList'};
2179    #my $peerlist = $dc->{'adc'} ? $dc->{'peers'} : $dc->{'NickList'};
2180    printlog( 'dev', "work", hconfig( 'dcbot_wait_start', $hub ) );
2181    $dc->wait( hconfig( 'dcbot_wait_start', $hub ) || 5 );
2182    #psmisc::file_rewrite( 'dump', Dumper $dc);
2183    printlog( 'dbg', "Starting users", ( $url{'dcuser'} or scalar keys %$peerlist ) );
2184    my $hubhost = $url{'host'};
2185    my $hubport = $url{'port'};
2186    my $n;
2187    my @todo = ( $url{'dcuser'} || ( sort keys %$peerlist ) );
2188    printlog(
2189      'info', "todo users:", scalar @todo,    #Dumper \@todo#, $dc
2190    );
2191
2192    for my $user (@todo) {
2193      #printlog( 'dev', "looking at [$user]");
2194      printlog( 'err', 'skipping bad user', "[$user]" ), next if !length $user or $user !~ /\S/;
2195      last if scan_stop;
2196      ++$n;
2197      my $get = 1;
2198      $url{'dcuser'} = $peerlist->{$user}{INF}{NI} || $user;
2199      $url{'host'} = full_host( { %url, 'host' => $hubhost, 'port' => $hubport } );
2200      #printlog('split:', Dumper(\%url));
2201      my %url_db = %url;
2202      #cp_trans_hash( $config{ 'cp_' . $url{'prot'} }, $config{'cp_db'}, \%url_db );
2203      #my $file =           ;
2204      my $file = encode_url(
2205        #scalar cp_trans( $config{ 'cp_' . $url{'prot'} }, $config{'cp_shell'},
2206        Encode::encode(
2207          'utf-8',    #hconfig( 'cp_shell' ),
2208          $dc->{'adc'} ? $peerlist->{$user}{INF}{NI} : $user, Encode::FB_DEFAULT
2209        ),
2210        $config{'encode_dc_user_mask'}
2211      );
2212      my $saveas_noext = $saveto . $config{'slash_sys'} . $file;
2213      my $saveas       = $saveas_noext . '.xml.bz2';
2214#my $saveasshell = Encode::encode hconfig( 'cp_shell' ), $saveas;
2215#$saveas_noext = Encode::encode hconfig( 'cp_shell' ), $saveas_noext;
2216#printlog( 'dev', "1user=$url{'dcuser'}; host=$url{'host'}; get=$get; config{'no_dcbot_download'}=$config{'no_dcbot_download'}" );
2217      my $sharesize = $peerlist->{$user}{INF}{SS} || $peerlist->{$user}{'sharesize'};
2218      if ( !hconfig( 'no_dcbot_download', $hub, $user ) ) {
2219        printlog(
2220          'dbg', 'looking at', $url{'host'}, '/', $url{'dcuser'}, ' filelist:', -s $saveas, int -M $saveas, $n, '/',
2221          scalar keys %$peerlist, 'share:', $sharesize,
2222          #Dumper $peerlist->{$user}
2223        );
2224        #printlog( 'dbg', "skip [$user] (filelist exists)" ), $get = 0
2225        #if
2226        #( !hconfig( 'forceforce', $hub ) and -s $saveto . $config{'slash_sys'} . $file . '.xml.bz2' );
2227        printlog( 'dbg', "skip [$user] (self)" ), $get = 0
2228          if $user eq $dc->{'Nick'}
2229            or $dc->{'INF'}{'SID'} eq $user
2230            or $dc->{'INF'}{'CID'} eq $user;
2231        printlog( 'dbg', "skip [$user] (scanned)" ), $get = 0
2232          if (
2233          !hconfig( 'forcedc', $hub, $user )
2234          and (( -s $saveas and ( ( time - $^T + 86400 * -M $saveas ) < $config{'dcbot_client_period'} ) )
2235            or ( -s $saveas . '.ok' and ( ( time - $^T + 86400 * -M $saveas . '.ok' ) < $config{'dcbot_client_period'} ) ) )
2236          ) or !allowscan( \%url_db, $config{'sql_tresource'}, undef, $config{'dcbot_client_period'} );
2237        my $already = dcscanned($user);
2238        #dmp 'already', $already;
2239        if (
2240          ( $already->{tiger} or $already->{cid} ) and (
2241            time - $already->{scan} < $config{'dcbot_client_period'}
2242            #or $scan < $already->{scan}
2243          )
2244          )
2245        {
2246          printlog( 'dbg', "skip [$user] (cid: already scanned)", $config{'dcbot_client_period'}, time - $already->{scan} ),
2247            $get = 0;
2248        }
2249      }
2250#$dc->cmd( 'GetINFO', $user ), $dc->work()if !$dc->{'adc'}and !hconfig( 'dcbot_no_getinfo', $hub, $user )and !$peerlist->{$user}{'info'}and grep { $_ ne 'INF' } keys %{ $peerlist->{$user} };    #, $dc->recv()
2251      if ( !hconfig( 'no_dcbot_download', $hub, $user ) ) {
2252        printlog( 'dbg', "skip [$user] (share $sharesize bytes)" ), $get = 0
2253          if !hconfig( 'forcedc', $hub, $user )    #!$config{'force'}
2254            and ( defined($sharesize) or hconfig( 'no_dcbot_download_unknown', $hub, $user ) ) and !$sharesize;
2255        printlog( 'dbg', "skip [$user] unable get from passive user" ), $get = 0
2256          if $dc->{'M'} eq 'P' and ( ( $dc->{adc} and !$peerlist->{$user}{INF}{I4} ) or $peerlist->{$user}{'M'} eq 'P' );
2257        printlog( 'dbg', "skip [$user] no free user slots ($peerlist->{$user}{'S'})" ), $get = 0 if    #!$config{'force'}
2258          !$dc->{'adc'} and !hconfig( 'forcedc', $hub, $user ) and $peerlist->{$user}{'S'} eq '0';
2259      }
2260#printlog( 'dev', "2user=$url{'dcuser'}; host=$url{'host'}; get=$get; config{'no_dcbot_download'}=$config{'no_dcbot_download'}" );
2261      if ( !hconfig( 'no_dcbot_download', $hub, $user ) and $get ) {
2262        printlog( 'dc', "Skip unknown user [$user]" ), next
2263          if !keys %{ $peerlist->{$user}{'INF'} } and !grep { $_ ne 'INF' } keys %{ $peerlist->{$user} };
2264        printlog( 'dc',  "Get filelist from [$user]" );
2265        printlog( 'dbg', "get: [$saveas_noext] user=$user to[$saveto]" );
2266        mkdir_rec( $saveto, 0777 );
2267        $dc->get( $user, '', $saveas_noext );                                                          #.get
2268        ++$stat{'filelists'};
2269        #$dc->wait(1, hconfig( 'dcbot_pause',  $hub )) and
2270        #$dc->wait(10, 0.1) for 0..5;
2271        #printlog( 'dev',  "sleep", hconfig( 'dcbot_pause',  $hub ) );
2272        $dc->wait( hconfig( 'dcbot_pause', $hub ) );
2273        #sleep hconfig( 'dcbot_pause',  $hub );
2274        #sleep $config{'dcbot_pause'};
2275      } elsif ( $config{'force_ip_collect'}
2276        and $user                   ne $dc->{'Nick'}
2277        and $dc->{'INF'}{'SID'}     ne $user
2278        and $dc->{'INF'}{'CID'}     ne $user
2279        and $peerlist->{$user}{'M'} ne 'P'
2280        and !( $peerlist->{$user}{'INF'}{'I4'} or $peerlist->{$user}{'INF'}{'I6'} )
2281        and ( ( $dc->{'M'} ne 'P' ) or $dc->{'allow_passive_ConnectToMe'} ) )
2282      {    #to do to dcpp lib
2283        printlog( 'dc', 'trying connect to us', $user );
2284        $dc->cmd( 'Rev' . 'ConnectToMe', $user ) if !$dc->{'adc'};    #TODO for adc
2285      }
2286      #$dc->info();
2287      #printlog( 'dev',  "cycle end" );
2288      printlog( 'err', 'hub inactive!' ), $dc->info(), last unless $dc->active();    #$dc->{'socket'};
2289    }
2290    my $share;
2291    $share += $peerlist->{$_}{INF}{SS} || $peerlist->{$_}{'sharesize'} for keys %$peerlist;
2292    $share = psmisc::human( 'size', $share );
2293    updateuser(
2294      $dc, '', {
2295        %$dc,
2296        'desc' => ( scalar keys %$peerlist or 0 ) . ' users ' . $share . ' ' . $dc->{'HubName'},
2297        'scan' => int( time() )
2298      }
2299    );
2300    printlog( 'dbg', 'ended, aftersleep' );
2301    #$dc->wait(100,1) for ( 0 .. 60 );
2302    $dc->wait(10) if $url{'dcuser'};
2303    $dc->wait_finish();
2304    printlog( 'dc', 'Finished' );
2305    $dc->destroy();
2306    psmisc::file_rewrite( 'dcexit.dump', Dumper $dc );
2307    #todo: hubstat
2308    #printlog('info', $dc->{'IpList'});
2309    #printlog('info', $dc->{'NickList'});
2310    #for( keys %{ $dc->{'IpList'} } ) {
2311##printlog('dev', "ip=$_, nick=",$dc->{'IpList'}->{$_}{'Nick'}, 'port=',$dc->{'IpList'}->{$_}->{'port'});
2312    #printlog('dbg','updating info about user ip:', $dc->{'IpList'}->{$_}{'Nick'}, $_),
2313    #updateuser($dc, $dc->{'IpList'}->{$_}->{'Nick'}, {'ip' => $_,
2314    #'port' => $dc->{'IpList'}->{$_}->{'port'},
2315    #'no_res' => 1, 'no_file' => 1, 'no_time' => 1});
2316    #}
2317    $processor{'file'}{ $url{'prot'} }{'func'}->($saveto) unless hconfig( 'no_dcbot_upload', $hub );
2318  } continue {
2319    #$dc = undef;
2320    $dc->destroy() if $dc;
2321    #printlog('dev', Dumper($dc));
2322    #delete $dc->{$_}for keys %$dc;
2323    $dc = undef;
2324    #undef $dc;
2325    printlog( 'dev', "dcexit (fail)" );
2326  }
2327  $work{'proc_scans'} -= $stat{'filelists'} - $filelists;
2328};
2329$processor{'prot'}{'dchub'}{'get'} ||= sub {
2330  #TODO
2331};
2332$processor{'prot'}{'adc'}{$_}    ||= $processor{'prot'}{'dchub'}{$_},
2333  $processor{'prot'}{'adcs'}{$_} ||= $processor{'prot'}{'dchub'}{$_},
2334  for keys %{ $processor{'prot'}{'dchub'} };
2335
2336sub nmap_host_name () {
2337  my $hostip   = $+{ip};
2338  my $hostname = $+{name};
2339  $hostip   ||= $hostname;
2340  $hostname ||= $hostip;
2341  my $host = ( ( !$config{'nmap_nores'} and $hostname ) ? $hostname : $hostip );
2342  $host =~ s/\(|\)//g;
2343  return wantarray ? ( $host, $hostip, $hostname ) : $host;
2344}
2345$processor{'format'}{'nmap'} ||= sub {
2346  my $nping = shift @_;
2347  my ( $host, $hostip, $hostname );
2348  my @resbuffer;
2349  my %nstat;
2350  $nping = $nping =~ /-P[0n]/;
2351  for my $content (@_) {
2352    my ( $hostup, $resolvs );
2353    my $oneline = sub {
2354      chomp;
2355      s/[\x0d\x0a]+//g;
2356      #printlog( 'nmap', $_ );
2357      if (
2358           /Host\s+(?<name>\S*)\s+(:?\((?<ip>\S+)\)\s*)?appears to be up/io
2359        or /Host:\s*(?<ip>\S+)\s*(?<name>\S*)\s*(Status: Up|Ports:)/io
2360        or /Interesting ports on (?<name>\S*)\s*\((?<ip>\S+)\)/io
2361        or /Interesting ports on (?<ip>\S+):/io
2362        or /Discovered open port (?<port>\S+)\/tcp on (?<ip>\S+)/io
2363        or /Nmap scan report for (?<name>\S+)\s*(:?\((?<ip>\S+)\)\s*)?\s*(?<down>\[host down\])?/io
2364        #Nmap scan report for 24.117.235.52 [host down]
2365        )
2366      {
2367        ( $host, $hostip, $hostname ) = nmap_host_name();
2368        #$hostip = $+{ip};
2369        #$hostname = $+{name};
2370        #$hostip   ||= $hostname;
2371        #$hostname ||= $hostip;
2372        #$host = ( ( !$config{'nmap_nores'} and $hostname ) ? $hostname : $hostip );
2373        #$host =~ s/\(|\)//g;
2374        return if !$host or $host =~ /^\d$/;
2375        $hostup = 1 if !$nping;
2376        $hostup = 0 if $+{down};
2377        #printlog ('up', $host, $hostup, Dumper \%+);
2378        #$resolvs = 0;
2379        #$nstat{$hostip}
2380      } elsif (/^\s*Host is up(?:\.| \((?<latency>\S+)s latency\))/) {
2381        $hostup = 1 unless $nping;
2382        $nstat{'host'}{$host}{$_} = $+{$_} for grep { length $+{$_} } qw(latency);
2383      } elsif (
2384        /^\s*Nmap (?:done|finished): (?<total>\d+) IP addresses \((?<up>\d+) hosts up\) scanned in (?<seconds>\S+) seconds/)
2385      {
2386        #
2387        #Completed Connect Scan at 20:21, 7.66s elapsed (1024 total ports)
2388        #Scanning 256 hosts [4 ports/host]
2389        #$work{'range_size'} = $+{total};
2390        $nstat{'up'}   = $+{up};
2391        $nstat{'per'}  = $+{seconds};
2392        $nstat{'size'} = $+{total};
2393      } elsif (/^\s*Raw packets sent: (?<sp>\d+) \((?<sb>\S+)\) \| Rcvd: (?<rp>\d+) \((?<rb>\S+)\)/) {
2394        $nstat{$_} = $+{$_} for keys %+;
2395        #count traffic bytes?
2396      } elsif (
2397/^\s*All (?<ports>\d+) scanned ports on (?<name>\S+)(?: \((?<ip>\S+)\))? are (?:closed|filtered)(?: \((?<closed>\S+)\) or filtered \((?<filtered>\S+)\))?/
2398        )
2399      {
2400        ( $host, $hostip, $hostname ) = nmap_host_name();
2401        $hostup = 0 if $nping;
2402        #printlog ('down', $host, $hostup, Dumper \%+);
2403        $nstat{'host'}{$host}{$_} = $+{$_} for grep { length $+{$_} } qw(closed filtered);    # ports
2404      } elsif (/Not shown: (?<closed>\d+) closed ports/) {
2405        $nstat{'host'}{$host}{$_} = $+{$_} for grep { length $+{$_} } qw(closed);
2406      }
2407      if ( m{(?<port>\S+)/tcp\s+(?<state>open|on|unfiltered)}io or m{Discovered open port (?<port>\S+)/tcp on (?<host>\S+)} ) {
2408        my ( $port, $prot ) = ( $1, $static{'port2prot'}{$1} );
2409        return unless $host;
2410        return if $config{'scanner'}{$prot}{'disabled'};
2411        $host = normalize_ip($host) if $config{'host_by_name_norm'};                          # and !$resolvs++;
2412        printlog( 'warn', "invalid host" ), next unless $host;
2413        my $res = $prot . '://' . $host;
2414        return if $nstat{'scanned'}{$res}++;
2415        push( @resbuffer, $res ) unless $config{'nmap_only'};
2416
2417        unless ( $static{'banned'}->{$res} or $static{'banned'}->{$host} ) {
2418          #warn "tryup";
2419          #psmisc::dmp('tryup', $prot, $_->database),
2420          $_->update( $config{'sql_tresource'}, undef, { 'prot' => $prot, 'host' => $host } )
2421            for grep { $_ } $static{'db'}, $static{'db_shard'}{$prot};
2422        }
2423        $processor{'urls'}->(@resbuffer), @resbuffer = () if $#resbuffer >= $config{'start_by'} - 1;
2424        $hostup = 1 if $nping;
2425        ++$ipstt{'resalive'}{$host};    #remove
2426        ++$nstat{'resalive'}{$host};
2427        $nstat{'prot'}{$prot}{$host} = 1;
2428        $nstat{'host'}{$host}{$prot} = 1;
2429      }
2430      if ($hostup) {
2431        $host = normalize_ip($host) if $config{'host_by_name_norm'};
2432        #$static{'db'}->line("SELECT * FROM $tq$config{'sql_thost'}$tq WHERE host!=$vq$host$vq AND ip=$vq$ip$vq")->{'host'}
2433        $static{'db'}->update(
2434          $config{'sql_thost'},
2435          undef, {
2436            'host' => $host,
2437            'ip'   => $hostip,
2438            'time' => int( time() ),
2439            'meta' => join( ' ', map { "$_=$nstat{'host'}{$host}{$_}" } keys %{ $nstat{'host'}{$host} } )
2440          }
2441        );
2442        ++$ipstt{'alive'}{$host};
2443        ++$ipstt{'ralive'}{$host};    # not optimal!
2444        #++$nstat{'up'}{$host};
2445        $hostup = 0;
2446      }
2447    };
2448    oneliner( $oneline, $content );
2449  }
2450  for my $p ( keys %{ $nstat{'prot'} || {} } ) { $nstat{$p} = scalar keys %{ $nstat{'prot'}{$p} || {} }; }
2451  dmp( \%nstat );
2452  $processor{'urls'}->(@resbuffer) if @resbuffer;
2453  return \%nstat;
2454};
2455
2456sub speed ($$) {
2457  my ( $t, $c ) = @_;
2458  my $ps;
2459  $ps = $c / $t if $c and $t > 0;
2460  $ps = ' at ' . psmisc::human( 'float', $ps ) . 'ps' if $ps;
2461  return 'per ' . psmisc::human( 'time_period', $t ) . $ps;
2462}
2463$processor{'url'} ||= sub {
2464  my @rng = @_;
2465  #printlog('dev', '$processor{url}', @_);
2466  %ipstt = ();
2467  @rng = reverse(@rng) if $config{'ranges_reverse'};
2468  shuffle( \@rng ) if $config{'ranges_shuffle'};
2469  my $ranges = 0;
2470  for my $range (@rng) {
2471    ++$ranges;
2472    $range =~ s/^\s+|\s+$//g;
2473    next if !$range or $range =~ /^[;#]/;
2474    my ($desc) = $1 if $range =~ s/\s+(.+)//;
2475    $range = 'local://' . $range
2476      if ( $range =~ /^\w:/ and $config{'system'} =~ /win/ )
2477      or $range =~ m{^([/\\])(!:\1)} and -d $range;
2478    $range = '\\\\' . $range if $range =~ /^(?:\.|\w\:[^\/\\]|\\[^\\])/;    #    |(\/[^\/])!!!   |((\\)|(\/)$)
2479    $range =~ tr/\\/\// if $range =~ s/^\s*(?:\\\\|\/\/)/file:\/\//;
2480    my $tim = timer();
2481    #todo $range = 'range://'    unless ( $range =~ /\:\/\// );
2482    #printlog('dev', '$processor{url}2', $range);
2483    if ( $range =~ /\:\/\// ) { $processor{'scan_proto_host'}->($range); next; }
2484    next
2485    #unless allowscan( { 'range' => $range, 'desc' => $desc }, $config{'sql_tranges'}, undef, $config{'range_period'} );
2486    #unless allowscan( { 'range' => $range, 'desc' => $desc }, $config{'sql_tranges'}, undef, hconfig('range_period', $range) );
2487      unless allowscan( { 'range' => $range, 'desc' => $desc }, $config{'sql_tranges'}, undef,
2488          hconfig( 'period', $range, 'range' ) );
2489    next if $config{'noscan'};
2490    ( $ipstt{'ralive'}, $ipstt{'resalive'} ) = ();
2491    printlog( 'nmap', "scanning $range [", $ranges, '/', $#rng + 1, ']' );
2492    state( "scan:", $range );
2493    my $nmap_myip;
2494    $nmap_myip = '-S ' . $config{'myip'}
2495      if $config{'myip'}
2496        and $config{'get_myip_mask'}
2497        and $range =~ /$config{'get_myip_mask'}/;
2498    my $port = join ',',
2499      map { $config{'scanner'}{$_}{'port'} }
2500      grep { $config{'scanner'}{$_}{'port'} and !$config{'scanner'}{$_}{'disabled'} and !$config{'scanner'}{$_}{'no_nmap'} }
2501      keys %{ $config{'scanner'} };
2502    $port = $config{'nmap_param_port'} . ' ' . $port
2503      if $port and $config{'nmap_param_port'} and !$config{'pingonly'} and !$param->{'pingonly'};    # del $param
2504    my $scantype = $port ? $config{'nmap_scan'} : $config{'nmap_ping'};
2505    shelldata($range);
2506    state( "scan range: $range", );
2507    on_interrupt(
2508      sub {
2509        $static{'db'}->update(
2510          $config{'sql_tranges'},
2511          undef, {
2512            'range' => $range,
2513            #'scan'  => int( time() + hconfig('on_interrupt', $range) - hconfig('range_period', $range)),
2514            'scan' => int( time() + hconfig( 'on_interrupt', $range ) - hconfig( 'period', $range, 'range' ) ),
2515          }
2516        );
2517      }
2518    ) if hconfig( 'on_interrupt', $range );
2519    my $h = openproc(
2520"$config{'starter'} $config{'nmap'} $scantype $port $config{'nmap_param'} $config{'nmap_nores'} $nmap_myip $range $config{'stderr_redirect'} |"
2521    ) or next;
2522    my $nstat = $processor{'format'}{'nmap'}->( $config{'nmap_param'}, $h ) || {};
2523    on_interrupt();
2524    close($h);
2525    my $ips = ( 2**( 32 - $1 ) ) if !$nstat->{'size'} and $range =~ m|^\s*\d+\.\d+\.\d+\.\d+/(\d+)\s*$|;
2526    $ips = $nstat->{'size'} if $nstat->{'size'};
2527    #$nstat->{'range_size'} = 0;
2528    %_ = (
2529      'range' => $range,
2530      'scan'  => int( time() ),
2531      'alive' => scalar( keys %{ $ipstt{'ralive'} } ),
2532      'size'  => ( $ips or 1 ),
2533      'meta'  => join( ' ', map { "$_=$nstat->{$_}" } grep { !ref $nstat->{$_} } keys %$nstat )
2534    );
2535    $_{'time'} = int( time() ) if scalar( keys %{ $ipstt{'ralive'} } );
2536    $static{'db'}->update( $config{'sql_tranges'}, undef, \%_ );
2537    my $t = $tim->();
2538    #my $ipps = $ips / $t if $t > 0;
2539    #( $ipps = sprintf( "%.3f", $ipps ) ) .= ' ipps' if $ipps;
2540    #$ips = " [$ips] " if $ips;
2541    printlog(
2542      'nmap', "finished $range $ips alive:", scalar( keys %{ $ipstt{'ralive'} } ), ', resalive:',
2543      scalar( keys %{ $ipstt{'resalive'} } ),
2544      #' per', psmisc::human( 'time_period', $t ), $ipps
2545      speed( $t, $ips ), $_{meta},
2546    );
2547    ++$work{'scans'};
2548  }
2549};
2550
2551sub url_local2ext (\%) {
2552  my $url = \%{ $_[0] };
2553  $url->{prot} = $config{'local_prot'};
2554  $url->{host} = $config{'local_host'};
2555  $config{'local_path'}->( $url->{path} ) if $config{'local_path'} eq 'CODE';
2556  $url;
2557}
2558$processor{'prot'}{'range'}{'func'} ||= sub { printlog( 'dev', '$processor{range}', @_ ); };
2559$processor{'scan_proto_host'} ||= sub {
2560  for my $url (@_) {
2561    #printlog('dev', '$processor{scan_proto_host}', $url);
2562    $url =~ s/\s+$//;
2563    $url =~ tr|\\|/|;
2564    $url .= '/' unless $url =~ m|/$|;
2565    my $url_orig = $url;
2566    my %url      = split_url($url);
2567    #printlog('dev', '$processor{scan_proto_host}', %url);
2568    #printlog('dev', '$processor{scan_proto_host} pre norm', join_url(\%url));
2569    $url{'host'} = normalize_ip( $url{'host'} )
2570      if $url{'host'}
2571        and $url{'prot'} ne 'range'
2572        and hconfig( 'host_by_name_norm',       $url{'host'}, $url{'prot'} )
2573        and hconfig( 'host_by_name_norm_force', $url{'host'}, $url{'prot'} )
2574        #and !( $config{'norm_skip_host'} and $url{'host'} =~ /$config{'norm_skip_host'}/i )
2575    ;
2576    #printlog('warn', "host [$url{'host'}]", name_to_ip($url{'host'}));
2577    printlog( 'warn', "invalid host [$url]" ), next if !$url{'host'} and $url{'prot'} ne 'local';
2578#printlog('dev', '$processor{scan_proto_host}3', %url, $config{'scanner'}{ $url{'prot'} }{'disabled'}, $config{'scanner'}{ $url{'prot'} }{'func'});
2579#printlog('dev', '$processor{scan_proto_host} post norm', join_url(\%url));
2580    next
2581      if $config{'scanner'}{ $url{'prot'} }{'disabled'}
2582        or ( !$url{'host'} and $url{'prot'} ne 'local' )
2583        or !$processor{'prot'}{ $url{'prot'} }{'func'};
2584    $url{'path'} = '' if $url{'path'} eq '/';
2585    $url = join_url( \%url );
2586    my $desc;    #todo
2587    my $table = $url{'prot'} eq 'range' ? $config{'sql_tranges'} : $config{'sql_tresource'};
2588    my %res = $url{'prot'} eq 'range' ? ( 'range' => $url, 'desc' => $desc ) : ( %url, 'host' => full_host( \%url ), );
2589    #printlog('dev', '$processor{scan_proto_host}2', %res);
2590    printlog(
2591      'dbg',
2592      "Skipping $url (scanned",
2593      psmisc::human( 'time_period', int( time() ) - $work{'skiptime'}{$url} ),
2594      ' ago) period=',
2595      psmisc::human( 'time_period', $work{'skipperiod'}{$url} ),
2596      ),
2597      next
2598      if (
2599      !allowscan(
2600        \%res, $table, undef, ( ( $url{'prot'} eq 'dchub' and $url{'host'} =~ m|/| ) ? $config{'dcbot_client_period'} : undef )
2601      )
2602      );
2603    next if $config{'noscan'};
2604    #} else {
2605    #if !$config{'noscan'};
2606    #{
2607    local $config{'force'} = 0 if $work{'scan_proto_host_reset_force'};
2608    resetcounters();
2609    $work{'scan_unfinished'} = 0;
2610    state( "scan:", join_url( \%url ) );
2611    printlog( 'dbg', "Starting scan", join_url( \%url ) );
2612    on_interrupt(
2613      sub {
2614        #$static{'db'}->flush();
2615        #local $config{'log_dmp'} = 1;
2616        #printlog('dev', 'on_interrupt update'),
2617        $static{'db'}->update( $table, undef,
2618          { %url, 'scan' => int( time() + hconfig( 'on_interrupt', values %url ) - hconfig( 'period', values %url ) ), } )
2619          if hconfig( 'on_interrupt', values %url );
2620      }
2621    );
2622    my $tim = timer();
2623    #printlog('dev', '$processor{scan_proto_host} fu ', $url_orig,join_url(\%url));
2624    $processor{'prot'}{ $url{'prot'} }{'func'}->( ( $url{'prot'} eq 'local' ) ? $url_orig : join_url( \%url ) );
2625    on_interrupt();
2626    ++$work{'scans'};
2627    $static{'db'}->flush_insert()
2628      #$processor{'out'}{'array'}->()
2629      if hconfig( 'flush_sql_every_res', values %url );
2630    next if $config{'scanner'}{ $url{'prot'} }{'no_res_stat'};
2631    #printlog('dev', '$processor{scan_proto_host} fu0 ', $url_orig,join_url(\%url), join_url(\%res));
2632    my $local;
2633    $local = 1 if $url{'prot'} eq 'local';
2634    %url = %{ url_local2ext(%url) }, %res = %{ url_local2ext(%res) }, if $url{'prot'} eq 'local';
2635  #printlog('dev', '$processor{scan_proto_host} fu ', $url_orig,join_url(\%url), join_url(\%res), "loc[$local]");
2636  #local $config{'log_dmp'} = 1;
2637  #printlog('dev', 'on_finish update', 'files=',$work{'filec'}->( $stat{'files'} ), 'size=', $work{'sizec'}->( $stat{'size'} ));
2638  #dmp \%stat;
2639    local %_;
2640    if ( $work{'filec'}->( $stat{'files'} ) ) {
2641      %_ = (
2642        %res, %{ $config{'clear_file'} || {} },
2643        'files' => $work{'filec'}->( $stat{'files'} ),
2644        'dirs'  => $work{'dirc'}->( $stat{'dirs'} ),
2645        'size'  => $work{'sizec'}->( $stat{'size'} )
2646      );
2647      $_{'time'} = int( time() );
2648      $_{'scan'} = int( time() ) unless $work{'scan_unfinished'};
2649      $static{'db'}->update( $config{'sql_tresource'}, undef, \%_ );
2650    }
2651    #%_ = ( %res, 'ip' => ip_to_name( $url{'host'} ), 'scan' => int( time() ) );
2652    %_ = ( %res, 'ip' => name_to_ip( $url{'host'} ), 'scan' => int( time() ) );
2653    $_{'time'} = int( time() ) if $work{'filec'}->( $stat{'files'} );
2654    $static{'db'}->update( $config{'sql_thost'}, undef, \%_ );
2655#printlog('dev', 'sizec',  $work{'sizec'}->( $stat{'size'}),$work{'filec'}->( $stat{'files'})  ,$work{'filec'}->( $stat{'dirs'}));
2656    if ( $work{'sizec'}->( $stat{'size'} ) and ( $work{'filec'}->( $stat{'files'} ) or $work{'filec'}->( $stat{'dirs'} ) ) ) {
2657      %_ = (
2658        %res,
2659        'path' => '',
2660        'name' => '',
2661        'ext'  => '',
2662        'desc' => $work{'desc'}{ $url{'host'} }{ $url{'prot'} },
2663        'scan' => int( time() )
2664      );
2665      $static{'db'}->update( $config{'sql_tfile'}, undef, \%_ );
2666      #$_{'path'} = '/';
2667      #$static{'db'}->update( $config{'sql_tfile'}, undef, \%_ );
2668      #printlog( 'dev', 'cd0', $res{'path'}, $local ? $url_orig : $res{'path'}, Dumper \%res);
2669      #=z
2670      update_to_root( %res, %_, $local, $url_orig );
2671      my $t = $tim->();
2672      printlog(
2673        'stat', $url, 'size', $work{'sizec'}->( $stat{'size'} ),
2674        'files', $work{'filec'}->( $stat{'files'} ),
2675        'dirs',
2676        $work{'filec'}->( $stat{'dirs'} ),
2677        speed( $t, $work{'filec'}->( $stat{'files'} ) )
2678      );
2679    }
2680    #}
2681  }
2682};
2683
2684=del
2685sub startme {
2686  my ($start) = @_;
2687  if ($start) {
2688#$processor{'out'}{'array'}->();
2689    my $com =
2690#"$config{'starter'} $config{'spawn_prefix'} $config{'perl'} $config{'root_path'}crawler.pl $force $start $config{'spawn_postfix'}";
2691"$config{'starter'} $config{'spawn_prefix'} $^X $0 $force $start $config{'spawn_postfix'}";
2692
2693    printlog( 'dbg', "starting with $start:", $com );
2694#printlog( 'dbg', $com );
2695    system($com);
2696  }
2697}
2698=cut
2699
2700sub startself {
2701  my ($start) = @_;
2702  $static{'db'}->flush_insert();
2703  #if (defined $start) {
2704  my $force = "--force=$config{'force'}" if $config{'force'};
2705  return psmisc::startme( join ' ', $force, $start );
2706  #}
2707}
2708$processor{'urls'} ||= sub {
2709  my ( $start, @urlproc );
2710  for (@_) {
2711    next unless $_;
2712    if ( !$config{'noscan'} and $config{'spawn'} and /\:\/\// ) { $start .= " $_ "; }
2713    else                                                        { push( @urlproc, $_ ); }
2714  }
2715  startself($start);
2716  mysleep( $config{'scan_sleep'} );
2717  $processor{'url'}->(@urlproc) if @urlproc;
2718};
2719
2720=c
2721$processor{'file'}{'sql'}{'func'} ||= sub {
2722  my $tper = timer();
2723  for my $datafile ( grep { $_ } @_ ) {
2724    my $filesize = ( -s ( $datafile or next ) or next );
2725    printlog(
2726      'info',
2727      'Uploading ',
2728      $datafile,
2729      " ($filesize bytes) ...", (
2730        $tper->()
2731          and $stat{'total_bytes'}
2732        ? " prognose:" . psmisc::human('time_period', ( $filesize * $tper->() ) / $stat{'total_bytes'} ) . 's'
2733        : ''
2734      )
2735    );
2736    my $per = timer();
2737    rename( $datafile, $datafile . '.up' );
2738    $processor{'out'}{'mysql_file'}->("$datafile.up");
2739    rename( $datafile . '.up', $datafile . '.ok' );
2740    $stat{'total_bytes'} += $filesize;
2741    printlog(
2742      'time',
2743      " done per ",
2744      $per->(),
2745      " sec ",
2746      $per->() ? 'at ' . ( $filesize / $per->() ) . ' b/s ' : '',
2747      "[total $stat{'total_bytes'} bytes per ",
2748      psmisc::human('time_period', $tper - () ),
2749      " sec ",
2750      ( $tper->() ? 'at ' . int( $stat{'total_bytes'} / $tper->() ) : '' ),
2751      ' b/s]',
2752    );
2753  }
2754};
2755=cut
2756
2757sub dcscanned {
2758  local %_ = ref $_[0] ? %{ $_[0] } : ( cid => $_[0] );
2759  #$static{'db'}->line
2760  $static{'db'}->query( $config{'sql'}{'table'}{ $config{'sql_tresource'} }{'cid'}
2761    ? qq{SELECT * FROM $tq$config{'sql_tresource'}$tq WHERE ${rq}cid${rq}=} . $static{'db'}->quote( $_{'cid'} ) . qq{ LIMIT 1}
2762    : qq{SELECT * FROM $tq$config{'sql_tfile'}$tq WHERE ${rq}tiger${rq}=}
2763      . $static{'db'}->quote( $_{'cid'} )
2764      . qq{ AND ${rq}path${rq}=${vq}${vq} LIMIT 1} )->[0];
2765}
2766#!!! TODO : gz tgz
2767$processor{'file'}{'bz2'}{'func'} ||= sub {
2768  my $tper = timer();
2769  $processor{'file'}{'xml'}{'func'}->(@_), return if $config{'direct_unzip'};
2770  for my $datafile ( grep { $_ } @_ ) {
2771    #my $filesize = -s $datafile;
2772    my $filesize = -s ( $datafile || next );
2773    unless ($filesize) {
2774      prinlog( 'no fs', $datafile );
2775      rename( $datafile, $datafile . '.ok' );
2776      next;
2777    }
2778    printlog(
2779      'info',
2780      'Extracting ',
2781      $datafile,
2782      " ($filesize bytes) ...", (
2783        $tper->() and $stat{'total_bytes'} ? " prognose:" . int( ( $filesize * $tper->() ) / $stat{'total_bytes'} ) . 's' : ''
2784      )
2785    );
2786    my $per = timer();
2787    shelldata($datafile);
2788    printprog( "$config{'bzip2'} $config{'bzip2_uncompress'} \"$datafile\"", 1 );
2789    rename( $datafile, $datafile . '.ok' );
2790    $stat{'total_bytes'} += $filesize;
2791    printlog(
2792      'time',
2793      " done per ",
2794      $per->(),
2795      " sec ",
2796      $per->() ? 'at ' . ( $filesize / $per->() ) . ' b/s ' : '',
2797      "[total",
2798      psmisc::human( 'size', $stat{'total_bytes'} ),
2799      "bytes per",
2800      psmisc::human( 'time_period', $tper->() ),
2801      ' ',
2802      ( $tper->() ? 'at ' . int( $stat{'total_bytes'} / $tper->() ) : '' ),
2803      ' b/s]',
2804    );
2805  }
2806};
2807$processor{'file'}{'xml'}{'func'} ||= sub {    #dc++
2808  our $tper ||= timer();
2809  my ( %url, %files );
2810  $url{'prot'} = 'dchub';
2811  for my $datafile ( grep { $_ } @_ ) {
2812    last if scan_stop;
2813    my $datafilebug = $datafile;
2814    my $per         = timer();
2815    #my $filesize    = -s $datafile;
2816    my $filesize = ( -s ( $datafile or next ) or next );
2817    my $files;
2818    my $scan = int( $^T - 86400 * -M $datafile );
2819    #int( time() )
2820    printlog(
2821      'dc',
2822      'Parsing ',
2823      $datafile,
2824      " ($filesize bytes) ...", (
2825        $tper->() and $stat{'total_bytes'} ? " prognose:" . int( ( $filesize * $tper->() ) / $stat{'total_bytes'} ) . 's' : ''
2826        )
2827        #, 'scan=', $scan, ' now=', int(time()), ' mdf=' , ( -M $datafile ) * 86400
2828    );
2829    rename( $datafile, $datafile . '.up' );
2830    on_interrupt(
2831      sub {
2832        rename( $datafile . '.up', $datafile );
2833        filesave( \%files );
2834        %files = ();
2835      }
2836    );
2837    #printlog('dev', "tryop[$datafile]");
2838    #local @_ = ;
2839    my $cpdata = {};
2840    my $fh;
2841    if (
2842      #open ((),@_)
2843      $datafile =~ /\.xml\.bz2$/i
2844      ? open( $fh, '-|' . ':encoding(utf-8)', qq{$config{'bzip2'} $config{'bzip2_uncompress'}c "$datafile.up"} )
2845      : open( $fh, '<' . ':encoding(utf-8)',  "$datafile.up" )
2846      #open($fh, ($datafile =~ /\.xml\.bz2$/i
2847      #? ('-|' . ':encoding(utf-8)', , qq{$config{'bzip2'} $config{'bzip2_uncompress'}c "$datafile.up"})
2848      #: ('<' . ':encoding(utf-8)', "$datafile.up") ))
2849      )
2850    {
2851      my @dir;
2852      my $path = '/';
2853      %url = ( %url, 'port' => '', split_url( $work{'current_dchub'} ) );    #clean port of prev iteration
2854      #$url{'dcuser'} = cp_trans( $config{'cp_shell'}, $config{'cp_db'}, decode_url($datafile) );
2855      #eval {
2856      $url{'dcuser'} = Encode::decode 'utf-8', decode_url($datafile), Encode::FB_DEFAULT;
2857	#};
2858      printlog( 'err', 'cant decode ', $datafile, decode_url($datafile) ), rename( $datafile . '.up', $datafile . '.err' ), next
2859        if $@;
2860      $url{'dcuser'} =~ s/\.xml(\.bz2)?$//i;
2861      $url{'dcuser'} =~ s{^.+(\\|/)}{}i;
2862      my $url = join_url( \%url );
2863      $url{'host'} = full_host( \%url );
2864      my %time =
2865        ( hconfig( 'unknown_time', $url{'host'} ) ne ''
2866        ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) )
2867        : () );
2868      ( $work{'sizec'}, $work{'filec'}, $work{'dirc'} ) =
2869        ( counter( $stat{'size'} ), counter( $stat{'files'} ), counter( $stat{'dirs'} ) );
2870      local $/ = '<';
2871      my $decoder = sub {
2872        $_ = HTML::Entities::decode_entities($_);
2873        #printlog('decoder');
2874        }
2875        if psmisc::use_try 'HTML::Entities';
2876      my $cid;
2877      while (<$fh>) {
2878        last if $work{'die'};
2879        #if ( my ( $file, $size, $tiger ) = /^File Name="([^"]+)" Size="(\d+)" TTH="([^"]+)"/i ) {
2880        local %_;
2881        #$_ = HTML::Entities::decode_entities_old $_;
2882        #$_ = HTML::Entities::decode_entities $_;
2883        psmisc::code_run $decoder;
2884        #psmisc::cp_detect_trans($_, $cpdata, 'utf-8', 'utf-8'); #some filelist not in utf
2885        s/^(\w+)\s//;
2886        my ($w) = lc($1);
2887        s/(\w+)="([^"]+)"/$_{lc$1}=$2;''/eg;    #"
2888        #printlog 'dev', "s: [$w]", Dumper  \%_;
2889        #if ( /^File Name="([^"]+)" Size="(\d+)" TTH="([^"]+)"/i ) {
2890        if ( $w eq 'file' and length $_{name} ) {
2891          #printlog('dev','1', $config{'cp_dc_list'}, $config{'cp_db'}, $_{name});
2892          local $_ = $_{name};                  # = cp_trans( $config{'cp_dc_list'}, $config{'cp_db'}, $file );
2893          #printlog('dev','2', $config{'cp_dc_list'}, $config{'cp_db'}, $file);
2894          my ($ext) = ( $_{name} =~ s/\.([^\.]+)$// ? ($1) : ('') );
2895          $_{meta} = join ' ', map { qq{$_="$_{$_}"} } grep { $_{$_} } qw(br hit);
2896  #my %meta;
2897  #my @k = sort grep {$_ ne qw(name size ) , ($config{'sql'}{'table'}{ $config{'sql_tfile'} }{'tiger'} ? () : ('tth'))} keys %_;
2898          $files{ $path . '/' . $_ } = {
2899            %url, %_,
2900            'path' => $path,
2901            #'name' => $_{name},
2902            'ext'  => $ext,
2903            'size' => int $_{size},
2904            'time' => int $_{ts},
2905            %time, (
2906              $config{'sql'}{'table'}{ $config{'sql_tfile'} }{'tiger'}    #{'array_insert'}
2907              ? ( 'tiger' => $_{tth} )
2908              : ( 'desc' => 'tiger=' . $_{tth} )
2909            ),
2910            'scan' => $scan
2911          };
2912          ++$stat{'total_files'};
2913          ++$files;
2914          #} elsif ( my ($file) = /^Directory Name="([^"]+)">/i ) {    #"
2915        } elsif ( $w eq 'directory' and length $_{name} ) {
2916          local $_ = $_{name};    # = cp_trans( $config{'cp_dc_list'}, $config{'cp_db'}, $file );
2917          push( @dir, $_{name} );
2918          my ($ext) = ( $_{name} =~ s/\.([^\.]+)$// ? ($1) : ('') );
2919          $files{ $path . '/' . $_ } = { %url, 'path' => $path, 'name' => $_{name}, 'ext' => $ext, %time, 'scan' => $scan };
2920          $path = ( join( '/', @dir ) or '/' );
2921          #printlog 'dev', 'dir start', "[$_]", $path, 'd=', @dir;
2922        } elsif (m|^/Directory>|i) {
2923          #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%files );
2924          #printlog 'saving', Dumper \%files;
2925          filesave( \%files );
2926          %files = ();
2927          #printlog 'dev', 'dir end', "[$_]",$path,  'd=',  @dir;
2928          pop(@dir);
2929          $path = join( '/', @dir ) || '/';
2930          #} elsif (/^FileListing .*Base="([^"]+)"/i) {                #"mc
2931        } elsif ( $w eq 'filelisting' and length $_{base} ) {
2932          my $last;
2933          $cid //= $_{cid} if $_{cid};
2934          my $already;
2935          if ( $_{cid} and $config{'use_dc'} and !$config{'use_dc_user_dupes'} ) {
2936            #printlog 'Cur', Dumper #$static{'db'}->select($config{'sql_tfile'}, {'tiger'=>$_{'cid'}, 'path'=>''});
2937            $already = dcscanned( \%_ );
2938            #printlog 'cid', $_{cid}, Dumper $already, $config{'dcbot_client_period'}, time - $already->{scan}, $scan ;
2939            #;
2940            if (  ( $already->{tiger} or $already->{cid} )
2941              and ( time - $already->{scan} < $config{'dcbot_client_period'} or $scan < $already->{scan} ) )
2942            {
2943              printlog(
2944                'info', 'skipping already scanned ',
2945                $url, 'as', join_url($already),
2946                psmisc::human( 'time_period', time - $already->{scan} ),
2947                $config{'dcbot_client_period'},
2948              );
2949              #printlog('warn', 'not uploading');
2950              rename( $datafile . '.up', $datafile . '.ok' );
2951              $last = 1;
2952            }
2953          }
2954          @dir = ( $_{base} eq '/' ? '' : $_{base} );
2955          $path = join( '/', @dir ) || '/';
2956          my $save = {
2957            %url, (
2958              hconfig( 'unknown_time', $url{'host'} ) ne ''
2959              ? ( 'time' => int( time() ) - hconfig( 'unknown_time', $url{'host'} ) )
2960              : ()
2961            ),
2962            'scan' => $scan
2963          };
2964          #%_ = ( '' => { %$_, 'desc' => $_{'generator'}, 'tiger' => $_{'cid'}, }, ' ' => { %$_, 'path' => '/' }, );
2965          #$static{'db'}->insert_hash_hash( $config{'sql_tfile'}, \%_ );
2966          #filesave( \%_ );
2967          my @replace = qw(prot host port dcuser);
2968          my %meta = ( 'meta' => 'link=' . encode_url( join_url($already) ) );
2969          for ( 0 .. 1 ) {
2970            filesave( {
2971                '' => { %meta, %$save, 'desc' => $_{'generator'}, 'tiger' => ( $_{'cid'} || $_{'tiger'} ), },
2972                ' ' => { %meta, %$save, 'path' => '/' },
2973              }
2974            );
2975            last unless psmisc::is_hash_size $already;
2976            %meta = ();
2977            printlog(
2978              'info', 'saving already scanned',
2979              $url, 'at', psmisc::human( 'time_period', time - $already->{'scan'} ),
2980              'as', join_url($already)
2981            ) if !$_ and !$last;
2982            $url{$_} = $save->{$_} = $already->{$_} for @replace;
2983            $url{'host'} = full_host( \%url );
2984          }
2985          last if $last;
2986        }
2987      }
2988      close($fh);
2989      on_interrupt();
2990      #filesave( \%files ); %files = ();
2991      %_ = (
2992        %url,
2993        'scan'  => $scan,
2994        'files' => $work{'filec'}->( $stat{'files'} ),
2995        'dirs'  => $work{'dirc'}->( $stat{'dirs'} ),
2996        'size'  => $work{'sizec'}->( $stat{'size'} ),
2997        ( !$cid ? () : ( 'cid' => $cid ) ),
2998      );
2999      $_{'time'} = $scan if $work{'filec'}->( $stat{'files'} );
3000      $static{'db'}->update( $config{'sql_tresource'}, undef, { %_, %{ $config{'clear_file'} || {} } } );
3001    }
3002    rename( $datafilebug . '.up', $datafilebug . '.ok' );
3003    $stat{'total_bytes'} += $filesize;
3004    printlog(
3005      'time',
3006      " done per ",
3007      psmisc::human( 'time_period', $per->() ),
3008      ,
3009      $per->()
3010      ? 'at ' . psmisc::human( 'float', $filesize / $per->() ) . ' bps ' . psmisc::human( 'float', $files / $per->() ) . ' fps '
3011      : '',
3012      "[total $stat{'total_bytes'} bytes $stat{'total_files'} files per ",
3013      psmisc::human( 'time_period', $tper->() ),
3014      ' ', (
3015        $tper->()
3016        ? 'at ' . int( $stat{'total_bytes'} / $tper->() ) . ' bps  ' . int( $stat{'total_files'} / $tper->() ) . ' fps'
3017        : ''
3018      ),
3019      ' ]',
3020    );
3021  }
3022};
3023$processor{'file'}{'toscan'}{'func'} ||= sub {
3024  local $config{'force'} = 1;
3025  for my $datafile ( grep { $_ } @_ ) {
3026    my $url = ( $datafile or next );
3027    $url =~ s{^.+(\\|/)}{}i;
3028    $url =~ s/\.toscan$//i;
3029    $url = decode_url($url);
3030    local $config{'ftp_recurse_force'} = 1;
3031    resetcounters();
3032    $processor{'prot'}{'ftp'}{'func'}->($url);
3033  }
3034};
3035for ( keys %{ $processor{'format'} } ) {
3036  $processor{'file'}{$_}{'func'} ||= sub {
3037    for my $datafile ( grep { $_ } @_ ) {
3038      rename( $datafile, $datafile . '.up' );
3039      open( F, '<', $datafile . '.up' ) or next;
3040      $datafile =~ s/.$_$//i;
3041      $processor{'format'}{$_}->( $datafile, \*F );
3042      close(F);
3043      rename( $datafile . '.up', $datafile . '.ok' );
3044    }
3045  };
3046}
3047$processor{'file'}{'url'}{'func'} ||= sub {
3048  for my $datafile ( grep { $_ } @_ ) {
3049    #printlog('dev', "LOAD:$datafile");
3050    open( my $fh, ( ( $datafile eq '-' ) ? ($datafile) : ( '<', $datafile ) ) ) or next;
3051    $processor{'url'}->(<$fh>);
3052    close($fh);
3053  }
3054};
3055$processor{'file'}{'config'}{'func'} = $processor{'file'}{'hublist'}{'func'} ||= sub {
3056  local $config{'host_by_name_norm'} = 0;
3057  for my $datafile ( grep { $_ } @_ ) {
3058    $datafile = uni_get($datafile);
3059    next
3060      unless (
3061      open(
3062        my $fh, (
3063            ( $datafile eq '-' )
3064          ? ($datafile)
3065          : ( $datafile =~ /\.xml\.bz2$/i ? "$config{'bzip2'} $config{'bzip2_uncompress'}c \"$datafile\"|" : '<' . $datafile )
3066        )
3067      )
3068      );
3069    printlog( 'dc', 'hublist: loading', $datafile );
3070    while (<$fh>) {
3071      #printlog ('dev', $_);
3072      if (m|^\s*<Hub\s.*?Address="\s*(\w+://)?([^:/"]+):?(\d*)\s*"|i) {    #"mc
3073        #printlog ('dev', $_);
3074        my %url = ( 'prot' => 'dchub' );
3075        $processor{'url'}->(
3076          ( $1 || $url{'prot'} . '://' ) . $2 . ( $3 and $3 != $config{'scanner'}{ $url{'prot'} }{'port'} ? ':' . $3 : '' ) );
3077        ++$stat{'hublist'};
3078      }
3079    }
3080    close($fh);
3081  }
3082};
3083$processor{'file'}{'list'}{'func'} ||= sub {
3084  local $config{'host_by_name_norm'} = 0;
3085  for my $datafile ( grep { $_ } @_ ) {
3086    $datafile = uni_get($datafile);
3087    next unless ( open( my $fh, ( ( $datafile eq '-' ) ? ($datafile) : ( '<', $datafile ) ) ) );
3088    printlog( 'dc', 'hub .list: loading', $datafile );
3089    while (<$fh>) {
3090      if (m{^\s*([^:/\|]+):?(\d*)\s*\|}i) {
3091        my %url = ( 'prot' => 'dchub' );
3092        $processor{'url'}->( 'dchub://' . $1 . ( $2 and $2 != $config{'scanner'}{ $url{'prot'} }{'port'} ? ':' . $2 : '' ) );
3093        ++$stat{'list'};
3094      }
3095    }
3096    close($fh);
3097  }
3098};
3099$processor{'file'}{'ok'}{'func'} ||= sub { unlink(@_) unless $config{'keep_ok'} or $config{'debug'}; };
3100$processor{'file'}{'partial'}{'func'} ||= sub {
3101  #printlog( 'pt', "$_", 86400 * -M $_ ) for @_
3102  unlink( grep { 3600 < 86400 * -M $_ } @_ ) unless $config{'keep_ok'} or $config{'debug'};
3103};
3104$processor{'file'}{'up'}{'func'} ||= sub {
3105  #printlog( 'pt', "$_", 86400 * -M $_ ) for @_
3106  unlink( grep { 7 < -M $_ } @_ ) unless $config{'debug'};
3107};
3108$processor{'file'}{'dchub'}{'func'} ||= sub {
3109  for my $datadir ( grep { $_ and -d } @_ ) {
3110    $work{'current_dchub'} = $datadir;
3111    $work{'current_dchub'} =~ s{^.*[/\\]}{}ig;
3112    $work{'current_dchub'} = $1 . '://' . $work{'current_dchub'} if $work{'current_dchub'} =~ s{\.((?:dchub|adc))$}{}ig;
3113    $work{'current_dchub'} =~ s/\.(\d+)$/:$1/
3114      if $work{'current_dchub'} =~ /(\d+\.){4}(\d+)$/
3115        or $work{'current_dchub'} =~ /\D\.(\d+)$/;
3116    printlog( 'dbg', "dir:", $datadir, " hub:$work{'current_dchub'}" );
3117    local $config{'del_empty_dir'} = $config{'del_empty_dcdir'};
3118    $processor{'dir'}->($datadir);
3119  }
3120  #$processor{'out'}{'array'}->();
3121  $static{'db'}->flush_insert();
3122};
3123$processor{'file'}{'adc'}{'func'}  ||= $processor{'file'}{'dchub'}{'func'};
3124$processor{'file'}{'adcs'}{'func'} ||= $processor{'file'}{'dchub'}{'func'};
3125$processor{'file'}{'ldi'}{'func'}  ||= sub {
3126  my $table = $config{'sql_tfile'};
3127  for my $datafile ( grep { $_ } @_ ) {
3128    rename( $datafile, $datafile . '.up' );
3129    my $mydatafile = $datafile;
3130    $mydatafile =~ s!\\+|/+!$config{'slash_mysql'}!g;
3131    next if !$mydatafile or !-s $mydatafile . '.up';
3132    #my ( $tq, $rq, $vq ) = $static{'db'}->quotes();
3133    $static{'db'}->do(
3134"LOAD DATA $static{'db'}->{'LOW_PRIORITY'} LOCAL INFILE '$mydatafile.up' REPLACE INTO TABLE $config{'sql_tfile'} FIELDS OPTIONALLY ENCLOSED BY \"'\" ("
3135        . join(
3136        ',',
3137        map( "$rq$_$rq", (
3138            sort { $config{'sql'}{'table'}{$table}{$b}{'order'} <=> $config{'sql'}{'table'}{$table}{$a}{'order'} }
3139            grep( $config{'sql'}{'table'}{$table}{$_}{'array_insert'}, keys %{ $config{'sql'}{'table'}{$table} } )
3140          ) )
3141        )
3142        . ")"
3143    );
3144    rename( $datafile . '.up', $datafile . '.ok' );
3145  }
3146};
3147$processor{'dir'} ||= sub {
3148  for my $dir (@_) {
3149    printlog( 'info', "uploading [$dir]" );
3150    $dir .= $config{'slash_perl'} if $dir and !( $dir =~ /[\/\\]$/ );
3151    #printlog( 'dev', "dir: [$dir]:",<$dir*.$_>, <$dir.*.$_> );
3152    #!!! READDIR HERE
3153    for my $type ( shuffle( keys %{ $processor{'file'} } ) ) {    #!!!!SORT
3154      #$processor{'file'}{$_}{'func'}->( @{ shuffle( [ <$dir*.$_>, <$dir.*.$_> ] ) } )
3155      next if ref $processor{'file'}{$type}{'func'} ne 'CODE' or $processor{'file'}{$type}{'disabled'};    #and @dir;
3156      last if scan_stop;
3157      my @files = shuffle( <$dir*.$type>, <$dir.*.$type> );
3158      $processor{'file'}{$type}{'func'}->(
3159        grep {
3160#printlog('dev', 'dt', $_, int(time), int( $^T - 86400 *  -M $_   ), psmisc::human('time_period', time - $^T +  86400 * -M $_   ));
3161          $_ and time - $^T + 86400 * -M $_ > $processor{'file'}{$type}{'purge'} || $config{'purge'}
3162        } @files
3163      );
3164      unlink for grep { !-s } @files;
3165      #my $scan = int( $^T - ( -M $datafile ) * 86400 );
3166    }
3167    my @dir = grep { !m{/\.\.?$} } <$dir*>, <$dir.*>;
3168    #printlog 'D', Dumper \@dir;
3169    printlog( 'dbg', "deleted empty $dir:", ( rmdir($dir) or $! ) ) if $config{'del_empty_dir'} and !@dir;
3170  }
3171  $static{'db'}->flush_insert();
3172  #$processor{'out'}{'array'}->();
3173};
3174#{
3175#my ( $current, $order );
3176#sub newcurrent { $program{ $current = $_[0] }{'order'} ||= $order += 10; }            #v2
3177#}
3178$program{ psmisc::program('init') }{'force'} = 1;
3179$program{ psmisc::program() }{'func'} ||= sub {
3180  printlog(
3181    'info', "Started with [",
3182    $^X, $work{'$0'}, join ' ',
3183    map( $_ . ( ( $param->{$_} ne '' and $param->{$_} != 1 ) ? "=$param->{$_}" : '' ), sort keys %$param ),
3184    "] version:", ( $config{'version'} or 'r' . ( split( ' ', '$Revision: 4843 $' ) )[1] )
3185    )
3186    if    # $param and
3187    %$param and !$config{'nolog_startstop'};
3188  mkdir_rec( $config{'datadir'}, 0777 );
3189  mkdir_rec( $config{'tmpdir'},  0777 );
3190#later  $config{'sql_mysqlopt'} = ( $config{'sql_host'} ? ' -h ' . $config{'sql_host'} : ' ' ) . ' ' . $config{'sql_mysqlopt'} . ' ';
3191  $static{'banned'} = { loadlist( $config{'file_banned'} ), %{ $config{'banned'} || {} } };
3192#if ( ref $config{'sql_base_up'} eq 'HASH' ) {    $config{'sql'}{$_} = $config{'sql_base_up'}{$_}      for keys %{ $config{'sql_base_up'} };
3193#} else {    $config{'sql'}{'database'} = $config{'sql_base_up'};  }
3194  $work{'cp_input'} = $config{'cp_shell'};
3195  psmisc::lib_init();
3196  $config{'sql'}{'codepage'} ||= $config{'cp_db'} if $config{'cp_db'};
3197  unless ( keys %$param ) {
3198    printlog( 'info',
3199          'Short usage: '
3200        . ( $work{'$0'} || $0 )
3201        . ' [install] [file.to.load] [ftp://res.to.scan] [proc[=3]] [--configparam=value] [--sql__configparam=value]' );
3202  }
3203  return 0;
3204};
3205$program{ psmisc::program('db') }{'force'} = 1;
3206$program{ psmisc::program() }{'func'} ||= sub {
3207  #END {printlog('WOW1', $static{'db'}->{'dbh'});};
3208  #printlog 'dev', "user0:", $static{'db'}, $config{'sql'}{user}, $config{'sql_base_up'}{user};
3209  #printlog 'dev', 'U', Dumper $config{'sql_base_up'};
3210  $static{'db'} ||= pssql->new(
3211    #'codepage'        => $config{'cp_db'},
3212    'handler_insert0' => sub {
3213      my ( $self, $table, $hash ) = @_;
3214      #printlog('dev', 'handler_insert0', @_);
3215      if ( $table eq $config{'sql_tfile'} ) {    #TODO CONFIGURABLE
3216
3217=old
3218        if (  !$self->{'use_dbi'}
3219          and $work{'current_output_file'} ne $hash->{$_}{'host'}
3220          and $hash->{'host'} )
3221        {
3222          $work{'current_output_file'} = $hash->{'host'};
3223          open_out_file( $work{'current_output_file'} );
3224        }
3225=cut
3226
3227        if ( $hash->{'size'} ) {
3228          ++$stat{'files'};
3229          $stat{'size'} += $hash->{'size'} if $hash->{'size'} < $config{'max_stat_file_size'};
3230        } else {
3231          ++$stat{'dirs'};
3232        }
3233      }
3234    },
3235    'handler_insert2' => sub {
3236      my ( $self, $table, $hash ) = @_;
3237      #printlog('dev', 'handler_insert2', @_);
3238      $work{'upscanned'} = counter( $stat{'files'} ) unless $work{'upscanned'};
3239      if (  $work{'filec'}
3240        and $work{'filec'}->( $stat{'files'} )
3241        and $work{'upscanned'}->( $stat{'files'} ) > $config{'update_scan_every'} )
3242      {
3243        #local $config{'log_dmp'} = 1;
3244        #printlog('dev', 'on_update update'),
3245        $work{'upscanned'} = counter( $stat{'files'} );
3246        #my ($values) = values(%$hash);
3247        my ($values) = $hash;
3248        #printlog('dev', 'handler_insert2values:', $values);
3249        #sql_update(
3250        my %scantime = ( 'scan' => int( time() ), 'time' => int( time() ) ) if $values->{'prot'} ne 'dchub';
3251        $self->update(
3252          $config{'sql_tresource'},
3253          undef, {
3254            $self->filter_row( $config{'sql_tresource'}, 'primary', $values ),
3255            'path' => '',
3256            #'scan' => int( time() ),
3257            #'time' => int( time() )
3258            %scantime
3259          }
3260        );
3261#!!! TODO < BUG !!!
3262#printlog('dev', 'now=', 'files=',$work{'filec'}->( $stat{'files'} ), 'size=', $work{'sizec'}->( $stat{'size'} ));
3263#$self->select_log($config{'sql_tresource'}, {$self->filter_row( $config{'sql_tresource'}, 'primary', $values ), 'path'  => '',});
3264#printlog('dev', 'wherebody=',$self->where_body({'files' => $work{'filec'}->( $stat{'files'} ), 'files_mode'=>'g'}, undef, $config{'sql_tresource'}, ));
3265        $self->update(
3266          #sql_update(
3267          $config{'sql_tresource'},
3268          undef, {
3269            $self->filter_row( $config{'sql_tresource'}, 'primary', $values ),
3270            'path'  => '',
3271            'files' => $work{'filec'}->( $stat{'files'} ),
3272            'dirs'  => $work{'dirc'}->( $stat{'dirs'} ),
3273            'size'  => $work{'sizec'}->( $stat{'size'} )
3274          },
3275          $self->where_body(
3276            { 'files' => $work{'filec'}->( $stat{'files'} ), 'files_mode' => 'l' },
3277            undef, $config{'sql_tresource'},
3278            )
3279            #"$self->{'rq'}files$self->{'rq'} < " . $self->{'vq'} . $work{'filec'}->( $stat{'files'} ) . $self->{'vq'}
3280        );
3281        my $h = split_url( $values->{'host'} )->{'host'};
3282        $self->update(
3283          #sql_update(
3284          $config{'sql_thost'},
3285          undef, {
3286            $self->filter_row( $config{'sql_thost'}, 'primary', $values ),
3287            'host' => $h,
3288            #'ip'   => ip_to_name($h),
3289            ( $values->{'prot'} eq 'dchub' ? () : ( 'ip' => name_to_ip($h) ) ),
3290            #'scan' => int( time() ),
3291            #'time' => int( time() )
3292            %scantime
3293          }
3294        );
3295      }
3296    },
3297    %{ $config{'sql'} || {} },
3298    ( ref $config{'sql_base_up'} eq 'HASH' ? %{ $config{'sql_base_up'} } : ( 'database' => $config{'sql_base_up'} ) ),
3299  );
3300  #END {printlog('WOW2', $static{'db'}->{'dbh'});};
3301  #printlog 'dev', Dumper $static{'db'};
3302  #printlog 'dev', "user:", $static{'db'}{user}, $config{'sql'}{user}, $config{'sql_base_up'}{user};
3303  #printlog 'dev', "u", %{ $config{'sql_base_up'} };
3304  ( $tq, $rq, $vq ) = $static{'db'}->quotes();
3305#$processor{'out'}{'array'}->();
3306#my ( $tq, $rq, $vq ) = $static{'db'}->quotes();
3307#$static{'db'}->query_log( 'SELECT * FROM ' . ( join ',', map { "$tq$_$tq" } sort keys %{ $config{'sql'}{'table'} } ) . ' WHERE 1 LIMIT 1' )
3308#use Data::Dumper;
3309#printlog( 'dev', Dumper($static{'db'}));
3310  undef;
3311};
3312$program{ psmisc::program('start') }{'func'} ||= sub { unlink( $config{'stop_file'} ); -f $config{'stop_file'}; };
3313$program{ psmisc::program('check') }{'func'} ||= sub {
3314  printlog( 'info', 'checkset' );
3315  #$config{'sql'}{'auto_repair'}  = 1;
3316  #$config{'sql'}{'force_repair'} = 1;
3317  $static{'db'}->check_data();
3318  #  for my $table ( $_[1] or keys %{ $static{'db'}{'table'} } ) {
3319  #    printlog ('check', $table,$static{'db'}->query_log("SELECT * FROM $table LIMIT 1"));
3320  #  }
3321};
3322$program{ psmisc::program('drop') }{'func'} ||= sub {
3323  @_ = keys %{ $config{'sql'}{'table'} } if !@_;
3324  printlog( 'info', "Dropping ", @_ );
3325  #printlog( 'info', "Dropping $static{'db'}->{'table_prefix'} [$_]" ), $processor{'out'}{'mysql'}->( drop_table_sql($_) )
3326  #for sort grep { $_ and $config{'sql'}{'table'}{$_} } @_;
3327  $static{'db'}->drop_tables( sort grep { $_ and $config{'sql'}{'table'}{$_} } @_ );
3328  return 0;
3329};
3330$program{ psmisc::program('dropdb') }{'func'} ||= sub {
3331  @_ = $config{'sql'}{'database'} if !@_;
3332  printlog( 'info', "Dropping ", @_ );
3333  #printlog( 'info', "Dropping $static{'db'}->{'table_prefix'} [$_]" ), $processor{'out'}{'mysql'}->( drop_table_sql($_) )
3334  #for sort grep { $_ and $config{'sql'}{'table'}{$_} } @_;
3335  $static{'db'}->drop_database( sort grep { $_ } @_ );
3336  return 0;
3337};
3338$program{ psmisc::program('install') }{'func'} ||= sub {
3339  my %bases;
3340  $bases{ $config{$_} } = $_ for grep { /^sql_base_/ } keys %config;
3341  $static{'db'}->install( grep { $_ } keys %bases );
3342
3343=cu
3344  my ( $tdbi, $tb ) = ( $config{'sql_use_dbi'}, $config{'sql_base'} );
3345  $config{'sql_use_dbi'} = 0;
3346  $config{'sql_base'}    = '';
3347  for ( grep $_, keys %bases ) {
3348    printlog( 'info', "Creating database '$_' [$bases{$_}]" );
3349    $processor{'out'}{'mysql'}->(
3350      "CREATE DATABASE IF NOT EXISTS $_"
3351        . (
3352        $config{'sql_default_character_set'}
3353          and $config{'sql'}{ $config{'sql_driver'} }{'DEFAULT CHARACTER SET'}
3354        ? " $config{'sql'}{ $config{'sql_driver'} }{'DEFAULT CHARACTER SET'} $config{'sql_default_character_set'}"
3355        : ''
3356        )
3357    );
3358  }
3359  ( $config{'sql_base'}, $config{'sql_use_dbi'} ) = ( $tb, $tdbi );
3360  printlog( 'info', "Creating $static{'db'}->{'table_prefix'} [", ( keys %{ $config{'sql'}{'table'} } ), ']' );
3361  $processor{'out'}{'mysql'}->( create_table_sql( %{ $config{'sql'}{'table'} } ) );
3362  $processor{'out'}{'mysql'}->( create_index_sql( %{ $config{'sql'}{'table'} } ) )
3363    unless $config{'sql'}{ $config{'sql_driver'} }{'index in create table'};
3364=cut
3365
3366  printlog( 'info', 'tryweb', `$^X web/index.cgi` );
3367  `chmod o-rwx tools/rsync_password`;
3368  return 0;
3369};
3370$program{ psmisc::program('index_disable') }{'func'} ||= sub {
3371  $static{'db'}->index_disable( $config{'sql_tfile'} );
3372  #my $tim = timer();
3373  #printlog( 'info', 'Disabling indexes...' );
3374  #printlog( 'err', 'ALTER TABLE ... DISABLE KEYS available in mysql >= 4' ), return if $config{'sql'}{'driver'} eq 'mysql3';
3375  #$static{'db'}->query_log("ALTER TABLE $static{'db'}->{'table_prefix'}$config{'sql_tfile'} DISABLE KEYS");
3376  #printlog( 'time', "Disable index per", psmisc::human( 'time_period', $tim->() ), "sec" );
3377  return 0;
3378};
3379$program{ psmisc::program('upgrade') }{'func'} ||= sub {
3380  local $config{'sql_base'}        = $config{'sql_base_web'};
3381  local $config{'sql_error_tries'} = 0;
3382  local $config{'sql_error_sleep'} = 0;
3383  printlog( 'info', "Upgrade from $_[0]. dont worry about errors" );
3384  printlog( 'info', 'Removing very old resource.vote,votelast' ),
3385    $static{'db'}->do("ALTER TABLE resource DROP `vote`, DROP `votelast`, DROP INDEX votei, DROP INDEX votelasti")
3386    if $_[0] < 1905;
3387  printlog( 'info', 'Removing host.votelast' ), $static{'db'}->do("ALTER TABLE host DROP `votelast`, DROP INDEX votelasti"),
3388    printlog( 'info', 'Adding host voting' ),
3389    $static{'db'}->do(
3390"ALTER TABLE host ADD `voteu` INT  NOT NULL  DEFAULT 0, ADD `voted` INT  NOT NULL  DEFAULT 0, ADD INDEX voteui (`voteu`), ADD INDEX votedi (`voted`)"
3391    ), $static{'db'}->do("ALTER TABLE host ADD `lastip` VARCHAR (15) NOT NULL DEFAULT 0, ADD INDEX lastipi (`lastip`(15))"),
3392    printlog( 'info', 'Adding top query lastip' ),
3393    $static{'db'}
3394    ->do("ALTER TABLE $config{'sql_twordstat'} ADD `lastip` VARCHAR (15) NOT NULL DEFAULT 0, ADD INDEX lastipi (`lastip`(15))"),
3395    $static{'db'}->do(
3396    "ALTER TABLE $config{'sql_tquerystat'} ADD `lastip` VARCHAR (15)  NOT NULL  DEFAULT 0, ADD INDEX lastipi (`lastip`(15))")
3397    if $_[0] < 1933;
3398  printlog( 'info', '{2618} 0.15.18 -> 0.16.0 (adding port column into host table)' ),
3399    $static{'db'}
3400    ->do("ALTER TABLE $config{'sql_thost'} ADD `port` SMALLINT UNSIGNED NOT NULL  DEFAULT 0, ADD INDEX porti (`port`)")
3401    if $_[0] < 2618;
3402  printlog( 'info', '{2970} 0.16.1 -> 0.16.2 (adding fulltext query index)' ),
3403    $static{'db'}->do(
3404"ALTER TABLE $config{'sql_tquerystat'} ADD FULLTEXT $config{'sql'}{'table'}{$config{'sql_tquerystat'}}{'query'}{'fulltext'} (`query`)"
3405    ) if $_[0] < 2970 and $config{'sql'}{'table'}{ $config{'sql_tquerystat'} }{'query'}{'fulltext'};
3406  printlog( 'info', '{3010} 0.16.1 -> 0.16.2 (adding dcuser to filebase and resource)' ),
3407    $static{'db'}
3408    ->do("ALTER TABLE `$config{'sql_tfile'}` ADD `dcuser` VARCHAR( 30 ) NOT NULL AFTER `host`, ADD INDEX dcuseri (`dcuser`)"),
3409    $static{'db'}->do(
3410    "ALTER TABLE `$config{'sql_tresource'}` ADD `dcuser` VARCHAR( 30 ) NOT NULL AFTER `host`, ADD INDEX dcuseri (`dcuser`)"),
3411    $static{'db'}->do(
3412"ALTER TABLE `$config{'sql_tfile'}` DROP PRIMARY KEY , ADD PRIMARY KEY ( `prot` , `host` , `path` , `name` , `ext` , `dcuser` )"
3413    ),
3414    $static{'db'}
3415    ->do("ALTER TABLE `$config{'sql_tresource'}` DROP PRIMARY KEY , ADD PRIMARY KEY ( `prot` , `host` , `path` , `dcuser` )"),
3416    if $config{'use_dc'} and $_[0] < 3010;
3417  printlog( 'info', '{3120} 0.16.1 -> 0.16.2 (adding voting to filebase)' ),
3418    $static{'db'}->do(
3419"ALTER TABLE `$config{'sql_tfile'}` ADD `voteu` INT  UNSIGNED NOT NULL  DEFAULT '0' , ADD `voted` INT  UNSIGNED NOT NULL  DEFAULT '0' , ADD `lastip` VARCHAR (15)  NOT NULL  DEFAULT '' , ADD INDEX voteui (`voteu`), ADD INDEX votedi (`voted`), ADD INDEX lastipi (`lastip`(15))"
3420    ),
3421    if $_[0] < 3120;
3422
3423  if ( $_[0] < 3939 ) {
3424    printlog( 'info', '{3939} 0.17.2 -> 0.18.0 (adding added to filebase)' ),
3425      $static{'db'}->do("ALTER TABLE `$config{'sql_tfile'}` ADD `added` INT  UNSIGNED NOT NULL  DEFAULT '0'");
3426    printlog( 'info', '{3939} 0.17.2 -> 0.18.0 (adding stem to filebase)' ),
3427      $static{'db'}->do("ALTER TABLE `$config{'sql_tfile'}` ADD `stem` VARCHAR (128)  NOT NULL  DEFAULT ''"),
3428      printlog( 'info', '{3939} 0.17.2 -> 0.18.0 (adding stem index to filebase)' ),
3429      $static{'db'}->do("ALTER TABLE `$config{'sql_tfile'}` ADD FULLTEXT `stemi` (`stem`)"),
3430      printlog( 'info', '{3939} 0.17.2 -> 0.18.0 (adding stem to querystat)' ),
3431      $static{'db'}->do("ALTER TABLE `$config{'sql_tquerystat'}` ADD `stem` VARCHAR (128)  NOT NULL  DEFAULT ''"),
3432      printlog( 'info', '{3939} 0.17.2 -> 0.18.0 (adding stem index to querystat)' ),
3433      $static{'db'}->do("ALTER TABLE `$config{'sql_tquerystat'}` ADD FULLTEXT `stemi` (`stem`)"),
3434      if $config{'use_stem'};
3435  }
3436  #
3437  if ( $_[0] < 4617 ) {
3438    if ( $config{'use_dc'} ) {
3439      printlog( 'info', '{4617} 0.19.1 -> 0.19.2 (dc: CID on resource table)' ), $static{'db'}
3440        ->do("ALTER TABLE `$config{'sql_tresource'}` ADD  `cid` VARCHAR (39)  NOT NULL  DEFAULT '', ADD INDEX `cid_i` (`cid`)");
3441    }
3442  }
3443  if ( $_[0] < 4775 ) {
3444    printlog( 'info', '{4775} 0.19.1 -> 0.19.2 (ranges,host: more stat)' );
3445    $static{'db'}->do(
3446"ALTER TABLE `$config{'sql_tranges'}` ADD  `meta` VARCHAR ($config{'sql'}{'table'}{$config{'sql_tranges'}}{'meta'}{'length'})  NOT NULL  DEFAULT ''"
3447    );
3448    $static{'db'}->do(
3449"ALTER TABLE `$config{'sql_thost'}` ADD  `meta` VARCHAR ($config{'sql'}{'table'}{$config{'sql_thost'}}{'meta'}{'length'})  NOT NULL  DEFAULT ''"
3450    );
3451  }
3452  # $Id: crawler.pl 4843 2013-08-14 12:17:58Z pro $#
3453  $program{$_}{'upgrade'}->(@_) for grep { ref $program{$_}{'upgrade'} eq 'CODE' } keys %program;
3454  psmisc::program_one('install');
3455};
3456$program{ psmisc::program('repair') }{'func'} ||= sub {
3457  printlog( 'warn', 'repair locked', @_ ), return if !psmisc::lock( 'repair_' . ( join '-', @_ ), { timeout => 1 } );
3458  $static{'db'}->repair(@_);
3459};
3460$program{ psmisc::program('purge') }{'func'} ||= sub {
3461  my ($purge) = ( $_[0] =~ /(\d+)/ ? $1 : $config{'purge_by'} );
3462  my ( $cut, $timr, $total, $now ) = ( int( time() ), timer() );
3463  printlog( 'info', "Purging tables (by $purge). cut =", psmisc::human( 'date_time', $cut -= $config{'purge'} ), $_[2] );
3464  local $config{'nav_all'} = 1;
3465  for my $table ( $_[1] or grep { $config{'sql'}{'table_param'}{$_}{'purge'} } keys %{ $config{'sql'}{'table'} } ) {
3466    #printlog('dev', 'pg', $config{'sql'}{'table_param'}{$table}{'purge'});
3467    for my $i ( 1 .. $config{'purges'} ) {
3468      my $timl = timer();
3469      $total += (
3470        $now = $static{'db'}->do(
3471          "DELETE $static{'db'}->{'LOW_PRIORITY'} FROM $tq$static{'db'}->{'table_prefix'}$table$tq " . $static{'db'}->where( {
3472              $config{'sql'}{'table_param'}{$table}{'purge'} . ''      => $cut,
3473              $config{'sql'}{'table_param'}{$table}{'purge'} . '_mode' => '<',
3474              #'glueg1'                                                  => 'or',
3475              #$config{'sql'}{'table_param'}{$table}{'purge'} . '1'       => 0,
3476              (
3477                  ( !$config{'sql'}{'table'}{$table}{'scan'} or $config{'sql'}{'table_param'}{$table}{'purge'} eq 'scan' )
3478                ? ()
3479                : ( 'scan' . '1' => 0, 'glueg1' => 'and', 'scan' . '_mode1' => '!', )
3480              ),
3481            },
3482            undef,
3483            $table
3484            )
3485            . " $_[2]"
3486            . ( $static{'db'}->{'no_delete_limit'} ? () : " LIMIT $purge" )
3487        )
3488      );
3489      local $_ = $timl->();
3490      printlog(
3491        'info',
3492        "purge $table pass $i of $config{'purges'}, $now records purged ($total total) per",
3493        psmisc::human( 'time_period', $_ ),
3494        ',', psmisc::human( 'float', $now / ( $_ || 1 ) ), 'fps'
3495      );
3496      last if $now < $purge;
3497      mysleep( $config{'purge_sleep'} ) if $config{'purge_sleep'};
3498    }
3499  }
3500  printlog(
3501    'time',
3502    "Purged $total records per",
3503    psmisc::human( 'time_period', $timr->() ),
3504    ' (', ( $timr->() > 0 ? psmisc::human( 'float', $total / $timr->() ) : '?' ),
3505    ' fps)'
3506  );
3507  return $total;
3508};
3509$program{ psmisc::program('delhost') }{'func'} ||= sub {
3510  return unless $_[0];
3511  my ($purge) = ( $config{'purge_by'} );
3512  my ( $tfiles, $files, $rec );
3513  my $timr = timer();
3514  printlog( 'info', "delhost $_[0] (by $purge)." );
3515  my $purges = ( $param->{'purges'} or 10000 );
3516  for my $i ( 1 .. $purges ) {
3517    my $timl = timer();
3518    $tfiles += (
3519      $files =
3520        #$processor{'out'}{'mysql'}
3521        $static{'db'}->do(
3522"DELETE $static{'db'}->{'LOW_PRIORITY'} FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tfile'}$tq WHERE ${rq}host${rq} LIKE ${vq}$_[0]${vq} LIMIT $purge"
3523        )
3524    );
3525    printlog( 'info', "pass $i/$purges, $files files deleted, (total $tfiles) per", psmisc::human( 'time_period', $timl->() ) );
3526    mysleep( $config{'purge_sleep'} ) if $config{'purge_sleep'};
3527    last if $files < $purge;
3528  }
3529  $tfiles +=
3530    $static{'db'}->do(
3531    "DELETE $static{'db'}->{'LOW_PRIORITY'} FROM $tq$static{'db'}->{'table_prefix'}$_$tq WHERE ${rq}host${rq} = ${vq}$_[0]${vq}"
3532    ) for ( $config{'sql_thost'}, $config{'sql_tresource'} );
3533  printlog(
3534    'time',
3535    "Deleted $tfiles files and $rec records per",
3536    psmisc::human( 'time_period', $timr->() ),
3537    'sec (', psmisc::human( 'float', $timr->() > 0 ? $tfiles / $timr->() : '?' ),
3538    ' fps)'
3539  );
3540};
3541$program{ psmisc::program('deldead') }{'func'} ||= sub {
3542  my $rec;
3543  $rec +=
3544    $static{'db'}->do(
3545"DELETE $static{'db'}->{'LOW_PRIORITY'} FROM $tq$static{'db'}->{'table_prefix'}$_$tq WHERE ${rq}scan${rq} AND ! ${rq}time${rq} AND ! ${rq}files${rq} AND ! ${rq}dl${rq} AND ! ${rq}added${rq}"
3546    ) for ( $config{'sql_tresource'} );    #, $config{'sql_thost'}
3547  $rec +=
3548    $static{'db'}->do(
3549"DELETE $static{'db'}->{'LOW_PRIORITY'} FROM $tq$static{'db'}->{'table_prefix'}$_$tq WHERE ! ${rq}files${rq} AND ! ${rq}dl${rq} AND ! ${rq}added${rq} AND ! ${rq}voteu${rq} AND ! ${rq}voted${rq} AND ! ${rq}lastip${rq}"
3550    )                                      # ${rq}scan${rq} AND ! ${rq}time${rq} AND
3551    for ( $config{'sql_thost'} );
3552  printlog( 'info', "Deleted  $rec records" );
3553};
3554$program{ psmisc::program('reset_dead') }{'func'} ||= sub {
3555  my $rec;
3556  $rec +=
3557    $static{'db'}->do(
3558"UPDATE $tq$static{'db'}->{'table_prefix'}$_$tq SET ${rq}scan${rq} = ${vq}0${vq}, ${rq}time${rq} = ${vq}0${vq} WHERE ! ${rq}files${rq}"
3559    ) for ( $config{'sql_tresource'} );    #, $config{'sql_thost'}
3560  printlog( 'info', "Updated $rec records" );
3561};
3562$program{ psmisc::program('reset_res') }{'func'} ||= sub {
3563  local $_;
3564  printlog(
3565    'info',
3566    'reset_res:',
3567    $_ = $static{'db'}->do(
3568"UPDATE $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq SET ${rq}scan${rq} = ${vq}0${vq}, ${rq}time${rq} = ${vq}0${vq}"
3569    )
3570  );
3571  return $_;
3572};
3573$program{ psmisc::program('reset_res_bug') }{'func'} ||= sub {
3574  printlog(
3575    'info',
3576    'reset_res:',
3577    $static{'db'}->do(
3578"UPDATE $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq SET ${rq}scan${rq} = ${vq}0${vq}, ${rq}time${rq} = ${vq}0${vq} WHERE ${rq}scan${rq} > $vq"
3579        . int( time() )
3580        . $vq
3581    )
3582  );
3583};
3584$program{ psmisc::program('reset_range') }{'func'} ||= sub {
3585  printlog(
3586    'info',
3587    'reset_range:',
3588    $static{'db'}->do(
3589"UPDATE $tq$static{'db'}->{'table_prefix'}$config{'sql_tranges'}$tq SET ${rq}scan${rq} = ${vq}0${vq}, ${rq}time${rq} = ${vq}0${vq}"
3590    )
3591  );
3592};
3593$program{ psmisc::program('reset_range_bug') }{'func'} ||= sub {
3594  printlog(
3595    'info',
3596    'reset_range:',
3597    $static{'db'}->do(
3598"UPDATE $tq$static{'db'}->{'table_prefix'}$config{'sql_tranges'}$tq SET ${rq}scan${rq} = ${vq}0${vq}, ${rq}time${rq} = ${vq}0${vq} WHERE ${rq}scan${rq} > $vq"
3599        . int( time() )
3600        . $vq
3601    )
3602  );
3603};
3604$program{ psmisc::program('reset_period') }{'func'} ||= sub {
3605  my $rec;
3606  $rec += $static{'db'}->do("UPDATE $tq$static{'db'}->{'table_prefix'}$_$tq SET ${rq}period${rq} = ${vq}0${vq} ")
3607    for ( $config{'sql_tresource'}, $config{'sql_tranges'} );    #, $config{'sql_thost'}
3608  printlog( 'info', "Updated $rec records" );
3609};
3610$program{ psmisc::program('skip') }{'func'} ||= sub { $static{'banned'}->{ $_[0] } = 1; };
3611$program{ psmisc::program('file') }{'func'} ||= sub {
3612  printlog( 'info', 'loading file', $_[0] );
3613  $processor{'file'}{'url'}{'func'}->( uni_get( $_[0] ) );
3614};
3615$program{ psmisc::program('ipdig') }{'func'} ||= sub {
3616  my %dig;
3617  for my $file (@_) {
3618    printlog( 'info', 'founding ips in', $file );
3619    open( IF, ( ( $file eq '-' ) ? ($file) : ( '<', uni_get($file) ) ) ) or printlog( 'err', 'unable to open' ), return;
3620    while (<IF>) {
3621      s/<.+?>//g if $file =~ /^http/;
3622      for my $name (m!(?:\W|^)($config{'dig_find_mask'})\W!g) {
3623        next unless $name;
3624        next if grep { $name =~ /$_$/ } @{ $config{'dig_not_domain'} };
3625        my ( $ip, $err, $add );
3626        my %url = split_url($name);
3627        ( $ip, $err ) = name_to_ip( $url{'host'} );
3628        printlog( 'dbg', ' NOT ip', $name, $url{'host'}, $ip ), next if $err;
3629        printlog( 'dbg', ' ip not in mask:', $ip ), next if $ip !~ /$config{'dig_ip_mask'}/;
3630        unless ( $config{'dig_no_prot'} and !$url{'prot'} ) {
3631          next if !defined( $config{'scanner'}{ $url{'prot'} } );
3632          next if $config{'scanner'}{ $url{'prot'} }{'no_dig'};
3633        }
3634        $add = join_url( {
3635            %url,
3636            ( $config{'dig_as_ip'} ? ( 'host' => $ip ) : () ),
3637            ( ( $url{'prot'} eq $static{'port2prot'}{ $url{'port'} } ) ? ( 'port' => '' ) : () ),
3638            ( ( $config{'dig_prot_add'} and !$url{'prot'} ) ? ( 'prot' => $config{'dig_prot_add'} ) : () ),
3639            ( $config{'host_by_name_norm'} ? ( 'host' => normalize_ip( $url{'host'} ) ) : () ),
3640            ( ( $url{'user'} =~ /anonymous|ftp|none|\*+/i ) ? ( 'user' => '', 'pass' => '' ) : () ),
3641            ( ( $url{'pass'} =~ /none|\*+/i ) ? ( 'pass' => '' ) : () ),
3642          }
3643        ) unless $config{'dig_add_netmask'};
3644        $add = $ip, $add =~ s/\d+$//, $add = $add . '0/' . $config{'dig_add_netmask'} if $config{'dig_add_netmask'};
3645        printlog( 'dbg', ' digged', $name, '=', $ip, '->', $add ) if !$dig{$add};
3646        $processor{'url'}->($add) if ( $config{'dig_add_every'} and !$dig{$add} );
3647        ++$dig{$add};
3648      }
3649    }
3650    close(IF);
3651  }
3652  $processor{'url'}->( keys %dig ) unless $config{'dig_add_every'};
3653  printlog( 'info', ' founded', scalar keys %dig, sort keys %dig );
3654};
3655$program{ psmisc::program('ipdigf') }{'func'} ||= sub {
3656  for my $file (@_) {
3657    printlog( 'info', 'founding ips hrefs in', $file );
3658    open( my $fh, ( ( $file eq '-' ) ? ($file) : ( '<', uni_get($file) ) ) ) or printlog( 'err', 'unable to open' ), return;
3659    while (<$fh>) {
3660      next if /^\s*[#;]/;
3661      s/^\s+//;
3662      s/\s+$//;
3663      psmisc::program_one( 'ipdig', $_ ) if $_;
3664    }
3665    close($fh);
3666  }
3667};
3668$program{ psmisc::program('hublist') }{'func'} ||= sub {
3669  $_ = ( $_[0] or $config{'hublist'} );
3670  $processor{'file'}{ /\.(?:list)|(?:config)$/i ? 'list' : 'hublist' }{'func'}->( uni_get($_) );    #/
3671};
3672$program{ psmisc::program('filelist') }{'func'} ||= sub {
3673  return unless psmisc::use_try 'Net::DirectConnect::filelist';
3674  push @_, grep { -d } @ARGV;
3675  delete $program{default};
3676  @_ = @{ $config{fine}{dchub}{share} || [] } unless @_;
3677  #printlog('params', Dumper $config{fine}{dchub});
3678  Net::DirectConnect::filelist->new(
3679    log => sub (@) {
3680      my $dc = ref $_[0] ? shift : {};
3681      psmisc::printlog shift(), "[$dc->{'number'}]", @_,;
3682    },
3683    #'sql' => $config{'sql'},
3684    'db' => $static{'db'},
3685    ,
3686    %{ $config{fine}{dchub} || {} }
3687  )->filelist_make(@_),;
3688  0;
3689};
3690$program{ psmisc::program('default') }{'mask'} ||= '\S';
3691$program{ psmisc::program() }{'multi'}         ||= 1;
3692$program{ psmisc::program() }{'param_name'}    ||= 1;
3693$program{ psmisc::program() }{'func'}          ||= sub {
3694  return if scan_stop;
3695  for my $current ( keys %program ) {
3696    next if $current eq 'default';
3697    return if $program{ psmisc::program() }{'mask'} and $_[0] =~ /$program{psmisc::program()}{'mask'}/i;
3698  }
3699  psmisc::program_one( ( -f $_ && -s $_ ? 'file' : 'scan' ), $_ ) for @_;
3700  #$processor{'url'}->(@_);
3701};
3702$program{ psmisc::program('scan') }{'func'} ||= sub {
3703  #printlog('dev', 'scan', @_);
3704  $processor{'url'}->(@_);
3705};
3706$program{ psmisc::program('upload') }{'func'} ||= sub {
3707  #close_out_file();
3708  startn( $_[0], 'upload' );
3709  $processor{'dir'}->( $config{'datadir'} );
3710  #$static{'db'}->flush_insert();
3711};
3712$program{ psmisc::program('index_enable') }{'func'} ||= sub {
3713  $static{'db'}->index_enable( $config{'sql_tfile'} );
3714  #my $tim = timer();
3715  #printlog( 'info', 'Enabling indexes...' );
3716  #$static{'db'}->query_log("ALTER TABLE $static{'db'}->{'table_prefix'}$config{'sql_tfile'} ENABLE KEYS");
3717  #printlog( 'time', 'Enable index per ', psmisc::human( 'time_period', $tim->() ) );
3718  return 0;
3719};
3720$program{ psmisc::program('optimize') }{'func'} ||= sub {
3721  return $static{'db'}->optimize(@_);
3722  #@_ = keys %{ $config{'sql'}{'table'} } unless @_;
3723  #@_ = grep { $_ and $config{'sql'}{'table'}{$_} } @_;
3724  #printlog( 'info', 'Optimizing table...', @_ );
3725  #$static{'db'}->query_log( "OPTIMIZE TABLE " . join( ',', map( "$static{'db'}->{'table_prefix'}$_", @_ ) ) );
3726};
3727$program{ psmisc::program('analyze') }{'func'} ||= sub {
3728  return $static{'db'}->analyze(@_);
3729  #@_ = keys %{ $config{'sql'}{'table'} } unless @_;
3730  #@_ = grep { $_ and $config{'sql'}{'table'}{$_} } @_;
3731  #printlog( 'info', 'Analyze table...', @_ );
3732  #$static{'db'}->query_log( "ANALYZE TABLE " . join( ',', map( "$static{'db'}->{'table_prefix'}$_", @_ ) ) );
3733};
3734$program{ psmisc::program('flush') }{'func'} ||= sub {
3735  #printlog( 'dev', 'Flush');
3736  return $static{'db'}->flush(@_);
3737  #my $tim = timer();
3738  #my ( $tq, $rq, $vq ) = $static{'db'}->quotes();
3739  #$static{'db'}->do( "FLUSH TABLES " . ( join ',', map { "$tq$_$tq" } sort keys %{ $config{'sql'}{'table'} } ) );
3740  #printlog( 'time', 'Flush per', psmisc::human( 'time_period', $tim->() ) );
3741  #0;
3742};
3743$program{ psmisc::program('prof') }{'func'} ||= sub {
3744  my $tim = timer();
3745  #my ( $tq, $rq, $vq ) = $static{'db'}->quotes();
3746  @_ = grep { $_ ne $config{'sql_tfile'} } keys %{ $config{'sql'}{'table'} } unless @_;
3747  @_ = grep { $_ and $config{'sql'}{'table'}{$_} } @_;
3748  psmisc::program( $_, @_ ) for qw(repair optimize);
3749  printlog( 'time', 'Prof per', psmisc::human( 'time_period', $tim->() ) );
3750  return 0;
3751};
3752$program{ psmisc::program('res') }{'proc'} ||= 100;
3753$program{ psmisc::program() }{'func'} ||= sub {
3754  #my ( $tq, $rq, $vq ) = $static{'db'}->quotes();
3755  #printlog('err', 'logtest');
3756  my ( $join, $what, $where, $lock ) = (
3757    "LEFT JOIN $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq USING (${rq}host$rq)",
3758    ", $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq.${rq}time$rq AS ${rq}live$rq",
3759" $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq.${rq}time$rq IS NULL OR $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq.${rq}time$rq = ${vq}0$vq OR $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq.${rq}time$rq > $vq"
3760      . ( int( time() ) + $config{'timediff'} - $config{'online_minutes'} * 60 )
3761      . $vq,
3762    ", $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq WRITE"
3763  ) if $config{'scan_online_res'} and !$config{'scan_dead'};
3764##### TODO !!!!!!!!!!!!! sql_where !!!!!!!!!!!
3765  #printlog( 'dev', 'PRELOCK' );
3766  my $protos = join( ' OR ',
3767    map { " $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.${rq}prot$rq = $vq$_$vq" }
3768      grep { $config{'scanner'}{$_}{'scanable'} and !$config{'scanner'}{$_}{'disabled'} and !$config{"no_$_"} }
3769      keys %{ $config{'scanner'} } )
3770    or return $config{'nothing_scan_sleep'};
3771  $static{'db'}->lock_tables("$tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq WRITE $lock");
3772  local @_ = ( (
3773      $config{'scan_dead'} ? ()
3774      : " NOT $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.${rq}time$rq OR $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.${rq}size$rq > ${vq}0$vq "
3775    ), (
3776      $config{'scanner'}{'dchub'}{'disabled'} ? ()
3777      : ( !%{ $config{'sql'}{'table'}{ $config{'sql_tresource'} }{'dcuser'} or {} }
3778        ? " $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.${rq}host$rq NOT LIKE $vq%/%$vq "
3779        : " $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.${rq}dcuser$rq = $vq$vq " )
3780    ),
3781    $protos,
3782    ( $where or () )
3783  );
3784  local $_ = grep( $_, @_ ) ? ' WHERE ( ' . join( ') AND (', grep( $_, @_ ) ) . ' ) ' : '';
3785  my $res =
3786    $static{'db'}->line(
3787"SELECT $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.*$what FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq $join $_ ORDER BY ($tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.${rq}scan$rq"
3788      . ( $config{'res_fast'} ? '' : "+$tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq.${rq}period$rq" )
3789      . ") LIMIT 1" );
3790#printlog('dev', 'res selectedp:', ($res), );
3791#printlog('dev', 'res selectedp:', join_url($res), Dumper $res);
3792#printlog('dev', 'res selected:',( join_url($res), psmisc::human('date_time',$res->{'scan'}),'p=', $res->{'period'},'=', psmisc::human('time_period',$res->{'period'})));
3793  our ( $lastres, $lasttime );
3794  my %origres = %$res;
3795  $res->{'period'} ||= hconfig( 'period', $res->{'host'}, $res->{'prot'} );
3796  printlog(
3797    'dev',
3798    'res selected:', (
3799      join_url($res), !!$res->{'scan'},
3800      psmisc::human( 'time_period', time - $res->{'scan'} ),
3801      psmisc::human( 'time_period', time - $res->{'time'} ),
3802      'p=', $res->{'period'}, '=', psmisc::human( 'time_period', $res->{'period'} )
3803    )
3804  );
3805  $_ = ( %origres and allowscan( $res, $config{'sql_tresource'}, $res->{'scan'} + 1, $res->{'period'} ) );
3806#$_ = ( $static{'db'}->filter_row( $config{'sql_tresource'}, 'primary', $res ) and allowscan( $res, $config{'sql_tresource'}, $res->{'scan'} + 1, $res->{'period'} ) );
3807#$_ = ( @{$res}{$static{'db'}->filter_row( $config{'sql_tresource'}, 'primary', $res )} and allowscan( $res, $config{'sql_tresource'}, $res->{'scan'} + 1, $res->{'period'} ) );
3808#my $period = ($res->{'period'} or hconfig( 'period', $res->{'host'}, $res->{'prot'} ));
3809#$_ = ( %$res and allowscan( $res, $config{'sql_tresource'}, $res->{'scan'} + 1, $period ) );
3810  $static{'db'}->unlock_tables();
3811#printlog( 'dev', "resrun[$_]", %$res, 'fr:',   @{$res}{ $static{'db'}->filter_row( $config{'sql_tresource'}, 'primary', $res ) } );
3812  if ($_) {
3813    printlog( 'err', "resource table probably corrupted", "duplicate", $lastres ), sleep( 10 + rand(100) ),
3814      psmisc::program_one( 'repair', $config{'sql_tresource'}, $config{'sql_thost'} ), return 0,
3815      if $config{'resourse_breakable'}
3816        and $lastres
3817        and $lastres eq join_url($res)
3818        and $lasttime + $config{'resourse_breakable'} > time;
3819    $lastres  = join_url($res);
3820    $lasttime = time;
3821    local $work{'scan_proto_host_reset_force'} = 1 unless $config{'force'};
3822    local $config{'force'} = 1;
3823    $processor{'url'}->( join_url($res) );
3824    return 0;
3825  } elsif (%$res) {
3826    my $wait = $res->{'period'} - ( $res->{'scan'} ? int( time() ) - $res->{'scan'} : 0 );
3827    my $nextscan = ( $_ = $res->{'period'} + $res->{'scan'} - int( time() ) ) > 0 ? $_ : $config{'nothing_scan_sleep'};
3828    $_ = min( max( $nextscan, 0 ), $config{'nothing_scan_sleep'}, max( $wait, 0 ) + 1 );
3829    printlog(
3830      'dbg', 'Res: nothing to do (next:',
3831      join_url($res), ':', psmisc::human( 'time_period', $nextscan ),
3832      ') sl(', $_, ')'
3833    );
3834    return $_;
3835  } else {
3836    printlog( 'dbg', 'all resources is scanned or dead or table is empty?' );
3837    return $config{'nothing_scan_sleep'};
3838  }
3839};
3840$program{ psmisc::program('dcbot') }{'proc'} ||= 1;
3841$program{ psmisc::program() }{'func'} ||= sub {
3842  return $config{'nothing_scan_sleep'} if $config{'scanner'}{'dchub'}{'disabled'};
3843  local $config{'scan_online_res'}              = 0;
3844  local $config{'scan_dead'}                    = 1;
3845  local $config{'host_by_name_norm'}            = 0;
3846  local $config{'scanner'}{'dchub'}{'scanable'} = 1;
3847  local $config{'scanner'}{'adc'}{'scanable'}   = 1;
3848  local $config{'scanner'}{'adcs'}{'scanable'}  = 1;
3849  local $config{'scanner'}{'ftp'}{'disabled'}   = 1;
3850  local $config{'scanner'}{'http'}{'disabled'}  = 1;
3851  local $config{'scanner'}{'file'}{'disabled'}  = 1;
3852  local $config{'scanner'}{'rsync'}{'disabled'} = 1;
3853  return psmisc::program_one('res');
3854};
3855$program{ psmisc::program('range') }{'proc'} ||= 1;
3856$program{ psmisc::program() }{'func'} ||= sub {
3857  #my ( $tq, $rq, $vq ) = $static{'db'}->quotes();
3858  my $where = $static{'db'}->where( {
3859      'scan'      => int( time() ),
3860      'scan_mode' => '<',
3861      'glueg'     => 'or',
3862      ( $config{'scan_dead'} ? () : ( 'time1' => 0, 'alive1' => 1, 'alive_mode1' => '>', 'gluel1' => 'or' ) )
3863    },
3864    undef,
3865    $config{'sql_tranges'}
3866  );
3867  $static{'db'}->lock_tables("$tq$static{'db'}->{'table_prefix'}$config{'sql_tranges'}$tq WRITE");
3868  #TODO MAKE QUERY HERE:
3869  my ( $rng, $tim, $period ) = @{
3870    $static{'db'}->line(
3871"SELECT ${rq}range$rq, ${rq}scan$rq, ${rq}period$rq FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tranges'}$tq $where ORDER BY (${rq}scan$rq+${rq}period$rq) LIMIT 1"
3872    )
3873    }{ ( 'range', 'scan', 'period' ) };
3874  #printlog('dev', 'range selected:',( $rng, psmisc::human('time_period',time-$tim), $period ));
3875  $static{'db'}->unlock_tables(), return $config{'nothing_scan_sleep'} unless $rng;
3876  #$period ||= hconfig( 'range_period', $rng );
3877  $period ||= hconfig( 'period', $rng, 'range' );
3878  local $config{'scan_timeout'} = $config{'scan_range_timeout'};
3879  $_ = allowscan( { 'range' => $rng }, $config{'sql_tranges'}, $tim + 1, $period );
3880  #printlog('dbg', 'range allowscan:', $_);
3881  $static{'db'}->unlock_tables();
3882  if ($_) {
3883    local $config{'force'} = 1;
3884    $config{'nmap_only'} = 1 unless defined $config{'nmap_only'};
3885    $processor{'url'}->($rng);
3886    return 0;
3887  } else {
3888    $period ||= hconfig( 'period', $rng );
3889    my $nextscan = $period + $tim - int( time() );
3890    $_ = min( max( $nextscan, 0 ), $config{'nothing_scan_sleep'}, $config{'nothing_scan_sleep'}, max( $period, 0 ) + 1 );
3891    printlog(
3892      'dbg', 'range: nothing to do, sleeping ',
3893      $_, 'seconds (next:',
3894      $rng, ':', psmisc::human( 'time_period', $nextscan ),
3895      'period:', psmisc::human( 'time_period', $period ),
3896      'last:', psmisc::human( 'time_period', int( time() ) - $tim ), ')'
3897    );
3898    return $_;
3899  }
3900};
3901
3902sub startn {
3903  my ( $how, $from ) = (@_);
3904  #printlog('dev', 'startn', @_) ;
3905  #my $proc = $_[0];
3906  #printlog('dev', "start1 proc=$how", @ARGV);
3907  s/\s*($from)=\d+\s*/$1/i for @ARGV;
3908  #printlog('dev', "start2 proc=$how", @ARGV);
3909  #printlog('dev', "proc=$how;", join( ' ', grep { /^(-|$from)/ } @ARGV )),
3910  startself( join( ' ', grep { /^(-|$from)/ } @ARGV ) ) while --$how > 0;
3911}
3912$program{ psmisc::program('proc') }{'func'} ||= sub {
3913  #return unless $config{'sql_use_dbi'};
3914  return if scan_stop;
3915  my $only = $config{'scan'};
3916  if ( int( $_[0] ) ) { startn( $_[0], 'proc' ) }
3917  elsif ( $program{ $_[0] } ) { $only ||= $_[0]; }
3918  my $min_sleep;
3919  #printlog( 'dmp', "gggg" );
3920  while ( !scan_stop ) {
3921    my ( $runned, $sleep ) = ( 0, $config{'nothing_scan_sleep'} );
3922    for my $prog (
3923      $only
3924      or sort { $program{$a}{'order'} <=> $program{$b}{'order'} }
3925      grep { $program{$_}{'proc'} and !$config{ 'noscan_' . $_ } } keys %program
3926      )
3927    {
3928      ++$runned;
3929      $work{'proc_scans'} = $program{$prog}{'proc'};
3930      do {
3931        state("proc: $prog");
3932        #printlog( 'dev', "proc $prog run" );
3933        $_ = alarmed( $program{$prog}{'alarm'} || $config{'max_proc_time'}, $program{$prog}{'func'} );
3934        printlog( 'dbg', 'proc', "prog $prog returned $_;", $work{'proc_scans'} );
3935        printlog( 'info', "starting child and exit.." ), startself( join( ' ', grep { /^(-|proc)/ } @ARGV ) ), exit
3936          if ( $config{'reload_time'} and $work{'start_timer'}->() > $config{'reload_time'} )
3937          or ( $config{'reload_every'} and $work{'scans'} >= $config{'reload_every'} );
3938      } while --$work{'proc_scans'} > 0 and !$_ and !scan_stop;
3939      $sleep = min( $sleep, max( $_, 0 ) );
3940      $min_sleep = 0 unless $_;
3941      #printlog( 'dbg', "sleep=$sleep, r=$_" );
3942    }
3943    printlog( 'info', 'proc: nothing to do, exit' ), last
3944      if !$runned
3945        or ( $config{'once'} and $sleep == $config{'nothing_scan_sleep'} );
3946    $sleep = min( $sleep, ( $min_sleep *= 2 ) ||= 5 );
3947    $min_sleep = min( $min_sleep, $config{'nothing_scan_sleep'} );
3948    state( "proc: sleep:", $sleep + $config{'scan_sleep'} );
3949    printlog( 'dbg', 'sleep..', $sleep, ' + rand', $config{'scan_sleep'}, " min=$min_sleep max=$config{'nothing_scan_sleep'}" );
3950    mysleep( $sleep + rand( $config{'scan_sleep'} ) );
3951  }
3952};
3953$program{ psmisc::program('stat') }{'func'} ||= sub {
3954  my ( $qq, $qn );
3955  $static{'db'}{'auto_repair'}  = 1;
3956  $static{'db'}{'force_repair'} = 1;
3957  my ( $tq, $rq, $vq ) = $static{'db'}->quotes();
3958  $qn = "WHERE ${rq}scan${rq} = ${vq}0${vq}";
3959  for ( 0 .. $_[0] ) {
3960    mysleep( $config{'stat_sleep'} or 1 ) if $_;
3961    unless ( $config{'stat_no_res'} ) {
3962      my $dq;
3963      $dq = "  ${rq}dcuser${rq} LIKE $vq$vq " if keys %{ $static{'db'}{'table'}{ $config{'sql_tresource'} }{'dcuser'} };
3964      $qq = " ${rq}scan${rq} < $vq" . ( int( time() ) - $config{'period'} ) . "$vq ";
3965      $qq .= " AND ( ${rq}time${rq}=${vq}0${vq} OR ${rq}size${rq}>${vq}0${vq} ) " unless $config{'scan_dead'};
3966      #printlog( 'dch', keys %{$config{'table'}{$config{'sql_tresource'}}{'dcuser'}});
3967      my $where = 'WHERE';
3968      #$where = ' WHERE ' . join (' AND ', grep {$_}$dq, $qq) if $dq or $qq;
3969      printlog(
3970        'info',
3971        #'host=', $static{'db'}->{'host'}, 'db=', $static{'db'}->{'database'},
3972        "Resources: to scan ",
3973        values %{
3974          $static{'db'}->line(
3975            "SELECT COUNT(*) FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq "
3976              . ( $dq || $qq ? $where . join( ' AND ', grep { $_ } $dq, $qq ) : '' )
3977          )
3978          },
3979        'from',
3980        values %{
3981          $static{'db'}->line(
3982            "SELECT COUNT(*) FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq " . ( $dq ? $where . $dq : '' )
3983          )
3984          },
3985        '; new',
3986        values %{ $static{'db'}->line("SELECT COUNT(*) FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq $qn")
3987          },
3988      );
3989    }
3990    unless ( $config{'stat_no_range'} ) {
3991      #$qq = 'WHERE ${rq}scan${rq} < \'' . ( int( time() ) - $config{'range_period'} ) . "' ";
3992      $qq = "WHERE ${rq}scan${rq} < $vq" . ( int( time() ) - hconfig( 'period', 'range' ) ) . "$vq ";
3993      $qq .= " AND ( ${rq}time${rq}=${vq}0${vq} OR ${rq}alive${rq}>${vq}0${vq} ) " unless $config{'scan_dead'};
3994      printlog(
3995        'info',
3996        'ranges: to scan ',
3997        values %{ $static{'db'}->line("SELECT COUNT(*) FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tranges'}$tq $qq ")
3998          },
3999        'from',
4000        values %{ $static{'db'}->line("SELECT COUNT(*) FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tranges'}$tq ") },
4001        '; new',
4002        values %{ $static{'db'}->line("SELECT COUNT(*) FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tranges'}$tq $qn") },
4003      );
4004    }
4005    our %total;
4006    my %totallast = %total;
4007    unless ( $config{'stat_no_total'} ) {
4008      printlog(
4009        'info',
4010        'totals:',
4011        map {
4012          (
4013            $_,
4014            '=',
4015            $total{$_} = ( values %{ $static{'db'}->line("SELECT COUNT(*) FROM $tq$static{'db'}->{'table_prefix'}$_$tq ") } )[0]
4016            ),
4017            $totallast{$_}
4018            ? ( $totallast{$_} = $total{$_} - $totallast{$_} )
4019              ? ( $totallast{$_} > 0 ? '+' : () ) . $totallast{$_}
4020              : ()
4021            : ()
4022        } keys %{ $config{'sql'}{'table'} }
4023      );
4024    }
4025  }
4026};
4027$program{ psmisc::program('statuniq') }{'func'} ||= sub {
4028  my $table = 'filebase';
4029  #printlog Dumper
4030  my $indexes = $static{'db'}->query("show index from $table");
4031  #printlog Dumper $bykey     ;
4032  my $oldseq = 1;
4033  my $n      = 0;
4034  for my $i (@$indexes) {
4035    $i->{"Key"} = ++$n if $i->{"Seq_in_index"} <= $oldseq;
4036    $oldseq = $i->{"Seq_in_index"};
4037  }
4038  my $bykey = { map { $_->{Key} => $_ } @$indexes };
4039  #printlog Dumper $indexes;
4040  my ( $run, $total );
4041  for my $s (`$config{myisamchk} -v --description /var/db/storage/$config{sql_base_web}/$table`) {
4042    $total = $1 if $s =~ /Data records:\s*(\d+)/;
4043    ++$run if $s =~ /table description/;
4044    #printlog $s;
4045    next unless $run;
4046    #Key Start Len Index   Type                     Rec/key         Root  Blocksize
4047    #1   2     8   unique  ulonglong                      1    422342656       1024
4048    $s =~ /^\s*(?<Key>\d+)\s+\d+\s+\d+.*?(?<reckey>\d+)/;
4049    next unless $+{Key};
4050    $bykey->{ $+{Key} }{reckey} = $+{reckey} unless $bykey->{ $+{Key} }{reckey};
4051    #printlog $s, Dumper \%+;
4052  }
4053  my ($size) = values %{ $static{'db'}->line("SELECT SUM(size) FROM resource WHERE files>0") };
4054  printlog "totals: files: $total; size $size";
4055  my %byname;
4056  for my $i ( values %$bykey ) {
4057    $i->{rec} = int( $total / $i->{reckey} ) if $i->{reckey};
4058    next if $i->{rec} <= 1 or $i->{rec} == $total;
4059    $byname{ $i->{Column_name} } = $i;
4060    printlog "unique $i->{Column_name}'s: $i->{rec}";    #" $i->{reckey}";
4061  }
4062  printlog
4063"averages: file copies: $byname{tiger}{reckey}, files per user: $byname{dcuser}{reckey}, files per hub: $byname{host}{reckey}, file names per hash:",
4064    ( $byname{name}{reckey} / $byname{tiger}{reckey} ), ", file size: ", int( $size / $total )
4065    if $byname{tiger};
4066  printlog "files per host: $byname{host}{reckey},", "file size:", int( $size / $total ) unless $byname{tiger};
4067  #printlog Dumper $bykey;
4068  #warn "dddd";
4069  return 0;
4070};
4071$program{ psmisc::program('mrtg') }{'func'} ||= sub {
4072  my ( $first, $second );
4073
4074=c
4075  local $config{'allow_auto_repair'}   = 0;
4076  local $config{'sql_reconnect_tries'} = 0;
4077  local $config{'sql_reconnect_sleep'} = 0;
4078  local $config{'sql_error_tries'}     = 0;
4079  local $config{'sql_error_sleep'}     = 0;
4080=cut
4081
4082  $static{'db'}->retry_off();
4083  if ( $_[0] eq 'online' ) {
4084    ( $first, $second ) = @{
4085      $static{'db'}->line(
4086"SELECT SUM($static{'db'}->{'table_prefix'}$config{'sql_tresource'}.files) AS files, SUM($static{'db'}->{'table_prefix'}$config{'sql_tresource'}.size) AS size FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq LEFT JOIN $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq USING (host) WHERE $static{'db'}->{'table_prefix'}$config{'sql_tresource'}.files > ${vq}0${vq} AND $static{'db'}->{'table_prefix'}$config{'sql_thost'}.time > '"
4087          . ( int( time() ) + $config{'timediff'} - $config{'online_minutes'} * 60 ) . "'"
4088      )
4089      }{ ( 'files', 'size' ) };
4090    $first  /= $config{'mrtg_online_files'}, $first  = int($first)  if $config{'mrtg_online_files'};
4091    $second /= $config{'mrtg_online_bytes'}, $second = int($second) if $config{'mrtg_online_bytes'};
4092  } else {
4093    ($first) = @{
4094      $static{'db'}->line(
4095"SELECT count(*) as shared FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_tresource'}$tq LEFT JOIN $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq USING (host) WHERE $static{'db'}->{'table_prefix'}$config{'sql_tresource'}.files > ${vq}0${vq} AND $static{'db'}->{'table_prefix'}$config{'sql_thost'}.time > '"
4096          . ( int( time() ) + $config{'timediff'} - $config{'online_minutes'} * 60 ) . "'"
4097      )
4098      }{ ('shared') };
4099    ($second) = @{
4100      $static{'db'}->line(
4101            "SELECT count(*) as now   FROM $tq$static{'db'}->{'table_prefix'}$config{'sql_thost'}$tq WHERE ${rq}time${rq}>$vq"
4102          . ( int( time() ) - $config{'online_minutes'} * 60 )
4103          . $vq
4104      )
4105      }{ ('now') };
4106  }
4107  if ( $config{'mrtg_out'} and open( MF, '>', $config{'mrtg_out'} ) ) {
4108    print MF"$first\n$second\nz\nz\n";
4109    close(MF);
4110  } else {
4111    print "$first\n$second\nz\nz\n";
4112  }
4113  $static{'db'}->retry_on();
4114  return 0;
4115};
4116$program{ psmisc::program('cycle') }{'func'} ||= sub {
4117  if ( $config{'periodic_func'} ) {
4118    while ( !scan_stop ) {
4119      my $tim = timer();
4120      $config{'periodic_func'}->() if $config{'periodic_func'};
4121      printlog(
4122        'time', "Cycle done per",
4123        $tim->(),
4124        "sec; Sleeping $config{'periodic_time'} seconds to ",
4125        psmisc::human( 'date_time', int( time() ) + $config{'periodic_time'} )
4126      );
4127      last if $config{'cycle_once'};
4128      mysleep( $config{'periodic_time'} );
4129    }
4130  }
4131};
4132$program{ psmisc::program('mystatus') }{'func'} ||= sub {
4133  $static{'db'}->query_log('SHOW FULL PROCESSLIST');
4134
4135=c
4136  if ( $config{'my_dump_file'} and open( FM, '>>', $config{'my_dump_file'} ) ) {
4137    local $config{'sql_use_dbi'} = 0;
4138    print FM $static{'db'}->query('SHOW FULL PROCESSLIST');
4139    close(FM);
4140  }
4141=cut
4142
4143};
4144
4145=for SIG tests
4146  psmisc::program('dummy');
4147  $program{psmisc::program()}{'func'} ||= sub {
4148    $_[0] = 10 if $_[0] eq 'dummy';
4149    printlog('info', 'dummy', $_), sleep(1) for 0..$_[0];
4150  };
4151=cut
4152
4153$program{ psmisc::program('applyfilter') }{'func'} ||= sub {
4154  @_ = qw(query file word) unless @_;
4155  for my $table (@_) {
4156    #printlog "[$table],", $config{'sql'}{'table_param'}{ $config{ 'sql_t' . $table . 'stat' } }{'filter'}, ;
4157    next unless my $filter = $config{'sql'}{'table_param'}{ $config{ 'sql_t' . $table . 'stat' } }{'filter'};
4158    my $by = 10000;
4159    #my $by = 5;
4160    printlog $table, $filter;
4161    my $now = 0;
4162    my $del = 0;
4163    while (1) {
4164      my @rows = $static{'db'}->query( "SELECT * from $tq" . $config{ 'sql_t' . $table . 'stat' } . "$tq LIMIT $now, $by" );
4165      for my $row (@rows) {
4166        next unless ref $filter eq 'CODE' and $filter->( undef, $row );
4167        $static{'db'}->do( "DELETE FROM $tq"
4168            . $config{ 'sql_t' . $table . 'stat' }
4169            . "$tq WHERE $table="
4170            . $static{'db'}->quote( $row->{$table} ) );
4171        ++$del;
4172      }
4173      $now += $by;
4174      last if scalar @rows < $by;
4175    }
4176    printlog "deleted $del";
4177  }
4178};
4179$config{'killer_time'}     ||= 20;
4180$config{'killer_time_max'} ||= 40;
4181$config{'killer_sleep'}    ||= 10;
4182@{ $config{'killer_state'} } = ( 'Sending data', 'Sorting result', 'FULLTEXT initialization', 'Waiting for table', 'Locked' );
4183$config{'killer_commands'} ||= "select|update";
4184$config{'killer_mask'} = qr{^\s*(?:/\*.*?\*/)?\s*(?:$config{'killer_commands'})}i;
4185$config{'log_kill'} ||= 0;
4186$program{ psmisc::program('killer') }{'func'} ||= sub {
4187
4188  for ( 1 .. $_[0] || 1 ) {
4189    sleep( $config{'killer_sleep'} ), next unless psmisc::lock( 'killer', timeout => 10, old => 5 * 60 );
4190    $static{'db'} = pssql->new( %{ $config{'sql'} || {} },
4191      ( ref $config{'sql_base_web'} eq 'HASH' ? %{ $config{'sql_base_web'} } : 'database' => $config{'sql_base_web'} ) )
4192      if $static{'db'}{'database'} ne $config{'sql_base_web'};
4193    local $static{'db'}->{'auto_repair'}     = 0;
4194    local $static{'db'}->{'reconnect_tries'} = 1 unless $_[0];
4195    local $static{'db'}->{'connect_tries'}   = 1 unless $_[0];
4196    local $static{'db'}->{'reconnect_sleep'} = 0 unless $_[0];
4197    local $static{'db'}->{'error_tries'}     = 0 unless $_[0];
4198    local $static{'db'}->{'error_sleep'}     = 0 unless $_[0];
4199    local $config{'log_default'}             = 'kill.log';
4200    my $killed;
4201
4202    for my $row ( sort { $b->{'Time'} <=> $a->{'Time'} } @{ $static{'db'}->query('SHOW FULL PROCESSLIST') } ) {
4203      next unless $row->{'Id'} and $row->{'db'} and $row->{'Time'} and $row->{'State'};
4204      printlog( 'kill', 'other base', $_, map( "[$_:$row->{$_}] ", keys %{$row} ) ), next
4205        unless grep { $row->{'db'} eq $_ } array( $config{'killer_base'} ), $config{'sql_base_web'};
4206      printlog( 'kill', 'other state', $_, map( "[$_:$row->{$_}] ", keys %{$row} ) ), next
4207        unless grep { $row->{'State'} } @{ $config{'killer_state'} };
4208      printlog( 'kill', 'no time', $_, map( "[$_:$row->{$_}] ", keys %{$row} ) ), next
4209        unless $row->{'Time'} > $config{'killer_time'};
4210      printlog( 'kill', "not in mask [$row->{'Info'}]" ), next
4211        if $config{'killer_mask'} and !( $row->{'Info'} =~ $config{'killer_mask'} );
4212      printlog( 'info', "Killing query:", map( "[$_:$row->{$_}] ", keys %{$row} ) );
4213      $static{'db'}->do("KILL QUERY $row->{'Id'}");
4214      ++$killed;
4215      last if $killed and $_[0] and $row->{'Time'} < $config{'killer_time_max'};
4216    }
4217    psmisc::unlock('killer');
4218    sleep $config{'killer_sleep'} if $_[0];
4219  }
4220};
4221$program{ psmisc::program('stop') }{'func'} ||= sub {
4222  psmisc::file_rewrite( $config{'stop_file'}, time );
4223  0;
4224};
4225#unless ( $config{'module_crawler'} ) {
4226psmisc::program_run($param) unless (caller);
4227
4228sub make_stat {
4229  return (
4230    ( defined $work{'start_timer'} ? ( '[run time', psmisc::human( 'time_period', $work{'start_timer'}->() ), ']', ) : () ),
4231    (
4232      $stat{'total_bytes'}
4233      ? " [total $stat{'total_bytes'} bytes per "
4234        . psmisc::human( 'time_period', $work{'start_timer'}->() ) . ' '
4235        . ( $work{'start_timer'}->() ? 'at ' . int( $stat{'total_bytes'} / $work{'start_timer'}->() ) : '' ) . ' b/s]'
4236      : ''
4237    ), (
4238      $stat{'files'}
4239      ? "$stat{'files'} files $stat{'dirs'} dirs scanned."
4240        . ( $stat{'skipped'} ? " ($stat{'skipped'} skipped)" : '' )
4241        . (
4242        $work{'start_timer'}->() ? '(' . psmisc::human( 'float', $stat{'files'} / $work{'start_timer'}->() ) . ' fps)' : '' )
4243      : ''
4244    ),
4245    ( int( $stat{'ftp_reconnects'} ) > 1 ? " with $stat{'ftp_reconnects'} recon." : '' ),
4246    ( %stat ? ( 'Stat: ', map { "$_=$stat{$_};" } keys %stat ) : () ),
4247    ,
4248    ( ref $static{'db'} ? ( '[', $static{'db'}->stat_string(), ']' ) : () ),
4249    times,
4250  );
4251}
4252#END {printlog('WOW3', $static{'db'}->{'dbh'});};
4253#sub
4254END {
4255  #printlog('dbh4.0=',$static{'db'}->{'dbh'});
4256  #close_out_file();
4257  #printlog('dbh4.1=',$static{'db'}->{'dbh'});
4258  $static{'db'}->flush_insert() if $static{'db'};
4259  printlog( 'info', 'Finished [', join ' ', map( { $_ . ( $param->{$_} ne '' ? "=$param->{$_}" : '' ) } keys %$param ),
4260    '] ', make_stat() )
4261    if $param
4262      and %$param
4263      and !$config{'nolog_startstop'};
4264  #$static{'db'}->log_stat();
4265}
4266#print Dumper  \%program;
4267#print Dumper  \%config;
4268#print Dumper  $param;
4269#print "\n\n param:\n";for (sort keys %$param) { print "$_ = $param->{$_}\n"; } # debug #
4270#print "\n\n config:\n";for (sort keys %config) { print "$_ = $config{$_}\n"; } # debug #
4271#print "\n\n config:\n", Dumper\%config; # debug #
4272#warn "\n\n configkeys:\n", scalar keys %config; # debug #
4273#print "\n\n INC:\n", Dumper(\%INC); # debug #
4274#$static{'db'}->query('error');
4275#$static{'db'}->do('error');
4276#http_dl('127.0.0.1', sub {print "cb:",$_[0], "\n"});
42771;
4278