1package Net::Whois::Raw::Common;
2$Net::Whois::Raw::Common::VERSION = '2.99032';
3# ABSTRACT: Helper for Net::Whois::Raw.
4
5use Encode;
6use warnings;
7use strict;
8use Regexp::IPv6 qw($IPv6_re);
9use Net::Whois::Raw::Data ();
10use Net::Whois::Raw ();
11
12use utf8;
13
14# func prototype
15sub untaint(\$);
16
17# get whois from cache
18sub get_from_cache {
19    my ($query, $cache_dir, $cache_time) = @_;
20
21    return undef unless $cache_dir;
22    mkdir $cache_dir unless -d $cache_dir;
23
24    my $now = time;
25    # clear the cache
26    foreach my $fn ( glob("$cache_dir/*") ) {
27        my $mtime = ( stat($fn) )[9] or next;
28        my $elapsed = $now - $mtime;
29        untaint $fn; untaint $elapsed;
30        unlink $fn if ( $elapsed / 60 >= $cache_time );
31    }
32
33    my $result;
34    if ( -e "$cache_dir/$query.00" ) {
35        my $level = 0;
36        while ( open( my $cache_fh, '<', "$cache_dir/$query.".sprintf( "%02d", $level ) ) ) {
37            $result->[$level]->{srv} = <$cache_fh>;
38            chomp $result->[$level]->{srv};
39            $result->[$level]->{text} = join "", <$cache_fh>;
40            if ( !$result->[$level]->{text} and $Net::Whois::Raw::CHECK_FAIL ) {
41                $result->[$level]->{text} = undef ;
42            }
43            else {
44                $result->[$level]->{text} = decode_utf8( $result->[$level]->{text} );
45            }
46            $level++;
47            close $cache_fh;
48        }
49    }
50
51    return $result;
52}
53
54# write whois to cache
55sub write_to_cache {
56    my ($query, $result, $cache_dir) = @_;
57
58    return unless $cache_dir && $result;
59    mkdir $cache_dir unless -d $cache_dir;
60
61    untaint $query; untaint $cache_dir;
62
63    my $level = 0;
64    foreach my $res ( @{$result} ) {
65        local $res->{text} = $res->{whois} if not exists $res->{text};
66
67        next if defined $res->{text} && !$res->{text} || !defined $res->{text};
68        my $enc_text = $res->{text};
69        utf8::encode( $enc_text );
70        my $postfix = sprintf("%02d", $level);
71        if ( open( my $cache_fh, '>', "$cache_dir/$query.$postfix" ) ) {
72            print $cache_fh $res->{srv} ? $res->{srv} :
73                ( $res->{server} ? $res->{server} : '')
74                , "\n";
75
76            print $cache_fh $enc_text ? $enc_text : '';
77
78            close $cache_fh;
79            chmod 0666, "$cache_dir/$query.$postfix";
80        }
81        $level++;
82    }
83
84}
85
86# remove copyright messages, check for existance
87sub process_whois {
88    my ( $query, $server, $whois, $CHECK_FAIL, $OMIT_MSG, $CHECK_EXCEED ) = @_;
89
90    $server = lc $server;
91    my ( $name, $tld ) = split_domain( $query );
92
93    # use string as is
94    no utf8;
95
96    if ( $CHECK_EXCEED ) {
97        my $exceed = $Net::Whois::Raw::Data::exceed{ $server };
98
99        if ( $exceed && $whois =~ /$exceed/s) {
100            return $whois, 'Connection rate exceeded';
101        }
102    }
103
104    $whois = _strip_trailer_lines( $whois )  if $OMIT_MSG;
105
106    if ( $CHECK_FAIL || $OMIT_MSG ) {
107
108        my $notfound = $Net::Whois::Raw::Data::notfound{ $server };
109        my $strip = $Net::Whois::Raw::Data::strip{ $server };
110        my @strip = $strip ? @$strip : ();
111        my @lines;
112
113        MAIN:
114        for ( split /\n/, $whois ) {
115            if ( $CHECK_FAIL && $notfound && /$notfound/ ) {
116                return undef, "Not found";
117            }
118
119            if ( $OMIT_MSG ) {
120                for my $re ( @strip ) {
121                    next MAIN  if /$re/;
122                }
123            }
124
125            push @lines, $_;
126        }
127
128        $whois = join "\n", @lines, '';
129
130        if ( $OMIT_MSG ) {
131            $whois =~ s/(?:\s*\n)+$/\n/s;
132            $whois =~ s/^\n+//s;
133            $whois =~ s|\n{3,}|\n\n|sg;
134        }
135    }
136
137    if ( defined $Net::Whois::Raw::Data::postprocess{ $server } ) {
138        $whois = $Net::Whois::Raw::Data::postprocess{ $server }->( $whois );
139    }
140
141    if ( defined $Net::Whois::Raw::POSTPROCESS{ $server } ) {
142        $whois = $Net::Whois::Raw::POSTPROCESS{ $server }->( $whois );
143    }
144
145    if ( defined $Net::Whois::Raw::Data::codepages{ $server } ) {
146        $whois = decode( $Net::Whois::Raw::Data::codepages{ $server }, $whois );
147    }
148    else {
149        utf8::decode( $whois );
150    }
151
152    return $whois, undef;
153}
154
155# Tries to strip trailer lines of whois
156sub _strip_trailer_lines {
157    my ( $whois ) = @_;
158
159    for my $re ( @Net::Whois::Raw::Data::strip_regexps ) {
160        $whois =~ s/$re//;
161    }
162
163    return $whois;
164}
165
166# get whois-server for domain / tld
167sub get_server {
168    my ( $dom, $is_ns, $tld ) = @_;
169
170    $tld ||= get_dom_tld( $dom );
171    $tld = uc $tld;
172
173    if ( grep { $_ eq $tld } @Net::Whois::Raw::Data::www_whois ) {
174        return 'www_whois';
175    }
176
177    if ( $is_ns ) {
178        return $Net::Whois::Raw::Data::servers{ $tld . '.NS' }
179            || $Net::Whois::Raw::Data::servers{ 'NS' };
180    }
181
182    return lc( $Net::Whois::Raw::Data::servers{ $tld } || "whois.nic.$tld" );
183}
184
185sub get_real_whois_query{
186    my ( $whoisquery, $srv, $is_ns ) = @_;
187
188    $srv .= '.ns'  if $is_ns;
189
190    if ( $srv eq 'whois.crsnic.net' && domain_level( $whoisquery ) == 2 ) {
191        return "domain $whoisquery";
192    }
193    elsif ( $Net::Whois::Raw::Data::query_prefix{ $srv } ) {
194        return $Net::Whois::Raw::Data::query_prefix{ $srv } . $whoisquery;
195    }
196
197    return $whoisquery;
198}
199
200# get domain TLD
201sub get_dom_tld {
202    my ($dom) = @_;
203
204    my $tld;
205    if ( is_ipaddr($dom) || is_ip6addr($dom) ) {
206        $tld = "IP";
207    }
208    elsif ( domain_level($dom) == 1 ) {
209        $tld = "NOTLD";
210    }
211    else {
212        my @tokens = split( /\./, $dom );
213
214        # try to get the longest known tld for this domain
215        for my $i ( 1..$#tokens ) {
216            my $tld_try = join '.', @tokens[$i..$#tokens];
217            if ( exists $Net::Whois::Raw::Data::servers{ uc $tld_try } ) {
218                $tld = $tld_try;
219                last;
220            }
221        }
222
223        $tld = $tokens[-1] unless $tld;
224    }
225
226    return $tld;
227}
228
229# get URL for query via HTTP
230# %param: domain*
231sub get_http_query_url {
232    my ($domain) = @_;
233
234    my ($name, $tld) = split_domain($domain);
235    my @http_query_data;
236    # my ($url, %form);
237
238    if ($tld eq 'ru' || $tld eq 'su') {
239        my $data = {
240            url  => "http://www.nic.ru/whois/?domain=$name.$tld",
241            form => '',
242        };
243        push @http_query_data, $data;
244    }
245    elsif ($tld eq 'ip') {
246        my $data = {
247            url  => "http://www.nic.ru/whois/?ip=$name",
248            form => '',
249        };
250        push @http_query_data, $data;
251    }
252    elsif ($tld eq 'ws') {
253        my $data = {
254            url  => "http://worldsite.ws/utilities/lookup.dhtml?domain=$name&tld=$tld",
255            form => '',
256        };
257        push @http_query_data, $data;
258    }
259    elsif ($tld eq 'kz') {
260        my $data = {
261            url  => "http://www.nic.kz/cgi-bin/whois?query=$name.$tld&x=0&y=0",
262            form => '',
263        };
264        push @http_query_data, $data;
265    }
266    elsif ($tld eq 'vn') {
267        # VN doesn't have web whois at the moment...
268        my $data = {
269            url  => "http://www.tenmien.vn/jsp/jsp/tracuudomain1.jsp",
270            form => {
271                cap2        => ".$tld",
272                referer     => 'http://www.vnnic.vn/english/',
273                domainname1 => $name,
274            },
275        };
276        push @http_query_data, $data;
277    }
278    elsif ($tld eq 'ac') {
279        my $data = {
280            url  => "http://nic.ac/cgi-bin/whois?query=$name.$tld",
281            form => '',
282        };
283        push @http_query_data, $data;
284    }
285    elsif ($tld eq 'bz') {
286        my $data = {
287            url  => "http://www.test.bz/Whois/index.php?query=$name&output=nice&dotname=.$tld&whois=Search",
288        };
289        push @http_query_data, $data;
290    }
291    elsif ($tld eq 'tj') {
292        #my $data = {
293        #    url  => "http://get.tj/whois/?lang=en&domain=$domain",
294        #    from => '',
295        #};
296        #push @http_query_data, $data;
297
298        # first level on nic.tj
299        #$data = {
300        #    url  => "http://www.nic.tj/cgi/lookup2?domain=$name",
301        #    from => '',
302        #};
303        #push @http_query_data, $data;
304
305        # second level on nic.tj
306        my $data = {
307            url  => "http://www.nic.tj/cgi/whois?domain=$name",
308            from => '',
309        };
310        push @http_query_data, $data;
311
312        #$data = {
313        #    url  => "http://ns1.nic.tj/cgi/whois?domain=$name",
314        #    from => '',
315        #};
316        #push @http_query_data, $data;
317
318        #$data = {
319        #    url  => "http://62.122.137.16/cgi/whois?domain=$name",
320        #    from => '',
321        #};
322        #push @http_query_data, $data;
323    }
324
325    # return $url, %form;
326    return \@http_query_data;
327}
328
329sub have_reserve_url {
330    my ( $tld ) = @_;
331
332    my %tld_list = (
333        'tj' => 1,
334    );
335
336    return defined $tld_list{$tld};
337}
338
339# Parse content received from HTTP server
340# %param: resp*, tld*
341sub parse_www_content {
342    my ($resp, $tld, $url, $CHECK_EXCEED) = @_;
343
344    chomp $resp;
345    $resp =~ s/\r//g;
346
347    my $ishtml;
348
349    if ( $tld eq 'ru' || $tld eq 'su' ) {
350
351        $resp = decode( 'koi8-r', $resp );
352
353        (undef, $resp) = split('<script>.*?</script>',$resp);
354        ($resp) = split('</td></tr></table>', $resp);
355        $resp =~ s/&nbsp;/ /gi;
356        $resp =~ s/<([^>]|\n)*>//gi;
357
358        return 0 if $resp=~ m/Доменное имя .*? не зарегистрировано/i;
359
360        $resp = 'ERROR' if $resp =~ m/Error:/i || $resp !~ m/Информация о домене .+? \(по данным WHOIS.RIPN.NET\):/;
361        #TODO: errors
362
363    }
364    elsif ($tld eq 'ip') {
365
366        $resp = decode_utf8( $resp );
367
368        return 0 unless $resp =~ m|<p ID="whois">(.+?)</p>|s;
369
370        $resp = $1;
371
372        $resp =~ s|<a.+?>||g;
373        $resp =~ s|</a>||g;
374        $resp =~ s|<br>||g;
375        $resp =~ s|&nbsp;| |g;
376
377    }
378    elsif ($tld eq 'ws') {
379
380        $resp = decode_utf8( $resp );
381
382        if ($resp =~ /Whois information for .+?:(.+?)<table>/s) {
383            $resp = $1;
384            $resp =~ s|<font.+?>||isg;
385            $resp =~ s|</font>||isg;
386
387            $ishtml = 1;
388        }
389        else {
390            return 0;
391        }
392
393    }
394    elsif ($tld eq 'kz') {
395
396        $resp = decode_utf8( $resp );
397
398        if ($resp =~ /Domain Name\.{10}/s && $resp =~ /<pre>(.+?)<\/pre>/s) {
399            $resp = $1;
400        }
401        else {
402            return 0;
403        }
404    }
405    elsif ($tld eq 'vn') {
406
407        $resp = decode_utf8( $resp );
408
409        if ($resp =~ /\(\s*?(Domain .+?:\s*registered)\s*?\)/i )  {
410            $resp = $1;
411        }
412        else {
413            return 0;
414        }
415
416        #
417        # if ($resp =~/#ENGLISH.*?<\/tr>(.+?)<\/table>/si) {
418        #    $resp = $1;
419        #    $resp =~ s|</?font.*?>||ig;
420        #    $resp =~ s|&nbsp;||ig;
421        #    $resp =~ s|<br>|\n|ig;
422        #    $resp =~ s|<tr>\s*<td.*?>\s*(.*?)\s*</td>\s*<td.*?>\s*(.*?)\s*</td>\s*</tr>|$1 $2\n|isg;
423        #    $resp =~ s|^\s*||mg;
424        #
425    }
426    elsif ($tld eq 'ac') {
427
428        $resp = decode_utf8( $resp );
429
430        if ($CHECK_EXCEED && $resp =~ /too many requests/is) {
431            die "Connection rate exceeded";
432        }
433        elsif ($resp =~ /<!--- Start \/ Domain Info --->(.+?)<!--- End \/ Domain Info --->/is) {
434            $resp = $1;
435            $resp =~ s|</?table.*?>||ig;
436            $resp =~ s|</?b>||ig;
437            $resp =~ s|</?font.*?>||ig;
438            $resp =~ s|<tr.*?>\s*<td.*?>\s*(.*?)\s*</td>\s*<td.*?>\s*(.*?)\s*</td>\s*</tr>|$1 $2\n|isg;
439            $resp =~ s|</?tr>||ig;
440            $resp =~ s|</?td>||ig;
441            $resp =~ s|^\s*||mg;
442        }
443        else {
444            return 0;
445        }
446
447    }
448    elsif ($tld eq 'bz') {
449
450        $resp = decode_utf8( $resp );
451
452        if ( $resp =~ m{
453                <blockquote>
454                (.+)
455                </blockquote>
456            }xms )
457        {
458            $resp = $1;
459            if ( $resp =~ /NOT\s+FOUND/ || $resp =~ /No\s+Domain/ ) {
460                # Whois info not found
461                return 0;
462            }
463
464            $resp =~ s|<[^<>]+>||ig;
465        }
466        else {
467            return 0;
468        }
469    }
470    elsif ( $tld eq 'tj' && $url =~ m|^http\://get\.tj| ) {
471        $resp = decode_utf8( $resp );
472
473        if ($resp =~ m|<!-- Content //-->\n(.+?)<!-- End Content //-->|s ) {
474            $resp = $1;
475            $resp =~ s|<[^<>]+>||ig;
476            $resp =~ s|Whois\n|\n|s;
477
478            return 0 if $resp =~ m|Domain \S+ is free|s;
479
480            $resp =~ s|Domain \S+ is already taken\.\n|\n|s;
481            $resp =~ s|&nbsp;| |ig;
482            $resp =~ s|&laquo;|"|ig;
483            $resp =~ s|&raquo;|"|ig;
484            $resp =~ s|\n\s+|\n|sg;
485            $resp =~ s|\s+\n|\n|sg;
486            $resp =~ s|\n\n|\n|sg;
487        }
488        else {
489            return 0;
490        }
491
492    }
493    elsif ( $tld eq 'tj' && $url =~ m|\.nic\.tj/cgi/lookup| ) {
494
495        $resp = decode_utf8( $resp );
496
497        if ($resp =~ m|<div[0-9a-z=\" ]*>\n?(.+?)\n?</div>|s) {
498            $resp = $1;
499
500            return 0 if $resp =~ m|may be available|s;
501
502            $resp =~ s|\n\s+|\n|sg;
503            $resp =~ s|\s+\n|\n|sg;
504            $resp =~ s|\n\n|\n|sg;
505            $resp =~ s|<br.+||si;
506        }
507        else {
508            return 0;
509        }
510
511    }
512    elsif ( $tld eq 'tj' && $url =~ m|\.nic\.tj/cgi/whois| || $url =~ m|62\.122\.137\.16| ) {
513        $resp = decode_utf8( $resp );
514
515        if ( $resp =~ m{ <table [^>]*? > (.+) (:? </table> ) }sxmi ) {
516            $resp = $1;
517            $resp =~ s|</?tr>||ig;
518            $resp =~ s|<td>| |ig;
519            $resp =~ s|</?td[0-9a-z=\" ]*>||ig;
520            $resp =~ s|</?col[0-9a-z=\" ]*>||ig;
521            $resp =~ s|&laquo;|"|ig;
522            $resp =~ s|&raquo;|"|ig;
523            $resp =~ s|&nbsp;| |ig;
524            $resp =~ s|\n\s+|\n|sg;
525            $resp =~ s|\s+\n|\n|sg;
526            $resp =~ s|\n\n|\n|sg;
527        }
528        else {
529            return 0;
530        }
531
532    }
533    else {
534        return 0;
535    }
536
537    return $resp;
538}
539
540# check, if it's IP-address?
541sub is_ipaddr {
542    $_[0] =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/;
543}
544
545# check, if it's IPv6-address?
546sub is_ip6addr {
547    my ( $ip ) = @_;
548
549    return 0 unless defined $ip;
550
551    return $ip =~ /^$IPv6_re$/;
552}
553
554# get domain level
555sub domain_level {
556    my ($str) = @_;
557
558    my $dotcount = $str =~ tr/././;
559
560    return $dotcount + 1;
561}
562
563# split domain on name and TLD
564sub split_domain {
565    my ($dom) = @_;
566
567    my $tld = get_dom_tld( $dom );
568
569    my $name;
570    if (uc $tld eq 'IP' || $tld eq 'NOTLD') {
571        $name = $dom;
572    }
573    else {
574        $name = substr( $dom, 0, length($dom) - length($tld) - 1 );
575    }
576
577    return ($name, $tld);
578}
579
580#
581sub dlen {
582    my ($str) = @_;
583
584    return length($str) * domain_level($str);
585}
586
587# clear the data's taintedness
588sub untaint (\$) {
589    my ($str) = @_;
590
591    $$str =~ m/^(.*)$/;
592    $$str = $1;
593}
594
5951;
596
597__END__
598
599=pod
600
601=encoding UTF-8
602
603=head1 NAME
604
605Net::Whois::Raw::Common - Helper for Net::Whois::Raw.
606
607=head1 VERSION
608
609version 2.99032
610
611=head1 AUTHOR
612
613Alexander Nalobin <alexander@nalobin.ru>
614
615=head1 COPYRIGHT AND LICENSE
616
617This software is copyright (c) 2002-2021 by Alexander Nalobin.
618
619This is free software; you can redistribute it and/or modify it under
620the same terms as the Perl 5 programming language system itself.
621
622=cut
623