1package FEAR::API::Agent; 2use strict; 3use Spiffy -base; 4use FEAR::API::SourceFilter; 5use WWW::Mechanize; 6use Carp; 7our @ISA = qw(WWW::Mechanize); 8 9sub new() { 10 my $self = super; 11 $self->agent_alias('Windows IE 6'); 12 $self->cookie_jar({ file => "/tmp/fear-".time()."-cookies.txt" }); 13 $self->max_redirect(3); 14 $self; 15} 16 17use Encode; 18use HTML::Encoding 'encoding_from_http_message'; 19sub _convert_to_utf8 { 20 if($self->res){ 21# use Data::Dumper; 22# print Dumper $self->res; 23 eval { 24 my $enc = encoding_from_http_message($self->res); 25 $self->{content} = decode($enc => $self->content); 26 } 27 } 28} 29 30sub force_content_type { 31 $self->{forced_ct} = shift; 32} 33 34sub get_content { 35 use Text::Iconv; 36 my $url = shift; 37 $self->get($url); 38 if( $self->res->is_success ){ 39# print $self->res->content_type,$/; 40 if( $self->res->content_type =~ /text/o){ 41 $self->{ct} = $self->{forced_ct} if $self->{forced_ct}; 42 $self->_convert_to_utf8; 43 # Since document is translated to UTF-8, so links MUST be re-extracted 44 $self->_extract_links(); 45 } 46 return $self->content; 47 } 48} 49 50my %name_to_number = qw(url 0 51 text 1 52 name 2 53 tag 3 54 base 4 55 attr 5 56 referer 6 57 ); 58 59sub links { 60 foreach my $link (@{$self->{links}}){ 61 $link->[0] = $link->url_abs()->as_string; 62 $link->[6] = $self->uri; # referer 63 } 64 super; 65} 66 67 68chain_sub sort_links { 69 @{$self->{links}} = 70 sort { 71 $_[0] ? 72 ( 73 ref($_[0]) eq 'CODE' ? 74 $_[0]->($a, $b) 75 : 76 $a->[$name_to_number{$_[0]}] cmp $b->[$name_to_number{$_[0]}] 77 ) 78 : 79 $a->[0] cmp $b->[0] 80 } $self->links; 81} 82 83 84chain_sub keep_links { 85 my ($filter, $field); 86 if(@_ == 2){ 87 $filter = $_[1]; 88 $field = $name_to_number{$_[0]}; 89 } 90 else { 91 $filter = $_[0]; 92 $field = 0; 93 } 94 @{$self->{links}} = grep { 95 ref $filter eq 'CODE' ? $filter->($field) : $_->[$field] =~ /$filter/; 96 } $self->links; 97} 98 99chain_sub remove_links { 100 my ($filter, $field); 101 if(@_ == 2){ 102 $filter = $_[1]; 103 $field = $name_to_number{$_[0]}; 104 } 105 else { 106 $filter = $_[0]; 107 $field = 0; 108 } 109 @{$self->{links}} = grep { 110 not ( 111 ref $filter eq 'CODE' ? $filter->($field) : $_->[$field] =~ /$filter/ 112 ); 113 } $self->links; 114} 115 116 117 118 1191; 120__END__ 121