1package FEAR::API::Agent;
2use strict;
3use Spiffy -base;
4use FEAR::API::SourceFilter;
5use WWW::Mechanize;
6use Carp;
7our @ISA = qw(WWW::Mechanize);
8
9sub new() {
10  my $self = super;
11  $self->agent_alias('Windows IE 6');
12  $self->cookie_jar({ file => "/tmp/fear-".time()."-cookies.txt" });
13  $self->max_redirect(3);
14  $self;
15}
16
17use Encode;
18use HTML::Encoding 'encoding_from_http_message';
19sub _convert_to_utf8 {
20    if($self->res){
21#	use Data::Dumper;
22#	print Dumper $self->res;
23	eval {
24	    my $enc = encoding_from_http_message($self->res);
25	    $self->{content} = decode($enc => $self->content);
26	}
27    }
28}
29
30sub force_content_type {
31    $self->{forced_ct} = shift;
32}
33
34sub get_content {
35  use Text::Iconv;
36  my $url = shift;
37  $self->get($url);
38  if( $self->res->is_success ){
39#      print $self->res->content_type,$/;
40    if( $self->res->content_type =~ /text/o){
41      $self->{ct} = $self->{forced_ct} if $self->{forced_ct};
42      $self->_convert_to_utf8;
43      # Since document is translated to UTF-8, so links MUST be re-extracted
44      $self->_extract_links();
45    }
46    return $self->content;
47  }
48}
49
50my %name_to_number = qw(url 0
51			text 1
52			name 2
53			tag 3
54			base 4
55			attr 5
56			referer 6
57		       );
58
59sub links {
60  foreach my $link (@{$self->{links}}){
61    $link->[0] = $link->url_abs()->as_string;
62    $link->[6] = $self->uri; # referer
63  }
64  super;
65}
66
67
68chain_sub sort_links {
69  @{$self->{links}} =
70      sort {
71	  $_[0] ?
72	      (
73	       ref($_[0]) eq 'CODE' ?
74	       $_[0]->($a, $b)
75	       :
76	       $a->[$name_to_number{$_[0]}] cmp $b->[$name_to_number{$_[0]}]
77	       )
78	      :
79	      $a->[0] cmp $b->[0]
80	  } $self->links;
81}
82
83
84chain_sub keep_links {
85  my ($filter, $field);
86  if(@_ == 2){
87    $filter = $_[1];
88    $field = $name_to_number{$_[0]};
89  }
90  else {
91    $filter = $_[0];
92    $field = 0;
93  }
94  @{$self->{links}} = grep {
95      ref $filter eq 'CODE' ? $filter->($field) : $_->[$field] =~ /$filter/;
96  } $self->links;
97}
98
99chain_sub remove_links {
100  my ($filter, $field);
101  if(@_ == 2){
102    $filter = $_[1];
103    $field = $name_to_number{$_[0]};
104  }
105  else {
106    $filter = $_[0];
107    $field = 0;
108  }
109  @{$self->{links}} = grep {
110      not (
111	   ref $filter eq 'CODE' ? $filter->($field) : $_->[$field] =~ /$filter/
112	   );
113  } $self->links;
114}
115
116
117
118
1191;
120__END__
121