1#!/usr/bin/perl 2#$Revision: 1.4 $$Date: 2007-11-11 20:26:22 $$Author: boumenot $ 3####################################################################### 4# FIXME: this script screen scapes the web to build the ItemSearch 5# validate classes. Unfortunately, this breaks too frequently. A 6# better way needs to be found. 7####################################################################### 8 9package main; 10require 5.008_001; 11 12use Getopt::Long; 13use IO::File; 14use Pod::Usage; 15use LWP::Simple; 16use Text::Template; 17use Data::Dumper; 18use File::Path; 19use lib "$FindBin::Bin/../lib"; 20use HTML::TreeBuilder::XPath; 21use Net::Amazon (); 22 23use strict; 24use warnings; 25 26sub AWS4_ONLINE_HTML { 27 'http://docs.amazonwebservices.com/AWSECommerceService/'.$Net::Amazon::WSDL_DATE.'/DG/'; 28} 29 30use constant AWS4_LOCALE_HTML => { 31 'us' => 'USSearchIndexParamForItemsearch.html', 32# 'de' => 'DESearchIndexParamForItemsearch.html', 33# 'es' => 'ESSearchIndexParamForItemsearch.html', 34# 'jp' => 'JPSearchIndexParamForItemsearch.html', 35# 'it' => 'ITSearchIndexParamForItemsearch.html', 36# 'uk' => 'UKSearchIndexParamForItemsearch.html', 37# 'fr' => 'FRSearchIndexParamForItemsearch.html', 38# 'ca' => 'CASearchIndexParamForItemsearch.html', 39}; 40 41my $Opt_Debug = 0; 42my $Opt_Dest = "../lib/Net/Amazon/Validate/ItemSearch"; 43my $Opt_Overwrite = 0; 44 45unless (&GetOptions ( 46 "help|h" => \&usage, 47 "version|V" => \&version, 48 "debug|D" => \$Opt_Debug, 49 "dest=s" => \$Opt_Dest, 50 "overwrite" => \$Opt_Overwrite, 51 "<>" => \¶meter, 52 )) { 53 usage(); 54} 55 56## main ######################################### 57 58unless (-d $Opt_Dest) { 59 die "The directory $Opt_Dest does not exist!\n"; 60} 61 62 63for my $locale (keys %{(AWS4_LOCALE_HTML)}) { 64 my $link = AWS4_ONLINE_HTML.AWS4_LOCALE_HTML->{$locale}; 65 print "fetching $link ...\n" if $Opt_Debug; 66 67 my $tree = HTML::TreeBuilder::XPath->new(); 68 $tree->parse(get($link)); 69 $tree->eof(); 70 71 my @search_indicies = map { $_->as_text } $tree->findnodes("//div[\@class=\"section\"]//h2"); 72 my %depts; 73 my %upc; 74 my %keywords; 75 76 for my $search_index (@search_indicies) { 77 78 (my $search_index_name) = $search_index =~ /SearchIndex:\s+(\w+)/; 79 next if $search_index_name eq 'All'; 80 81 print $search_index_name."\n"; 82 $upc{$search_index_name}++; 83 84 my @parameters = map { $_->as_text } $tree->findnodes("//div[\@class=\"section\"]//h2[contains(text(),\"$search_index\")]/../../../..//li/p"); 85 86 for my $parameter (@parameters) { 87 print " -> $parameter\n"; 88 push @{$depts{$search_index_name}}, $parameter; 89 $keywords{$locale}{$search_index_name}++ if $parameter eq "Keywords"; 90 } 91 92 } 93 94 for my $dept (keys %depts) { 95 dump_library($depts{$dept}, $locale, $dept); 96 upc_add(\%upc, $depts{$dept}); 97 } 98 99 for my $locale (keys %keywords) { 100 my @a = keys %{$keywords{$locale}}; 101 dump_library(\@a, $locale, "Keywords"); 102 } 103 104 my @a = keys %upc; 105 my $type = ($locale eq 'us') ? 'UPC' : 'EAN'; 106 dump_library(\@a, $locale, $type); 107} 108 109## subs ######################################### 110 111sub usage { 112 print '$Revision: 1.4 $$Date: 2007-11-11 20:26:22 $$Author: boumenot $ ', "\n"; 113 pod2usage(-verbose=>2, -exitval => 2); 114 exit (1); 115} 116 117sub version { 118 print '$Revision: 1.4 $$Date: 2007-11-11 20:26:22 $$Author: boumenot $ ', "\n"; 119 exit (1); 120} 121 122sub parameter { 123 my $param = shift; 124 die "%Error: Unknown parameter: $param\n"; 125} 126 127################################################## 128 129# Attempt to pick a "favored" default for the different types of 130# ItemSearch'es. The favored list is returned in order of preference. 131# The most preferred is Books because that was the default for AWS3. 132# As Books is not available for all types of ItemSearch'es use other 133# "favored" defaults. They are Music, DVD, Software, etc. in that 134# order. If none of those are a possible default then use the first 135# item in the list of acceptable values. 136 137sub select_default { 138 my $aref = shift; 139 140 my %hash = map { $_ => 1 } @$aref; 141 142 for my $favored_default (qw(Books Music DVD Software Title Keyword Keywords)) { 143 return $favored_default if defined $hash{$favored_default}; 144 } 145 146 return $aref->[0]; 147} 148 149sub upc_add { 150 my ($href, $aref) = @_; 151 $href->{$_}++ for @$aref; 152} 153 154sub dump_library { 155 my ($aref, $locale, $dept) = @_; 156 157 my $fn = "$Opt_Dest/$locale/$dept.pm"; 158 my $dn = "$Opt_Dest/$locale"; 159 160 unless (-d $dn) { 161 mkpath $dn or die "Failed to create '$dn'!\n"; 162 } 163 164 if (-f $fn && !$Opt_Overwrite) { 165 warn "The file $fn already exists, skipping!\n"; 166 return; 167 } 168 169 my $template = Text::Template->new( 170 TYPE => 'FILE', 171 SOURCE => 'aws4-itemsearch.tmpl', 172 DELIMITERS => [ '[%--', '--%]', ], 173 ); 174 175 my $hash = {'MODULE_NAME' => "$locale".'::'."$dept", 176 'DEFAULT_OPTION' => select_default(\@$aref), 177 'LOCALE' => $locale, 178 'ITEM_SEARCH' => $dept, 179 'options' => \@$aref, 180 }; 181 182 my $text = $template->fill_in(HASH => $hash); 183 unless ($text) { 184 die "Failed to fill in the text template for $locale/$dept!\n"; 185 } 186 187 my $fouth = IO::File->new(">$fn") or 188 die "$! '$fn'!\n"; 189 190 print $fouth $text; 191 192 $fouth->close(); 193} 194 195 196################################################## 197__END__ 198 199=pod 200 201=head1 asw4-itemsearch 202 203B<asw4-types> - convert Amazon's HTML data to Perl libraries to pick ItemSearch 204defaults. 205 206=head1 SYNOPSIS 207 208B<asw4-itemsearch> - [I<OPTION>]... [I<FILE>]... 209 210=head1 DESCRIPTION 211 212B<asw4-itemsearch> converts the data stored in Amazon's HTML pages for 213ASW4 into Perl libraries. These libraries are used by Net::Amazon to 214validate user input, and select default entries for ItemSearch 215operations. 216 217=head1 ARGUMENTS 218 219=over 4 220 221=item -h, --help 222 223Displays this message and program version and exits. 224 225=item -V, --version 226 227Displays the program's version and exits. 228 229=item -D, --debug 230 231Prints debug information. 232 233=item --overwrite 234 235Overwrite any libraries if they already exist. 236 237=item --dest E<lt>directoryE<gt> 238 239Specify the destination where the files should be written. 240 241=back 242 243=head1 AUTHORS 244 245Written by Christopher Boumenot. 246 247=head1 REPORTING BUGS 248 249Report bugs to <boumenot@gmail.com>. 250 251=head1 SEE ALSO 252 253=cut 254 255