1#!/usr/bin/perl
2#$Revision: 1.4 $$Date: 2007-11-11 20:26:22 $$Author: boumenot $
3#######################################################################
4# FIXME: this script screen scapes the web to build the ItemSearch
5# validate classes.  Unfortunately, this breaks too frequently.  A
6# better way needs to be found.
7#######################################################################
8
9package main;
10require 5.008_001;
11
12use Getopt::Long;
13use IO::File;
14use Pod::Usage;
15use LWP::Simple;
16use Text::Template;
17use Data::Dumper;
18use File::Path;
19use lib "$FindBin::Bin/../lib";
20use HTML::TreeBuilder::XPath;
21use Net::Amazon ();
22
23use strict;
24use warnings;
25
26sub AWS4_ONLINE_HTML {
27    'http://docs.amazonwebservices.com/AWSECommerceService/'.$Net::Amazon::WSDL_DATE.'/DG/';
28}
29
30use constant AWS4_LOCALE_HTML => {
31    'us' => 'USSearchIndexParamForItemsearch.html',
32#     'de' => 'DESearchIndexParamForItemsearch.html',
33#     'es' => 'ESSearchIndexParamForItemsearch.html',
34#     'jp' => 'JPSearchIndexParamForItemsearch.html',
35#     'it' => 'ITSearchIndexParamForItemsearch.html',
36#     'uk' => 'UKSearchIndexParamForItemsearch.html',
37#     'fr' => 'FRSearchIndexParamForItemsearch.html',
38#     'ca' => 'CASearchIndexParamForItemsearch.html',
39};
40
41my $Opt_Debug = 0;
42my $Opt_Dest = "../lib/Net/Amazon/Validate/ItemSearch";
43my $Opt_Overwrite = 0;
44
45unless (&GetOptions (
46		     "help|h"	 => \&usage,
47		     "version|V" => \&version,
48		     "debug|D"   => \$Opt_Debug,
49                     "dest=s"    => \$Opt_Dest,
50		     "overwrite" => \$Opt_Overwrite,
51		     "<>"	 => \&parameter,
52		     )) {
53    usage();
54}
55
56## main #########################################
57
58unless (-d $Opt_Dest) {
59    die "The directory $Opt_Dest does not exist!\n";
60}
61
62
63for my $locale (keys %{(AWS4_LOCALE_HTML)}) {
64    my $link =  AWS4_ONLINE_HTML.AWS4_LOCALE_HTML->{$locale};
65    print "fetching $link ...\n" if $Opt_Debug;
66
67    my $tree = HTML::TreeBuilder::XPath->new();
68    $tree->parse(get($link));
69    $tree->eof();
70
71    my @search_indicies = map { $_->as_text } $tree->findnodes("//div[\@class=\"section\"]//h2");
72    my %depts;
73    my %upc;
74    my %keywords;
75
76    for my $search_index (@search_indicies) {
77
78	(my $search_index_name) = $search_index =~ /SearchIndex:\s+(\w+)/;
79	next if $search_index_name eq 'All';
80
81	print $search_index_name."\n";
82	$upc{$search_index_name}++;
83
84	my @parameters = map { $_->as_text } $tree->findnodes("//div[\@class=\"section\"]//h2[contains(text(),\"$search_index\")]/../../../..//li/p");
85
86	for my $parameter (@parameters) {
87	    print "  -> $parameter\n";
88	    push @{$depts{$search_index_name}}, $parameter;
89            $keywords{$locale}{$search_index_name}++ if $parameter eq "Keywords";
90	}
91
92    }
93
94    for my $dept (keys %depts) {
95	dump_library($depts{$dept}, $locale, $dept);
96	upc_add(\%upc, $depts{$dept});
97    }
98
99    for my $locale (keys %keywords) {
100        my @a = keys %{$keywords{$locale}};
101        dump_library(\@a, $locale, "Keywords");
102    }
103
104    my @a = keys %upc;
105    my $type = ($locale eq 'us') ? 'UPC' : 'EAN';
106    dump_library(\@a, $locale, $type);
107}
108
109## subs #########################################
110
111sub usage {
112    print '$Revision: 1.4 $$Date: 2007-11-11 20:26:22 $$Author: boumenot $ ', "\n";
113    pod2usage(-verbose=>2, -exitval => 2);
114    exit (1);
115}
116
117sub version {
118    print '$Revision: 1.4 $$Date: 2007-11-11 20:26:22 $$Author: boumenot $ ', "\n";
119    exit (1);
120}
121
122sub parameter {
123    my $param = shift;
124    die "%Error: Unknown parameter: $param\n";
125}
126
127##################################################
128
129# Attempt to pick a "favored" default for the different types of
130# ItemSearch'es.  The favored list is returned in order of preference.
131# The most preferred is Books because that was the default for AWS3.
132# As Books is not available for all types of ItemSearch'es use other
133# "favored" defaults.  They are Music, DVD, Software, etc. in that
134# order.  If none of those are a possible default then use the first
135# item in the list of acceptable values.
136
137sub select_default {
138    my $aref = shift;
139
140    my %hash = map { $_ => 1 } @$aref;
141
142    for my $favored_default (qw(Books Music DVD Software Title Keyword Keywords)) {
143	return $favored_default if defined $hash{$favored_default};
144    }
145
146    return $aref->[0];
147}
148
149sub upc_add {
150    my ($href, $aref) = @_;
151    $href->{$_}++ for @$aref;
152}
153
154sub dump_library {
155    my ($aref, $locale, $dept) = @_;
156
157    my $fn = "$Opt_Dest/$locale/$dept.pm";
158    my $dn = "$Opt_Dest/$locale";
159
160    unless (-d $dn) {
161        mkpath $dn or die "Failed to create '$dn'!\n";
162    }
163
164    if (-f $fn && !$Opt_Overwrite) {
165        warn "The file $fn already exists, skipping!\n";
166        return;
167    }
168
169    my $template = Text::Template->new(
170            TYPE       => 'FILE',
171            SOURCE     => 'aws4-itemsearch.tmpl',
172            DELIMITERS => [ '[%--', '--%]', ],
173    );
174
175    my $hash = {'MODULE_NAME'    => "$locale".'::'."$dept",
176                'DEFAULT_OPTION' => select_default(\@$aref),
177                'LOCALE'         => $locale,
178                'ITEM_SEARCH'    => $dept,
179                'options'        => \@$aref,
180    };
181
182    my $text = $template->fill_in(HASH => $hash);
183    unless ($text) {
184        die "Failed to fill in the text template for $locale/$dept!\n";
185    }
186
187    my $fouth = IO::File->new(">$fn") or
188        die "$! '$fn'!\n";
189
190    print $fouth $text;
191
192    $fouth->close();
193}
194
195
196##################################################
197__END__
198
199=pod
200
201=head1 asw4-itemsearch
202
203B<asw4-types> - convert Amazon's HTML data to Perl libraries to pick ItemSearch
204defaults.
205
206=head1 SYNOPSIS
207
208B<asw4-itemsearch> - [I<OPTION>]... [I<FILE>]...
209
210=head1 DESCRIPTION
211
212B<asw4-itemsearch> converts the data stored in Amazon's HTML pages for
213ASW4 into Perl libraries.  These libraries are used by Net::Amazon to
214validate user input, and select default entries for ItemSearch
215operations.
216
217=head1 ARGUMENTS
218
219=over 4
220
221=item -h, --help
222
223Displays this message and program version and exits.
224
225=item -V, --version
226
227Displays the program's version and exits.
228
229=item -D, --debug
230
231Prints debug information.
232
233=item --overwrite
234
235Overwrite any libraries if they already exist.
236
237=item --dest E<lt>directoryE<gt>
238
239Specify the destination where the files should be written.
240
241=back
242
243=head1 AUTHORS
244
245Written by Christopher Boumenot.
246
247=head1 REPORTING BUGS
248
249Report bugs to <boumenot@gmail.com>.
250
251=head1 SEE ALSO
252
253=cut
254
255