1package Astro::ADS::Query;
2
3# ---------------------------------------------------------------------------
4
5#+
6#  Name:
7#    Astro::ADS::Query
8
9#  Purposes:
10#    Perl wrapper for the ADS database
11
12#  Language:
13#    Perl module
14
15#  Description:
16#    This module wraps the ADS online database.
17
18#  Authors:
19#    Alasdair Allan (aa@astro.ex.ac.uk)
20
21#  Revision:
22#     $Id: Query.pm,v 1.24 2011/07/01 bjd Exp $
23
24#  Copyright:
25#     Copyright (C) 2001 University of Exeter. All Rights Reserved.
26
27#-
28
29# ---------------------------------------------------------------------------
30
31=head1 NAME
32
33Astro::ADS::Query - Object definining an prospective ADS query.
34
35=head1 SYNOPSIS
36
37  $query = new Astro::ADS::Query( Authors     => \@authors,
38                                  AuthorLogic => $aut_logic,
39                                  Objects     => \@objects,
40                                  ObjectLogic => $obj_logic,
41                                  Bibcode     => $bibcode,
42                                  Proxy       => $proxy,
43                                  Timeout     => $timeout,
44                                  URL         => $url );
45
46  my $results = $query->querydb();
47
48=head1 DESCRIPTION
49
50Stores information about an prospective ADS query and allows the query to
51be made, returning an Astro::ADS::Result object.
52
53The object will by default pick up the proxy information from the HTTP_PROXY
54and NO_PROXY environment variables, see the LWP::UserAgent documentation for
55details.
56
57=cut
58
59# L O A D   M O D U L E S --------------------------------------------------
60
61use strict;
62use warnings;
63use vars qw/ $VERSION /;
64
65use LWP::UserAgent;
66use Astro::ADS::Result;
67use Astro::ADS::Result::Paper;
68use Net::Domain qw(hostname hostdomain);
69use Carp;
70
71'$Revision: 1.26 $ ' =~ /.*:\s(.*)\s\$/ && ($VERSION = $1);
72
73# C L A S S   A T T R I B U T E S ------------------------------------------
74{
75	my $_ads_mirror = 'cdsads.u-strasbg.fr';	# this is the default mirror site
76	sub ads_mirror {
77		my ($class, $new_mirror) = @_;
78		$_ads_mirror = $new_mirror if @_ > 1;
79		return $_ads_mirror;
80	}
81}
82
83# C O N S T R U C T O R ----------------------------------------------------
84
85=head1 REVISION
86
87$Id: Query.pm,v 1.25 2013/08/06 bjd Exp $
88$Id: Query.pm,v 1.24 2009/07/01 bjd Exp $
89$Id: Query.pm,v 1.22 2009/05/01 bjd Exp $
90$Id: Query.pm,v 1.21 2002/09/23 21:07:49 aa Exp $
91
92=head1 METHODS
93
94=head2 Constructor
95
96=over 4
97
98=item B<new>
99
100Create a new instance from a hash of options
101
102  $query = new Astro::ADS::Query( Authors     => \@authors,
103                                  AuthorLogic => $aut_logic,
104                                  Objects     => \@objects,
105                                  ObjectLogic => $obj_logic,
106                                  Bibcode     => $bibcode,
107                                  Proxy       => $proxy,
108                                  Timeout     => $timeout,
109                                  URL         => $url );
110
111returns a reference to an ADS query object.
112
113=cut
114
115sub new {
116  my $proto = shift;
117  my $class = ref($proto) || $proto;
118
119  # bless the query hash into the class
120  my $block = bless { OPTIONS   => {},
121                      URL       => undef,
122                      QUERY     => undef,
123                      FOLLOWUP  => undef,
124                      USERAGENT => undef,
125                      BUFFER    => undef }, $class;
126
127  # Configure the object
128  # does nothing if no arguments supplied
129  $block->configure( @_ );
130
131  return $block;
132
133}
134
135# Q U E R Y  M E T H O D S ------------------------------------------------
136
137=back
138
139=head2 Accessor Methods
140
141=over 4
142
143=item B<querydb>
144
145Returns an Astro::ADS::Result object for an inital ADS query
146
147   $results = $query->querydb();
148
149=cut
150
151sub querydb {
152  my $self = shift;
153
154  # call the private method to make the actual ADS query
155  $self->_make_query();
156
157  # check for failed connect
158  return unless defined $self->{BUFFER};
159
160  # return an Astro::ADS::Result object
161  return $self->_parse_query();
162
163}
164
165=item B<followup>
166
167Returns an Astro::ADS::Result object for a followup query, e.g. CITATIONS,
168normally called using accessor methods from an Astro::ADS::Paper object, but
169can be called directly.
170
171   $results = $query->followup( $bibcode, $link_type );
172
173returns undef if no arguements passed. Possible $link_type values are AR,
174CITATIONS, REFERENCES and TOC.
175
176=cut
177
178sub followup {
179  my $self = shift;
180
181  # return unless we have arguments
182  return unless @_;
183
184  my $bibcode = shift;
185  my $link_type = shift;
186
187  # call the private method to make the actual ADS query
188  $self->_make_followup( $bibcode, $link_type );
189
190  # check for failed connect
191  return unless defined $self->{BUFFER};
192
193  # return an Astro::ADS::Result object
194  return $self->_parse_query();
195
196}
197
198=item B<proxy>
199
200Return (or set) the current proxy for the ADS request.
201
202   $query->proxy( 'http://wwwcache.ex.ac.uk:8080/' );
203   $proxy_url = $query->proxy();
204
205=cut
206
207sub proxy {
208   my $self = shift;
209
210   # grab local reference to user agent
211   my $ua = $self->{USERAGENT};
212
213   if (@_) {
214      my $proxy_url = shift;
215      $ua->proxy('http', $proxy_url );
216   }
217
218   # return the current proxy
219   return $ua->proxy('http');
220
221}
222
223=item B<timeout>
224
225Return (or set) the current timeout in seconds for the ADS request.
226
227   $query->timeout( 30 );
228   $proxy_timeout = $query->timeout();
229
230=cut
231
232sub timeout {
233   my $self = shift;
234
235   # grab local reference to user agent
236   my $ua = $self->{USERAGENT};
237
238   if (@_) {
239      my $time = shift;
240      $ua->timeout( $time );
241   }
242
243   # return the current timeout
244   return $ua->timeout();
245
246}
247
248=item B<url>
249
250Return (or set) the current base URL for the ADS query.
251
252   $url = $query->url();
253   $query->url( "adsabs.harvard.edu" );
254
255if not defined the default URL is cdsads.u-strasbg.fr
256
257As of v1.24, this method sets a class attribute to keep it
258consistant across all objects.  Not terribly thread safe, but
259at least you know where your query is going.
260
261=cut
262
263sub url {
264  my $self = shift;
265  my $class = ref($self);	# now re-implemented as a class attribute
266
267  # SETTING URL
268  if (@_) {
269
270    # set the url option
271    my $base_url = shift;
272    $class->ads_mirror( $base_url );
273    if( defined $base_url ) {
274       $self->{QUERY} = "http://$base_url/cgi-bin/nph-abs_connect?";
275       $self->{FOLLOWUP} = "http://$base_url/cgi-bin/nph-ref_query?";
276    }
277  }
278
279  # RETURNING URL
280  return $class->ads_mirror();
281}
282
283=item B<agent>
284
285Returns the user agent tag sent by the module to the ADS server.
286
287   $agent_tag = $query->agent();
288
289=cut
290
291sub agent {
292  my $self = shift;
293  my $string = shift;
294  if (defined $string) {
295	my $agent = $self->{USERAGENT}->agent();
296	$agent =~ s/(\d+)\s(\[.*\]\s*)?\(/$1 [$string] (/;
297	return $self->{USERAGENT}->agent($agent);
298  }
299  else {
300    return $self->{USERAGENT}->agent();
301  }
302}
303
304# O T H E R   M E T H O D S ------------------------------------------------
305
306=item B<Authors>
307
308Return (or set) the current authors defined for the ADS query.
309
310   @authors = $query->authors();
311   $first_author = $query->authors();
312   $query->authors( \@authors );
313
314if called in a scalar context it will return the first author.
315
316=cut
317
318sub authors {
319  my $self = shift;
320
321  # SETTING AUTHORS
322  if (@_) {
323
324    # clear the current author list
325    ${$self->{OPTIONS}}{"author"} = "";
326
327    # grab the new list from the arguements
328    my $author_ref = shift;
329
330    # make a local copy to use for regular expressions
331    my @author_list = @$author_ref;
332
333    # mutilate it and stuff it into the author list OPTION
334    for my $i ( 0 ... $#author_list ) {
335       $author_list[$i] =~ s/\s/\+/g;
336
337       if ( $i eq 0 ) {
338          ${$self->{OPTIONS}}{"author"} = $author_list[$i];
339       } else {
340          ${$self->{OPTIONS}}{"author"} =
341               ${$self->{OPTIONS}}{"author"} . ";" . $author_list[$i];
342       }
343    }
344  }
345
346  # RETURNING AUTHORS
347  my $author_line =  ${$self->{OPTIONS}}{"author"};
348  $author_line =~ s/\+/ /g;
349  my @authors = split(/;/, $author_line);
350
351  return wantarray ? @authors : $authors[0];
352}
353
354=item B<AuthorLogic>
355
356Return (or set) the logic when dealing with multiple authors for a search,
357possible values for this parameter are OR, AND, SIMPLE, BOOL and FULLMATCH.
358
359   $author_logic = $query->authorlogic();
360   $query->authorlogic( "AND" );
361
362if called with no arguements, or invalid arguements, then the method will
363return the current logic.
364
365=cut
366
367sub authorlogic {
368  my $self = shift;
369
370  if (@_) {
371
372     my $logic = shift;
373     if ( $logic eq "OR"   || $logic eq "AND" || $logic eq "SIMPLE" ||
374          $logic eq "BOOL" || $logic eq "FULLMATCH" ) {
375
376        # set the new logic
377        ${$self->{OPTIONS}}{"aut_logic"} = $logic;
378     }
379  }
380
381  return ${$self->{OPTIONS}}{"aut_logic"};
382}
383
384=item B<Objects>
385
386Return (or set) the current objects defined for the ADS query.
387
388   @objects = $query->objects();
389   $query->objects( \@objects );
390
391=cut
392
393sub objects {
394  my $self = shift;
395
396  # SETTING AUTHORS
397  if (@_) {
398
399    # clear the current object list
400    ${$self->{OPTIONS}}{"object"} = "";
401
402    # grab the new list from the arguements
403    my $object_ref = shift;
404
405    # make a local copy to use for regular expressions
406    my @object_list = @$object_ref;
407
408    # mutilate it and stuff it into the object list OPTION
409    for my $i ( 0 ... $#object_list ) {
410       $object_list[$i] =~ s/\s/\+/g;
411
412       if ( $i eq 0 ) {
413          ${$self->{OPTIONS}}{"object"} = $object_list[$i];
414       } else {
415          ${$self->{OPTIONS}}{"object"} =
416               ${$self->{OPTIONS}}{"object"} . ";" . $object_list[$i];
417       }
418    }
419  }
420
421  # RETURNING OBJECTS
422  my $object_line =  ${$self->{OPTIONS}}{"object"};
423  $object_line =~ s/\+/ /g;
424  my @objects = split(/;/, $object_line);
425
426  return @objects;
427
428}
429
430=item B<ObjectLogic>
431
432Return (or set) the logic when dealing with multiple objects in a search,
433possible values for this parameter are OR, AND, SIMPLE, BOOL and FULLMATCH.
434
435   $obj_logic = $query->objectlogic();
436   $query->objectlogic( "AND" );
437
438if called with no arguements, or invalid arguements, then the method will
439return the current logic.
440
441=cut
442
443sub objectlogic {
444  my $self = shift;
445
446  if (@_) {
447
448     my $logic = shift;
449     if ( $logic eq "OR"   || $logic eq "AND" || $logic eq "SIMPLE" ||
450          $logic eq "BOOL" || $logic eq "FULLMATCH" ) {
451
452        # set the new logic
453        ${$self->{OPTIONS}}{"obj_logic"} = $logic;
454     }
455  }
456
457  return ${$self->{OPTIONS}}{"obj_logic"};
458}
459
460=item B<Bibcode>
461
462Return (or set) the current bibcode used for the ADS query.
463
464   $bibcode = $query->bibcode();
465   $query->bibcode( "1996PhDT........42J" );
466
467=cut
468
469sub bibcode {
470  my $self = shift;
471
472  # SETTING BIBCODE
473  if (@_) {
474
475    # set the bibcode option
476    ${$self->{OPTIONS}}{"bibcode"} = shift;
477  }
478
479  # RETURNING BIBCODE
480  return ${$self->{OPTIONS}}{"bibcode"};
481}
482
483
484=item B<startmonth>
485
486Return (or set) the current starting month of the ADS query.
487
488   $start_month = $query->startmonth();
489   $query->startmonth( "01" );
490
491=cut
492
493sub startmonth {
494  my $self = shift;
495
496  # SETTING STARTING MONTH
497  if (@_) {
498
499    # set the starting month option
500    ${$self->{OPTIONS}}{"start_mon"} = shift;
501  }
502
503  # RETURNING STARTING MONTH
504  return ${$self->{OPTIONS}}{"start_mon"};
505
506}
507
508=item B<endmonth>
509
510Return (or set) the current end month of the ADS query.
511
512   $end_month = $query->endmonth();
513   $query->endmonth( "12" );
514
515=cut
516
517sub endmonth {
518  my $self = shift;
519
520  # SETTING END MONTH
521  if (@_) {
522
523    # set the end month option
524    ${$self->{OPTIONS}}{"end_mon"} = shift;
525  }
526
527  # RETURNING END MONTH
528  return ${$self->{OPTIONS}}{"end_mon"};
529
530}
531
532=item B<startyear>
533
534Return (or set) the current starting year of the ADS query.
535
536   $start_year = $query->startyear();
537   $query->start_year( "2001" );
538
539=cut
540
541sub startyear {
542  my $self = shift;
543
544  # SETTING START YEAR
545  if (@_) {
546
547    # set the starting year option
548    ${$self->{OPTIONS}}{"start_year"} = shift;
549  }
550
551  # RETURNING START YEAR
552  return ${$self->{OPTIONS}}{"start_year"};
553
554}
555
556=item B<endyear>
557
558Return (or set) the current end year of the ADS query.
559
560   $end_year = $query->endyear();
561   $query->end_year( "2002" );
562
563=cut
564
565sub endyear {
566  my $self = shift;
567
568  # SETTING END YEAR
569  if (@_) {
570
571    # set the end year option
572    ${$self->{OPTIONS}}{"end_year"} = shift;
573  }
574
575  # RETURNING END YEAR
576  return ${$self->{OPTIONS}}{"end_year"};
577
578}
579
580=item B<journal>
581
582Return (or set) whether refereed, non-refereed (OTHER) or all bibilographic sources (ALL) are returned.
583
584   $query->journal( "REFEREED" );
585   $query->journal( "OTHER" );
586   $query->journal( "ALL" );
587
588   $journals = $query->journal();
589
590the default is ALL bibilographic sources
591
592=cut
593
594sub journal {
595  my $self = shift;
596
597  # SETTING END YEAR
598  if (@_) {
599
600    my $source = shift;
601
602    if ( $source eq "REFEREED" ) {
603       ${$self->{OPTIONS}}{"jou_pick"} = "NO";
604    } elsif ( $source eq "OTHER" ) {
605       ${$self->{OPTIONS}}{"jou_pick"} = "EXCL";
606    } else {
607       ${$self->{OPTIONS}}{"jou_pick"} = "ALL";
608    }
609
610  }
611
612  # RETURNING END YEAR
613  return ${$self->{OPTIONS}}{"jou_pick"};
614
615}
616
617# C O N F I G U R E -------------------------------------------------------
618
619=back
620
621=head2 General Methods
622
623=over 4
624
625=item B<configure>
626
627Configures the object, takes an options hash as an argument
628
629  $query->configure( %options );
630
631Does nothing if the array is not supplied.
632
633=cut
634
635sub configure {
636  my $self = shift;
637  my $class = ref($self);
638
639  # CONFIGURE DEFAULTS
640  # ------------------
641
642  # define the default base URL
643  my $default_url = $class->ads_mirror();
644
645  # define the query URLs
646  $self->{QUERY} = "http://$default_url/cgi-bin/nph-abs_connect?";
647  $self->{FOLLOWUP} = "http://$default_url/cgi-bin/nph-ref_query?";
648
649
650  # Setup the LWP::UserAgent
651  my $HOST = hostname();
652  my $DOMAIN = hostdomain();
653  $self->{USERAGENT} = new LWP::UserAgent( timeout => 30 );
654  $self->{USERAGENT}->agent("Astro::ADS/$VERSION ($HOST.$DOMAIN)");
655
656  # Grab Proxy details from local environment
657  $self->{USERAGENT}->env_proxy();
658
659  # configure the default options
660  ${$self->{OPTIONS}}{"db_key"}           = "AST";
661  ${$self->{OPTIONS}}{"sim_query"}        = "YES";
662  ${$self->{OPTIONS}}{"aut_xct"}          = "NO";
663  ${$self->{OPTIONS}}{"aut_logic"}        = "OR";
664  ${$self->{OPTIONS}}{"obj_logic"}        = "OR";
665  ${$self->{OPTIONS}}{"author"}           = "";
666  ${$self->{OPTIONS}}{"object"}           = "";
667  ${$self->{OPTIONS}}{"keyword"}          = "";
668  ${$self->{OPTIONS}}{"start_mon"}        = "";
669  ${$self->{OPTIONS}}{"start_year"}       = "";
670  ${$self->{OPTIONS}}{"end_mon"}          = "";
671  ${$self->{OPTIONS}}{"end_year"}         = "";
672  ${$self->{OPTIONS}}{"ttl_logic"}        = "OR";
673  ${$self->{OPTIONS}}{"title"}            = "";
674  ${$self->{OPTIONS}}{"txt_logic"}        = "OR";
675  ${$self->{OPTIONS}}{"text"}             = "";
676  ${$self->{OPTIONS}}{"nr_to_return"}     = "100";
677  ${$self->{OPTIONS}}{"start_nr"}         = "1";
678  ${$self->{OPTIONS}}{"start_entry_day"}  = "";
679  ${$self->{OPTIONS}}{"start_entry_mon"}  = "";
680  ${$self->{OPTIONS}}{"start_entry_year"} = "";
681  ${$self->{OPTIONS}}{"min_score"}        = "";
682  ${$self->{OPTIONS}}{"jou_pick"}         = "ALL";
683  ${$self->{OPTIONS}}{"ref_stems"}        = "";
684  ${$self->{OPTIONS}}{"data_and"}         = "ALL";
685  ${$self->{OPTIONS}}{"group_and"}        = "ALL";
686  ${$self->{OPTIONS}}{"sort"}             = "SCORE";
687  ${$self->{OPTIONS}}{"aut_syn"}          = "YES";
688  ${$self->{OPTIONS}}{"ttl_syn"}          = "YES";
689  ${$self->{OPTIONS}}{"txt_syn"}          = "YES";
690  ${$self->{OPTIONS}}{"aut_wt"}           = "1.0";
691  ${$self->{OPTIONS}}{"obj_wt"}           = "1.0";
692  ${$self->{OPTIONS}}{"ttl_wt"}           = "0.3";
693  ${$self->{OPTIONS}}{"txt_wt"}           = "3.0";
694  ${$self->{OPTIONS}}{"aut_wgt"}          = "YES";
695  ${$self->{OPTIONS}}{"obj_wgt"}          = "YES";
696  ${$self->{OPTIONS}}{"ttl_wgt"}          = "YES";
697  ${$self->{OPTIONS}}{"txt_wgt"}          = "YES";
698  ${$self->{OPTIONS}}{"ttl_sco"}          = "YES";
699  ${$self->{OPTIONS}}{"txt_sco"}          = "YES";
700  ${$self->{OPTIONS}}{"version"}          = "1";
701  ${$self->{OPTIONS}}{"bibcode"}          = "";
702
703  # Set the data_type option to PORTABLE so our regular expressions work!
704  # Set the return format to LONG so we get full abstracts!
705  ${$self->{OPTIONS}}{"data_type"}        = "PORTABLE";
706  ${$self->{OPTIONS}}{"return_fmt"}       = "LONG";
707
708  # CONFIGURE FROM ARGUEMENTS
709  # -------------------------
710
711  # return unless we have arguments
712  return unless @_;
713
714  # grab the argument list
715  my %args = @_;
716
717  # Loop over the allowed keys and modify the default query options
718  for my $key (qw / Authors AuthorLogic Objects ObjectLogic Bibcode
719                    StartMonth EndMonth StartYear EndYear Journal
720                    Proxy Timeout URL/ ) {
721      my $method = lc($key);
722      $self->$method( $args{$key} ) if exists $args{$key};
723  }
724
725}
726
727# T I M E   A T   T H E   B A R  --------------------------------------------
728
729=back
730
731=begin __PRIVATE_METHODS__
732
733=head2 Private methods
734
735These methods are for internal use only.
736
737=over 4
738
739=item B<_make_query>
740
741Private function used to make an ADS query. Should not be called directly,
742since it does not parse the results. Instead use the querydb() assessor method.
743
744=cut
745
746sub _make_query {
747   my $self = shift;
748
749   # grab the user agent
750   my $ua = $self->{USERAGENT};
751
752   # clean out the buffer
753   $self->{BUFFER} = "";
754
755   # grab the base URL
756   my $URL = $self->{QUERY};
757   my $options = "";
758
759   # loop round all the options keys and build the query
760   foreach my $key ( keys %{$self->{OPTIONS}} ) {
761      # some bibcodes have & and needs to be made "web safe"
762      my $websafe_option = ${$self->{OPTIONS}}{$key};
763      $websafe_option =~ s/&/%26/g;
764      $options = $options . "&$key=$websafe_option";
765
766   }
767
768   # build final query URL
769   $URL = $URL . $options;
770
771   # build request
772   my $request = new HTTP::Request('GET', $URL);
773
774   # grab page from web
775   my $reply = $ua->request($request);
776
777   if ( ${$reply}{"_rc"} eq 200 ) {
778
779      # stuff the page contents into the buffer
780      $self->{BUFFER} = ${$reply}{"_content"};
781
782   } elsif ( ${$reply}{"_rc"} eq 500 ) {
783
784      # we may have a network unreachable, or we may have a no reference
785      # selected error returned by ADS (go figure)
786
787      $self->{BUFFER} = ${$reply}{"_content"};
788      my @buffer = split( /\n/,$self->{BUFFER});
789      chomp @buffer;
790
791      # assume we have an error unless we can prove otherwise
792      my $error_flag = 1;
793
794      foreach my $line ( 0 ... $#buffer ) {
795          if( $buffer[$line] =~ "No reference selected" ) {
796
797             # increment the counter and drop out of the loop
798             $line = $#buffer;
799             $error_flag = 0;
800          }
801      }
802
803      # we definately have an error
804      if( $error_flag ) {
805         $self->{BUFFER} = undef;
806		 my $proxy_string = undef;
807		 if ($proxy_string = $ua->proxy('http')) { substr($proxy_string, 0, 0) = ' using proxy '; }
808		 else { $proxy_string = ' (no proxy)'; }
809         croak("Error ${$reply}{_rc}: Failed to establish network connection to $URL",
810				$proxy_string, "\n");
811      }
812
813   } else {
814      $self->{BUFFER} = undef;
815	  my $proxy_string = undef;
816	  if ($proxy_string = $ua->proxy('http')) { substr($proxy_string, 0, 0) = ' using proxy '; }
817	  else { $proxy_string = ' (no proxy)'; }
818      croak("Error ${$reply}{_rc}: Failed to establish network connection to $URL",
819				$proxy_string, "\n");
820   }
821
822
823}
824
825=item B<_make_followup>
826
827Private function used to make a followup ADS query, e.g. REFERNCES, called
828from the followup() assessor method. Should not be called directly.
829
830=cut
831
832sub _make_followup {
833   my $self = shift;
834
835   # grab the user agent
836   my $ua = $self->{USERAGENT};
837
838   # clean out the buffer
839   $self->{BUFFER} = "";
840
841   # grab the base URL
842   my $URL = $self->{FOLLOWUP};
843
844   # which paper?
845   my $bibcode = shift;
846   $bibcode =~ s/&/%26/g;	# make ampersands websafe
847
848   # which followup?
849   my $refs = shift;
850
851   # which database?
852   my $db_key = ${$self->{OPTIONS}}{"db_key"};
853   my $data_type = ${$self->{OPTIONS}}{"data_type"};
854   my $fmt = ${$self->{OPTIONS}}{"return_fmt"};
855
856   # build the final query URL
857   $URL = $URL . "bibcode=$bibcode&refs=$refs&db_key=$db_key&data_type=$data_type&return_fmt=$fmt";
858
859   # build request
860   my $request = new HTTP::Request('GET', $URL);
861
862   # grab page from web
863   my $reply = $ua->request($request);
864
865   if ( ${$reply}{"_rc"} eq 200 ) {
866      # stuff the page contents into the buffer
867      $self->{BUFFER} = ${$reply}{"_content"};
868   } else {
869      $self->{BUFFER} = undef;
870	  my $proxy_string = undef;
871	  if ($proxy_string = $ua->proxy('http')) { substr($proxy_string, 0, 0) = ' using proxy '; }
872	  else { $proxy_string = ' (no proxy) '; }
873      croak("Error ${$reply}{_rc}: Failed to establish network connection to $URL" .
874			$proxy_string . $self->{BUFFER} ."\n");
875   }
876}
877
878=item B<_parse_query>
879
880Private function used to parse the results returned in an ADS query. Should
881not be called directly. Instead use the querydb() assessor method to make and
882parse the results.
883
884=cut
885
886sub _parse_query {
887  my $self = shift;
888
889  # get a local copy of the current BUFFER
890  my @buffer = split( /\n/,$self->{BUFFER});
891  chomp @buffer;
892
893  # create an Astro::ADS::Result object to hold the search results
894  my $result = new Astro::ADS::Result();
895
896  # create a temporary object to hold papers
897  my $paper;
898
899  # loop round the returned buffer and stuff the contents into Paper objects
900  my ( $next, $counter );
901  $next = $counter = 0;
902  foreach my $line ( 0 ... $#buffer ) {
903
904     #     R     Bibcode
905     #     T     Title
906     #     A     Author List
907     #     F     Affiliations
908     #     J     Journal Reference
909     #     D     Publication Date
910     #     K     Keywords
911     #     G     Origin
912     #     I     Outbound Links
913     #     U     Document URL
914     #     O     Object name
915     #     B     Abstract
916     #     S     Score
917
918     # NO ABSTRACTS
919     if( $buffer[$line] =~ "Retrieved 0 abstracts" ) {
920
921        # increment the counter and drop out of the loop
922        $line = $#buffer;
923
924     }
925
926     # NO ABSTRACT (HTML version)
927     if( $buffer[$line] =~ "No reference selected" ) {
928
929       # increment the counter and drop out of the loop
930        $line = $#buffer;
931     }
932
933     # NEW PAPER
934     if( substr( $buffer[$line], 0, 2 ) eq "%R" ) {
935
936        $counter = $line;
937        my $tag = substr( $buffer[$counter], 1, 1 );
938
939        # grab the bibcode
940        my $bibcode = substr( $buffer[$counter], 2 );
941        $bibcode =~ s/\s+//g;
942
943        # New Astro::ADS::Result::Paper object
944        $paper = new Astro::ADS::Result::Paper( Bibcode => $bibcode );
945
946        $counter++;
947
948        # LOOP THROUGH PAPER
949        my ( @title, @authors, @affil, @journal, @pubdate, @keywords,
950             @origin, @links, @url, @object, @abstract, @score );
951        while ( $counter <= $#buffer &&
952                substr( $buffer[$counter], 0, 2 ) ne "%R" ) {
953
954
955           # grab the tags
956           if( substr( $buffer[$counter], 0, 1 ) eq "%" ) {
957              $tag = substr( $buffer[$counter], 1, 1 );
958           }
959
960           # ckeck for each tag and stuff the contents into the paper object
961
962           # TITLE
963           # -----
964           if( $tag eq "T" ) {
965
966              #do we have the start of an title block?
967              if ( substr( $buffer[$counter], 0, 1 ) eq "%") {
968
969                 # push the end of line substring onto array
970                 push ( @title, substr( $buffer[$counter], 3 ) );
971
972              } else {
973
974                 # push the entire line onto the array
975                 push (@title, $buffer[$counter] );
976
977              }
978           }
979
980           # AUTHORS
981           # -------
982           if( $tag eq "A" ) {
983
984              #do we have the start of an author block?
985              if ( substr( $buffer[$counter], 0, 1 ) eq "%") {
986
987                 # push the end of line substring onto array
988                 push ( @authors, substr( $buffer[$counter], 3 ) );
989
990              } else {
991
992                 # push the entire line onto the array
993                 push (@authors, $buffer[$counter] );
994
995              }
996           }
997
998           # AFFILIATION
999           # -----------
1000           if( $tag eq "F" ) {
1001
1002              #do we have the start of an affil block?
1003              if ( substr( $buffer[$counter], 0, 1 ) eq "%") {
1004
1005                 # push the end of line substring onto array
1006                 push ( @affil, substr( $buffer[$counter], 3 ) );
1007
1008              } else {
1009
1010                 # push the entire line onto the array
1011                 push (@affil, $buffer[$counter] );
1012
1013              }
1014           }
1015
1016           # JOURNAL REF
1017           # -----------
1018           if( $tag eq "J" ) {
1019
1020              #do we have the start of an journal block?
1021              if ( substr( $buffer[$counter], 0, 1 ) eq "%") {
1022
1023                 # push the end of line substring onto array
1024                 push ( @journal, substr( $buffer[$counter], 3 ) );
1025
1026              } else {
1027
1028                 # push the entire line onto the array
1029                 push (@journal, $buffer[$counter] );
1030
1031              }
1032           }
1033
1034           # PUBLICATION DATE
1035           # ----------------
1036           if( $tag eq "D" ) {
1037
1038              #do we have the start of an publication date block?
1039              if ( substr( $buffer[$counter], 0, 1 ) eq "%") {
1040
1041                 # push the end of line substring onto array
1042                 push ( @pubdate, substr( $buffer[$counter], 3 ) );
1043
1044              } else {
1045
1046                 # push the entire line onto the array
1047                 push (@pubdate, $buffer[$counter] );
1048
1049              }
1050           }
1051
1052           # KEYWORDS
1053           # --------
1054           if( $tag eq "K" ) {
1055
1056              #do we have the start of an keyword block?
1057              if ( substr( $buffer[$counter], 0, 1 ) eq "%") {
1058
1059                 # push the end of line substring onto array
1060                 push ( @keywords, substr( $buffer[$counter], 3 ) );
1061
1062              } else {
1063
1064                 # push the entire line onto the array
1065                 push (@keywords, $buffer[$counter] );
1066
1067              }
1068           }
1069
1070           # ORIGIN
1071           # ------
1072           if( $tag eq "G" ) {
1073
1074              #do we have the start of an origin block?
1075              if ( substr( $buffer[$counter], 0, 1 ) eq "%") {
1076
1077                 # push the end of line substring onto array
1078                 push ( @origin, substr( $buffer[$counter], 3 ) );
1079
1080              } else {
1081
1082                 # push the entire line onto the array
1083                 push (@origin, $buffer[$counter] );
1084
1085              }
1086           }
1087
1088           # LINKS
1089           # -----
1090           if( $tag eq "I" ) {
1091
1092              #do we have the start of an author block?
1093              if ( substr( $buffer[$counter], 0, 1 ) eq "%") {
1094
1095                 # push the end of line substring onto array
1096                 push ( @links, substr( $buffer[$counter], 3 ) );
1097
1098              } else {
1099
1100                 # push the entire line onto the array
1101                 push (@links, $buffer[$counter] );
1102
1103              }
1104           }
1105
1106           # URL
1107           # ---
1108           if( $tag eq "U" ) {
1109
1110              #do we have the start of an URL block?
1111              if ( substr( $buffer[$counter], 0, 1 ) eq "%") {
1112
1113                 # push the end of line substring onto array
1114                 push ( @url, substr( $buffer[$counter], 3 ) );
1115
1116              } else {
1117
1118                 # push the entire line onto the array
1119                 push (@url, $buffer[$counter] );
1120
1121              }
1122           }
1123
1124           # OBJECT
1125           # ------
1126           if( $tag eq "O" ) {
1127
1128              #do we have the start of an title block?
1129              if ( substr( $buffer[$counter], 0, 1 ) eq "%") {
1130
1131                 # push the end of line substring onto array
1132                 push ( @object, substr( $buffer[$counter], 3 ) );
1133
1134              } else {
1135
1136                 # push the entire line onto the array
1137                 push (@object, $buffer[$counter] );
1138
1139              }
1140           }
1141
1142           # ABSTRACT
1143           # --------
1144           if( $tag eq "B" ) {
1145
1146              #do we have the start of an title block?
1147              if ( substr( $buffer[$counter], 0, 1 ) eq "%") {
1148
1149                 # push the end of line substring onto array
1150                 push ( @abstract, substr( $buffer[$counter], 3 ) );
1151
1152              } else {
1153
1154                 # push the entire line onto the array
1155                 push (@abstract, $buffer[$counter] );
1156
1157              }
1158           }
1159
1160           # SCORE
1161           # -----
1162           if( $tag eq "S" ) {
1163
1164              #do we have the start of an title block?
1165              if ( substr( $buffer[$counter], 0, 1 ) eq "%") {
1166
1167                 # push the end of line substring onto array
1168                 push ( @score, substr( $buffer[$counter], 3 ) );
1169
1170              } else {
1171
1172                 # push the entire line onto the array
1173                 push (@score, $buffer[$counter] );
1174
1175              }
1176           }
1177
1178
1179           # set the next paper increment
1180           $next = $counter;
1181           # increment the line counter
1182           $counter++;
1183
1184        }
1185
1186        # PUSH TITLE INTO PAPER OBJECT
1187        # ----------------------------
1188        chomp @title;
1189        my $title_line = "";
1190        for my $i ( 0 ... $#title ) {
1191           # drop it onto one line
1192           $title_line = $title_line . $title[$i];
1193        }
1194        $paper->title( $title_line ) if defined $title[0];
1195
1196        # PUSH AUTHORS INTO PAPER OBJECT
1197        # ------------------------------
1198        chomp @authors;
1199        my $author_line = "";
1200        for my $i ( 0 ... $#authors ) {
1201           # drop it onto one line
1202           $author_line = $author_line . $authors[$i];
1203        }
1204        # get rid of leading spaces before author names
1205        $author_line =~ s/;\s+/;/g;
1206
1207        my @paper_authors = split( /;/, $author_line );
1208        $paper->authors( \@paper_authors ) if defined $authors[0];
1209
1210        # PUSH AFFILIATION INTO PAPER OBJECT
1211        # ----------------------------------
1212        chomp @affil;
1213        my $affil_line = "";
1214        for my $i ( 0 ... $#affil ) {
1215           # drop it onto one line
1216           $affil_line = $affil_line . $affil[$i];
1217        }
1218        # grab each affiliation from its brackets
1219        $affil_line =~ s/\w\w\(//g;
1220
1221        my @paper_affil = split( /\), /, $affil_line );
1222        $paper->affil( \@paper_affil ) if defined $affil[0];
1223
1224        # PUSH JOURNAL INTO PAPER OBJECT
1225        # ------------------------------
1226        chomp @journal;
1227        my $journal_ref = "";
1228        for my $i ( 0 ... $#journal ) {
1229           # drop it onto one line
1230           $journal_ref = $journal_ref . $journal[$i];
1231        }
1232        $paper->journal( $journal_ref ) if defined $journal[0];
1233
1234        # PUSH PUB DATE INTO PAPER OBJECT
1235        # -------------------------------
1236        chomp @pubdate;
1237        my $pub_date = "";
1238        for my $i ( 0 ... $#pubdate ) {
1239           # drop it onto one line
1240           $pub_date = $pub_date . $pubdate[$i];
1241        }
1242        $paper->published( $pub_date ) if defined $pubdate[0];
1243
1244        # PUSH KEYWORDS INTO PAPER OBJECT
1245        # -------------------------------
1246        chomp @keywords;
1247        my $key_line = "";
1248        for my $i ( 0 ... $#keywords ) {
1249           # drop it onto one line
1250           $key_line = $key_line . $keywords[$i];
1251        }
1252        # get rid of excess spaces
1253        $key_line =~ s/, /,/g;
1254
1255        my @paper_keys = split( /,/, $key_line );
1256        $paper->keywords( \@paper_keys ) if defined $keywords[0];
1257
1258        # PUSH ORIGIN INTO PAPER OBJECT
1259        # -----------------------------
1260        chomp @origin;
1261        my $origin_line = "";
1262        for my $i ( 0 ... $#origin) {
1263           # drop it onto one line
1264           $origin_line = $origin_line . $origin[$i];
1265        }
1266        $paper->origin( $origin_line ) if defined $origin[0];
1267
1268        # PUSH LINKS INTO PAPER OBJECT
1269        # ----------------------------
1270        chomp @links;
1271        my $links_line = "";
1272        for my $i ( 0 ... $#links ) {
1273           # drop it onto one line
1274           $links_line = $links_line . $links[$i];
1275        }
1276        # annoying complex reg exp to get rid of formatting
1277        $links_line =~ s/:.*?;\s*/;/g;
1278
1279        my @paper_links = split( /;/, $links_line );
1280        $paper->links( \@paper_links ) if defined $links[0];
1281
1282        # PUSH URL INTO PAPER OBJECT
1283        # --------------------------
1284        chomp @url;
1285        my $url_line = "";
1286        for my $i ( 0 ... $#url ) {
1287           # drop it onto one line
1288           $url_line = $url_line . $url[$i];
1289        }
1290        # get rid of trailing spaces
1291        $url_line =~ s/\s+$//;
1292        $paper->url( $url_line ) if defined $url[0];
1293
1294        # PUSH OBJECT INTO PAPER OBJECT
1295        # -----------------------------
1296        chomp @object;
1297        my $object_line = "";
1298        for my $i ( 0 ... $#object ) {
1299           # drop it onto one line
1300           $object_line = $object_line . $object[$i];
1301        }
1302        $paper->object( $object_line ) if defined $object[0];
1303
1304        # PUSH ABSTRACT INTO PAPER OBJECT
1305        # -------------------------------
1306        chomp @abstract;
1307        for my $i ( 0 ... $#abstract ) {
1308           # get rid of trailing spaces
1309           $abstract[$i] =~ s/\s+$//;
1310        }
1311        $paper->abstract( \@abstract ) if defined $abstract[0];
1312
1313        # PUSH SCORE INTO PAPER OBJECT
1314        # ----------------------------
1315        chomp @score;
1316        my $score_line = "";
1317        for my $i ( 0 ... $#score ) {
1318           # drop it onto one line
1319           $score_line = $score_line . $score[$i];
1320        }
1321        $paper->score( $score_line ) if defined $score[0];
1322
1323
1324     }
1325
1326     # Increment the line counter to the correct index for the next paper
1327     $line += $next;
1328
1329     # Push the new paper onto the Astro::ADS::Result object
1330     # -----------------------------------------------------
1331     $result->pushpaper($paper) if defined $paper;
1332     $paper = undef;
1333
1334   }
1335
1336   # return an Astro::ADS::Result object, or undef if no abstracts returned
1337   return $result;
1338
1339}
1340
1341=item B<_dump_raw>
1342
1343Private function for debugging and other testing purposes. It will return
1344the raw output of the last ADS query made using querydb().
1345
1346=cut
1347
1348sub _dump_raw {
1349   my $self = shift;
1350
1351   # split the BUFFER into an array
1352   my @portable = split( /\n/,$self->{BUFFER});
1353   chomp @portable;
1354
1355   return @portable;
1356}
1357
1358=item B<_dump_options>
1359
1360Private function for debugging and other testing purposes. It will return
1361the current query options as a hash.
1362
1363=cut
1364
1365sub _dump_options {
1366   my $self = shift;
1367
1368   return %{$self->{OPTIONS}};
1369}
1370
1371=back
1372
1373=end __PRIVATE_METHODS__
1374
1375=head1 BUGS
1376
1377=over
1378
1379=item #35645 filed at rt.cpan.org (Ampersands)
1380
1381Older versions can't handle ampersands in the bibcode, such as A&A for Astronomy & Astrophysics.
1382Fixed for queries in 1.22 - 5/2009.
1383Fixed for references in 1.23 - Boyd Duffee E<lt>b dot duffee at isc dot keele dot ac dot ukE<gt>, 7/2011.
1384
1385=back
1386
1387
1388=head1 COPYRIGHT
1389
1390Copyright (C) 2001 University of Exeter. All Rights Reserved.
1391
1392This program was written as part of the eSTAR project and is free software;
1393you can redistribute it and/or modify it under the terms of the GNU Public
1394License.
1395
1396=head1 AUTHORS
1397
1398Alasdair Allan E<lt>aa@astro.ex.ac.ukE<gt>,
1399
1400=cut
1401
1402# L A S T  O R D E R S ------------------------------------------------------
1403
14041;
1405