1package Astro::ADS::Query; 2 3# --------------------------------------------------------------------------- 4 5#+ 6# Name: 7# Astro::ADS::Query 8 9# Purposes: 10# Perl wrapper for the ADS database 11 12# Language: 13# Perl module 14 15# Description: 16# This module wraps the ADS online database. 17 18# Authors: 19# Alasdair Allan (aa@astro.ex.ac.uk) 20 21# Revision: 22# $Id: Query.pm,v 1.24 2011/07/01 bjd Exp $ 23 24# Copyright: 25# Copyright (C) 2001 University of Exeter. All Rights Reserved. 26 27#- 28 29# --------------------------------------------------------------------------- 30 31=head1 NAME 32 33Astro::ADS::Query - Object definining an prospective ADS query. 34 35=head1 SYNOPSIS 36 37 $query = new Astro::ADS::Query( Authors => \@authors, 38 AuthorLogic => $aut_logic, 39 Objects => \@objects, 40 ObjectLogic => $obj_logic, 41 Bibcode => $bibcode, 42 Proxy => $proxy, 43 Timeout => $timeout, 44 URL => $url ); 45 46 my $results = $query->querydb(); 47 48=head1 DESCRIPTION 49 50Stores information about an prospective ADS query and allows the query to 51be made, returning an Astro::ADS::Result object. 52 53The object will by default pick up the proxy information from the HTTP_PROXY 54and NO_PROXY environment variables, see the LWP::UserAgent documentation for 55details. 56 57=cut 58 59# L O A D M O D U L E S -------------------------------------------------- 60 61use strict; 62use warnings; 63use vars qw/ $VERSION /; 64 65use LWP::UserAgent; 66use Astro::ADS::Result; 67use Astro::ADS::Result::Paper; 68use Net::Domain qw(hostname hostdomain); 69use Carp; 70 71'$Revision: 1.26 $ ' =~ /.*:\s(.*)\s\$/ && ($VERSION = $1); 72 73# C L A S S A T T R I B U T E S ------------------------------------------ 74{ 75 my $_ads_mirror = 'cdsads.u-strasbg.fr'; # this is the default mirror site 76 sub ads_mirror { 77 my ($class, $new_mirror) = @_; 78 $_ads_mirror = $new_mirror if @_ > 1; 79 return $_ads_mirror; 80 } 81} 82 83# C O N S T R U C T O R ---------------------------------------------------- 84 85=head1 REVISION 86 87$Id: Query.pm,v 1.25 2013/08/06 bjd Exp $ 88$Id: Query.pm,v 1.24 2009/07/01 bjd Exp $ 89$Id: Query.pm,v 1.22 2009/05/01 bjd Exp $ 90$Id: Query.pm,v 1.21 2002/09/23 21:07:49 aa Exp $ 91 92=head1 METHODS 93 94=head2 Constructor 95 96=over 4 97 98=item B<new> 99 100Create a new instance from a hash of options 101 102 $query = new Astro::ADS::Query( Authors => \@authors, 103 AuthorLogic => $aut_logic, 104 Objects => \@objects, 105 ObjectLogic => $obj_logic, 106 Bibcode => $bibcode, 107 Proxy => $proxy, 108 Timeout => $timeout, 109 URL => $url ); 110 111returns a reference to an ADS query object. 112 113=cut 114 115sub new { 116 my $proto = shift; 117 my $class = ref($proto) || $proto; 118 119 # bless the query hash into the class 120 my $block = bless { OPTIONS => {}, 121 URL => undef, 122 QUERY => undef, 123 FOLLOWUP => undef, 124 USERAGENT => undef, 125 BUFFER => undef }, $class; 126 127 # Configure the object 128 # does nothing if no arguments supplied 129 $block->configure( @_ ); 130 131 return $block; 132 133} 134 135# Q U E R Y M E T H O D S ------------------------------------------------ 136 137=back 138 139=head2 Accessor Methods 140 141=over 4 142 143=item B<querydb> 144 145Returns an Astro::ADS::Result object for an inital ADS query 146 147 $results = $query->querydb(); 148 149=cut 150 151sub querydb { 152 my $self = shift; 153 154 # call the private method to make the actual ADS query 155 $self->_make_query(); 156 157 # check for failed connect 158 return unless defined $self->{BUFFER}; 159 160 # return an Astro::ADS::Result object 161 return $self->_parse_query(); 162 163} 164 165=item B<followup> 166 167Returns an Astro::ADS::Result object for a followup query, e.g. CITATIONS, 168normally called using accessor methods from an Astro::ADS::Paper object, but 169can be called directly. 170 171 $results = $query->followup( $bibcode, $link_type ); 172 173returns undef if no arguements passed. Possible $link_type values are AR, 174CITATIONS, REFERENCES and TOC. 175 176=cut 177 178sub followup { 179 my $self = shift; 180 181 # return unless we have arguments 182 return unless @_; 183 184 my $bibcode = shift; 185 my $link_type = shift; 186 187 # call the private method to make the actual ADS query 188 $self->_make_followup( $bibcode, $link_type ); 189 190 # check for failed connect 191 return unless defined $self->{BUFFER}; 192 193 # return an Astro::ADS::Result object 194 return $self->_parse_query(); 195 196} 197 198=item B<proxy> 199 200Return (or set) the current proxy for the ADS request. 201 202 $query->proxy( 'http://wwwcache.ex.ac.uk:8080/' ); 203 $proxy_url = $query->proxy(); 204 205=cut 206 207sub proxy { 208 my $self = shift; 209 210 # grab local reference to user agent 211 my $ua = $self->{USERAGENT}; 212 213 if (@_) { 214 my $proxy_url = shift; 215 $ua->proxy('http', $proxy_url ); 216 } 217 218 # return the current proxy 219 return $ua->proxy('http'); 220 221} 222 223=item B<timeout> 224 225Return (or set) the current timeout in seconds for the ADS request. 226 227 $query->timeout( 30 ); 228 $proxy_timeout = $query->timeout(); 229 230=cut 231 232sub timeout { 233 my $self = shift; 234 235 # grab local reference to user agent 236 my $ua = $self->{USERAGENT}; 237 238 if (@_) { 239 my $time = shift; 240 $ua->timeout( $time ); 241 } 242 243 # return the current timeout 244 return $ua->timeout(); 245 246} 247 248=item B<url> 249 250Return (or set) the current base URL for the ADS query. 251 252 $url = $query->url(); 253 $query->url( "adsabs.harvard.edu" ); 254 255if not defined the default URL is cdsads.u-strasbg.fr 256 257As of v1.24, this method sets a class attribute to keep it 258consistant across all objects. Not terribly thread safe, but 259at least you know where your query is going. 260 261=cut 262 263sub url { 264 my $self = shift; 265 my $class = ref($self); # now re-implemented as a class attribute 266 267 # SETTING URL 268 if (@_) { 269 270 # set the url option 271 my $base_url = shift; 272 $class->ads_mirror( $base_url ); 273 if( defined $base_url ) { 274 $self->{QUERY} = "http://$base_url/cgi-bin/nph-abs_connect?"; 275 $self->{FOLLOWUP} = "http://$base_url/cgi-bin/nph-ref_query?"; 276 } 277 } 278 279 # RETURNING URL 280 return $class->ads_mirror(); 281} 282 283=item B<agent> 284 285Returns the user agent tag sent by the module to the ADS server. 286 287 $agent_tag = $query->agent(); 288 289=cut 290 291sub agent { 292 my $self = shift; 293 my $string = shift; 294 if (defined $string) { 295 my $agent = $self->{USERAGENT}->agent(); 296 $agent =~ s/(\d+)\s(\[.*\]\s*)?\(/$1 [$string] (/; 297 return $self->{USERAGENT}->agent($agent); 298 } 299 else { 300 return $self->{USERAGENT}->agent(); 301 } 302} 303 304# O T H E R M E T H O D S ------------------------------------------------ 305 306=item B<Authors> 307 308Return (or set) the current authors defined for the ADS query. 309 310 @authors = $query->authors(); 311 $first_author = $query->authors(); 312 $query->authors( \@authors ); 313 314if called in a scalar context it will return the first author. 315 316=cut 317 318sub authors { 319 my $self = shift; 320 321 # SETTING AUTHORS 322 if (@_) { 323 324 # clear the current author list 325 ${$self->{OPTIONS}}{"author"} = ""; 326 327 # grab the new list from the arguements 328 my $author_ref = shift; 329 330 # make a local copy to use for regular expressions 331 my @author_list = @$author_ref; 332 333 # mutilate it and stuff it into the author list OPTION 334 for my $i ( 0 ... $#author_list ) { 335 $author_list[$i] =~ s/\s/\+/g; 336 337 if ( $i eq 0 ) { 338 ${$self->{OPTIONS}}{"author"} = $author_list[$i]; 339 } else { 340 ${$self->{OPTIONS}}{"author"} = 341 ${$self->{OPTIONS}}{"author"} . ";" . $author_list[$i]; 342 } 343 } 344 } 345 346 # RETURNING AUTHORS 347 my $author_line = ${$self->{OPTIONS}}{"author"}; 348 $author_line =~ s/\+/ /g; 349 my @authors = split(/;/, $author_line); 350 351 return wantarray ? @authors : $authors[0]; 352} 353 354=item B<AuthorLogic> 355 356Return (or set) the logic when dealing with multiple authors for a search, 357possible values for this parameter are OR, AND, SIMPLE, BOOL and FULLMATCH. 358 359 $author_logic = $query->authorlogic(); 360 $query->authorlogic( "AND" ); 361 362if called with no arguements, or invalid arguements, then the method will 363return the current logic. 364 365=cut 366 367sub authorlogic { 368 my $self = shift; 369 370 if (@_) { 371 372 my $logic = shift; 373 if ( $logic eq "OR" || $logic eq "AND" || $logic eq "SIMPLE" || 374 $logic eq "BOOL" || $logic eq "FULLMATCH" ) { 375 376 # set the new logic 377 ${$self->{OPTIONS}}{"aut_logic"} = $logic; 378 } 379 } 380 381 return ${$self->{OPTIONS}}{"aut_logic"}; 382} 383 384=item B<Objects> 385 386Return (or set) the current objects defined for the ADS query. 387 388 @objects = $query->objects(); 389 $query->objects( \@objects ); 390 391=cut 392 393sub objects { 394 my $self = shift; 395 396 # SETTING AUTHORS 397 if (@_) { 398 399 # clear the current object list 400 ${$self->{OPTIONS}}{"object"} = ""; 401 402 # grab the new list from the arguements 403 my $object_ref = shift; 404 405 # make a local copy to use for regular expressions 406 my @object_list = @$object_ref; 407 408 # mutilate it and stuff it into the object list OPTION 409 for my $i ( 0 ... $#object_list ) { 410 $object_list[$i] =~ s/\s/\+/g; 411 412 if ( $i eq 0 ) { 413 ${$self->{OPTIONS}}{"object"} = $object_list[$i]; 414 } else { 415 ${$self->{OPTIONS}}{"object"} = 416 ${$self->{OPTIONS}}{"object"} . ";" . $object_list[$i]; 417 } 418 } 419 } 420 421 # RETURNING OBJECTS 422 my $object_line = ${$self->{OPTIONS}}{"object"}; 423 $object_line =~ s/\+/ /g; 424 my @objects = split(/;/, $object_line); 425 426 return @objects; 427 428} 429 430=item B<ObjectLogic> 431 432Return (or set) the logic when dealing with multiple objects in a search, 433possible values for this parameter are OR, AND, SIMPLE, BOOL and FULLMATCH. 434 435 $obj_logic = $query->objectlogic(); 436 $query->objectlogic( "AND" ); 437 438if called with no arguements, or invalid arguements, then the method will 439return the current logic. 440 441=cut 442 443sub objectlogic { 444 my $self = shift; 445 446 if (@_) { 447 448 my $logic = shift; 449 if ( $logic eq "OR" || $logic eq "AND" || $logic eq "SIMPLE" || 450 $logic eq "BOOL" || $logic eq "FULLMATCH" ) { 451 452 # set the new logic 453 ${$self->{OPTIONS}}{"obj_logic"} = $logic; 454 } 455 } 456 457 return ${$self->{OPTIONS}}{"obj_logic"}; 458} 459 460=item B<Bibcode> 461 462Return (or set) the current bibcode used for the ADS query. 463 464 $bibcode = $query->bibcode(); 465 $query->bibcode( "1996PhDT........42J" ); 466 467=cut 468 469sub bibcode { 470 my $self = shift; 471 472 # SETTING BIBCODE 473 if (@_) { 474 475 # set the bibcode option 476 ${$self->{OPTIONS}}{"bibcode"} = shift; 477 } 478 479 # RETURNING BIBCODE 480 return ${$self->{OPTIONS}}{"bibcode"}; 481} 482 483 484=item B<startmonth> 485 486Return (or set) the current starting month of the ADS query. 487 488 $start_month = $query->startmonth(); 489 $query->startmonth( "01" ); 490 491=cut 492 493sub startmonth { 494 my $self = shift; 495 496 # SETTING STARTING MONTH 497 if (@_) { 498 499 # set the starting month option 500 ${$self->{OPTIONS}}{"start_mon"} = shift; 501 } 502 503 # RETURNING STARTING MONTH 504 return ${$self->{OPTIONS}}{"start_mon"}; 505 506} 507 508=item B<endmonth> 509 510Return (or set) the current end month of the ADS query. 511 512 $end_month = $query->endmonth(); 513 $query->endmonth( "12" ); 514 515=cut 516 517sub endmonth { 518 my $self = shift; 519 520 # SETTING END MONTH 521 if (@_) { 522 523 # set the end month option 524 ${$self->{OPTIONS}}{"end_mon"} = shift; 525 } 526 527 # RETURNING END MONTH 528 return ${$self->{OPTIONS}}{"end_mon"}; 529 530} 531 532=item B<startyear> 533 534Return (or set) the current starting year of the ADS query. 535 536 $start_year = $query->startyear(); 537 $query->start_year( "2001" ); 538 539=cut 540 541sub startyear { 542 my $self = shift; 543 544 # SETTING START YEAR 545 if (@_) { 546 547 # set the starting year option 548 ${$self->{OPTIONS}}{"start_year"} = shift; 549 } 550 551 # RETURNING START YEAR 552 return ${$self->{OPTIONS}}{"start_year"}; 553 554} 555 556=item B<endyear> 557 558Return (or set) the current end year of the ADS query. 559 560 $end_year = $query->endyear(); 561 $query->end_year( "2002" ); 562 563=cut 564 565sub endyear { 566 my $self = shift; 567 568 # SETTING END YEAR 569 if (@_) { 570 571 # set the end year option 572 ${$self->{OPTIONS}}{"end_year"} = shift; 573 } 574 575 # RETURNING END YEAR 576 return ${$self->{OPTIONS}}{"end_year"}; 577 578} 579 580=item B<journal> 581 582Return (or set) whether refereed, non-refereed (OTHER) or all bibilographic sources (ALL) are returned. 583 584 $query->journal( "REFEREED" ); 585 $query->journal( "OTHER" ); 586 $query->journal( "ALL" ); 587 588 $journals = $query->journal(); 589 590the default is ALL bibilographic sources 591 592=cut 593 594sub journal { 595 my $self = shift; 596 597 # SETTING END YEAR 598 if (@_) { 599 600 my $source = shift; 601 602 if ( $source eq "REFEREED" ) { 603 ${$self->{OPTIONS}}{"jou_pick"} = "NO"; 604 } elsif ( $source eq "OTHER" ) { 605 ${$self->{OPTIONS}}{"jou_pick"} = "EXCL"; 606 } else { 607 ${$self->{OPTIONS}}{"jou_pick"} = "ALL"; 608 } 609 610 } 611 612 # RETURNING END YEAR 613 return ${$self->{OPTIONS}}{"jou_pick"}; 614 615} 616 617# C O N F I G U R E ------------------------------------------------------- 618 619=back 620 621=head2 General Methods 622 623=over 4 624 625=item B<configure> 626 627Configures the object, takes an options hash as an argument 628 629 $query->configure( %options ); 630 631Does nothing if the array is not supplied. 632 633=cut 634 635sub configure { 636 my $self = shift; 637 my $class = ref($self); 638 639 # CONFIGURE DEFAULTS 640 # ------------------ 641 642 # define the default base URL 643 my $default_url = $class->ads_mirror(); 644 645 # define the query URLs 646 $self->{QUERY} = "http://$default_url/cgi-bin/nph-abs_connect?"; 647 $self->{FOLLOWUP} = "http://$default_url/cgi-bin/nph-ref_query?"; 648 649 650 # Setup the LWP::UserAgent 651 my $HOST = hostname(); 652 my $DOMAIN = hostdomain(); 653 $self->{USERAGENT} = new LWP::UserAgent( timeout => 30 ); 654 $self->{USERAGENT}->agent("Astro::ADS/$VERSION ($HOST.$DOMAIN)"); 655 656 # Grab Proxy details from local environment 657 $self->{USERAGENT}->env_proxy(); 658 659 # configure the default options 660 ${$self->{OPTIONS}}{"db_key"} = "AST"; 661 ${$self->{OPTIONS}}{"sim_query"} = "YES"; 662 ${$self->{OPTIONS}}{"aut_xct"} = "NO"; 663 ${$self->{OPTIONS}}{"aut_logic"} = "OR"; 664 ${$self->{OPTIONS}}{"obj_logic"} = "OR"; 665 ${$self->{OPTIONS}}{"author"} = ""; 666 ${$self->{OPTIONS}}{"object"} = ""; 667 ${$self->{OPTIONS}}{"keyword"} = ""; 668 ${$self->{OPTIONS}}{"start_mon"} = ""; 669 ${$self->{OPTIONS}}{"start_year"} = ""; 670 ${$self->{OPTIONS}}{"end_mon"} = ""; 671 ${$self->{OPTIONS}}{"end_year"} = ""; 672 ${$self->{OPTIONS}}{"ttl_logic"} = "OR"; 673 ${$self->{OPTIONS}}{"title"} = ""; 674 ${$self->{OPTIONS}}{"txt_logic"} = "OR"; 675 ${$self->{OPTIONS}}{"text"} = ""; 676 ${$self->{OPTIONS}}{"nr_to_return"} = "100"; 677 ${$self->{OPTIONS}}{"start_nr"} = "1"; 678 ${$self->{OPTIONS}}{"start_entry_day"} = ""; 679 ${$self->{OPTIONS}}{"start_entry_mon"} = ""; 680 ${$self->{OPTIONS}}{"start_entry_year"} = ""; 681 ${$self->{OPTIONS}}{"min_score"} = ""; 682 ${$self->{OPTIONS}}{"jou_pick"} = "ALL"; 683 ${$self->{OPTIONS}}{"ref_stems"} = ""; 684 ${$self->{OPTIONS}}{"data_and"} = "ALL"; 685 ${$self->{OPTIONS}}{"group_and"} = "ALL"; 686 ${$self->{OPTIONS}}{"sort"} = "SCORE"; 687 ${$self->{OPTIONS}}{"aut_syn"} = "YES"; 688 ${$self->{OPTIONS}}{"ttl_syn"} = "YES"; 689 ${$self->{OPTIONS}}{"txt_syn"} = "YES"; 690 ${$self->{OPTIONS}}{"aut_wt"} = "1.0"; 691 ${$self->{OPTIONS}}{"obj_wt"} = "1.0"; 692 ${$self->{OPTIONS}}{"ttl_wt"} = "0.3"; 693 ${$self->{OPTIONS}}{"txt_wt"} = "3.0"; 694 ${$self->{OPTIONS}}{"aut_wgt"} = "YES"; 695 ${$self->{OPTIONS}}{"obj_wgt"} = "YES"; 696 ${$self->{OPTIONS}}{"ttl_wgt"} = "YES"; 697 ${$self->{OPTIONS}}{"txt_wgt"} = "YES"; 698 ${$self->{OPTIONS}}{"ttl_sco"} = "YES"; 699 ${$self->{OPTIONS}}{"txt_sco"} = "YES"; 700 ${$self->{OPTIONS}}{"version"} = "1"; 701 ${$self->{OPTIONS}}{"bibcode"} = ""; 702 703 # Set the data_type option to PORTABLE so our regular expressions work! 704 # Set the return format to LONG so we get full abstracts! 705 ${$self->{OPTIONS}}{"data_type"} = "PORTABLE"; 706 ${$self->{OPTIONS}}{"return_fmt"} = "LONG"; 707 708 # CONFIGURE FROM ARGUEMENTS 709 # ------------------------- 710 711 # return unless we have arguments 712 return unless @_; 713 714 # grab the argument list 715 my %args = @_; 716 717 # Loop over the allowed keys and modify the default query options 718 for my $key (qw / Authors AuthorLogic Objects ObjectLogic Bibcode 719 StartMonth EndMonth StartYear EndYear Journal 720 Proxy Timeout URL/ ) { 721 my $method = lc($key); 722 $self->$method( $args{$key} ) if exists $args{$key}; 723 } 724 725} 726 727# T I M E A T T H E B A R -------------------------------------------- 728 729=back 730 731=begin __PRIVATE_METHODS__ 732 733=head2 Private methods 734 735These methods are for internal use only. 736 737=over 4 738 739=item B<_make_query> 740 741Private function used to make an ADS query. Should not be called directly, 742since it does not parse the results. Instead use the querydb() assessor method. 743 744=cut 745 746sub _make_query { 747 my $self = shift; 748 749 # grab the user agent 750 my $ua = $self->{USERAGENT}; 751 752 # clean out the buffer 753 $self->{BUFFER} = ""; 754 755 # grab the base URL 756 my $URL = $self->{QUERY}; 757 my $options = ""; 758 759 # loop round all the options keys and build the query 760 foreach my $key ( keys %{$self->{OPTIONS}} ) { 761 # some bibcodes have & and needs to be made "web safe" 762 my $websafe_option = ${$self->{OPTIONS}}{$key}; 763 $websafe_option =~ s/&/%26/g; 764 $options = $options . "&$key=$websafe_option"; 765 766 } 767 768 # build final query URL 769 $URL = $URL . $options; 770 771 # build request 772 my $request = new HTTP::Request('GET', $URL); 773 774 # grab page from web 775 my $reply = $ua->request($request); 776 777 if ( ${$reply}{"_rc"} eq 200 ) { 778 779 # stuff the page contents into the buffer 780 $self->{BUFFER} = ${$reply}{"_content"}; 781 782 } elsif ( ${$reply}{"_rc"} eq 500 ) { 783 784 # we may have a network unreachable, or we may have a no reference 785 # selected error returned by ADS (go figure) 786 787 $self->{BUFFER} = ${$reply}{"_content"}; 788 my @buffer = split( /\n/,$self->{BUFFER}); 789 chomp @buffer; 790 791 # assume we have an error unless we can prove otherwise 792 my $error_flag = 1; 793 794 foreach my $line ( 0 ... $#buffer ) { 795 if( $buffer[$line] =~ "No reference selected" ) { 796 797 # increment the counter and drop out of the loop 798 $line = $#buffer; 799 $error_flag = 0; 800 } 801 } 802 803 # we definately have an error 804 if( $error_flag ) { 805 $self->{BUFFER} = undef; 806 my $proxy_string = undef; 807 if ($proxy_string = $ua->proxy('http')) { substr($proxy_string, 0, 0) = ' using proxy '; } 808 else { $proxy_string = ' (no proxy)'; } 809 croak("Error ${$reply}{_rc}: Failed to establish network connection to $URL", 810 $proxy_string, "\n"); 811 } 812 813 } else { 814 $self->{BUFFER} = undef; 815 my $proxy_string = undef; 816 if ($proxy_string = $ua->proxy('http')) { substr($proxy_string, 0, 0) = ' using proxy '; } 817 else { $proxy_string = ' (no proxy)'; } 818 croak("Error ${$reply}{_rc}: Failed to establish network connection to $URL", 819 $proxy_string, "\n"); 820 } 821 822 823} 824 825=item B<_make_followup> 826 827Private function used to make a followup ADS query, e.g. REFERNCES, called 828from the followup() assessor method. Should not be called directly. 829 830=cut 831 832sub _make_followup { 833 my $self = shift; 834 835 # grab the user agent 836 my $ua = $self->{USERAGENT}; 837 838 # clean out the buffer 839 $self->{BUFFER} = ""; 840 841 # grab the base URL 842 my $URL = $self->{FOLLOWUP}; 843 844 # which paper? 845 my $bibcode = shift; 846 $bibcode =~ s/&/%26/g; # make ampersands websafe 847 848 # which followup? 849 my $refs = shift; 850 851 # which database? 852 my $db_key = ${$self->{OPTIONS}}{"db_key"}; 853 my $data_type = ${$self->{OPTIONS}}{"data_type"}; 854 my $fmt = ${$self->{OPTIONS}}{"return_fmt"}; 855 856 # build the final query URL 857 $URL = $URL . "bibcode=$bibcode&refs=$refs&db_key=$db_key&data_type=$data_type&return_fmt=$fmt"; 858 859 # build request 860 my $request = new HTTP::Request('GET', $URL); 861 862 # grab page from web 863 my $reply = $ua->request($request); 864 865 if ( ${$reply}{"_rc"} eq 200 ) { 866 # stuff the page contents into the buffer 867 $self->{BUFFER} = ${$reply}{"_content"}; 868 } else { 869 $self->{BUFFER} = undef; 870 my $proxy_string = undef; 871 if ($proxy_string = $ua->proxy('http')) { substr($proxy_string, 0, 0) = ' using proxy '; } 872 else { $proxy_string = ' (no proxy) '; } 873 croak("Error ${$reply}{_rc}: Failed to establish network connection to $URL" . 874 $proxy_string . $self->{BUFFER} ."\n"); 875 } 876} 877 878=item B<_parse_query> 879 880Private function used to parse the results returned in an ADS query. Should 881not be called directly. Instead use the querydb() assessor method to make and 882parse the results. 883 884=cut 885 886sub _parse_query { 887 my $self = shift; 888 889 # get a local copy of the current BUFFER 890 my @buffer = split( /\n/,$self->{BUFFER}); 891 chomp @buffer; 892 893 # create an Astro::ADS::Result object to hold the search results 894 my $result = new Astro::ADS::Result(); 895 896 # create a temporary object to hold papers 897 my $paper; 898 899 # loop round the returned buffer and stuff the contents into Paper objects 900 my ( $next, $counter ); 901 $next = $counter = 0; 902 foreach my $line ( 0 ... $#buffer ) { 903 904 # R Bibcode 905 # T Title 906 # A Author List 907 # F Affiliations 908 # J Journal Reference 909 # D Publication Date 910 # K Keywords 911 # G Origin 912 # I Outbound Links 913 # U Document URL 914 # O Object name 915 # B Abstract 916 # S Score 917 918 # NO ABSTRACTS 919 if( $buffer[$line] =~ "Retrieved 0 abstracts" ) { 920 921 # increment the counter and drop out of the loop 922 $line = $#buffer; 923 924 } 925 926 # NO ABSTRACT (HTML version) 927 if( $buffer[$line] =~ "No reference selected" ) { 928 929 # increment the counter and drop out of the loop 930 $line = $#buffer; 931 } 932 933 # NEW PAPER 934 if( substr( $buffer[$line], 0, 2 ) eq "%R" ) { 935 936 $counter = $line; 937 my $tag = substr( $buffer[$counter], 1, 1 ); 938 939 # grab the bibcode 940 my $bibcode = substr( $buffer[$counter], 2 ); 941 $bibcode =~ s/\s+//g; 942 943 # New Astro::ADS::Result::Paper object 944 $paper = new Astro::ADS::Result::Paper( Bibcode => $bibcode ); 945 946 $counter++; 947 948 # LOOP THROUGH PAPER 949 my ( @title, @authors, @affil, @journal, @pubdate, @keywords, 950 @origin, @links, @url, @object, @abstract, @score ); 951 while ( $counter <= $#buffer && 952 substr( $buffer[$counter], 0, 2 ) ne "%R" ) { 953 954 955 # grab the tags 956 if( substr( $buffer[$counter], 0, 1 ) eq "%" ) { 957 $tag = substr( $buffer[$counter], 1, 1 ); 958 } 959 960 # ckeck for each tag and stuff the contents into the paper object 961 962 # TITLE 963 # ----- 964 if( $tag eq "T" ) { 965 966 #do we have the start of an title block? 967 if ( substr( $buffer[$counter], 0, 1 ) eq "%") { 968 969 # push the end of line substring onto array 970 push ( @title, substr( $buffer[$counter], 3 ) ); 971 972 } else { 973 974 # push the entire line onto the array 975 push (@title, $buffer[$counter] ); 976 977 } 978 } 979 980 # AUTHORS 981 # ------- 982 if( $tag eq "A" ) { 983 984 #do we have the start of an author block? 985 if ( substr( $buffer[$counter], 0, 1 ) eq "%") { 986 987 # push the end of line substring onto array 988 push ( @authors, substr( $buffer[$counter], 3 ) ); 989 990 } else { 991 992 # push the entire line onto the array 993 push (@authors, $buffer[$counter] ); 994 995 } 996 } 997 998 # AFFILIATION 999 # ----------- 1000 if( $tag eq "F" ) { 1001 1002 #do we have the start of an affil block? 1003 if ( substr( $buffer[$counter], 0, 1 ) eq "%") { 1004 1005 # push the end of line substring onto array 1006 push ( @affil, substr( $buffer[$counter], 3 ) ); 1007 1008 } else { 1009 1010 # push the entire line onto the array 1011 push (@affil, $buffer[$counter] ); 1012 1013 } 1014 } 1015 1016 # JOURNAL REF 1017 # ----------- 1018 if( $tag eq "J" ) { 1019 1020 #do we have the start of an journal block? 1021 if ( substr( $buffer[$counter], 0, 1 ) eq "%") { 1022 1023 # push the end of line substring onto array 1024 push ( @journal, substr( $buffer[$counter], 3 ) ); 1025 1026 } else { 1027 1028 # push the entire line onto the array 1029 push (@journal, $buffer[$counter] ); 1030 1031 } 1032 } 1033 1034 # PUBLICATION DATE 1035 # ---------------- 1036 if( $tag eq "D" ) { 1037 1038 #do we have the start of an publication date block? 1039 if ( substr( $buffer[$counter], 0, 1 ) eq "%") { 1040 1041 # push the end of line substring onto array 1042 push ( @pubdate, substr( $buffer[$counter], 3 ) ); 1043 1044 } else { 1045 1046 # push the entire line onto the array 1047 push (@pubdate, $buffer[$counter] ); 1048 1049 } 1050 } 1051 1052 # KEYWORDS 1053 # -------- 1054 if( $tag eq "K" ) { 1055 1056 #do we have the start of an keyword block? 1057 if ( substr( $buffer[$counter], 0, 1 ) eq "%") { 1058 1059 # push the end of line substring onto array 1060 push ( @keywords, substr( $buffer[$counter], 3 ) ); 1061 1062 } else { 1063 1064 # push the entire line onto the array 1065 push (@keywords, $buffer[$counter] ); 1066 1067 } 1068 } 1069 1070 # ORIGIN 1071 # ------ 1072 if( $tag eq "G" ) { 1073 1074 #do we have the start of an origin block? 1075 if ( substr( $buffer[$counter], 0, 1 ) eq "%") { 1076 1077 # push the end of line substring onto array 1078 push ( @origin, substr( $buffer[$counter], 3 ) ); 1079 1080 } else { 1081 1082 # push the entire line onto the array 1083 push (@origin, $buffer[$counter] ); 1084 1085 } 1086 } 1087 1088 # LINKS 1089 # ----- 1090 if( $tag eq "I" ) { 1091 1092 #do we have the start of an author block? 1093 if ( substr( $buffer[$counter], 0, 1 ) eq "%") { 1094 1095 # push the end of line substring onto array 1096 push ( @links, substr( $buffer[$counter], 3 ) ); 1097 1098 } else { 1099 1100 # push the entire line onto the array 1101 push (@links, $buffer[$counter] ); 1102 1103 } 1104 } 1105 1106 # URL 1107 # --- 1108 if( $tag eq "U" ) { 1109 1110 #do we have the start of an URL block? 1111 if ( substr( $buffer[$counter], 0, 1 ) eq "%") { 1112 1113 # push the end of line substring onto array 1114 push ( @url, substr( $buffer[$counter], 3 ) ); 1115 1116 } else { 1117 1118 # push the entire line onto the array 1119 push (@url, $buffer[$counter] ); 1120 1121 } 1122 } 1123 1124 # OBJECT 1125 # ------ 1126 if( $tag eq "O" ) { 1127 1128 #do we have the start of an title block? 1129 if ( substr( $buffer[$counter], 0, 1 ) eq "%") { 1130 1131 # push the end of line substring onto array 1132 push ( @object, substr( $buffer[$counter], 3 ) ); 1133 1134 } else { 1135 1136 # push the entire line onto the array 1137 push (@object, $buffer[$counter] ); 1138 1139 } 1140 } 1141 1142 # ABSTRACT 1143 # -------- 1144 if( $tag eq "B" ) { 1145 1146 #do we have the start of an title block? 1147 if ( substr( $buffer[$counter], 0, 1 ) eq "%") { 1148 1149 # push the end of line substring onto array 1150 push ( @abstract, substr( $buffer[$counter], 3 ) ); 1151 1152 } else { 1153 1154 # push the entire line onto the array 1155 push (@abstract, $buffer[$counter] ); 1156 1157 } 1158 } 1159 1160 # SCORE 1161 # ----- 1162 if( $tag eq "S" ) { 1163 1164 #do we have the start of an title block? 1165 if ( substr( $buffer[$counter], 0, 1 ) eq "%") { 1166 1167 # push the end of line substring onto array 1168 push ( @score, substr( $buffer[$counter], 3 ) ); 1169 1170 } else { 1171 1172 # push the entire line onto the array 1173 push (@score, $buffer[$counter] ); 1174 1175 } 1176 } 1177 1178 1179 # set the next paper increment 1180 $next = $counter; 1181 # increment the line counter 1182 $counter++; 1183 1184 } 1185 1186 # PUSH TITLE INTO PAPER OBJECT 1187 # ---------------------------- 1188 chomp @title; 1189 my $title_line = ""; 1190 for my $i ( 0 ... $#title ) { 1191 # drop it onto one line 1192 $title_line = $title_line . $title[$i]; 1193 } 1194 $paper->title( $title_line ) if defined $title[0]; 1195 1196 # PUSH AUTHORS INTO PAPER OBJECT 1197 # ------------------------------ 1198 chomp @authors; 1199 my $author_line = ""; 1200 for my $i ( 0 ... $#authors ) { 1201 # drop it onto one line 1202 $author_line = $author_line . $authors[$i]; 1203 } 1204 # get rid of leading spaces before author names 1205 $author_line =~ s/;\s+/;/g; 1206 1207 my @paper_authors = split( /;/, $author_line ); 1208 $paper->authors( \@paper_authors ) if defined $authors[0]; 1209 1210 # PUSH AFFILIATION INTO PAPER OBJECT 1211 # ---------------------------------- 1212 chomp @affil; 1213 my $affil_line = ""; 1214 for my $i ( 0 ... $#affil ) { 1215 # drop it onto one line 1216 $affil_line = $affil_line . $affil[$i]; 1217 } 1218 # grab each affiliation from its brackets 1219 $affil_line =~ s/\w\w\(//g; 1220 1221 my @paper_affil = split( /\), /, $affil_line ); 1222 $paper->affil( \@paper_affil ) if defined $affil[0]; 1223 1224 # PUSH JOURNAL INTO PAPER OBJECT 1225 # ------------------------------ 1226 chomp @journal; 1227 my $journal_ref = ""; 1228 for my $i ( 0 ... $#journal ) { 1229 # drop it onto one line 1230 $journal_ref = $journal_ref . $journal[$i]; 1231 } 1232 $paper->journal( $journal_ref ) if defined $journal[0]; 1233 1234 # PUSH PUB DATE INTO PAPER OBJECT 1235 # ------------------------------- 1236 chomp @pubdate; 1237 my $pub_date = ""; 1238 for my $i ( 0 ... $#pubdate ) { 1239 # drop it onto one line 1240 $pub_date = $pub_date . $pubdate[$i]; 1241 } 1242 $paper->published( $pub_date ) if defined $pubdate[0]; 1243 1244 # PUSH KEYWORDS INTO PAPER OBJECT 1245 # ------------------------------- 1246 chomp @keywords; 1247 my $key_line = ""; 1248 for my $i ( 0 ... $#keywords ) { 1249 # drop it onto one line 1250 $key_line = $key_line . $keywords[$i]; 1251 } 1252 # get rid of excess spaces 1253 $key_line =~ s/, /,/g; 1254 1255 my @paper_keys = split( /,/, $key_line ); 1256 $paper->keywords( \@paper_keys ) if defined $keywords[0]; 1257 1258 # PUSH ORIGIN INTO PAPER OBJECT 1259 # ----------------------------- 1260 chomp @origin; 1261 my $origin_line = ""; 1262 for my $i ( 0 ... $#origin) { 1263 # drop it onto one line 1264 $origin_line = $origin_line . $origin[$i]; 1265 } 1266 $paper->origin( $origin_line ) if defined $origin[0]; 1267 1268 # PUSH LINKS INTO PAPER OBJECT 1269 # ---------------------------- 1270 chomp @links; 1271 my $links_line = ""; 1272 for my $i ( 0 ... $#links ) { 1273 # drop it onto one line 1274 $links_line = $links_line . $links[$i]; 1275 } 1276 # annoying complex reg exp to get rid of formatting 1277 $links_line =~ s/:.*?;\s*/;/g; 1278 1279 my @paper_links = split( /;/, $links_line ); 1280 $paper->links( \@paper_links ) if defined $links[0]; 1281 1282 # PUSH URL INTO PAPER OBJECT 1283 # -------------------------- 1284 chomp @url; 1285 my $url_line = ""; 1286 for my $i ( 0 ... $#url ) { 1287 # drop it onto one line 1288 $url_line = $url_line . $url[$i]; 1289 } 1290 # get rid of trailing spaces 1291 $url_line =~ s/\s+$//; 1292 $paper->url( $url_line ) if defined $url[0]; 1293 1294 # PUSH OBJECT INTO PAPER OBJECT 1295 # ----------------------------- 1296 chomp @object; 1297 my $object_line = ""; 1298 for my $i ( 0 ... $#object ) { 1299 # drop it onto one line 1300 $object_line = $object_line . $object[$i]; 1301 } 1302 $paper->object( $object_line ) if defined $object[0]; 1303 1304 # PUSH ABSTRACT INTO PAPER OBJECT 1305 # ------------------------------- 1306 chomp @abstract; 1307 for my $i ( 0 ... $#abstract ) { 1308 # get rid of trailing spaces 1309 $abstract[$i] =~ s/\s+$//; 1310 } 1311 $paper->abstract( \@abstract ) if defined $abstract[0]; 1312 1313 # PUSH SCORE INTO PAPER OBJECT 1314 # ---------------------------- 1315 chomp @score; 1316 my $score_line = ""; 1317 for my $i ( 0 ... $#score ) { 1318 # drop it onto one line 1319 $score_line = $score_line . $score[$i]; 1320 } 1321 $paper->score( $score_line ) if defined $score[0]; 1322 1323 1324 } 1325 1326 # Increment the line counter to the correct index for the next paper 1327 $line += $next; 1328 1329 # Push the new paper onto the Astro::ADS::Result object 1330 # ----------------------------------------------------- 1331 $result->pushpaper($paper) if defined $paper; 1332 $paper = undef; 1333 1334 } 1335 1336 # return an Astro::ADS::Result object, or undef if no abstracts returned 1337 return $result; 1338 1339} 1340 1341=item B<_dump_raw> 1342 1343Private function for debugging and other testing purposes. It will return 1344the raw output of the last ADS query made using querydb(). 1345 1346=cut 1347 1348sub _dump_raw { 1349 my $self = shift; 1350 1351 # split the BUFFER into an array 1352 my @portable = split( /\n/,$self->{BUFFER}); 1353 chomp @portable; 1354 1355 return @portable; 1356} 1357 1358=item B<_dump_options> 1359 1360Private function for debugging and other testing purposes. It will return 1361the current query options as a hash. 1362 1363=cut 1364 1365sub _dump_options { 1366 my $self = shift; 1367 1368 return %{$self->{OPTIONS}}; 1369} 1370 1371=back 1372 1373=end __PRIVATE_METHODS__ 1374 1375=head1 BUGS 1376 1377=over 1378 1379=item #35645 filed at rt.cpan.org (Ampersands) 1380 1381Older versions can't handle ampersands in the bibcode, such as A&A for Astronomy & Astrophysics. 1382Fixed for queries in 1.22 - 5/2009. 1383Fixed for references in 1.23 - Boyd Duffee E<lt>b dot duffee at isc dot keele dot ac dot ukE<gt>, 7/2011. 1384 1385=back 1386 1387 1388=head1 COPYRIGHT 1389 1390Copyright (C) 2001 University of Exeter. All Rights Reserved. 1391 1392This program was written as part of the eSTAR project and is free software; 1393you can redistribute it and/or modify it under the terms of the GNU Public 1394License. 1395 1396=head1 AUTHORS 1397 1398Alasdair Allan E<lt>aa@astro.ex.ac.ukE<gt>, 1399 1400=cut 1401 1402# L A S T O R D E R S ------------------------------------------------------ 1403 14041; 1405