1#!/usr/bin/perl -w 2# 3# Checks the online sources for the values of the constants 4# Boyd Duffee, Mar 2020 5# 6# hard coded to run from top directory and uses only data/PhysicalConstants.xml 7 8use strict; 9use autodie; 10use Modern::Perl; 11use XML::LibXML; 12use LWP::Simple; 13use List::Util qw/shuffle/; 14use HTML::Parser; 15 16#die "Usage: $0 infile outfile" unless @ARGV == 1; 17 18my $TESTING = 0; 19my $ONLINE = 1; 20my $SLEEP = 0; 21 22my ($n, @values_parsed, @uncertainties_parsed, ); 23my ($td_flag, $font_flag, $text_flag, $uncertainty_flag) = 0; 24 25my $xml = XML::LibXML->load_xml(location => 'data/PhysicalConstants.xml'); 26my $nist_parser = HTML::Parser->new( 27 start_h => [\&start_nist, "self, tagname, attr"], 28 end_h => [\&end_nist, "tagname, attr"], 29 text_h => [\&text, "text"], 30); 31 32configure_parsers(); 33 34for my $constant ( $xml->getElementsByTagName('PhysicalConstant') ) { 35 my ($long_name, $old_value, ) = undef; 36 37 for my $name ( $constant->getChildrenByTagName('name') ) { 38 $long_name = $name->textContent() if $name->getAttribute('type') eq 'long'; 39 } 40 41 my $description = $constant->getChildrenByTagName('description')->shift()->textContent(); 42 for my $value ( $constant->getChildrenByTagName('value') ) { 43 if ( $value->hasAttribute('system') ) { 44 $old_value = $value->textContent() if $value->getAttribute('system') eq 'MKS'; 45 } 46 else { 47 $old_value = $value->textContent(); 48 next; 49 } 50 $old_value =~ tr/_//; 51 } 52 53 my $precision = $constant->getChildrenByTagName('uncertainty')->shift(); 54 my $precision_type = $precision->getAttribute('type'); 55 $precision = $precision->textContent(); 56 my $source = $constant->getChildrenByTagName('source')->shift(); 57 my $source_url = $source->getAttribute('url'); 58 59 say <<CONST; 60$long_name\t$old_value\t$precision\t$precision_type 61$description 62$source_url 63CONST 64 #next unless $source_url =~ /physics\.nist\.gov/; 65 next if $source_url =~ /wikipedia|jupiterfact/; 66 67 print "Fetch page? [Ynq] "; 68 my $ans = <STDIN>; 69 next if $ans =~ /n/i; 70 last if $ans =~ /q/i; 71 72 my ($new_value, $new_uncertainty) = get_constant_value($source); 73 if ( ! defined $new_value ) { 74 warn "Couldn't get value for $long_name from $source_url"; 75 } 76 elsif ( $new_value == $old_value ) { 77 say "No change"; 78 } 79 else { 80 $new_uncertainty //= ''; 81 print <<"E"; 82UPDATE: $old_value \t($precision) 83 TO: $new_value \t($new_uncertainty) 84E 85 } 86 last if $TESTING && $TESTING <= ++$n; 87 sleep $SLEEP if $SLEEP; 88} 89 90 91exit; 92 93#### 94 95sub get_constant_value { 96 my ($source) = @_; 97 (@values_parsed, @uncertainties_parsed ) = (); 98 99 my $url = $source->getAttribute('url'); 100 my $selector = $source->getAttribute('selector'); 101 if ( $url =~ /\.pdf$/ ) { 102 warn "Can't scrape PDF documents yet\n"; 103 return; 104 } 105 say "Getting $url"; 106 return 0 unless $ONLINE; 107 my $content = get($url); 108 return unless $content; 109 110 if ( $url =~ /oeis\.org/ ) { 111 my ($value) = $content =~ /\%e \s \w+ \s (\d+\.?\d*)/x; 112 print /(\%e.+)/ if /\%e/ && $TESTING; 113 return $value; 114 } 115 elsif ( $url =~ /nist\.gov/ ) { 116 $nist_parser->parse($content); 117 return extract_value( @values_parsed ), 118 extract_value( @uncertainties_parsed ); 119 } 120 else { 121 print $content; 122 } 123} 124 125sub start_nist { 126 my ($self, $tag, $attr) = @_; 127 $td_flag = 1 if $tag eq 'td'; 128 $font_flag = 1 if $tag eq 'font'; 129 return unless $td_flag && $font_flag; 130 return if $font_flag && ! $attr->{color} || $attr->{color} ne 'red'; 131 $text_flag = 1; 132} 133 134sub end_nist { 135 my ($tag) = @_; 136 if ($tag eq 'tr') { 137 $uncertainty_flag = 0; 138 } 139 return unless $tag eq 'td' || $tag eq 'font'; 140 $td_flag = 0 if $tag eq 'font'; 141 $font_flag = 0 if $tag eq 'font'; 142 $text_flag = 0; 143} 144 145sub text { 146 my ($text, $attr) = @_; 147 if ($text_flag) { 148 push @values_parsed, $text; 149 say "I found $text" if $TESTING; 150 } 151 if ($uncertainty_flag) { 152 push @uncertainties_parsed, $text; 153 } 154 elsif (@values_parsed && $text =~ /Relative standard uncertainty/) { 155 $uncertainty_flag = 1; 156 } 157 if ($uncertainty_flag) { 158 say "TG: $text"; 159 } 160} 161 162sub configure_parsers { 163 $nist_parser->ignore_tags('tt', 'b', 'sup'); 164} 165 166sub extract_value { 167 my ($digits, $power, $units) = grep /\w/, @_; 168 169 $power //= ''; $units //= ''; 170 $digits =~ s/(?: |\s+)//g; 171 $power =~ s/(?: |\s+)//g; 172 $units =~ s/(?: |\s+)//g; 173 print "From $digits, $power, $units, " if $TESTING; 174 $digits =~ s/\.{3,}//; # remove ellipsis pertaining to irrational values 175 176 if ( $digits =~ /exact/ || $power =~ /exact/ || $units =~ /exact/ ) { 177 say "Returning 0" if $TESTING; 178 say "exact value"; 179 return 0; 180 } 181 elsif ( $digits =~ s/x10// ) { 182 my $scinotation = join 'e', $digits, $power; 183 say "Extracting $scinotation" if $TESTING; 184 return $scinotation; 185 } 186 else { 187 say "Extracting $digits" if $TESTING; 188 $digits =~ s/^(-?\d+\.?\d*).*/$1/; # removed units for non-scinotation 189 return $digits; 190 } 191} 192