1#!/usr/bin/perl -w
2#
3# Checks the online sources for the values of the constants
4# Boyd Duffee, Mar 2020
5#
6# hard coded to run from top directory and uses only data/PhysicalConstants.xml
7
8use strict;
9use autodie;
10use Modern::Perl;
11use XML::LibXML;
12use LWP::Simple;
13use List::Util qw/shuffle/;
14use HTML::Parser;
15
16#die "Usage: $0 infile outfile" unless @ARGV == 1;
17
18my $TESTING = 0;
19my $ONLINE = 1;
20my $SLEEP = 0;
21
22my ($n, @values_parsed, @uncertainties_parsed, );
23my ($td_flag, $font_flag, $text_flag, $uncertainty_flag) = 0;
24
25my $xml = XML::LibXML->load_xml(location => 'data/PhysicalConstants.xml');
26my $nist_parser = HTML::Parser->new(
27	start_h => [\&start_nist, "self, tagname, attr"],
28	end_h => [\&end_nist, "tagname, attr"],
29	text_h => [\&text, "text"],
30);
31
32configure_parsers();
33
34for my $constant ( $xml->getElementsByTagName('PhysicalConstant') ) {
35	my ($long_name, $old_value, ) = undef;
36
37	for my $name ( $constant->getChildrenByTagName('name') ) {
38		$long_name = $name->textContent() if $name->getAttribute('type') eq 'long';
39	}
40
41	my $description = $constant->getChildrenByTagName('description')->shift()->textContent();
42	for my $value ( $constant->getChildrenByTagName('value') ) {
43		if ( $value->hasAttribute('system') ) {
44			$old_value = $value->textContent() if $value->getAttribute('system') eq 'MKS';
45		}
46		else {
47			$old_value = $value->textContent();
48			next;
49		}
50		$old_value =~ tr/_//;
51	}
52
53	my $precision = $constant->getChildrenByTagName('uncertainty')->shift();
54	my $precision_type = $precision->getAttribute('type');
55	$precision = $precision->textContent();
56	my $source = $constant->getChildrenByTagName('source')->shift();
57	my $source_url = $source->getAttribute('url');
58
59	say <<CONST;
60$long_name\t$old_value\t$precision\t$precision_type
61$description
62$source_url
63CONST
64	#next unless $source_url =~ /physics\.nist\.gov/;
65	next if $source_url =~ /wikipedia|jupiterfact/;
66
67	print "Fetch page? [Ynq] ";
68	my $ans = <STDIN>;
69	next if $ans =~ /n/i;
70	last if $ans =~ /q/i;
71
72	my ($new_value, $new_uncertainty) = get_constant_value($source);
73	if ( ! defined $new_value ) {
74		warn "Couldn't get value for $long_name from $source_url";
75	}
76	elsif ( $new_value == $old_value ) {
77			say "No change";
78	}
79	else {
80    	$new_uncertainty //= '';
81		print <<"E";
82UPDATE: $old_value \t($precision)
83    TO: $new_value \t($new_uncertainty)
84E
85	}
86	last if $TESTING && $TESTING <= ++$n;
87	sleep $SLEEP if $SLEEP;
88}
89
90
91exit;
92
93####
94
95sub get_constant_value {
96	my ($source) = @_;
97	(@values_parsed, @uncertainties_parsed ) = ();
98
99	my $url = $source->getAttribute('url');
100	my $selector = $source->getAttribute('selector');
101	if ( $url =~ /\.pdf$/ ) {
102		warn "Can't scrape PDF documents yet\n";
103		return;
104	}
105	say "Getting $url";
106	return 0 unless $ONLINE;
107	my $content = get($url);
108	return unless $content;
109
110	if ( $url =~ /oeis\.org/ ) {
111		my ($value) = $content =~ /\%e \s \w+ \s (\d+\.?\d*)/x;
112		print /(\%e.+)/ if /\%e/ && $TESTING;
113		return $value;
114	}
115	elsif ( $url =~ /nist\.gov/ ) {
116		$nist_parser->parse($content);
117		return extract_value( @values_parsed ),
118			extract_value( @uncertainties_parsed );
119	}
120	else {
121		print $content;
122	}
123}
124
125sub start_nist {
126	my ($self, $tag, $attr) = @_;
127	$td_flag = 1 if $tag eq 'td';
128	$font_flag = 1 if $tag eq 'font';
129	return unless $td_flag && $font_flag;
130	return if $font_flag && ! $attr->{color} || $attr->{color} ne 'red';
131	$text_flag = 1;
132}
133
134sub end_nist {
135	my ($tag) = @_;
136	if ($tag eq 'tr') {
137		$uncertainty_flag = 0;
138	}
139	return unless $tag eq 'td' || $tag eq 'font';
140	$td_flag = 0 if $tag eq 'font';
141	$font_flag = 0 if $tag eq 'font';
142	$text_flag = 0;
143}
144
145sub text {
146	my ($text, $attr) = @_;
147	if ($text_flag) {
148		push @values_parsed, $text;
149		say "I found $text" if $TESTING;
150	}
151	if ($uncertainty_flag) {
152		push @uncertainties_parsed, $text;
153	}
154	elsif (@values_parsed && $text =~ /Relative standard uncertainty/) {
155		$uncertainty_flag = 1;
156	}
157	if ($uncertainty_flag) {
158		say "TG: $text";
159	}
160}
161
162sub configure_parsers {
163	$nist_parser->ignore_tags('tt', 'b', 'sup');
164}
165
166sub extract_value {
167	my ($digits, $power, $units) = grep /\w/, @_;
168
169	$power //= ''; $units //= '';
170	$digits =~ s/(?:&nbsp;|\s+)//g;
171	$power =~ s/(?:&nbsp;|\s+)//g;
172	$units =~ s/(?:&nbsp;|\s+)//g;
173	print "From $digits, $power, $units, " if $TESTING;
174	$digits =~ s/\.{3,}//;	# remove ellipsis pertaining to irrational values
175
176	if ( $digits =~ /exact/ || $power =~ /exact/ || $units =~ /exact/ ) {
177		say "Returning 0" if $TESTING;
178		say "exact value";
179		return 0;
180	}
181	elsif ( $digits =~ s/x10// ) {
182		my $scinotation = join 'e', $digits, $power;
183		say "Extracting $scinotation" if $TESTING;
184		return $scinotation;
185	}
186	else {
187		say "Extracting $digits" if $TESTING;
188		$digits =~ s/^(-?\d+\.?\d*).*/$1/;	# removed units for non-scinotation
189		return $digits;
190	}
191}
192