1#!/usr/local/bin/perl
2
3###############################################################################
4#
5# Portions Copyright (C) 2000-2019 The Xastir Group
6#
7# Script to convert a GeoPDF file with included neatline into a collar-stripped
8# geotiff in EPSG:4326 projection (WGS84 equidistant cylindrical) in 8-bit
9# color.
10#
11# You must have installed GDAL/OGR, configured to use python in order to use
12# this script.  This is because the script attempts to run rgb2pct.py, which
13# is only installed if you've built GDAL with python support.
14#
15# This also depends on the -cutline and -crop_to_cutline features of
16# gdalwarp, which are only present in versions of GDAL after 1.8.1.
17#
18# Last Edit:$Date$
19# Revision:$Revision$
20# Last Edited By: $Author$
21###############################################################################
22
23use Getopt::Long;
24
25my $fudgeNeatline;
26$result=GetOptions("fixneatline|f"=>\$fudgeNeatline);
27
28# Input file is a geopdf:
29if ($#ARGV<0)
30{
31    print STDERR "Usage: $0 <geopdfname>\n";
32    exit 1;
33}
34
35my $inputPDF=$ARGV[0];
36if (! -e $inputPDF)
37{
38    print STDERR "File $inputPDF does not exist.\n";
39    exit 1;
40}
41
42open GDALINFO, "gdalinfo $inputPDF |" or die "can't fork $!";
43my $readingCoordSys=0;
44my $coordSys="";
45my $readingMetadata=0;
46my $neatlinePoly="";
47my $minLat=360,$maxLat=-360,$minLon=360,$maxLon=-360,$templat,$templon;
48
49while (<GDALINFO>)
50{
51  if (/^Driver: (.*)$/)
52  {
53      if ($1 ne "PDF/Geospatial PDF")
54      {
55          print STDERR "This script is intended to run only on GeoPDF input files.\n";
56          exit 1;
57      }
58      next;
59  }
60  if (/^Coordinate System is:/)
61  {
62      $readingCoordSys=1;
63      next;
64  }
65  if ($readingCoordSys == 1)
66  {
67      # This is tricky --- finding the end of the coordinate system
68      # means looking for the first line that starts in column 1 that isn't
69      # the first line of the coordinate system.  All lines of the coordinate
70      # system other than the first start with space.
71      if (/^[^ ]/ && $coordSys ne "")
72      {
73          $readingCoordSys=0;
74          next;
75      }
76      else
77      {
78          chomp($_);
79          $coordSys .= $_;
80          next;
81      }
82  }
83  if (/^Metadata:/)
84  {
85      $readingMetadata=1;
86      next;
87  }
88  if ($readingMetadata==1)
89  {
90      if (/^Corner Coordinates:/)
91      {
92          $readingMetadata=0;
93          next;
94      }
95      if (/^ *NEATLINE=(.*)$/)
96      {
97          $neatlinePoly=$1;
98          next;
99      }
100  }
101  if (/^Band ([0-9]+).*Type=([^,]*)/)
102  {
103      $bandinfo[$#bandinfo+1]=$2;
104      next;
105  }
106  if (/^(Upper|Lower) (Left|Right)/)
107  {
108      s/(Upper|Lower) (Left|Right) *\([^)]*\) *//;
109      /\(([^,]*), *([^\)]*)\)/;
110      $templonstr=$1;
111      $templatstr=$2;
112      $templonstr =~ /([0-9]*)d( *[0-9]*)'( *[0-9.]*)"([EW])/;
113      $deg=$1; $min=$2; $sec=$3; $hem=$4;
114      $templon=$deg+$min/60+$sec/3600;
115      $templon *= -1 if ($hem eq "W");
116      $minLon=$templon if ($templon<$minLon);
117      $maxLon=$templon if ($templon>$maxLon);
118
119      $templatstr =~ /([0-9]*)d( *[0-9]*)'( *[0-9.]*)"([NS])/;
120      $deg=$1; $min=$2; $sec=$3; $hem=$4;
121      $templat=$deg+$min/60+$sec/3600;
122      $templat *= -1 if ($hem eq "S");
123      $minLat=$templat if ($templat<$minLat);
124      $maxLat=$templat if ($templat>$maxLat);
125
126  }
127
128}
129close GDALINFO;
130
131if (! $fudgeNeatline)
132{
133# Create a CSV for the neatline.  OGR will recognize this geometry
134    open CSVFILE, ">$inputPDF.csv";
135    print CSVFILE "foo,WKT\n";
136    print CSVFILE "bar,\"$neatlinePoly\"\n";
137    close CSVFILE;
138
139# Unfortunately, there's no easy way to attach a spatial reference system
140# (SRS) to the csv file so that gdalwarp will recognize it.  So make a
141# virtual layer out of the CSV with the reference system in it.
142    open VRTFILE, ">$inputPDF.vrt";
143    print VRTFILE "<OGRVRTDataSource>\n";
144    print VRTFILE " <OGRVRTLayer name=\"$inputPDF\">\n";
145    print VRTFILE "   <LayerSRS>$coordSys</LayerSRS>\n";
146    print VRTFILE "   <SrcDataSource>$inputPDF.csv</SrcDataSource>\n";
147    print VRTFILE "   <GeometryType>wkbPolygon</GeometryType>\n";
148    print VRTFILE "   <GeometryField>WKT</GeometryField>\n";
149    print VRTFILE " </OGRVRTLayer>\n";
150    print VRTFILE "</OGRVRTDataSource>\n";
151    close VRTFILE;
152}
153else
154{
155    print "User asked us to fudge the neatline, finding nearest 7.5 minute quad boundaries\n";
156
157
158    # Instead of using the neatline specified, round it to nearest 7.5'
159    # quad boundary
160    $left=(int((abs($minLon)-int(abs($minLon)))/.125+.5)*.125+int(abs($minLon)))*($minLon/abs($minLon));
161    $right=(int((abs($maxLon)-int(abs($maxLon)))/.125+.5)*.125+int(abs($maxLon)))*($maxLon/abs($maxLon));
162    if ($maxLon<0)
163    {
164        $temp=$left;
165        $left=$right;
166        $right=$temp;
167    }
168    $bottom=(int((abs($minLat)-int(abs($minLat)))/.125+.5)*.125+int(abs($minLat)))*($minLat/abs($minLat));
169    $top=(int((abs($maxLat)-int(abs($maxLat)))/.125+.5)*.125+int(abs($maxLat)))*($maxLat/abs($maxLat));
170    if ($maxLat<0)
171    {
172        $temp=$top;
173        $top=$bottom;
174        $bottom=$temp;
175    }
176
177
178    open CSVFILE, ">$inputPDF.csv";
179    print CSVFILE "foo,WKT\n";
180    print CSVFILE "bar,\"POLYGON(($left $top,$right $top, $right $bottom, $left $bottom, $left $top))\"\n";
181    close CSVFILE;
182
183# Unfortunately, there's no easy way to attach a spatial reference system
184# (SRS) to the csv file so that gdalwarp will recognize it.  So make a
185# virtual layer out of the CSV with the reference system in it.
186    open VRTFILE, ">$inputPDF.vrt";
187    print VRTFILE "<OGRVRTDataSource>\n";
188    print VRTFILE " <OGRVRTLayer name=\"$inputPDF\">\n";
189    print VRTFILE "   <LayerSRS>EPSG:4326</LayerSRS>\n";
190    print VRTFILE "   <SrcDataSource>$inputPDF.csv</SrcDataSource>\n";
191    print VRTFILE "   <GeometryType>wkbPolygon</GeometryType>\n";
192    print VRTFILE "   <GeometryField>WKT</GeometryField>\n";
193    print VRTFILE " </OGRVRTLayer>\n";
194    print VRTFILE "</OGRVRTDataSource>\n";
195    close VRTFILE;
196}
197
198# The VRT virtual source will now appear to gdal warp as a complete
199# specification of the neatline, with both coordinates *AND* description of
200# the coordinate system.  This will enable gdalwarp to do the clipping of the
201# neatline *AND* conversion to a different coordinate system in the same
202# operation.  Had we not done the vrt, gdalwarp would have assumed the
203# neatline to be in the coordinates of the DESTINATION raster, not the
204# source raster.
205
206$outputTif=$inputPDF;
207$outputTif =~ s/pdf/tif/i;
208
209# We will warp from whatever the coordinate system is into EPSG:4326, the
210# coordinate system that requires the least work and involves the fewest
211# approximations from Xastir.
212#$theGdalWarp="gdalwarp -cutline $inputPDF.vrt -crop_to_cutline -t_srs EPSG:4326  -co \"COMPRESS=PACKBITS\" $inputPDF $outputTif";
213$theGdalWarp="gdalwarp -cutline $inputPDF.vrt -crop_to_cutline -t_srs EPSG:4326  $inputPDF $outputTif";
214
215system ($theGdalWarp) == 0 or die "System $theGdalWarp failed: $?";
216
217
218if ($#bandinfo>0)
219{
220    print "This is a multi-band raster, dithering...\n";
221    system ("mv $outputTif $$.tif");
222#    $theRGB2PCT="rgb2pct.py $$.tif $$-2.tif";
223    $theRGB2PCT="rgb2pct.py $$.tif $outputTif";
224    system ($theRGB2PCT) == 0 or die "Could not run rgb2pct: $?";
225#    system("gdal_translate -co \"COMPRESS=PACKBITS\" $$-2.tif $outputTif") == 0 or die "Could not gdal_translate: $?";
226    system("rm -f $$.tif $$-2.tif");
227}
228
229# now clean up our mess:
230system ("rm $inputPDF.csv $inputPDF.vrt");
231