1## Copyright (C) 2014-2020 Philip Nienhuis
2##
3## This program is free software; you can redistribute it and/or modify it
4## under the terms of the GNU General Public License as published by
5## the Free Software Foundation; either version 3 of the License, or
6## (at your option) any later version.
7##
8## This program is distributed in the hope that it will be useful,
9## but WITHOUT ANY WARRANTY; without even the implied warranty of
10## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11## GNU General Public License for more details.
12##
13## You should have received a copy of the GNU General Public License
14## along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
16## -*- texinfo -*-
17## @deftypefn {Function File}  [@var{outstruct} ] = shaperead (@var{shp_filename})
18## @deftypefnx {Function File} [@var{outstruct} ] = shaperead (@var{shp_filename}, @var{outstyle})
19## @deftypefnx {Function File} [@var{outstruct} ] = shaperead (@var{shp_filename}, @var{outstyle}, @var{opts})
20## @deftypefnx {Function File} [@var{outstruct}, @var{atts} ] = shaperead (@var{shp_filename}, ...)
21## Read an ArcGis shapefile set (shp, shx and dbf).
22##
23## Depending on the value of @var{outstyle} some different output formats
24## will be returned:
25##
26## @table @code
27## @item  0 (numeric)
28## @itemx  ml (case-insensitive)
29## @itemx  m
30## Return a Matlab-compatible M X 1 struct with a separate entry for each shape
31## feature in the shape file.  Each struct element contains fields "Geometry"
32## (shape type), "BoundingBox" ([minX minY ; maxX maxY]), X, Y (coordinates of
33## points in the shape item as row vectors).  For multi-part items, the
34## coordinates of each part are separated by NaNs.  This output format supports
35## neither M and Z type nor MultiPatch shape features.  For M and Z type shape
36## features the M and Z values will simply be ignored.
37## The struct is augmented with attributes found in the accompanying .dbf file,
38## if found.
39##
40## For ML-style output, if only one output argument is requested the attributes
41## in the .dbf file will be augmented to that struct.  If two output arguments
42## are requested, the attributes will be returned separately in output struct
43## @var{atts}.
44##
45## @item  1 (numeric)
46## @itemx  ext (case-insensitive)
47## @itemx  e
48## Same as 1 but M and Z type and MultiPatch shape features are accepted.  The
49## resulting output struct is no more ML-compatible.  If the shapefile contains
50## M and/or Z type shape features the mapstruct or geostruct has extra fields M
51## and -optionally- Z.  Note that MultiPatch shape features may not have
52## M-values even if Z-values are present.  For MultiPatch shapes another field
53## Parts is added, a Px2 array with zero-based indices to the first vertex of
54## each subfeature in the XYZ fields in column 1 and the type of each
55## subfeature in column 2; P is the number of shape feature parts.
56##
57## @item  2 (numeric)
58## @itemx  oct (case-insensitive)
59## @itemx  o
60## Return a struct containing a N X 6 double array "vals" containing the X, Y,
61## and Z coordinates, M-values, record nr. and type of each point in the shape
62## file.  If no M or Z values were present the relevant columns contain
63## NaNs.  Individual shape features and shape parts are separated by a row of
64## NaN values.  The field "idx" contains 1-based pointers into field vals to
65## the first vertex of each shape feature.
66## Field "bbox" contains an 8 X M double array of XYZ coordinates of the
67## bounding boxes and min/max M-values corresponding to the M items found in
68## the .shp file; for point shapes these contain NaNs.
69## Field "npt" contains a 1 X M array of the number of points for each item.
70## Field "npr" contains a 1 X M cell array containing a row of P part indices
71## (zero-based) for each Polyline, Polygon or MultiPatch part in the shape
72## file; for multipatch each cell contains another row with the part types;
73## for other item types (point etc.) the cell array contains empty rows.
74## A separate field "shpbox" contains the overall bounding box  X, Y and Z
75## coordinates and min/max M-values in a 4 X 2 double array.  If the shape file
76## contains no Z or M values the corresponding columns are filled with NaNs.
77##
78## The struct field "fields" contains a cellstr array with names of the columns.
79## If a corresponding .dbf file was read, the struct array also contains
80## a field for each attribute found in the .dbf file with the corresponding
81## field name, each containing a 1 X M array of attribute values matching the
82## M items in the .shp file.  These arrays can be double, char or logical,
83## depending on the type found in the .dbf file.
84##
85## @item  3 (numeric)
86## @itemx  dat (case-insensitive)
87## @itemx  d
88## Same as OCT or 0 but without a row of NaN values between each shape
89## file item in the VALS array.
90## @end table
91##
92## If a character option is given, just one character will suffice. The default
93## for @var{outstyle} is "ml".
94##
95## The output of 'shaperead' can be influenced by property-value pairs. The
96## following properties are recognized (of which only the first three
97## characters are significant, case doesn't matter):
98##
99## @table @code
100## @item Attributes
101## Normally all attributes associated with the shape features will be read
102## and returned in the output struct(s).  To limit this to just some
103## attributes, enter a value consisting of a cell array of attribute names to
104## be read.  To have no attributes read at all, specify @{@}, an empty cell
105## array.
106##
107## @item BoundingBox
108## Select only those shape items (features) whose bounding box lies within, or
109## intersets in at least one point with the limits of the BoundingBox value (a
110## 2 X 2 double array [Minx, MinY; MaxX, MaxY]).
111## No intersection or clipping with the BoundingBox value will be done by
112## default!
113##
114## @item Clip
115## (only useful in conjuction with the BoundingBox property) If a value of 1
116## or true is supplied, clip all shapes to the bounding box limits.  This
117## option may take quite a bit of processing time.  If a value of "0" or false
118## is given, do not perform clipping.  The default value is 0.
119## Clipping is merely meant to be performed in the XY plane.  Clipping 3D
120## shapes is supported but may lead to unexpected results.
121## For Z and M type polylines and polygons including MultiPatch and
122## Longitude/Latitude/Height types, Z (Height) and M values for each vertex in
123## the clipped shape feature are simply copied over from the nearest vertex in
124## the original shape feature.  This implies that Z and M values of new
125## vertices created on the bounding box edges may be less optimal.
126##
127## For clipping polylines and polygons the Octave-Forge geometry package needs
128## to be installed and loaded.
129##
130## @item Debug
131## If a value of 'true' or 1 is given, shaperead echoes the current record
132## number while reading.  Can be useful for very big shapefiles.  The default
133## value is 0 (no feedback).  If a Matlab-compatible output structarray is
134## requested and the Bounding Box property is specified, the extracted shape
135## feature indices are added to the field "___Shape_feature_nr___".
136##
137## @item RecordNumbers
138## Select only those records whose numbers are listed as integer values in
139## an array following RecordNumbers property. Neither the size nor the class of
140## the array matters as long as it is a numeric array.
141##
142## @item UseGeoCoords
143## (Only applicable if a Matlab-style output struct is requested). If a value
144## of 'true' (or 1) is supplied, return a geostruct rather than a mapstruct.
145## If a value of 0 or false is given, return a mapstruct.
146## The mere difference is that in a geostruct the fields 'X' and 'Y' (and
147## optionally 'Z') are replaced by 'Long' and 'Lat' (and 'Hght').  The
148## default value is 'false' (return a mapstruct').
149## @end table
150##
151## Ref: http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf
152##
153## @seealso{geoshow, mapshow, shapedraw, shapeinfo}
154## @end deftypefn
155
156## Author: Philip Nienhuis <prnienhuis@users.sf.net>
157## Created: 2014-11-07
158
159function [ outs, oatt ] = shaperead (fname, varargin);
160
161## FIXME Implementation needed of these ML input arguments:
162##       - Selector  (supposedly difficult. I'd prefer myself to leave this
163##                   outside of shaperead, it needlessly complicates and slows
164##                   this function /PRN)
165
166  ## Check input
167  if (nargin < 1)
168    print_usage ();
169  endif
170
171  ## Check file name
172  [pth, fnm, ext] = fileparts (fname);
173  if (isempty (ext))
174    bname = fname;
175    fname = [fname ".shp"];
176  elseif (isempty (pth))
177    ## Later on bname.shx and bname.dbf will be read
178    bname = fnm;
179  else
180    ## Later on bname.shx and bname.dbf will be read
181    bname = [pth filesep fnm];
182  endif
183
184  ## Find out what args have been supplied
185  if (nargin == 1)
186    ## Only filename supplied. Set "ml" (Matlab) type as default
187    outopts = 0;
188  elseif (nargin == 2)
189    ## Assume filename + outopts was supplied
190    if (isempty (varargin{1}))
191      ## Assume ML-style output
192      outopts = 0;
193    else
194      outopts = varargin{1};
195    endif
196    varargin = {};
197  elseif (rem (nargin, 2) == 0)
198    ## Even number of input args => Outopts & at least one pair of varargin
199    outopts = varargin{1};
200    varargin(1) = [];
201  else
202    ## Odd nr; maybe only filename and prop/val(s) supplied, outstyle skipped
203    if (ischar (varargin{1}))
204      ## Check arg#2, must be a property name then
205      if (! ismember (lower (varargin{1}(1:min(3, numel (varargin{1})))), ...
206                          {"att", "bou", "cli", "deb", "rec", "sel", "use"}))
207        error ("shaperead: property name expected for arg. #2");
208      endif
209    else
210      ## no outstyle or property => wrong input
211      print_usage ();
212    endif
213    outopts = 0;
214  endif
215
216  ## Check output type arg
217  if (isnumeric (outopts))
218    if (outopts < 0 || outopts > 3)
219      error ("shaperead: arg. #2 integer value out of range 0-3\n");
220    endif
221  elseif (ischar (outopts))
222    outopts = lower (outopts);
223    if (! any (strncmp (outopts, {"ml", "ext", "oct", "dat"}, 1)))
224      error ("shaperead: arg. #2 char value should be one of 'ml', 'ext', \
225'oct' or 'dat'\n");
226    endif
227    switch outopts
228      case {"ml", "m"}
229        outopts = 0;
230      case {"ext", "e"}
231        outopts = 1;
232      case {"oct", "o"}
233        outopts = 2;
234      case {"dat", "d"}
235        outopts = 3;
236      otherwise
237        error ("shaperead: illegal value for arg. #2: '%s' - expected 'ml', \
238'ext', 'oct' of 'dat'", ...
239               outopts);
240    endswitch
241  else
242    error ("shaperead: numeric or character type expected for arg. #2\n");
243  endif
244
245  ## Check .shp file existence
246  fidp = fopen (fname, "r");
247  ## Postpone file opening error until after other provisional input validation
248  if (fidp > 0)
249    ## Temporarily close to avoid file handle leaks during further input checks
250    fclose (fidp);
251  endif
252  ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
253  ## Open .shx file to help speed up seeks to next records. We need this info
254  ## for s_recs check below
255  have_shx = 0;
256  fidx = fopen ([bname ".shx"], "r");
257  if (fidx < 0)
258    s_recs = [];
259  else
260    fseek (fidx, 24, "bof");
261    fxlng = fread (fidx, 1, "int32", 72, "ieee-be");
262    nrec = (fxlng - 50) / 4;
263    fseek (fidx, 100, "bof");
264    ## Get record start positions & -lengths in 16-bit words
265    ridx = reshape (fread (fidx, nrec*2, "int32", 0, "ieee-be"), 2, [])';
266    fclose (fidx);
267    ## Get indices & lengths in bytes
268    ridx *= 2;
269    have_shx = 1;
270    s_recs = [1 : size(ridx, 1)];
271  endif
272
273  ## Parse options; first set defaults
274  clip = 0;
275  dbug   = 0;
276  s_atts = [];
277  s_bbox = [];
278  s_geo  = 0;
279  ## Init collection of records that meet BB criteria
280  bb_union = [];
281  ## Process input args
282  for ii = 1:2:length (varargin)
283    if (! ischar (varargin{ii}))
284      error ("shaperead: property %d: property name expected but got a %s value", ...
285              (ii+1)/2, class (varargin{ii}));
286    elseif (numel (varargin{ii}) < 3)
287      warning ("shaperead: unknown option '%s' - ignored\n", varargin{ii});
288    else
289      switch (lower (varargin{ii})(1:3))
290        case "att"
291          ## Select records based on attribute values
292          s_atts = varargin{ii+1};
293        case "bou"
294          ## Select whether record/shape features partly lie inside or outside limits
295          try
296            s_bbox = double (varargin{ii+1});
297            if (numel (s_bbox) != 4)
298              error ("shaperead: 2 X 2 numeric array expected for BoundingBox\n");
299            endif
300          catch
301            error ("shaperead: numeric 2 X 2 array expected for BoundingBox\n");
302          end_try_catch
303          ## Initialize supplementary polygon array here
304          sbox = [s_bbox(1) s_bbox(3); s_bbox(2) s_bbox(3); s_bbox(2) s_bbox(4); ...
305                  s_bbox(1) s_bbox(4); s_bbox(1) s_bbox(3)];
306        case "cli"
307          ## Clip polygons to requested BoundingBox
308          try
309            clip = logical (varargin{ii+1});
310          catch
311            error ("numeric or logical value expected for 'Clip'\n");
312          end_try_catch
313        case "deb"
314          ## Set verbose output (count records)
315          try
316            dbug = logical (varargin{ii+1});
317          catch
318            error ("numeric or logical value expected for 'Debug'\n");
319          end_try_catch
320        case "rec"
321          ## Select record nrs directly. Check for proper type & clean up
322          try
323            s_recs = sort (unique (double (varargin{ii+1}(:))));
324            if (have_shx && any (s_recs > nrec))
325              printf ("shaperead: requ. record nos. > nr. of records (%d) ignored\n", ...
326                      nrec);
327              s_recs (find (srecs > nrec)) = [];
328            endif
329          catch
330            error ("shaperead: numeric value or array expected for RecordNumbers\n");
331          end_try_catch
332        case "sel"
333          ## A hard one, to be implemented later?
334          printf ("shaperead: 'Selector' option not implemented, option ignored\n");
335        case "use"
336          ## Return a geostruct or a mapstruct (default). Only for ML-structs
337          if (outopts != 0)
338            error ("shaperead: UseGeoCoords only valid for Matlab-style output\n");
339          endif
340          try
341            s_geo = logical (varargin{ii+1});
342          catch
343            error ("shaperead: logical value type expected for 'UseGeoCoords'\n");
344          end_try_catch
345        otherwise
346          warning ("shaperead: unknown option '%s' - ignored\n", varargin{ii});
347      endswitch
348    endif
349  endfor
350  ## Post-processing
351  if (clip)
352    if (isempty (s_bbox))
353      warning ("shaperead: no BoundingBox supplied => Clip option ignored.\n");
354      clip = 0;
355    endif
356    if (isempty (which ("clipPolygon_clipper")))
357      ## No OF geometry package?
358      printf  ("shaperead: function 'clipPolygon' not found. Clip option ignored\n");
359      warning ("           (OF geometry package installed and loaded?)\n");
360      clip = 0;
361    endif
362    if (isempty (which ("distancePoints")))
363      ## No OF geometry package?
364      printf  ("shaperead: function 'distancePoints' not found. Clip option ignored\n");
365      warning ("           (OF geometry package installed and loaded?)\n");
366      clip = 0;
367    endif
368  endif
369
370  if (fidp < 0)
371    ## Only now convey file open error message
372    error ("shaperead: can't open file %s\n", fname);
373  else
374    ## Open .shp file
375    fidp = fopen (fname, "r");
376  endif
377  if (fidx < 0)
378    warning ("shaperead: index file %s not found\n", [fnm ".shx"]);
379  endif
380
381  ## ============= Preparations done, now we can start reading ============
382  ## ---------------------- 2. Read .shp file proper ----------------------
383
384  ## Start reading header
385  fseek (fidp, 0, "bof");
386
387  ## Read & check file code
388  fcode = fread (fidp, 1, "int32", 20, "ieee-be");
389  if (fcode != 9994)
390    error ("%s is not a valid shapefile\n", fname);
391  endif
392  flngt = fread (fidp, 1, "int32", 0, "ieee-be") * 2;
393  fvsn  = fread (fidp, 1, "int32", 0, "ieee-le");
394  shpt  = fread (fidp, 1, "int32");                         ## Shape file type
395
396  shpbox.X(1) = fread (fidp, 1, "double");
397  shpbox.Y(1) = fread (fidp, 1, "double");
398  shpbox.X(2) = fread (fidp, 1, "double");
399  shpbox.Y(2) = fread (fidp, 1, "double");
400  shpbox.Z(1) = fread (fidp, 1, "double");
401  shpbox.Z(2) = fread (fidp, 1, "double");
402  shpbox.M(1) = fread (fidp, 1, "double");
403  shpbox.M(2) = fread (fidp, 1, "double");
404
405  ## FIXME: scan shp file in advance to assess nr of XY points, to be able to
406  ##        preallocate rec array. May be difficult, .shp is not favorable for
407  ##        it. Initial tries showed no significant speed advantage yet over
408  ##        the incremental allocation scheme implemented in Ls. 611+ & 631+
409  ##        for oct/plt style output. For ml-style it is much harder as
410  ##        we'd need to preallocate a potentially very heterogeneous struct
411  ##        array.
412
413  ## Prepare for unsupported (in ML output) shape types
414  unsupp = 0;
415  ## Echo warning if dbug was set
416  ign_mz = (outopts == 0) && dbug;
417  ## Buffer for record data
418  BUFSIZE = 10000;
419  ## Read records, 1 by 1. Initialize final array
420  vals = npt = npr = bbox = idx = nullsh = [];
421  ## Init nr. of shapes read
422  nshp = 0;
423  ## Temp pointer to keep track of array size and increase it w. BUFSIZE rows
424  ivals = 1;
425  ## Provisionally assume file has M (measure) values
426  has_M = true;
427  ## Record index number (equals struct element number)
428  ir = 1;
429  ## Init output struct
430  outs = repmat (struct ("Geometry", ""), 0, 1);
431
432  do
433    ## If the .shx file is there, skip directly to requested record
434    if (have_shx)
435      fseek (fidp, ridx(s_recs(ir), 1), "bof");
436    endif
437    if (dbug)
438      printf ("Reading record %d ...\r", ir);
439    endif
440    val    = NaN(1, 6);
441    ## Read record index number & record length
442    val(5) = fread (fidp, 1, "int32", 0, "ieee-be");
443    rlen   = fread (fidp, 1, "int32", 0, "ieee-be");
444
445    ## Here we decide if the record need be read at all, based on s_recs
446    if (isempty (s_recs) || ismember (double (val(5)), s_recs))
447      ## => this record # has been desired; proceed. Read shape feature type
448      val(6)  = fread (fidp, 1, "int32");
449      rincl = 1;                                            ## see s_bbox, below
450
451      ## Init values for this shape item
452      tbbox  = NaN(1, 8);
453
454      switch val(6)
455        case 1                                              ## Point
456          val(1:2) = fread (fidp, 2, "double");
457          val(3:4) = NaN;
458          tnpt     = 1;
459          tnpr     = 0;
460
461        case 11                                             ## PointZ
462          val(1:4) = fread (fidp, 4, "double");
463          tnpt     = 1;
464          tnpr     = 0;
465
466        case 21                                             ## PointM
467          val(1:2) = fread (fidp, 2, "double");
468          ## "Z"
469          val(3)   = 0;
470          ## M
471          val(4)   = fread (fidp, 1, "double");
472          tnpt     = 1;
473          tnpr     = 0;
474
475        case 8                                              ## Multipoint
476          ## Read bounding box
477          tbbox(1:4)       = fread (fidp, 4, "double");
478          tnpt             = fread (fidp, 1, "int32");
479          val(1:tnpt, 1:2) = reshape (fread (fidp, tnpt*2, "double"), 2, [])';
480          val(1:tnpt, 3:4) = NaN;
481          ## Copy rec index & type down
482          val(2:tnpt, 5:6) = repmat (val(1, 5:6), tnpt-1, 1);
483          tnpr             = 0;
484
485        case 18                                             ## MultipointZ
486          tbbox(1:4)       = fread (fidp, 4, "double");
487          tnpt             = fread (fidp, 1, "int32");
488          val(1:tnpt, 1:2) = reshape (fread (fidp, tnpt*2, "double"), 2, [])';
489          ## Z min & max values
490          tbbox(5:6)       = fread (fidp, 2, "double");
491          ## Augment val array with Z values
492          val(1:tnpt, 3)   = fread(fidp, tnpt, "double")';
493          ## M min & max values
494          tbbox(7:8)       = fread (fidp, 2, "double");
495          if (val(1, 5) == 1)
496            has_M = checkM (fidp, val(1, 6), shpbox);
497          endif
498          if (has_M)
499            ## Augment val array with M values
500            val(1:tnpt, 4) = fread(fidp, tnpt, "double")';
501            ## Copy rec index & type down
502            val(2:tnpt, 5:6) = repmat (val(1, 5:6), tnpt-1, 1);
503            tnpr           = 0;
504          endif
505
506        case 28                                             ## MultipointM
507          tbbox(1:4)       = fread (fidp, 4, "double");
508          tnpt             = fread (fidp, 1, "int32");
509          val(1:tnpt, 1:2) = reshape (fread (fidp, tnpt*2, "double"), 2, [])';
510          ## Insert empty column for Z
511          val(1:tnpt, 3:4) = NaN;
512          if (val(1, 5) == 1)
513            has_M = checkM (fidp, val(1, 6), shpbox);
514          endif
515          if (has_M)
516            ## M min & max values
517            tbbox(7:8)     = fread (fidp, 2, "double");
518            ## Augment val array with M values
519            val(1:tnpt, 4) = fread(fidp, tnpt, "double")';
520          endif
521          ## Copy rec index & type down
522          val(2:tnpt, 5:6) = repmat (val(1, 5:6), tnpt-1, 1);
523          tnpr             = 0;
524
525        case {3, 5}                                         ## Polyline/-gon
526          tbbox(1:4)       = fread (fidp, 4, "double");
527          ## Read nparts, npoints, nparts pointers
528          nparts           = fread (fidp, 1, "int32");
529          tnpt             = fread (fidp, 1, "int32");
530          tnpr             = fread (fidp, nparts, "int32")';
531          ## Read XY point coordinates
532          val(1:tnpt, 1:2) = reshape (fread (fidp, tnpt*2, "double"), 2, [])';
533          ## No Z or M data
534          val(1:tnpt, 3:4) = NaN;
535          ## Copy rec index and type down
536          val(2:tnpt, 5:6) = repmat (val(1, 5:6), tnpt-1, 1);
537
538        case {13, 15}                                       ## Polyline/-gonZ
539          tbbox(1:4)       = fread (fidp, 4, "double");
540          ## Read nparts, npoints, nparts pointers
541          nparts           = fread (fidp, 1, "int32");
542          tnpt             = fread (fidp, 1, "int32");
543          tnpr             = fread (fidp, nparts, "int32")';
544          ## Read XY point coordinates
545          val(1:tnpt, 1:2) = reshape (fread (fidp, tnpt*2, "double"), 2, [])';
546          ## Z min & max values + data
547          tbbox(5:6)       = fread (fidp, 2, "double");
548          val(1:tnpt, 3)   = fread(fidp, tnpt, "double")';
549          if (val(1, 5) == 1)
550            has_M = checkM (fidp, val(1, 6), shpbox);
551          endif
552          if (has_M)
553            ## M min & max values + data
554            tbbox(7:8)       = fread (fidp, 2, "double");
555            val(1:tnpt, 4)   = fread(fidp, tnpt, "double")';
556            ## Copy rec index and type down
557            val(2:tnpt, 5:6) = repmat (val(1, 5:6), tnpt-1, 1);
558          endif
559
560        case {23, 25}                                       ## Polyline/-gonM
561          tbbox(1:4)       = fread (fidp, 4, "double");
562          ## Read nparts, npoints, nparts pointers
563          nparts           = fread (fidp, 1, "int32");
564          tnpt             = fread (fidp, 1, "int32");
565          tnpr             = fread (fidp, nparts, "int32")';
566          ## Read XY point coordinates
567          val(1:tnpt, 1:2) = reshape (fread (fidp, tnpt*2, "double"), 2, [])';
568          ## No Z data
569          val(1:tnpt, 3)   = NaN;
570          if (val(1, 5) == 1)
571            has_M = checkM (fidp, val(1, 6), shpbox);
572          endif
573          if (has_M)
574            ## M min & max values + data
575            tbbox(7:8)       = fread (fidp, 2, "double");
576            val(1:tnpt, 4)   = fread(fidp, tnpt, "double")';
577            ## Copy rec index and type down
578            val(2:tnpt, 5:6) = repmat (val(1, 5:6), tnpt-1, 1);
579          endif
580
581        case 31                                             ## Multipatch
582          tbbox(1:4)       = fread (fidp, 4, "double");
583          ## Read nparts, npoints, nparts pointers, npart types
584          nparts           = fread (fidp, 1, "int32");
585          tnpt             = fread (fidp, 1, "int32");
586          ## Npart types is just another row under npart pointers => read both.
587          ## Provisionally transpose, this is later on reset after NaN insertion
588          tnpr             = reshape (fread (fidp, nparts*2, "int32")', [], 2)';
589          ## Read XY point coordinates
590          val(1:tnpt, 1:2) = reshape (fread (fidp, tnpt*2, "double"), 2, [])';
591          ## Z min & max values + data. Watch out for incomplete .shp file
592          EOF = (ftell (fidp) > flngt-2);
593          if (! EOF)
594            tbbox(5:6)       = fread (fidp, 2, "double");
595            val(1:tnpt, 3)   = fread(fidp, tnpt, "double")';
596          endif
597          fptr = ftell (fidp);
598          EOF = (fptr > flngt-2);
599          if (! EOF && has_M)
600            has_M = checkM (fidp, val(1, 6), shpbox);
601          endif
602          ## M min & max values + data. Watch out for incomplete .shp file
603          if (! EOF && has_M)
604            tbbox(7:8)       = fread (fidp, 2, "double");
605            val(1:tnpt, 4)   = fread(fidp, tnpt, "double")';
606          endif
607          ## Copy rec index and type down
608          val(2:tnpt, 5:6) = repmat (val(1, 5:6), tnpt-1, 1);
609
610        otherwise                                           ## E.g., null shape (0)
611          ## Keep track of null shapes to avoid reading associated attributes
612          if (val(1, 6) == 0)
613            nullsh = [ nullsh ir ];
614            rincl = 0;
615          elseif (abs(val(1, 6)) > 31)
616            error ("shaperead: unexpected shapetype value %f for feature # %d\n\
617       Looks like a faulty shape file.", val(1, 6), ir);
618          endif
619
620      endswitch
621
622      ## Check if (X, Y) are valid coordinates
623      if (any (abs (val(:, 1:2)) > 1.797e308))
624        ## Probably +/- Inf
625        rincl = 0;
626        if (dbug)
627          printf ("shape# %d has no finite XY coordinates, skipped\n", ir);
628        endif
629
630      ## Detect if shape lies (partly) within or completely out of BoundingBox.
631      ## Null shapes are automatically skipped
632      elseif (! isempty (s_bbox))
633        ## Just check if any shape feature bounding box corner lies in s_bbox
634        tbox = [tbbox(1) tbbox(2); tbbox(1) tbbox(4); ...
635                tbbox(3) tbbox(4); tbbox(3) tbbox(2); tbbox(1) tbbox(2)];
636        rincl = 0;
637        ## For polygons, polylines & multipatches:
638        if (ismember (val(1, 6), [3, 13, 23, 5, 15, 25, 31])) ## Polygon/line
639          ## To avoid undue CPU time for large shapes we take a shortcut:
640          ## Check if shape feature lies in BoundingBox...
641          [a, b] = inpolygon (tbox(:, 1), tbox(:, 2), sbox(:, 1), sbox(:, 2));
642          ## ...or BoundingBox lies in polyline/gon. Faster than indiv. points
643          [c, d] = inpolygon (sbox(:, 1), sbox(:, 2), tbox(:, 1), tbox(:, 2));
644          if (any ([a; b; c; d]))
645            ## At least one of the shape item bbox corners lies within s_bbox
646            rincl = 1;
647            bb_union = unique ([bb_union val(1, 5)]);
648            ## FIXME still the case of polygon edges intersecting bounding box
649            ##       w/o vertices inside bounding box. Clipping required for that
650          endif
651        elseif (ismember (val(1, 6), [1, 11, 21]))          ## (Multi-)Point
652          ## Simply select all points within or on boundaries
653          [a, b] = inpolygon (val(:, 1), val(:, 2), sbox(:, 1), sbox(:, 2));
654          val = val(find(a), :);
655          tnpt = size (val, 1);
656          if (tnpt)
657            rincl = 1;
658          endif
659        endif
660        ## If clipping has been selected, clip all parts of the shape feature
661        if (rincl && clip)
662          ## What to do depends on shape type. Null and MultiPatch aren't clipped
663          switch val(1, 6)
664            case {3, 13, 23, 5, 15, 25, 31}                   ## Polyline/gon, Multipatch
665              ## Temporarily silence Octave a bit, then call clippln
666              warning ("off",  "Octave:broadcast", "local");
667              [val, tnpt, tnpr] = clippln (val, tnpt, tnpr, sbox, val(1, 6));
668            otherwise
669              warning ("shaperead: unknown shape type found (%d) - ignored\n", ...
670                       val(1, 6));
671              rincl = 0;
672              val = [];
673          endswitch
674          if (isempty (val))
675            ## Don't include it and remove last added bb_union entry
676            bb_union(end) = [];
677            rincl = 0;
678          else
679            ## Update bounding box
680            tbbox(1) = min (val(:, 1));
681            tbbox(2) = min (val(:, 2));
682            tbbox(3) = max (val(:, 1));
683            tbbox(4) = max (val(:, 2));
684          endif
685        endif
686      endif
687
688      ## What to do with the val data, if to be included
689      if (rincl)
690        ## Keep track of nr of shape features read
691        ++nshp;
692
693        ## M-values < -1e39 really mean absent values
694        im = find (val(:, 4) < -1e39);
695        val(im, 4) = NaN;
696
697        if (outopts < 3)
698          ## Prepare an Octave or Matlab style struct optimized for fast
699          ## plotting by inserting a NaN row after each polyline/-gon part
700          nn = size (tnpr, 2);
701          valt = NaN(tnpt + nn - 1, 6);
702          ipt = 1;
703          ttnpr = [tnpr(1, :) tnpt];
704          dtnpr = diff (ttnpr);
705          for ii=2:numel (ttnpr)
706            valt(ipt:ipt+dtnpr(ii-1)-1, :) = val(ttnpr(ii-1)+1:ttnpr(ii), :);
707            ipt += dtnpr(ii-1) + 1;
708          endfor
709          val = valt;
710          tnpr(1, :) = (tnpr(1, :) + [0:numel(dtnpr)-1]);
711        endif
712
713        ## Shape either included by default or it lies in requested BoundingBox
714        switch outopts
715          case {0, 1}
716            ## Return a ML compatible mapstruct. Attributes will be added later
717            if (ign_mz && val(1, 6) >= 10)
718              printf ("shaperead: M and Z values ignored for ml-style output\n");
719              ign_mz = 0;
720            endif
721            switch val(1, 6)
722              case {1, 11, 21}                              ## Point
723                outs(end+1, 1).Geometry = "Point";
724              case {8, 18, 28}                              ## Multipoint
725                outs(end+1, 1).Geometry = "MultiPoint";
726              case {3, 13, 23}                              ## Polyline
727                outs(end+1, 1).Geometry = "Line";
728              case {5, 15, 25}                              ## Polygon
729                outs(end+1, 1).Geometry = "Polygon";
730              otherwise
731                if (outopts == 1)
732                  ## "Extended" ML-style output struct
733                  if (val(1, 6) == 31)                      ## MultiPatch
734                    outs(end+1, 1).Geometry = "MultiPatch";
735                    outs(end, 1).Parts = tnpr;
736                  endif
737                else
738                  if (! unsupp)
739                    warning ("shaperead: shapefile contains unsupported shape \
740types\n");
741                    outs = oatt = [];
742                    return
743                  endif
744                  outs(end+1, 1).Geometry = val(1, 6);
745                endif
746            endswitch
747            ## Omit BoundingBox for Point
748            if (all ([1, 11, 21] - val(1, 6)))
749              outs(end).BoundingBox = reshape (tbbox(1:4), 2, [])';
750            endif
751            if (s_geo)
752              outs(end, 1).Lon = val(:, 1)';
753              outs(end, 1).Lat = val(:, 2)';
754            else
755              outs(end, 1).X = val(:, 1)';
756              outs(end, 1).Y = val(:, 2)';
757            endif
758            ## (ML-incompatible) add Z- and optional M-values, if any
759            if (outopts == 1 && any (isfinite (val(:, 4))))
760              outs(end, 1).M = val(:, 4)';
761              ## FIXME Decision needed if field Geometry should reflect the type
762              ##  outs{end}.Geometry = [ outs{end}.Geometry "M"];
763            endif
764            if (outopts == 1 && any (isfinite (val(:, 3))))
765              outs(end, 1).Z = val(:, 3)';
766              ## FIXME Decision needed if field Geometry should reflect the type
767              ## outs{end}.Geometry = [ outs{end}.Geometry "Z"];
768            endif
769            ## (ML-incompatible) add Z- and optional M-values, if any
770            if (dbug)
771              ## Add a field with shape feature identifier for boundingbox
772              outs(end, 1).___Shape_feature_nr___ = val(1, 5);
773            endif
774
775          case {2}
776            ## Return an Octave style struct.
777            ## Append to vals array; keep track of appended nr. of rows
778            lvals = size (vals, 1);
779            lval = size (val, 1);
780            if ((size (vals, 1) - ivals) < lval)
781              ## Increase size of vals
782              vals = [ vals; NaN(BUFSIZE, 6) ];
783            endif
784            vals(ivals:ivals+lval-1, :) = val;
785            idx = [idx ; ivals];
786            ivals += lval;
787            ## Add a row of NaNs
788            vals(ivals, :) = NaN(1,6);
789            ++ivals;
790            ## Append the other arrays
791            npt = [npt; tnpt];
792            if (isempty (npr))
793              npr = {tnpr};
794            else
795              npr = [npr; {tnpr}];
796            endif
797            bbox = [bbox; tbbox];
798
799          case {3}
800            ## Return a compressed Octave style struct.
801            ## Simply append to vals array; keep track of appended nr. of rows
802            lvals = size (vals, 1);
803            lval = size (val, 1);
804            if ((size (vals, 1) - ivals) < lval)
805              ## Increase size of vals
806              vals = [ vals; NaN(BUFSIZE, 6) ];
807            endif
808            vals(ivals:ivals+lval-1, :) = val;
809            idx = [idx ; ivals];
810            ivals += lval;
811            ## Append the other arrays
812            npt = [npt; tnpt];
813            if (isempty (npr))
814              npr = {tnpr};
815            else
816              npr = [npr; {tnpr}];
817            endif
818            bbox = [bbox; tbbox];
819
820          otherwise
821        endswitch
822      endif
823
824    else
825      ## Record not in s_recs list, skip rest of record
826      fread (fidp, (rlen*2), "char=>char");
827
828    endif
829
830    ## Next .shp record
831    ++ir;
832
833  until (ftell (fidp) > flngt - 3 ||
834         (have_shx && ir > numel (s_recs)));                ## i.e., within last
835                                                            ## file bytes or all
836                                                            ## req. records read
837  fclose (fidp);
838
839  ## If no shape was read, or none fitted within BoundingBox, return empty
840  if (nshp < 1)
841    outs = oatt = {};
842    return;
843  endif
844
845  ## ------------------------ Post-processing ------------------------
846  if (outopts > 1)
847    ## Octave-style outstruct. Truncate vals to proper length
848    vals = vals(1:ivals-1, :);
849    if (outopts == 2 && ! isempty (vals))
850      ## Remove last NaN row
851      vals(end, :) = [];
852    endif
853    if (isempty (vals))
854      s_recs = 0;
855    endif
856  endif
857
858  if (dbug)
859    if (outopts <= 1)
860      ii = numel (outs);
861    else
862      ii = numel (npt);
863    endif
864    printf ("\n%d records read.            \n", ii);
865  endif
866
867  ## For Octave style output, add separate arrays to output struct
868  if (outopts >= 2)
869    outs.shpbox = shpbox;
870    outs.vals   = vals;
871    outs.bbox   = bbox;
872    outs.npt    = npt;
873    outs.npr    = npr;
874    outs.idx    = idx;
875    ## Clear memory (for very large shape files)
876    clear shpbox vals bbox npt npr idx val;
877    outs = setfield (outs, "fields", {"shpbox", "vals", "bbox", "npt", "npr"});
878  endif
879
880  ## Clean up s_recs and bb_union
881  if (! isempty (s_bbox))
882    s_recs = sort (unique (bb_union));
883  else
884    s_recs = sort (unique (s_recs));
885  endif
886
887  ## Weed out any null shape records to prevent reading their attributes
888  if (! isempty (nullsh))
889    if (! isempty (s_recs))
890      s_recs = [1:numel (npt)];
891    endif
892    s_recs (ismember (nullsh, s_recs)) = [];
893  endif
894
895  ## ---------------------- 3. .dbf ----------------------
896
897  if (iscell (s_atts) && isempty (s_atts))
898    ## {} indicates no attributes to be read. Morph into ""
899    return;
900  endif
901  ## Check if dbfread is available
902  if (isempty (which ("dbfread")))
903    printf ("shaperead: dbfread function not found. No attributes will be added.\n");
904    printf ("             (io package installed and loaded?)\n");
905    oatt = {};
906
907  else
908    ## Try to read the .dbf file
909    try
910      atts = dbfread ([ bname ".dbf" ], s_recs, s_atts);
911      if (outopts < 2)
912        ## First check if any attributes match fieldnames; if so, pre-/append "_"
913        tags = {"Geometry", "BoundingBox", "X", "Y", "Lat", "Lon"};
914        for ii=1:numel (tags)
915          idx = find (strcmp (tags{ii}, atts(1, :)));
916          if (! isempty (idx))
917            atts(1, idx) = ["_" atts{1, idx} "_"];
918          endif
919        endfor
920        ## Matlab style map-/geostruct. Divide attribute values over struct elems
921        if (nargout < 2)
922          ## Attributes appended to outs struct
923          for ii=1:size (atts, 2)
924            [outs.(atts{1, ii})] = deal (atts(2:end, ii){:});
925          endfor
926          oatt = [];
927        else
928          ## Attributes separately in oatt struct
929          oatt(size (atts, 1) - 1).(atts{1, 1}) = [];
930          for ii=1:size (atts, 2)
931            [oatt.(atts{1, ii})] = deal (atts(2:end, ii){:});
932          endfor
933          oatt = oatt';
934        endif
935      else
936        ## Octave output struct. Add attributes columns as struct fields
937        ## Check if any attributes match fieldnames; if so, pre-/append "_"
938        tags = {"shpbox", "vals", "bbox", "npt", "npr", "idx", "Geometry", ...
939                "BoundingBox", "X", "Y", "Lat", "Lon"};
940        for ii=1:size (atts, 2)
941          outs.fields(end+1) = atts{1, ii};
942          if (islogical (atts{2, ii}) || isnumeric (atts{2, ii}))
943            outs = setfield (outs, atts{1, ii}, cell2mat (atts(2:end, ii)));
944          else
945           outs = setfield (outs, atts{1, ii}, atts(2:end, ii));
946         endif
947        endfor
948        atts = [];
949      endif
950    catch
951      printf ("shaperead: file %s couldn't be read;\n", [ bname ".dbf" ]);
952      printf ("           no attributes appended\n");
953    end_try_catch
954  endif
955
956endfunction
957
958
959function has_M = checkM (fidp, ft, shpbox)
960
961  has_M = true;
962  ## Check if we do have M-values. If so, the next 3 4-byte words
963  ## comprise the next record nr, rec length and shape type
964  fptr   = ftell (fidp);
965  tmp    = fread (fidp, 2, "int32", 0, "ieee-be");
966  tmp(3) = fread (fidp, 1, "int32");
967  if (tmp(1) == 2 && tmp(3) == ft)
968    ## Looks like next record header + next shape feature type => no M
969    has_M = false;
970    shpbox.M(1) = 0;
971    shpbox.M(2) = 0;
972  endif
973  fseek (fidp, fptr, "bof");
974
975endfunction
976
977
978## Only check input validation. I/O is tested in shapewrite.m
979%!error <arg. #2 char value should be one of> shaperead ('tst.shp', 'j');
980%!error <shaperead: arg. #2 integer value out of range> shaperead ('tst.shp', 7);
981%!error <illegal value for arg. #2:> shaperead ('tst.shp', 'deb')
982%!error < property name expected> shaperead ('tst.shp', "ml", []);
983%!error <numeric or logical value expected> shaperead ('tst.shp', 'deb', {});
984
985