1## Copyright (C) 2015-2021 Philip Nienhuis
2## Copyright (C) 2018-2020 Matthew Parkan
3##
4## This program is free software; you can redistribute it and/or modify it
5## under the terms of the GNU General Public License as published by
6## the Free Software Foundation; either version 3 of the License, or
7## (at your option) any later version.
8##
9## This program is distributed in the hope that it will be useful,
10## but WITHOUT ANY WARRANTY; without even the implied warranty of
11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12## GNU General Public License for more details.
13##
14## You should have received a copy of the GNU General Public License
15## along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17## -*- texinfo -*-
18## @deftypefn  {Function File} [@var{status}] = dbfwrite (@var{fname}, @var{data})
19## Write data in a cell array to a dbf (xBase) file, provisionally dBase III+.
20##
21## @var{fname} must be a valid file name, optionally with '.dbf' suffix.
22## @var{data} should be a cell array of which the top row contains column
23## names (character strings, each max. 10 characters; longer column names will
24## be truncated).  Each column must contain only one class of data, except of
25## course the top entry (the column header).  Integers interspersed in double
26## type colums will be written as doubles.  Data types that can be written are
27## character (text string), numeric (integer and float, the latter with 6
28## decimal places), and logical.
29##
30## Output argument @var{status} is 1 if the file was written successfully, -1 if
31## one or more data columns were skipped, 0 otherwise.  If 0 the incomplete file
32## will be deleted as well.
33##
34## Provisionally only dBase v. III+ files without memos can be written.
35##
36## @seealso{dbfread}
37## @end deftypefn
38
39## Authors: Philip Nienhuis <prnienhuis@users.sf.net>,whos
40##          Matthew Parkan <matthew.parkan@gmail.com>
41## Created: 2014-12-24
42
43function [status] = dbfwrite (fname, data)
44
45  status = 0;
46  ## Input validation
47  if (! ischar (fname))
48    error ("dbfwrite: file name expected for argument #1\n");
49  elseif (! iscell (data))
50    error ("dbfwrite: cell array expected for argument #2\n");
51  elseif (! iscellstr (data (1, :)))
52    error ("dbfwrite: column header titles (text) expected on first row of data\n");
53  endif
54  ## Column headers length cannot exceed 10 characters
55  toolong = [];
56  for ii=1:size (data, 2)
57    title = data{1, ii};
58    if (length (title) > 10)
59      toolong = [ toolong, ii ];
60      data(1, ii) = title(1:10);
61    endif
62  endfor
63  if (! isempty (toolong))
64    ## Truncate headers if required and check for uniqueness
65    warning ("dbfwrite: one or more column header(s) > 10 characters - truncated\n");
66    fmt = [repmat("%d ", 1, numel (toolong))];
67    printf ("Applies to column(s): %s\n", sprintf (fmt, toolong));
68    if (numel (unique (data(1, :))) < numel (data(1, :)))
69      error ("dbfwrite: column headers aren't unique - please fix data\n");
70    endif
71  endif
72
73  ## Assess nr of records
74  ## Data contains header row. Data toprow = 2
75  nrecs = size (data, 1) - 1;
76  tr = 2;
77
78  ## Check file name
79  [pth, fnm, ext] = fileparts (fname);
80  if (isempty (ext))
81    fname = [fname ".dbf"];
82  elseif (! strcmpi (ext, ".dbf"))
83    error ("dbfwrite: file name should have a '.dbf' suffix\n");
84  endif
85  ## Try to open file
86  fid = fopen (fname, "w", "ieee-le");
87  if (fid < 0)
88    error ("dbfwrite: could not open file %s\n", fname);
89  endif
90
91  unwind_protect
92    ## Start writing header
93    ## Provisionally assume dbase III+ w/o memos
94    fwrite (fid, 3, "uint8");
95
96    ## Date of last update (YYMMDD), with YY the number of years since 1900
97    t = now;
98    upd = datevec(t) - [1900, 0, 0, 0, 0, 0];
99    fwrite (fid, uint8(upd(1:3)), "uint8");
100
101    ## Number of records in the table
102    fwrite (fid, nrecs, "uint32");
103    ## The next two uint16 fields are to be written later, just fill temporarily
104    pos_lhdr = ftell(fid);
105
106    fwrite (fid, 0, "uint32");
107
108    ## Another place holder, write enough to allow next fseek to succeed
109    fwrite (fid, uint32 (zeros (1, 7)), "uint32");
110
111    ## Write record descriptors
112    nfields  = size (data, 2);
113    fldtyp   = "";
114    fldlngs  = {};
115    reclen   = 1;                                             ## "Erased" byte first
116    fseek (fid, 32, "bof");
117
118    RR = zeros (32, nfields, "uint8");
119    colskipped = 0;
120    for ii=1:nfields
121      decpl = 0;
122      recdesc = sprintf ("%d", uint32 (zeros (1, 8)));
123      recdesc(1:10) = strjust (sprintf ("%10s", data{1, ii}), "left"); ## Field name
124      ## Be strict on mixing char and numeric; this implies logicals
125      ## interspersed in numeric type column won't be accepted either
126      if (all (cellfun (@isnumeric, data(tr:end, ii), "uni", 1)))
127        ## We're lax on interspersed integers, they'll be cast to double
128        if (isinteger ([data{tr:end, ii}]) ||
129            all ([data{tr:end, ii}] - floor([data{tr:end, ii}]) < eps))
130          ftype = "N";
131          decpl = 0;
132        else
133          ftype = "F";
134          ## ML compatibility for .dbf/.shp file: 6 decimal places
135          decpl = 6;
136        endif
137        fldlng = 20;
138      elseif (all (cellfun (@ischar, data(tr:end, ii), "uni", 1)))
139        ftype = "C";
140        fldlng = max (cellfun (@(x) length(x), data(tr:end, ii)));
141      elseif (all (cellfun (@islogical, (data(tr:end, ii)), "uni", 1)))
142        ftype = "L";
143        fldlng = 1;
144      else
145        warning (["dbfwrite: heterogeneous data types in column %d ('%s'), ", ...
146                 "skipped.\n"], ii, data{1, ii});
147        RR(:, end) = [];
148        nfields--;
149        colskipped = 1;
150        continue ;
151        ## unwind_protect_cleanup takes care of closing & wiping file
152      endif
153      recdesc(12) = ftype;                                    ## Field type
154      fldtyp      = [ fldtyp ftype ];
155      recdesc(17) = uint8 (fldlng);                           ## Field length
156      recdesc(18) = uint8 (decpl);                            ## Decimal places
157      recdesc(32) = "\0";                                     ## Fill to byte# 32
158
159      RR(:, ii) = recdesc';
160      reclen += fldlng;
161      fldlngs = [ fldlngs; sprintf("%d", fldlng) ];
162    endfor
163
164    fwrite (fid, RR, "char");
165
166    ## Write header record terminator
167    fwrite (fid, 13, "uint8");
168    ## Remember position
169    fpos_data = ftell (fid);
170    ## Write missing data in header
171    fseek (fid, pos_lhdr, "bof");
172    fwrite (fid, fpos_data, "uint16");
173    fwrite (fid, reclen, "uint16");
174
175    ## Write data2
176    fseek (fid, fpos_data, "bof");
177
178    ## Determine data record format. "Erased byte" is first char of format
179    fmt = "\0";
180    for j = 1:nfields
181      switch fldtyp(j)
182          case "C"                                            ## character
183            txt = ["%", fldlngs{j}, "s"];
184          case "N"                                            ## numeric
185            txt = ["%" fldlngs{j} "d"];
186          case "L"                                            ## logical
187            txt = ["%", fldlngs{j}, "c"];
188          case "F"                                            ## float
189            txt = ["%" fldlngs{j} "f"];
190          case "D"                                            ## date; currently inactive
191            ## txt = sprintf (["%" fldlngs{jj} "s"], data{ii, jj});
192          otherwise
193        end
194        fmt = [fmt, txt];                                     ## append format
195    end
196
197    ## Convert boolean attributes to Y/N characters
198    str_logical = {"N", "Y"};
199    for jj = find (fldtyp == "L")
200      data(2:end, jj) = str_logical (double ([data{2:end, jj}] + 1))';
201    end
202
203    ## Write data in ~100 MB chunks to avoid overflow. First find an optimal
204    ## chunk size as max. nr. of records in a chunk= (= nr.of rows in data)
205    chunk_sz = floor (1e8 / reclen);
206    for ii=1 : chunk_sz : nrecs
207      ## Reshape chunk of data matrix
208      T = [data(ii+1:min (ii+chunk_sz, nrecs+1), :)'(:)];
209      blob = sprintf (fmt, T{:});
210      ## Write blob to file
211      fwrite (fid, blob, "char");
212    endfor
213    status = 1;
214
215  unwind_protect_cleanup
216    fclose (fid);
217    if (! status)
218      printf ("dbfwrite: removing incomplete file %s.\n", fname);
219      unlink (fname);
220    elseif (colskipped)
221      status = -1;
222    endif
223  end_unwind_protect
224
225endfunction
226