1## Copyright (C) 2015-2021 Philip Nienhuis 2## Copyright (C) 2018-2020 Matthew Parkan 3## 4## This program is free software; you can redistribute it and/or modify it 5## under the terms of the GNU General Public License as published by 6## the Free Software Foundation; either version 3 of the License, or 7## (at your option) any later version. 8## 9## This program is distributed in the hope that it will be useful, 10## but WITHOUT ANY WARRANTY; without even the implied warranty of 11## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12## GNU General Public License for more details. 13## 14## You should have received a copy of the GNU General Public License 15## along with this program. If not, see <http://www.gnu.org/licenses/>. 16 17## -*- texinfo -*- 18## @deftypefn {Function File} [@var{status}] = dbfwrite (@var{fname}, @var{data}) 19## Write data in a cell array to a dbf (xBase) file, provisionally dBase III+. 20## 21## @var{fname} must be a valid file name, optionally with '.dbf' suffix. 22## @var{data} should be a cell array of which the top row contains column 23## names (character strings, each max. 10 characters; longer column names will 24## be truncated). Each column must contain only one class of data, except of 25## course the top entry (the column header). Integers interspersed in double 26## type colums will be written as doubles. Data types that can be written are 27## character (text string), numeric (integer and float, the latter with 6 28## decimal places), and logical. 29## 30## Output argument @var{status} is 1 if the file was written successfully, -1 if 31## one or more data columns were skipped, 0 otherwise. If 0 the incomplete file 32## will be deleted as well. 33## 34## Provisionally only dBase v. III+ files without memos can be written. 35## 36## @seealso{dbfread} 37## @end deftypefn 38 39## Authors: Philip Nienhuis <prnienhuis@users.sf.net>,whos 40## Matthew Parkan <matthew.parkan@gmail.com> 41## Created: 2014-12-24 42 43function [status] = dbfwrite (fname, data) 44 45 status = 0; 46 ## Input validation 47 if (! ischar (fname)) 48 error ("dbfwrite: file name expected for argument #1\n"); 49 elseif (! iscell (data)) 50 error ("dbfwrite: cell array expected for argument #2\n"); 51 elseif (! iscellstr (data (1, :))) 52 error ("dbfwrite: column header titles (text) expected on first row of data\n"); 53 endif 54 ## Column headers length cannot exceed 10 characters 55 toolong = []; 56 for ii=1:size (data, 2) 57 title = data{1, ii}; 58 if (length (title) > 10) 59 toolong = [ toolong, ii ]; 60 data(1, ii) = title(1:10); 61 endif 62 endfor 63 if (! isempty (toolong)) 64 ## Truncate headers if required and check for uniqueness 65 warning ("dbfwrite: one or more column header(s) > 10 characters - truncated\n"); 66 fmt = [repmat("%d ", 1, numel (toolong))]; 67 printf ("Applies to column(s): %s\n", sprintf (fmt, toolong)); 68 if (numel (unique (data(1, :))) < numel (data(1, :))) 69 error ("dbfwrite: column headers aren't unique - please fix data\n"); 70 endif 71 endif 72 73 ## Assess nr of records 74 ## Data contains header row. Data toprow = 2 75 nrecs = size (data, 1) - 1; 76 tr = 2; 77 78 ## Check file name 79 [pth, fnm, ext] = fileparts (fname); 80 if (isempty (ext)) 81 fname = [fname ".dbf"]; 82 elseif (! strcmpi (ext, ".dbf")) 83 error ("dbfwrite: file name should have a '.dbf' suffix\n"); 84 endif 85 ## Try to open file 86 fid = fopen (fname, "w", "ieee-le"); 87 if (fid < 0) 88 error ("dbfwrite: could not open file %s\n", fname); 89 endif 90 91 unwind_protect 92 ## Start writing header 93 ## Provisionally assume dbase III+ w/o memos 94 fwrite (fid, 3, "uint8"); 95 96 ## Date of last update (YYMMDD), with YY the number of years since 1900 97 t = now; 98 upd = datevec(t) - [1900, 0, 0, 0, 0, 0]; 99 fwrite (fid, uint8(upd(1:3)), "uint8"); 100 101 ## Number of records in the table 102 fwrite (fid, nrecs, "uint32"); 103 ## The next two uint16 fields are to be written later, just fill temporarily 104 pos_lhdr = ftell(fid); 105 106 fwrite (fid, 0, "uint32"); 107 108 ## Another place holder, write enough to allow next fseek to succeed 109 fwrite (fid, uint32 (zeros (1, 7)), "uint32"); 110 111 ## Write record descriptors 112 nfields = size (data, 2); 113 fldtyp = ""; 114 fldlngs = {}; 115 reclen = 1; ## "Erased" byte first 116 fseek (fid, 32, "bof"); 117 118 RR = zeros (32, nfields, "uint8"); 119 colskipped = 0; 120 for ii=1:nfields 121 decpl = 0; 122 recdesc = sprintf ("%d", uint32 (zeros (1, 8))); 123 recdesc(1:10) = strjust (sprintf ("%10s", data{1, ii}), "left"); ## Field name 124 ## Be strict on mixing char and numeric; this implies logicals 125 ## interspersed in numeric type column won't be accepted either 126 if (all (cellfun (@isnumeric, data(tr:end, ii), "uni", 1))) 127 ## We're lax on interspersed integers, they'll be cast to double 128 if (isinteger ([data{tr:end, ii}]) || 129 all ([data{tr:end, ii}] - floor([data{tr:end, ii}]) < eps)) 130 ftype = "N"; 131 decpl = 0; 132 else 133 ftype = "F"; 134 ## ML compatibility for .dbf/.shp file: 6 decimal places 135 decpl = 6; 136 endif 137 fldlng = 20; 138 elseif (all (cellfun (@ischar, data(tr:end, ii), "uni", 1))) 139 ftype = "C"; 140 fldlng = max (cellfun (@(x) length(x), data(tr:end, ii))); 141 elseif (all (cellfun (@islogical, (data(tr:end, ii)), "uni", 1))) 142 ftype = "L"; 143 fldlng = 1; 144 else 145 warning (["dbfwrite: heterogeneous data types in column %d ('%s'), ", ... 146 "skipped.\n"], ii, data{1, ii}); 147 RR(:, end) = []; 148 nfields--; 149 colskipped = 1; 150 continue ; 151 ## unwind_protect_cleanup takes care of closing & wiping file 152 endif 153 recdesc(12) = ftype; ## Field type 154 fldtyp = [ fldtyp ftype ]; 155 recdesc(17) = uint8 (fldlng); ## Field length 156 recdesc(18) = uint8 (decpl); ## Decimal places 157 recdesc(32) = "\0"; ## Fill to byte# 32 158 159 RR(:, ii) = recdesc'; 160 reclen += fldlng; 161 fldlngs = [ fldlngs; sprintf("%d", fldlng) ]; 162 endfor 163 164 fwrite (fid, RR, "char"); 165 166 ## Write header record terminator 167 fwrite (fid, 13, "uint8"); 168 ## Remember position 169 fpos_data = ftell (fid); 170 ## Write missing data in header 171 fseek (fid, pos_lhdr, "bof"); 172 fwrite (fid, fpos_data, "uint16"); 173 fwrite (fid, reclen, "uint16"); 174 175 ## Write data2 176 fseek (fid, fpos_data, "bof"); 177 178 ## Determine data record format. "Erased byte" is first char of format 179 fmt = "\0"; 180 for j = 1:nfields 181 switch fldtyp(j) 182 case "C" ## character 183 txt = ["%", fldlngs{j}, "s"]; 184 case "N" ## numeric 185 txt = ["%" fldlngs{j} "d"]; 186 case "L" ## logical 187 txt = ["%", fldlngs{j}, "c"]; 188 case "F" ## float 189 txt = ["%" fldlngs{j} "f"]; 190 case "D" ## date; currently inactive 191 ## txt = sprintf (["%" fldlngs{jj} "s"], data{ii, jj}); 192 otherwise 193 end 194 fmt = [fmt, txt]; ## append format 195 end 196 197 ## Convert boolean attributes to Y/N characters 198 str_logical = {"N", "Y"}; 199 for jj = find (fldtyp == "L") 200 data(2:end, jj) = str_logical (double ([data{2:end, jj}] + 1))'; 201 end 202 203 ## Write data in ~100 MB chunks to avoid overflow. First find an optimal 204 ## chunk size as max. nr. of records in a chunk= (= nr.of rows in data) 205 chunk_sz = floor (1e8 / reclen); 206 for ii=1 : chunk_sz : nrecs 207 ## Reshape chunk of data matrix 208 T = [data(ii+1:min (ii+chunk_sz, nrecs+1), :)'(:)]; 209 blob = sprintf (fmt, T{:}); 210 ## Write blob to file 211 fwrite (fid, blob, "char"); 212 endfor 213 status = 1; 214 215 unwind_protect_cleanup 216 fclose (fid); 217 if (! status) 218 printf ("dbfwrite: removing incomplete file %s.\n", fname); 219 unlink (fname); 220 elseif (colskipped) 221 status = -1; 222 endif 223 end_unwind_protect 224 225endfunction 226