1 //-------------------------------------------------------------------
2 // C-MEX implementation of STR2ARRAY - this function is part of the NaN-toolbox.
3 // Actually, it also fixes a problem in STR2ARRAY.m described here:
4 // http://www-old.cae.wisc.edu/pipermail/help-octave/2007-December/007325.html
5 //
6 // This program is free software; you can redistribute it and/or modify
7 // it under the terms of the GNU General Public License as published by
8 // the Free Software Foundation; either version 3 of the License, or
9 // (at your option) any later version.
10 //
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 //
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, see <http://www.gnu.org/licenses/>.
18 //
19 //
20 // usage:
21 // [...] = STR2ARRAY(s)
22 // [...] = STR2ARRAY(sa)
23 // [...] = STR2ARRAY(s,cdelim)
24 // [...] = STR2ARRAY(s,cdelim,rdelim)
25 // [...] = STR2ARRAY(s,cdelim,rdelim,ddelim)
26 // [num,status,strarray] = STR2ARRAY(...)
27 //
28 // Input:
29 // s char string
30 // sa cell array of strings
31 // cdelim column delimiter
32 // rdelim row delimiter
33 // ddelim decimal delimiter
34 //
35 // Output:
36 // $Id: STR2ARRAY.cpp 7142 2010-03-30 18:48:06Z schloegl $
37 // Copyright (C) 2010,2011 Alois Schloegl <alois.schloegl@gmail.com>
38 // This function is part of the NaN-toolbox
39 // http://pub.ist.ac.at/~schloegl/matlab/NaN/
40 //
41 //-------------------------------------------------------------------
42
43
44
45 #include <ctype.h>
46 #include <math.h>
47 #include <stdint.h>
48 #include <string.h>
49 #include "mex.h"
50
51 #ifdef tmwtypes_h
52 #if (MX_API_VER<=0x07020000)
53 typedef int mwSize;
54 #endif
55 #endif
56
57
str2val(char * s,double * r,double * i)58 int str2val(char *s, double *r, double *i)
59 {
60 /*
61 str2val converts string into numeric value. real and complex numbers are supported.
62 complex numbers are "3.4 + 5.6i" or "3.4 + i * 5.6" (spaces are optional)
63
64 input:
65 s char string
66 output:
67 *r real value
68 *i imaginary value
69 return values:
70 0: conversion failed
71 1: real number returned:
72 2: complex number returned
73
74 */
75 char *endptr = NULL;
76 double val = strtod(s, &endptr); // conversion
77
78 while (isspace(*endptr)) endptr++;
79 #ifdef DEBUG
80 mexPrintf("123<%s>\t,%f,\t[%s]\n",s,val,endptr);
81 #endif
82 if (!*endptr) {
83 // conversion successful
84 *r = val;
85 return(1);
86 }
87 else if ((*endptr=='+') || (*endptr=='-')) {
88 // imaginary part
89 double sgn = (*endptr=='+') ? 1.0 : -1.0;
90 double ival;
91 while (isspace(*(++endptr)));
92
93 if (*endptr=='i') {
94 // case " a + i * b "
95 while (isspace(*(++endptr)));
96
97 if (*endptr=='*') {
98 ival = strtod(endptr+1, &endptr); // conversion
99 if (*endptr && !isspace(*endptr)) {
100 return(0); // failed
101 }
102 else {
103 *r = val;
104 *i = sgn*ival;
105 return(2); //
106 }
107 }
108 else
109 return(0); //failed
110 }
111 else {
112 // case " a + bi "
113 ival = strtod(endptr, &endptr); // conversion
114 if (*endptr != 'i') return(0);
115 endptr++;
116 while (*endptr) {
117 if (!isspace(*endptr)) return(0);
118 endptr++;
119 }
120 *r = val;
121 *i = sgn*ival;
122 return(2);
123 }
124 }
125 else //if (*endptr && !isspace(*endptr))
126 {
127 // conversion failed
128 return(0);
129 }
130 }
131
132
133
mexFunction(int nlhs,mxArray * plhs[],int nrhs,const mxArray * prhs[])134 void mexFunction(
135 int nlhs, /* number of expected outputs */
136 mxArray *plhs[], /* array of pointers to output arguments */
137 int nrhs, /* number of inputs */
138 const mxArray *prhs[] /* array of pointers to input arguments */
139 )
140
141 {
142 char *s = NULL;
143 const char *cdelim = "\x09,";
144 const char *rdelim = "\x0a;";
145 const char *ddelim = NULL;
146 const char *valid_delim = " ()[]{},;:\"|/\x21\x22\x09\0x0a\0x0b\0x0c\0x0d\x00"; // valid delimiter
147 uint8_t *u;
148 size_t slen = 0,k;
149 size_t maxcol=0, maxrow=0, nr, nc;
150
151 if (nrhs<1) {
152 mexPrintf(" STR2ARRAY.MEX converts delimiter text files into arrays of numerics and cell-strings\n");
153 mexPrintf(" STR2ARRAY.MEX converts delimiter text files into numeric arrays\n");
154 mexPrintf(" It fixes a problem of the old STR2DOUBLE discussed here: http://www-old.cae.wisc.edu/pipermail/help-octave/2007-December/007325.html\n");
155 mexPrintf(" at avoids using the insecure STR2NUM using EVAL\n");
156 mexPrintf("\n Usage of STR2ARRAY:\n");
157 mexPrintf("\t[...] = STR2ARRAY(s)\n");
158 mexPrintf("\t[...] = STR2ARRAY(sa)\n");
159 mexPrintf("\t[...] = STR2ARRAY(s,cdelim)\n");
160 mexPrintf("\t[...] = STR2ARRAY(s,cdelim,rdelim)\n");
161 mexPrintf("\t[...] = STR2ARRAY(s,cdelim,rdelim,ddelim)\n");
162 mexPrintf("\t[num,status,strarray] = STR2ARRAY(...)\n");
163 mexPrintf(" Input:\n\ts\tstring\n\tsa\tcell array of strings\n\tcdelim\tlist of column delimiters (default: \"<Tab>,\"\n\trdelim\tlist of row delimiter (default: \"<LF><CR>;\")");
164 mexPrintf("\n\tddelim\tdecimal delimiter (default: \".\"). This is useful if decimal delimiter is a comma (e.g. after Excel export in Europe)\n");
165 mexPrintf(" Output:\n\tnum\tnumeric array\n\tstatus\tflag failing conversion\n\tstrarray\tcell array of strings contains strings of failed conversions\n");
166 mexPrintf("\nExamples:\n\tSTR2ARRAY('4.12')\n\tSTR2ARRAY('1.2 - 3.4e2i') complex numbers\n\tSTR2ARRAY('101.01 , 0-i4; 1.2 - i * 3.4, abc')\n\tSTR2ARRAY({'101.01', '0-i4'; '1.2 - i * 3.4', 'abc'})\n\tSTR2ARRAY('1,2;a,b,c;3,4')\n");
167 mexPrintf("\tSTR2ARRAY('1;2,3;4',';',',') exchange row- and column delimiter\n\tSTR2ARRAY('1,200 4;3,400 5',' ',';',',') replace decimal delimter\n");
168 return;
169 }
170
171 /* sanity check of input arguments */
172
173 if ((nrhs==1) && mxIsCell(prhs[0])) {
174 // cell array of strings
175 maxrow = mxGetM(prhs[0]);
176 maxcol = mxGetN(prhs[0]);
177
178 /* allocate output memory */
179 if (nlhs>2) plhs[2] = mxCreateCellMatrix(maxrow, maxcol);
180 uint8_t *v = NULL;
181 if (nlhs>1) {
182 plhs[1] = mxCreateLogicalMatrix(maxrow, maxcol);
183 v = (uint8_t*)mxGetData(plhs[1]);
184 memset(v, 1, maxrow*maxcol);
185 }
186 plhs[0] = mxCreateDoubleMatrix(maxrow, maxcol, mxREAL);
187 double *o = (double*)mxGetData(plhs[0]);
188 double *oi= NULL;
189 for (k=0; k<maxrow*maxcol; k++) {
190 o[k] = 0.0/0.0;
191 }
192 for (k = 0; k < maxrow*maxcol; k++) {
193 double ival;
194 char *s = mxArrayToString(mxGetCell(prhs[0],k));
195 if (s==NULL) {
196 mxArray *a = mxGetCell(prhs[0],k);
197 /*
198 this does not work because a must not be modified
199 if (nlhs>2) mxSetCell(plhs[2], k, a);
200 */
201 }
202 else {
203 int typ = str2val(s, o+k, &ival);
204 if ((nlhs>2) && (typ==0)) mxSetCell(plhs[2], (mwSize)k, mxCreateString(s));
205 if ((nlhs>1) && (typ> 0)) v[k] = 0;
206 if (typ==2) {
207 if (mxGetPi(plhs[0])==NULL) {
208 oi = (double*) mxCalloc(maxrow*maxcol, sizeof(double));
209 mxSetPi(plhs[0], oi);
210 }
211 oi[k] = ival;
212 }
213 }
214 }
215 // cell-array input is finished
216 return;
217 }
218
219 if (nrhs>0) {
220 if (mxIsChar(prhs[0])) {
221 s = mxArrayToString(prhs[0]);
222 slen = mxGetNumberOfElements(prhs[0]);
223 }
224 else
225 mexErrMsgTxt("arg1 is not a char array");
226 }
227 if (nrhs>1) {
228 if (mxIsChar(prhs[1]))
229 cdelim = mxArrayToString(prhs[1]);
230 else
231 mexErrMsgTxt("arg2 is not a char array");
232 }
233 if (nrhs>2) {
234 if (mxIsChar(prhs[2]))
235 rdelim = mxArrayToString(prhs[2]);
236 else
237 mexErrMsgTxt("arg3 is not a char array");
238 }
239 if (nrhs>3) {
240 if (mxIsChar(prhs[3]) && (mxGetNumberOfElements(prhs[3])==1) ) {
241 ddelim = mxArrayToString(prhs[3]);
242 for (k=0; k<slen; k++) {
243 if (s[k]==ddelim[0])
244 s[k] = '.';
245 }
246 }
247 else
248 mexErrMsgTxt("arg4 is not a single char");
249 }
250
251 /* identify separators */
252 u = (uint8_t*) mxCalloc(1,slen+1);
253 for (k = 0; k < slen; ) {
254 if (strchr(cdelim,s[k]) != NULL) {
255 u[k] = 1; // column delimiter
256 while (s[++k]==' '); // ignore extra space characters
257 }
258 else if (strchr(rdelim,s[k]) != NULL)
259 u[k++] = 2; // row delimiter
260 else
261 k++; // ordinary character
262 }
263
264 /* count dimensions and set delimiter elements to 0 */
265 nc=0, nr=0;
266 if (u[slen-1]<2) {
267 // when terminating char is not a row delimiter
268 nr = (slen>0);
269 u[slen] = 2;
270 }
271 for (k = 0; k < slen; ) {
272 if (u[k]==2) {
273 s[k] = 0;
274 nr++;
275 if (nc > maxcol) maxcol=nc;
276 nc = 0;
277 }
278 else if (u[k]==1) {
279 s[k] = 0;
280 nc++;
281 }
282 k++;
283 }
284 if (nc > maxcol) maxcol=nc;
285 maxcol += (slen>0);
286 maxrow = nr;
287
288 /* allocate output memory */
289 if (nlhs>2) plhs[2] = mxCreateCellMatrix(maxrow, maxcol);
290 uint8_t *v = NULL;
291 if (nlhs>1) {
292 plhs[1] = mxCreateLogicalMatrix(maxrow, maxcol);
293 v = (uint8_t*)mxGetData(plhs[1]);
294 memset(v,1,maxrow*maxcol);
295 }
296 plhs[0] = mxCreateDoubleMatrix(maxrow, maxcol, mxREAL);
297 double *o = (double*)mxGetData(plhs[0]);
298 double *oi = NULL;
299 for (k=0; k<maxrow*maxcol; k++) {
300 o[k] = 0.0/0.0;
301 }
302
303 nr = 0; nc = 0;
304 size_t last=0;
305 for (k = 0; k <= slen; k++) {
306 if (u[k]) {
307 // delimiter triggers action
308 size_t idx = nr+nc*maxrow;
309 if (last==k) {
310 // empty field
311 o[idx] = 0.0/0.0;
312 }
313 else {
314 double ival;
315 int typ = str2val(s+last, o+idx, &ival);
316
317 if ((nlhs>2) && (typ==0)) mxSetCell(plhs[2], idx, mxCreateString(s+last));
318 if ((nlhs>1) && (typ> 0)) v[idx] = 0;
319 if (typ==2) {
320 if (oi==NULL) {
321 oi = (double*) mxCalloc(maxrow*maxcol, sizeof(double));
322 mxSetPi(plhs[0], oi);
323 }
324 oi[idx] = ival;
325 }
326 }
327
328 nc++; // next element
329 if (u[k]==2) {
330 nr++; // next row
331 nc = 0;
332 }
333 last = k+1;
334 }
335 }
336 mxFree(u);
337 };
338
339