1 //-------------------------------------------------------------------
2 //   C-MEX implementation of STR2ARRAY - this function is part of the NaN-toolbox.
3 //   Actually, it also fixes a problem in STR2ARRAY.m described here:
4 //   http://www-old.cae.wisc.edu/pipermail/help-octave/2007-December/007325.html
5 //
6 //   This program is free software; you can redistribute it and/or modify
7 //   it under the terms of the GNU General Public License as published by
8 //   the Free Software Foundation; either version 3 of the License, or
9 //   (at your option) any later version.
10 //
11 //   This program is distributed in the hope that it will be useful,
12 //   but WITHOUT ANY WARRANTY; without even the implied warranty of
13 //   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 //   GNU General Public License for more details.
15 //
16 //   You should have received a copy of the GNU General Public License
17 //   along with this program; if not, see <http://www.gnu.org/licenses/>.
18 //
19 //
20 // usage:
21 //	[...] = STR2ARRAY(s)
22 //	[...] = STR2ARRAY(sa)
23 //	[...] = STR2ARRAY(s,cdelim)
24 //	[...] = STR2ARRAY(s,cdelim,rdelim)
25 //	[...] = STR2ARRAY(s,cdelim,rdelim,ddelim)
26 //	[num,status,strarray] = STR2ARRAY(...)
27 //
28 // Input:
29 //  s 	        char string
30 //  sa 	        cell array of strings
31 //  cdelim	column delimiter
32 //  rdelim	row delimiter
33 //  ddelim      decimal delimiter
34 //
35 // Output:
36 //    $Id: STR2ARRAY.cpp 7142 2010-03-30 18:48:06Z schloegl $
37 //    Copyright (C) 2010,2011 Alois Schloegl <alois.schloegl@gmail.com>
38 //    This function is part of the NaN-toolbox
39 //    http://pub.ist.ac.at/~schloegl/matlab/NaN/
40 //
41 //-------------------------------------------------------------------
42 
43 
44 
45 #include <ctype.h>
46 #include <math.h>
47 #include <stdint.h>
48 #include <string.h>
49 #include "mex.h"
50 
51 #ifdef tmwtypes_h
52   #if (MX_API_VER<=0x07020000)
53     typedef int mwSize;
54   #endif
55 #endif
56 
57 
str2val(char * s,double * r,double * i)58 int str2val(char *s, double *r, double *i)
59 {
60 /*
61 	str2val converts string into numeric value. real and complex numbers are supported.
62 	complex numbers are "3.4 + 5.6i" or "3.4 + i * 5.6" (spaces are optional)
63 
64 	input:
65 		s	char string
66 	output:
67 		*r	real value
68 		*i	imaginary value
69 	return values:
70 		0: conversion failed
71 		1: real number returned:
72 		2: complex number returned
73 
74 */
75 	char *endptr = NULL;
76 	double val = strtod(s, &endptr);	// conversion
77 
78 	while (isspace(*endptr)) endptr++;
79 #ifdef DEBUG
80 	mexPrintf("123<%s>\t,%f,\t[%s]\n",s,val,endptr);
81 #endif
82 	if (!*endptr) {
83 		// conversion successful
84 		*r = val;
85 		return(1);
86 	}
87 	else if ((*endptr=='+') || (*endptr=='-')) {
88 		// imaginary part
89 		double sgn = (*endptr=='+') ? 1.0 : -1.0;
90 		double ival;
91 		while (isspace(*(++endptr)));
92 
93 		if (*endptr=='i') {
94 			// case " a + i * b "
95 			while (isspace(*(++endptr)));
96 
97 			if (*endptr=='*') {
98 				ival = strtod(endptr+1, &endptr);	// conversion
99 				if (*endptr && !isspace(*endptr)) {
100 					return(0); 	// failed
101 				}
102 				else {
103 					*r = val;
104 					*i = sgn*ival;
105 					return(2);	//
106 				}
107 			}
108 			else
109 				return(0); 	//failed
110 		}
111 		else {
112 			// case " a + bi "
113 			ival = strtod(endptr, &endptr);	// conversion
114 			if (*endptr != 'i') return(0);
115 			endptr++;
116 			while (*endptr) {
117 				if (!isspace(*endptr)) return(0);
118 				endptr++;
119 			}
120 			*r = val;
121 			*i = sgn*ival;
122 			return(2);
123 		}
124 	}
125 	else //if (*endptr && !isspace(*endptr))
126 	{
127 		// conversion failed
128 		return(0);
129 	}
130 }
131 
132 
133 
mexFunction(int nlhs,mxArray * plhs[],int nrhs,const mxArray * prhs[])134 void mexFunction(
135     int              nlhs,        /* number of expected outputs */
136     mxArray       *plhs[],        /* array of pointers to output arguments */
137     int              nrhs,        /* number of inputs */
138     const mxArray *prhs[]         /* array of pointers to input arguments */
139 )
140 
141 {
142 	char *s = NULL;
143 	const char *cdelim = "\x09,";
144 	const char *rdelim = "\x0a;";
145 	const char *ddelim = NULL;
146 	const char *valid_delim = " ()[]{},;:\"|/\x21\x22\x09\0x0a\0x0b\0x0c\0x0d\x00";	// valid delimiter
147 	uint8_t *u;
148 	size_t slen = 0,k;
149 	size_t maxcol=0, maxrow=0, nr, nc;
150 
151 	if (nrhs<1) {
152 		mexPrintf("   STR2ARRAY.MEX converts delimiter text files into arrays of numerics and cell-strings\n");
153 		mexPrintf("   STR2ARRAY.MEX converts delimiter text files into numeric arrays\n");
154 		mexPrintf("   It fixes a problem of the old STR2DOUBLE discussed here: http://www-old.cae.wisc.edu/pipermail/help-octave/2007-December/007325.html\n");
155 		mexPrintf("   at avoids using the insecure STR2NUM using EVAL\n");
156 		mexPrintf("\n   Usage of STR2ARRAY:\n");
157 		mexPrintf("\t[...] = STR2ARRAY(s)\n");
158 		mexPrintf("\t[...] = STR2ARRAY(sa)\n");
159 		mexPrintf("\t[...] = STR2ARRAY(s,cdelim)\n");
160 		mexPrintf("\t[...] = STR2ARRAY(s,cdelim,rdelim)\n");
161 		mexPrintf("\t[...] = STR2ARRAY(s,cdelim,rdelim,ddelim)\n");
162 		mexPrintf("\t[num,status,strarray] = STR2ARRAY(...)\n");
163 		mexPrintf("   Input:\n\ts\tstring\n\tsa\tcell array of strings\n\tcdelim\tlist of column delimiters (default: \"<Tab>,\"\n\trdelim\tlist of row delimiter (default: \"<LF><CR>;\")");
164 		mexPrintf("\n\tddelim\tdecimal delimiter (default: \".\"). This is useful if decimal delimiter is a comma (e.g. after Excel export in Europe)\n");
165 		mexPrintf("   Output:\n\tnum\tnumeric array\n\tstatus\tflag failing conversion\n\tstrarray\tcell array of strings contains strings of failed conversions\n");
166 		mexPrintf("\nExamples:\n\tSTR2ARRAY('4.12')\n\tSTR2ARRAY('1.2 - 3.4e2i')   complex numbers\n\tSTR2ARRAY('101.01 , 0-i4; 1.2 - i * 3.4, abc')\n\tSTR2ARRAY({'101.01', '0-i4'; '1.2 - i * 3.4', 'abc'})\n\tSTR2ARRAY('1,2;a,b,c;3,4')\n");
167 		mexPrintf("\tSTR2ARRAY('1;2,3;4',';',',')   exchange row- and column delimiter\n\tSTR2ARRAY('1,200 4;3,400 5',' ',';',',')  replace decimal delimter\n");
168 		return;
169 	}
170 
171 	/* sanity check of input arguments */
172 
173 	if ((nrhs==1) && mxIsCell(prhs[0])) {
174 		// cell array of strings
175 		maxrow = mxGetM(prhs[0]);
176 		maxcol = mxGetN(prhs[0]);
177 
178 		/* allocate output memory */
179 		if (nlhs>2) plhs[2] = mxCreateCellMatrix(maxrow, maxcol);
180 		uint8_t *v = NULL;
181 		if (nlhs>1) {
182 			plhs[1] = mxCreateLogicalMatrix(maxrow, maxcol);
183 			v = (uint8_t*)mxGetData(plhs[1]);
184 			memset(v, 1, maxrow*maxcol);
185 		}
186 		plhs[0] = mxCreateDoubleMatrix(maxrow, maxcol, mxREAL);
187 		double *o = (double*)mxGetData(plhs[0]);
188 		double *oi= NULL;
189 		for (k=0; k<maxrow*maxcol; k++) {
190 			o[k] = 0.0/0.0;
191 		}
192 		for (k = 0; k < maxrow*maxcol; k++) {
193 			double ival;
194 			char *s = mxArrayToString(mxGetCell(prhs[0],k));
195 			if (s==NULL) {
196 				mxArray *a = mxGetCell(prhs[0],k);
197 				/*
198 					this does not work because a must not be modified
199 					if (nlhs>2) mxSetCell(plhs[2], k, a);
200 				*/
201 			}
202 			else {
203 				int typ = str2val(s, o+k, &ival);
204 				if ((nlhs>2) && (typ==0)) mxSetCell(plhs[2], (mwSize)k, mxCreateString(s));
205 				if ((nlhs>1) && (typ> 0)) v[k] = 0;
206 				if (typ==2) {
207 					if (mxGetPi(plhs[0])==NULL) {
208 						oi = (double*) mxCalloc(maxrow*maxcol, sizeof(double));
209 						mxSetPi(plhs[0], oi);
210 					}
211 					oi[k] = ival;
212 				}
213 			}
214 		}
215 		// cell-array input is finished
216 		return;
217 	}
218 
219 	if (nrhs>0) {
220 		if (mxIsChar(prhs[0])) {
221 			s = mxArrayToString(prhs[0]);
222 			slen = mxGetNumberOfElements(prhs[0]);
223 		}
224 		else
225 			mexErrMsgTxt("arg1 is not a char array");
226 	}
227 	if (nrhs>1) {
228 		if (mxIsChar(prhs[1]))
229 			cdelim = mxArrayToString(prhs[1]);
230 		else
231 			mexErrMsgTxt("arg2 is not a char array");
232 	}
233 	if (nrhs>2) {
234 		if (mxIsChar(prhs[2]))
235 			rdelim = mxArrayToString(prhs[2]);
236 		else
237 			mexErrMsgTxt("arg3 is not a char array");
238 	}
239 	if (nrhs>3) {
240 		if (mxIsChar(prhs[3]) && (mxGetNumberOfElements(prhs[3])==1) ) {
241 			ddelim = mxArrayToString(prhs[3]);
242 			for (k=0; k<slen; k++) {
243 				if (s[k]==ddelim[0])
244 					s[k] = '.';
245 			}
246 		}
247 		else
248 			mexErrMsgTxt("arg4 is not a single char");
249 	}
250 
251 	/* identify separators */
252 	u = (uint8_t*) mxCalloc(1,slen+1);
253 	for (k = 0; k < slen; ) {
254 		if (strchr(cdelim,s[k]) != NULL) {
255 			u[k] = 1;      // column delimiter
256 			while (s[++k]==' ');    // ignore extra space characters
257 		}
258 		else if (strchr(rdelim,s[k]) != NULL)
259 			u[k++] = 2;    // row delimiter
260 		else
261 			k++; 	       // ordinary character
262 	}
263 
264 	/* count dimensions and set delimiter elements to 0 */
265 	nc=0, nr=0;
266 	if (u[slen-1]<2) {
267 		// when terminating char is not a row delimiter
268 		nr = (slen>0);
269 		u[slen] = 2;
270 	}
271 	for (k = 0; k < slen; ) {
272 		if (u[k]==2) {
273 			s[k] = 0;
274 			nr++;
275 			if (nc > maxcol) maxcol=nc;
276 			nc = 0;
277 		}
278 		else if (u[k]==1) {
279 			s[k] = 0;
280 			nc++;
281 		}
282 		k++;
283 	}
284 	if (nc > maxcol) maxcol=nc;
285 	maxcol += (slen>0);
286 	maxrow = nr;
287 
288 	/* allocate output memory */
289 	if (nlhs>2) plhs[2] = mxCreateCellMatrix(maxrow, maxcol);
290 	uint8_t *v = NULL;
291 	if (nlhs>1) {
292 		plhs[1] = mxCreateLogicalMatrix(maxrow, maxcol);
293 		v = (uint8_t*)mxGetData(plhs[1]);
294 		memset(v,1,maxrow*maxcol);
295 	}
296 	plhs[0] = mxCreateDoubleMatrix(maxrow, maxcol, mxREAL);
297 	double *o = (double*)mxGetData(plhs[0]);
298 	double *oi = NULL;
299 	for (k=0; k<maxrow*maxcol; k++) {
300 		o[k] = 0.0/0.0;
301 	}
302 
303 	nr = 0; nc = 0;
304 	size_t last=0;
305 	for (k = 0; k <= slen; k++) {
306 		if (u[k]) {
307 			// delimiter triggers action
308 			size_t idx = nr+nc*maxrow;
309 			if (last==k) {
310 				// empty field
311 				o[idx] = 0.0/0.0;
312 			}
313 			else {
314 				double ival;
315 				int typ = str2val(s+last, o+idx, &ival);
316 
317 				if ((nlhs>2) && (typ==0)) mxSetCell(plhs[2], idx, mxCreateString(s+last));
318 				if ((nlhs>1) && (typ> 0)) v[idx] = 0;
319 				if (typ==2) {
320 					if (oi==NULL) {
321 						oi = (double*) mxCalloc(maxrow*maxcol, sizeof(double));
322 						mxSetPi(plhs[0], oi);
323 					}
324 					oi[idx] = ival;
325 				}
326 			}
327 
328 			nc++;	// next element
329 			if (u[k]==2) {
330 				nr++;	// next row
331 				nc = 0;
332 			}
333 			last = k+1;
334 		}
335 	}
336 	mxFree(u);
337 };
338 
339