1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 2003-2011 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                   Phong Vo <kpv@research.att.com>                    *
18 *                                                                      *
19 ***********************************************************************/
20 #include	<vclib.h>
21 
22 /*	Transpose a table of some given row length.
23 **
24 **	Written by Binh Vo and Kiem-Phong Vo (04/24/2006)
25 */
26 
27 #define TR_PLAIN	1	/* strictly tranposed data only	*/
28 #define TR_SEPARATOR	2	/* setting record separator	*/
29 #define TR_COLUMNS	3	/* setting number of columns	*/
30 
31 typedef struct _transflip_s
32 {	ssize_t		open;	/* next open record		*/
33 	ssize_t		rpos;	/* record position		*/
34 } Transflip_t;
35 
36 typedef struct _transctxt_s
37 {	Vccontext_t	ctxt;
38 	ssize_t		ncols;	/* number of columns in table	*/
39 	int		rsep;	/* or record separator		*/
40 } Transctxt_t;
41 
42 typedef struct _transpose_s
43 {	int		type;
44 	Transctxt_t*	ctxt;	/* default context		*/
45 } Transpose_t;
46 
47 /* arguments passable to vcopen() */
48 static Vcmtarg_t _Transargs[] =
49 {
50 	{ "rsep", "Rows are separated  by 'rsep=character'.", (Void_t*)TR_SEPARATOR },
51 	{ "fsep", "This is equivalent to 'rsep'.", (Void_t*)TR_SEPARATOR },
52 	{ "columns", "Number of columns is defined by 'columns=length'.", (Void_t*)TR_COLUMNS },
53 	{ "0", "Only transposed data are coded, not meta-data.", (Void_t*)TR_PLAIN },
54 	{  0 , "Both transposed data and meta-data are coded.", (Void_t*)0 }
55 };
56 
57 #if __STD_C
transflip(Vcchar_t * data,ssize_t dtsz,int rsep,Vcchar_t * flip)58 static ssize_t transflip(Vcchar_t* data, ssize_t dtsz, int rsep, Vcchar_t* flip)
59 #else
60 static ssize_t transflip(data, dtsz, rsep, flip)
61 Vcchar_t*	data;
62 ssize_t		dtsz;
63 int		rsep;
64 Vcchar_t*	flip;
65 #endif
66 {
67 	ssize_t		z, p, r, nrows;
68 	int		byte;
69 	Transflip_t	*fl;
70 
71 	if(data[dtsz-1] != rsep)
72 		RETURN(-1);
73 
74 	/* count number of rows */
75 	for(nrows = 0, z = 0; z < dtsz; ++z)
76 		if(data[z] == rsep)
77 			nrows += 1;
78 
79 	/* allocate space for row data */
80 	if(!(fl = (Transflip_t*)calloc(nrows, sizeof(Transflip_t))) )
81 		RETURN(-1);
82 
83 	/* compute record starts and record sizes */
84 	for(p = r = z = 0; z < dtsz; ++z)
85 	{	if(data[z] == rsep)
86 		{	fl[r].open = r; /* all slot starting out open */
87 			fl[r].rpos = p; /* record position */
88 			p = z+1;
89 			r += 1;
90 		}
91 	}
92 
93 	/* now flip records */
94 	while(fl[r = 0].open < nrows)
95 	{	do
96 		{	if((p = fl[r].open) > r) /* a done record */
97 			{	fl[r].open = p < nrows ? fl[p].open : nrows;
98 				r = p;
99 			}
100 			else /* output one byte from this record */
101 			{	*flip++ = (byte = data[fl[r].rpos]);
102 				if(byte == rsep) /* record is done */
103 					fl[r].open = r+1;
104 				fl[r].rpos += 1;
105 				r += 1;
106 			}
107 		} while(r < nrows);
108 	}
109 
110 	free(fl);
111 	return nrows;
112 }
113 
114 #if __STD_C
unflip(Vcchar_t * data,ssize_t dtsz,int rsep,Vcchar_t * flip)115 static ssize_t unflip(Vcchar_t* data, ssize_t dtsz, int rsep, Vcchar_t* flip)
116 #else
117 static ssize_t unflip(data, dtsz, rsep, flip)
118 Vcchar_t*	data;
119 ssize_t		dtsz;
120 int		rsep;
121 Vcchar_t*	flip;
122 #endif
123 {
124 	ssize_t		z, p, r, nrows;
125 	Transflip_t	*fl;
126 
127 	if(data[dtsz-1] != rsep)
128 		RETURN(-1);
129 
130 	/* count number of rows */
131 	for(nrows = 0, z = 0; z < dtsz; ++z)
132 		if(data[z] == rsep)
133 			nrows += 1;
134 
135 	/* allocate space for row data */
136 	if(!(fl = (Transflip_t*)calloc(nrows, sizeof(Transflip_t))) )
137 		RETURN(-1);
138 	for(r = 0; r < nrows; ++r)
139 		fl[r].open = r;
140 
141 	/* compute the size of each row */
142 	for(r = z = 0; z < dtsz; )
143 	{	while((p = fl[r].open) > r) /* find row still open */
144 		{	fl[r].open = p < nrows ? fl[p].open : nrows;
145 			r = p;
146 		}
147 
148 		if(r < nrows)
149 		{	fl[r].rpos += 1; /* add to its length */
150 			if(data[z] == rsep) /* this record is done */
151 				fl[r].open = r+1;
152 			z += 1;
153 		}
154 
155 		if((r += 1) >= nrows) /* wrap around */
156 			r = 0;
157 	}
158 
159 	/* allocate space for each record */
160 	for(p = r = 0; r < nrows; ++r)
161 	{	fl[r].open = r;
162 		z = fl[r].rpos; /* save current length */
163 		fl[r].rpos = p; /* set starting position */
164 		p += z; /* starting position for next record */
165 	}
166 
167 	/* rebuild records */
168 	for(r = z = 0; z < dtsz; )
169 	{	while((p = fl[r].open) > r) /* find row still open */
170 		{	fl[r].open = p < nrows ? fl[p].open : nrows;
171 			r = p;
172 		}
173 
174 		if(r < nrows)
175 		{	flip[fl[r].rpos++] = data[z];
176 			if(data[z] == rsep) /* this record is done */
177 				fl[r].open = r+1;
178 			z += 1;
179 		}
180 
181 		if((r += 1) >= nrows)
182 			r = 0;
183 	}
184 
185 	free(fl);
186 	return nrows;
187 }
188 
189 #if __STD_C
transfixed(Vcchar_t * oldtbl,ssize_t nrows,ssize_t ncols,Vcchar_t * newtbl)190 static void transfixed(Vcchar_t* oldtbl, ssize_t nrows, ssize_t ncols, Vcchar_t* newtbl)
191 #else
192 static void transfixed(oldtbl, nrows, ncols, newtbl)
193 Vcchar_t*	oldtbl;	/* original table	*/
194 ssize_t		nrows;	/* #rows in otbl	*/
195 ssize_t		ncols;	/* #columns in otbl	*/
196 Vcchar_t*	newtbl;	/* new transposed table	*/
197 #endif
198 {
199 	ssize_t		r, nr, rr, c, nc, cc;
200 	Vcchar_t	*rdt, *cdt;
201 
202 #define BLOCK	32	/* transposing small blocks to be cache-friendly */
203 	for(r = 0; r < nrows; r += BLOCK)
204 	{	nr = (nrows-r) < BLOCK ? (nrows-r) : BLOCK;
205 		for(c = 0; c < ncols; c += BLOCK)
206 		{	nc = (ncols-c) < BLOCK ? (ncols-c) : BLOCK;
207 			rdt = oldtbl + r*ncols + c;
208 			for(rr = 0; rr < nr; ++rr, rdt += ncols-nc)
209 			{	cdt = newtbl + c*nrows + r+rr;
210 				for(cc = 0; cc < nc; ++cc, cdt += nrows)
211 					*cdt = *rdt++;
212 			}
213 		}
214 	}
215 }
216 
217 /* compute the # of columns from training data */
218 #if __STD_C
transtrain(const Void_t * train,size_t trsz)219 static ssize_t transtrain(const Void_t* train, size_t trsz)
220 #else
221 static ssize_t transtrain(train, trsz)
222 Void_t*	train;	/* training data */
223 size_t	trsz;
224 #endif
225 {
226 	ssize_t		ncols, sz;
227 
228 	if(!train || trsz <= 0)
229 		return 1;
230 
231 #define SAMPLE	(64*1024)
232 	for(sz = trsz < SAMPLE ? trsz : SAMPLE; sz <= trsz; sz *= 2)
233 		if((ncols = vcperiod(train, sz)) > 0)
234 			break;
235 
236 	return ncols <= 0 ? 1 : ncols;
237 }
238 
239 #if 0 /* combining transpose and run-length-encoding - an optimization  */
240 #if __STD_C
241 static ssize_t transrle(Vcodex_t* vc, const Void_t* data,
242 			size_t ncols, size_t nrows, Void_t** out)
243 #else
244 static ssize_t transrle(vc, data, ncols, nrows, out)
245 Vcodex_t*	vc;
246 Void_t*		data;
247 ssize_t		ncols, nrows;
248 Void_t**	out;
249 #endif
250 {
251 	reg Vcchar_t	*run, *chr, *dt, ch;
252 	Vcchar_t	*output, *enddt;
253 	reg ssize_t	c, r;
254 	ssize_t		hd, sz, size;
255 	Vcio_t		io;
256 	Transpose_t	*trans = vcgetmtdata(vc, Transpose_t*);
257 
258 	size = nrows*ncols;
259 	hd = vcsizeu(size) + (trans->type == TR_PLAIN ? 0 : vcsizeu(ncols));
260 	if(!(output = vcbuffer(vc, NIL(Vcchar_t*), 2*(size + vcsizeu(size)), hd)) )
261 		RETURN(-1);
262 
263 	chr = output + vcsizeu(size);
264 	run = chr + size + vcsizeu(size);
265 
266 	ch = -1; r = 0;
267 	for(enddt = (Vcchar_t*)data+size, c = 0; c < ncols; c += 1)
268 	for(dt = (Vcchar_t*)data + c; dt < enddt; dt += ncols)
269 	{	if(*dt == ch)
270 			r += 1;
271 		else
272 		{	if(r > 0)
273 			{	if(r >= 3 || ch == RL_ESC)
274 				{	if(r < (1<<7) )
275 						*run++ = r;
276 					else if(r < (1<<14) )
277 					{	*run++ = (r>>7)|128;
278 						*run++ = r&127;
279 					}
280 					else
281 					{	vcioinit(&io, run, 2*sizeof(ssize_t));
282 						vcioputu(&io, r);
283 						run = vcionext(&io);
284 					}
285 
286 					*chr++ = RL_ESC;
287 					if(ch != RL_ESC || r > 1)
288 						*chr++ = ch;
289 				}
290 				else
291 				{	*chr++ = ch;
292 					if(r == 2)
293 						*chr++ = ch;
294 				}
295 			}
296 
297 			ch = *dt; r = 1;
298 		}
299 	}
300 
301 	if(r > 0)
302 	{	if(r >= 3 || ch == RL_ESC)
303 		{	vcioinit(&io, run, 2*sizeof(ssize_t));
304 			vcioputu(&io, r);
305 			run = vcionext(&io);
306 
307 			*chr++ = RL_ESC;
308 			if(ch != RL_ESC || r > 1)
309 				*chr++ = ch;
310 		}
311 		else
312 		{	*chr++ = ch;
313 			if(r == 2)
314 				*chr++ = ch;
315 		}
316 	}
317 
318 	c = chr - (output + vcsizeu(size)); chr = output + vcsizeu(size);
319 	r = run - (chr + size + vcsizeu(size)); run = chr + size + vcsizeu(size);
320 
321 	if(vc->coder->coder) /* note that vc->coder is Vcrle */
322 	{	sz = 2*(size + vcsizeu(size));
323 		if((sz = _vcrle2coder(vc->coder,hd,chr,c,run,r,&output,sz)) < 0)
324 			RETURN(-1);
325 	}
326 	else
327 	{	vcioinit(&io, output, 2*(size+hd));
328 		vcioputu(&io, c);
329 		vcioputs(&io, chr, c);
330 		vcioputu(&io, r);
331 		vcioputs(&io, run, r);
332 		sz = vciosize(&io);
333 	}
334 
335 	output -= hd;
336 	vcioinit(&io, output, hd);
337 	if(trans->type != TR_PLAIN)
338 		vcioputu(&io, ncols);
339 	vcioputu(&io, size);
340 
341 	if(!(output = vcbuffer(vc, output, sz+hd, -1)) )
342 		RETURN(-1);
343 	if(out)
344 		*out = output;
345 	return sz+hd;
346 }
347 #endif
348 
349 #if __STD_C
transpose(Vcodex_t * vc,const Void_t * data,size_t size,Void_t ** out)350 static ssize_t transpose(Vcodex_t* vc, const Void_t* data, size_t size, Void_t** out)
351 #else
352 static ssize_t transpose(vc, data, size, out)
353 Vcodex_t*	vc;
354 Void_t*		data;
355 size_t		size;
356 Void_t**	out;
357 #endif
358 {
359 	Vcchar_t	*output, *dt;
360 	ssize_t		nrows, ncols, sz, z;
361 	int		rsep; /* if >= 0, record separator for var-length table */
362 	Vcio_t		io;
363 	Transctxt_t	*ctxt;
364 	Transpose_t	*trans;
365 
366 	vc->undone = 0;
367 	if((sz = (ssize_t)size) <= 0)
368 		return 0;
369 
370 	if(!(trans = vcgetmtdata(vc, Transpose_t*)) )
371 		RETURN(-1);
372 
373 	if(!(ctxt = vcgetcontext(vc, Transctxt_t*)) )
374 		RETURN(-1);
375 
376 	if((rsep = ctxt->rsep) < 0 && trans->ctxt->rsep >= 0 )
377 		rsep = trans->ctxt->rsep;
378 	if(rsep >= 0)
379 		ncols = 0;
380 	else if((ncols = ctxt->ncols) <= 0 && trans->ctxt->ncols > 0)
381 		ncols = trans->ctxt->ncols;
382 	nrows = 0;
383 
384 	if(rsep >= 0) /* var-length table */
385 	{	for(dt = (Vcchar_t*)data, z = sz-1; z >= 0; --z)
386 			if(dt[z] == rsep)
387 				break;
388 		vc->undone = sz - (z+1); /* exclude the dangling record */
389 		sz = z+1; /* data to be processed */
390 	}
391 	else
392 	{	if(ncols <= 0 && (ncols = transtrain(data,sz)) <= 0 )
393 			nrows = ncols = 0;
394 		else
395 		{	nrows = sz/ncols;
396 			ctxt->ncols = ncols;
397 		}
398 		vc->undone = sz - ncols*nrows;
399 		sz = nrows*ncols;
400 	}
401 	if(sz == 0)
402 		return 0;
403 
404 	z = 2*sizeof(ssize_t); /* for coding ncols or rsep */
405 	if(!(output = vcbuffer(vc, NIL(Vcchar_t*), sz, z)) )
406 		RETURN(-1);
407 
408 	if(rsep >= 0)
409 	{	if((nrows = transflip((Vcchar_t*)data, sz, rsep, output)) < 0)
410 			RETURN(-1);
411 	}
412 	else	transfixed((Vcchar_t*)data, nrows, ncols, output);
413 
414 	dt = output;
415 	if(vcrecode(vc, &output, &sz, z, 0) < 0 )
416 		RETURN(-1);
417 	if(dt != output)
418 		vcbuffer(vc, dt, -1, -1);
419 
420 	if(trans->type != TR_PLAIN)
421 	{	z = vcsizeu(ncols);
422 		if(ncols <= 0)
423 			z += 1;
424 		output -= z; sz += z;
425 		vcioinit(&io, output, z);
426 		vcioputu(&io, ncols);
427 		if(ncols <= 0)
428 			vcioputc(&io, rsep);
429 	}
430 
431 	if(out)
432 		*out = output;
433 	return sz;
434 }
435 
436 
437 #if __STD_C
untranspose(Vcodex_t * vc,const Void_t * data,size_t size,Void_t ** out)438 static ssize_t untranspose(Vcodex_t* vc, const Void_t* data, size_t size, Void_t** out)
439 #else
440 static ssize_t untranspose(vc, data, size, out)
441 Vcodex_t*	vc;
442 Void_t*		data;
443 size_t		size;
444 Void_t**	out;
445 #endif
446 {
447 	Vcchar_t	*output, *dt;
448 	ssize_t		nrows, ncols, z;
449 	int		rsep; /* if >= 0, record separator for var-length table */
450 	Vcio_t		io;
451 	Transctxt_t	*ctxt;
452 	Transpose_t	*trans;
453 
454 	vc->undone = 0;
455 	if(size == 0)
456 		return 0;
457 
458 	if(!(trans = vcgetmtdata(vc, Transpose_t*)) )
459 		RETURN(-1);
460 
461 	if(!(ctxt = vcgetcontext(vc, Transctxt_t*)) )
462 		RETURN(-1);
463 
464 	vcioinit(&io, data, size);
465 	rsep = -1; ncols = nrows = 0;
466 	if(trans->type != TR_PLAIN)
467 	{	if((ncols = vciogetu(&io)) < 0)
468 			RETURN(-1);
469 		if(ncols == 0)
470 			rsep = vciogetc(&io);
471 	}
472 	else
473 	{	if((rsep = ctxt->rsep) < 0)
474 			rsep = trans->ctxt->rsep;
475 		if(rsep < 0)
476 			if((ncols = ctxt->ncols) <= 0)
477 				ncols = trans->ctxt->ncols;
478 	}
479 
480 	if(rsep < 0 && ncols <= 0)
481 		RETURN(-1);
482 
483 	/* data to be untransposed */
484 	dt = vcionext(&io);
485 	z = vciomore(&io);
486 	if(vcrecode(vc, &dt, &z, 0, 0) < 0)
487 		RETURN(-1);
488 
489 	if(rsep < 0) /* fixed-length data */
490 	{	nrows = z/ncols;
491 		if(ncols*nrows != z)
492 			RETURN(-1);
493 	}
494 
495 	if(!(output = vcbuffer(vc, NIL(Vcchar_t*), z, 0)) )
496 		RETURN(-1);
497 
498 	if(rsep < 0)
499 		transfixed(dt, ncols, z/ncols, output);
500 	else if(unflip(dt, z, rsep, output) < 0)
501 		RETURN(-1);
502 
503 	if(out)
504 		*out = output;
505 	return z;
506 }
507 
508 #if __STD_C
transevent(Vcodex_t * vc,int type,Void_t * params)509 static int transevent(Vcodex_t* vc, int type, Void_t* params)
510 #else
511 static int transevent(vc, type, params)
512 Vcodex_t*	vc;
513 int		type;
514 Void_t*		params;
515 #endif
516 {
517 	Transpose_t	*trans;
518 	Transctxt_t	*ctxt;
519 	char		*data, val[1024];
520 	Vcmtarg_t	*arg;
521 
522 	if(type == VC_OPENING)
523 	{	if(!(trans = (Transpose_t*)calloc(1,sizeof(Transpose_t))) )
524 			RETURN(-1);
525 		if(!(trans->ctxt = (Transctxt_t*)vcinitcontext(vc, NIL(Vccontext_t*))) )
526 		{	free(trans);
527 			RETURN(-1);
528 		}
529 		vcsetmtdata(vc, trans);
530 		goto vc_setarg;
531 	}
532 	else if(type == VC_CLOSING)
533 	{	if((trans = vcgetmtdata(vc,Transpose_t*)) )
534 			free(trans);
535 
536 		vcsetmtdata(vc, NIL(Transpose_t*));
537 		return 0;
538 	}
539 	else if(type == VC_SETMTARG)
540 	{ vc_setarg:
541 		if(!(ctxt = vcgetcontext(vc, Transctxt_t*)) )
542 			RETURN(-1);
543 		for(data = (char*)params; data && *data; )
544 		{	data = vcgetmtarg(data, val, sizeof(val), _Transargs, &arg);
545 			switch(TYPECAST(int,arg->data) )
546 			{ case TR_SEPARATOR: /* setting the record separator */
547 				ctxt->rsep = val[0];
548 				ctxt->ncols = 0;
549 				break;
550 			  case TR_COLUMNS: /* setting number of columns */
551 				ctxt->ncols = (ssize_t) vcatoi(val);
552 				ctxt->rsep = -1;
553 				break;
554 			  case TR_PLAIN: /* setting transpose.0 */
555 			  default :
556 				if(type == VC_OPENING)
557 					trans->type = TYPECAST(int,arg->data);
558 				break;
559 			}
560 		}
561 
562 		return 0;
563 	}
564 	else if(type == VC_INITCONTEXT)
565 	{	if(!params)
566 			return 0;
567 		if(!(ctxt = (Transctxt_t*)calloc(1,sizeof(Transctxt_t))) )
568 			RETURN(-1);
569 		ctxt->ncols = 0;
570 		ctxt->rsep = -1;
571 		*((Transctxt_t**)params) = ctxt;
572 		return 1;
573 	}
574 	else if(type == VC_FREECONTEXT)
575 	{	if((ctxt = (Transctxt_t*)params) )
576 			free(ctxt);
577 		return 0;
578 	}
579 	else	return 0;
580 }
581 
582 Vcmethod_t _Vctranspose =
583 {	transpose,
584 	untranspose,
585 	transevent,
586 	"transpose", "Transposing a table.",
587 	"[-version?transpose (AT&T Research) 2003-01-01]" USAGE_LICENSE,
588 	_Transargs,
589 	256*1024,
590 	0
591 };
592 
593 VCLIB(Vctranspose)
594