1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 2003-2011 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Eclipse Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.eclipse.org/org/documents/epl-v10.html *
11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Phong Vo <kpv@research.att.com> *
18 * *
19 ***********************************************************************/
20 #include <vclib.h>
21
22 /* Transpose a table of some given row length.
23 **
24 ** Written by Binh Vo and Kiem-Phong Vo (04/24/2006)
25 */
26
27 #define TR_PLAIN 1 /* strictly tranposed data only */
28 #define TR_SEPARATOR 2 /* setting record separator */
29 #define TR_COLUMNS 3 /* setting number of columns */
30
31 typedef struct _transflip_s
32 { ssize_t open; /* next open record */
33 ssize_t rpos; /* record position */
34 } Transflip_t;
35
36 typedef struct _transctxt_s
37 { Vccontext_t ctxt;
38 ssize_t ncols; /* number of columns in table */
39 int rsep; /* or record separator */
40 } Transctxt_t;
41
42 typedef struct _transpose_s
43 { int type;
44 Transctxt_t* ctxt; /* default context */
45 } Transpose_t;
46
47 /* arguments passable to vcopen() */
48 static Vcmtarg_t _Transargs[] =
49 {
50 { "rsep", "Rows are separated by 'rsep=character'.", (Void_t*)TR_SEPARATOR },
51 { "fsep", "This is equivalent to 'rsep'.", (Void_t*)TR_SEPARATOR },
52 { "columns", "Number of columns is defined by 'columns=length'.", (Void_t*)TR_COLUMNS },
53 { "0", "Only transposed data are coded, not meta-data.", (Void_t*)TR_PLAIN },
54 { 0 , "Both transposed data and meta-data are coded.", (Void_t*)0 }
55 };
56
57 #if __STD_C
transflip(Vcchar_t * data,ssize_t dtsz,int rsep,Vcchar_t * flip)58 static ssize_t transflip(Vcchar_t* data, ssize_t dtsz, int rsep, Vcchar_t* flip)
59 #else
60 static ssize_t transflip(data, dtsz, rsep, flip)
61 Vcchar_t* data;
62 ssize_t dtsz;
63 int rsep;
64 Vcchar_t* flip;
65 #endif
66 {
67 ssize_t z, p, r, nrows;
68 int byte;
69 Transflip_t *fl;
70
71 if(data[dtsz-1] != rsep)
72 RETURN(-1);
73
74 /* count number of rows */
75 for(nrows = 0, z = 0; z < dtsz; ++z)
76 if(data[z] == rsep)
77 nrows += 1;
78
79 /* allocate space for row data */
80 if(!(fl = (Transflip_t*)calloc(nrows, sizeof(Transflip_t))) )
81 RETURN(-1);
82
83 /* compute record starts and record sizes */
84 for(p = r = z = 0; z < dtsz; ++z)
85 { if(data[z] == rsep)
86 { fl[r].open = r; /* all slot starting out open */
87 fl[r].rpos = p; /* record position */
88 p = z+1;
89 r += 1;
90 }
91 }
92
93 /* now flip records */
94 while(fl[r = 0].open < nrows)
95 { do
96 { if((p = fl[r].open) > r) /* a done record */
97 { fl[r].open = p < nrows ? fl[p].open : nrows;
98 r = p;
99 }
100 else /* output one byte from this record */
101 { *flip++ = (byte = data[fl[r].rpos]);
102 if(byte == rsep) /* record is done */
103 fl[r].open = r+1;
104 fl[r].rpos += 1;
105 r += 1;
106 }
107 } while(r < nrows);
108 }
109
110 free(fl);
111 return nrows;
112 }
113
114 #if __STD_C
unflip(Vcchar_t * data,ssize_t dtsz,int rsep,Vcchar_t * flip)115 static ssize_t unflip(Vcchar_t* data, ssize_t dtsz, int rsep, Vcchar_t* flip)
116 #else
117 static ssize_t unflip(data, dtsz, rsep, flip)
118 Vcchar_t* data;
119 ssize_t dtsz;
120 int rsep;
121 Vcchar_t* flip;
122 #endif
123 {
124 ssize_t z, p, r, nrows;
125 Transflip_t *fl;
126
127 if(data[dtsz-1] != rsep)
128 RETURN(-1);
129
130 /* count number of rows */
131 for(nrows = 0, z = 0; z < dtsz; ++z)
132 if(data[z] == rsep)
133 nrows += 1;
134
135 /* allocate space for row data */
136 if(!(fl = (Transflip_t*)calloc(nrows, sizeof(Transflip_t))) )
137 RETURN(-1);
138 for(r = 0; r < nrows; ++r)
139 fl[r].open = r;
140
141 /* compute the size of each row */
142 for(r = z = 0; z < dtsz; )
143 { while((p = fl[r].open) > r) /* find row still open */
144 { fl[r].open = p < nrows ? fl[p].open : nrows;
145 r = p;
146 }
147
148 if(r < nrows)
149 { fl[r].rpos += 1; /* add to its length */
150 if(data[z] == rsep) /* this record is done */
151 fl[r].open = r+1;
152 z += 1;
153 }
154
155 if((r += 1) >= nrows) /* wrap around */
156 r = 0;
157 }
158
159 /* allocate space for each record */
160 for(p = r = 0; r < nrows; ++r)
161 { fl[r].open = r;
162 z = fl[r].rpos; /* save current length */
163 fl[r].rpos = p; /* set starting position */
164 p += z; /* starting position for next record */
165 }
166
167 /* rebuild records */
168 for(r = z = 0; z < dtsz; )
169 { while((p = fl[r].open) > r) /* find row still open */
170 { fl[r].open = p < nrows ? fl[p].open : nrows;
171 r = p;
172 }
173
174 if(r < nrows)
175 { flip[fl[r].rpos++] = data[z];
176 if(data[z] == rsep) /* this record is done */
177 fl[r].open = r+1;
178 z += 1;
179 }
180
181 if((r += 1) >= nrows)
182 r = 0;
183 }
184
185 free(fl);
186 return nrows;
187 }
188
189 #if __STD_C
transfixed(Vcchar_t * oldtbl,ssize_t nrows,ssize_t ncols,Vcchar_t * newtbl)190 static void transfixed(Vcchar_t* oldtbl, ssize_t nrows, ssize_t ncols, Vcchar_t* newtbl)
191 #else
192 static void transfixed(oldtbl, nrows, ncols, newtbl)
193 Vcchar_t* oldtbl; /* original table */
194 ssize_t nrows; /* #rows in otbl */
195 ssize_t ncols; /* #columns in otbl */
196 Vcchar_t* newtbl; /* new transposed table */
197 #endif
198 {
199 ssize_t r, nr, rr, c, nc, cc;
200 Vcchar_t *rdt, *cdt;
201
202 #define BLOCK 32 /* transposing small blocks to be cache-friendly */
203 for(r = 0; r < nrows; r += BLOCK)
204 { nr = (nrows-r) < BLOCK ? (nrows-r) : BLOCK;
205 for(c = 0; c < ncols; c += BLOCK)
206 { nc = (ncols-c) < BLOCK ? (ncols-c) : BLOCK;
207 rdt = oldtbl + r*ncols + c;
208 for(rr = 0; rr < nr; ++rr, rdt += ncols-nc)
209 { cdt = newtbl + c*nrows + r+rr;
210 for(cc = 0; cc < nc; ++cc, cdt += nrows)
211 *cdt = *rdt++;
212 }
213 }
214 }
215 }
216
217 /* compute the # of columns from training data */
218 #if __STD_C
transtrain(const Void_t * train,size_t trsz)219 static ssize_t transtrain(const Void_t* train, size_t trsz)
220 #else
221 static ssize_t transtrain(train, trsz)
222 Void_t* train; /* training data */
223 size_t trsz;
224 #endif
225 {
226 ssize_t ncols, sz;
227
228 if(!train || trsz <= 0)
229 return 1;
230
231 #define SAMPLE (64*1024)
232 for(sz = trsz < SAMPLE ? trsz : SAMPLE; sz <= trsz; sz *= 2)
233 if((ncols = vcperiod(train, sz)) > 0)
234 break;
235
236 return ncols <= 0 ? 1 : ncols;
237 }
238
239 #if 0 /* combining transpose and run-length-encoding - an optimization */
240 #if __STD_C
241 static ssize_t transrle(Vcodex_t* vc, const Void_t* data,
242 size_t ncols, size_t nrows, Void_t** out)
243 #else
244 static ssize_t transrle(vc, data, ncols, nrows, out)
245 Vcodex_t* vc;
246 Void_t* data;
247 ssize_t ncols, nrows;
248 Void_t** out;
249 #endif
250 {
251 reg Vcchar_t *run, *chr, *dt, ch;
252 Vcchar_t *output, *enddt;
253 reg ssize_t c, r;
254 ssize_t hd, sz, size;
255 Vcio_t io;
256 Transpose_t *trans = vcgetmtdata(vc, Transpose_t*);
257
258 size = nrows*ncols;
259 hd = vcsizeu(size) + (trans->type == TR_PLAIN ? 0 : vcsizeu(ncols));
260 if(!(output = vcbuffer(vc, NIL(Vcchar_t*), 2*(size + vcsizeu(size)), hd)) )
261 RETURN(-1);
262
263 chr = output + vcsizeu(size);
264 run = chr + size + vcsizeu(size);
265
266 ch = -1; r = 0;
267 for(enddt = (Vcchar_t*)data+size, c = 0; c < ncols; c += 1)
268 for(dt = (Vcchar_t*)data + c; dt < enddt; dt += ncols)
269 { if(*dt == ch)
270 r += 1;
271 else
272 { if(r > 0)
273 { if(r >= 3 || ch == RL_ESC)
274 { if(r < (1<<7) )
275 *run++ = r;
276 else if(r < (1<<14) )
277 { *run++ = (r>>7)|128;
278 *run++ = r&127;
279 }
280 else
281 { vcioinit(&io, run, 2*sizeof(ssize_t));
282 vcioputu(&io, r);
283 run = vcionext(&io);
284 }
285
286 *chr++ = RL_ESC;
287 if(ch != RL_ESC || r > 1)
288 *chr++ = ch;
289 }
290 else
291 { *chr++ = ch;
292 if(r == 2)
293 *chr++ = ch;
294 }
295 }
296
297 ch = *dt; r = 1;
298 }
299 }
300
301 if(r > 0)
302 { if(r >= 3 || ch == RL_ESC)
303 { vcioinit(&io, run, 2*sizeof(ssize_t));
304 vcioputu(&io, r);
305 run = vcionext(&io);
306
307 *chr++ = RL_ESC;
308 if(ch != RL_ESC || r > 1)
309 *chr++ = ch;
310 }
311 else
312 { *chr++ = ch;
313 if(r == 2)
314 *chr++ = ch;
315 }
316 }
317
318 c = chr - (output + vcsizeu(size)); chr = output + vcsizeu(size);
319 r = run - (chr + size + vcsizeu(size)); run = chr + size + vcsizeu(size);
320
321 if(vc->coder->coder) /* note that vc->coder is Vcrle */
322 { sz = 2*(size + vcsizeu(size));
323 if((sz = _vcrle2coder(vc->coder,hd,chr,c,run,r,&output,sz)) < 0)
324 RETURN(-1);
325 }
326 else
327 { vcioinit(&io, output, 2*(size+hd));
328 vcioputu(&io, c);
329 vcioputs(&io, chr, c);
330 vcioputu(&io, r);
331 vcioputs(&io, run, r);
332 sz = vciosize(&io);
333 }
334
335 output -= hd;
336 vcioinit(&io, output, hd);
337 if(trans->type != TR_PLAIN)
338 vcioputu(&io, ncols);
339 vcioputu(&io, size);
340
341 if(!(output = vcbuffer(vc, output, sz+hd, -1)) )
342 RETURN(-1);
343 if(out)
344 *out = output;
345 return sz+hd;
346 }
347 #endif
348
349 #if __STD_C
transpose(Vcodex_t * vc,const Void_t * data,size_t size,Void_t ** out)350 static ssize_t transpose(Vcodex_t* vc, const Void_t* data, size_t size, Void_t** out)
351 #else
352 static ssize_t transpose(vc, data, size, out)
353 Vcodex_t* vc;
354 Void_t* data;
355 size_t size;
356 Void_t** out;
357 #endif
358 {
359 Vcchar_t *output, *dt;
360 ssize_t nrows, ncols, sz, z;
361 int rsep; /* if >= 0, record separator for var-length table */
362 Vcio_t io;
363 Transctxt_t *ctxt;
364 Transpose_t *trans;
365
366 vc->undone = 0;
367 if((sz = (ssize_t)size) <= 0)
368 return 0;
369
370 if(!(trans = vcgetmtdata(vc, Transpose_t*)) )
371 RETURN(-1);
372
373 if(!(ctxt = vcgetcontext(vc, Transctxt_t*)) )
374 RETURN(-1);
375
376 if((rsep = ctxt->rsep) < 0 && trans->ctxt->rsep >= 0 )
377 rsep = trans->ctxt->rsep;
378 if(rsep >= 0)
379 ncols = 0;
380 else if((ncols = ctxt->ncols) <= 0 && trans->ctxt->ncols > 0)
381 ncols = trans->ctxt->ncols;
382 nrows = 0;
383
384 if(rsep >= 0) /* var-length table */
385 { for(dt = (Vcchar_t*)data, z = sz-1; z >= 0; --z)
386 if(dt[z] == rsep)
387 break;
388 vc->undone = sz - (z+1); /* exclude the dangling record */
389 sz = z+1; /* data to be processed */
390 }
391 else
392 { if(ncols <= 0 && (ncols = transtrain(data,sz)) <= 0 )
393 nrows = ncols = 0;
394 else
395 { nrows = sz/ncols;
396 ctxt->ncols = ncols;
397 }
398 vc->undone = sz - ncols*nrows;
399 sz = nrows*ncols;
400 }
401 if(sz == 0)
402 return 0;
403
404 z = 2*sizeof(ssize_t); /* for coding ncols or rsep */
405 if(!(output = vcbuffer(vc, NIL(Vcchar_t*), sz, z)) )
406 RETURN(-1);
407
408 if(rsep >= 0)
409 { if((nrows = transflip((Vcchar_t*)data, sz, rsep, output)) < 0)
410 RETURN(-1);
411 }
412 else transfixed((Vcchar_t*)data, nrows, ncols, output);
413
414 dt = output;
415 if(vcrecode(vc, &output, &sz, z, 0) < 0 )
416 RETURN(-1);
417 if(dt != output)
418 vcbuffer(vc, dt, -1, -1);
419
420 if(trans->type != TR_PLAIN)
421 { z = vcsizeu(ncols);
422 if(ncols <= 0)
423 z += 1;
424 output -= z; sz += z;
425 vcioinit(&io, output, z);
426 vcioputu(&io, ncols);
427 if(ncols <= 0)
428 vcioputc(&io, rsep);
429 }
430
431 if(out)
432 *out = output;
433 return sz;
434 }
435
436
437 #if __STD_C
untranspose(Vcodex_t * vc,const Void_t * data,size_t size,Void_t ** out)438 static ssize_t untranspose(Vcodex_t* vc, const Void_t* data, size_t size, Void_t** out)
439 #else
440 static ssize_t untranspose(vc, data, size, out)
441 Vcodex_t* vc;
442 Void_t* data;
443 size_t size;
444 Void_t** out;
445 #endif
446 {
447 Vcchar_t *output, *dt;
448 ssize_t nrows, ncols, z;
449 int rsep; /* if >= 0, record separator for var-length table */
450 Vcio_t io;
451 Transctxt_t *ctxt;
452 Transpose_t *trans;
453
454 vc->undone = 0;
455 if(size == 0)
456 return 0;
457
458 if(!(trans = vcgetmtdata(vc, Transpose_t*)) )
459 RETURN(-1);
460
461 if(!(ctxt = vcgetcontext(vc, Transctxt_t*)) )
462 RETURN(-1);
463
464 vcioinit(&io, data, size);
465 rsep = -1; ncols = nrows = 0;
466 if(trans->type != TR_PLAIN)
467 { if((ncols = vciogetu(&io)) < 0)
468 RETURN(-1);
469 if(ncols == 0)
470 rsep = vciogetc(&io);
471 }
472 else
473 { if((rsep = ctxt->rsep) < 0)
474 rsep = trans->ctxt->rsep;
475 if(rsep < 0)
476 if((ncols = ctxt->ncols) <= 0)
477 ncols = trans->ctxt->ncols;
478 }
479
480 if(rsep < 0 && ncols <= 0)
481 RETURN(-1);
482
483 /* data to be untransposed */
484 dt = vcionext(&io);
485 z = vciomore(&io);
486 if(vcrecode(vc, &dt, &z, 0, 0) < 0)
487 RETURN(-1);
488
489 if(rsep < 0) /* fixed-length data */
490 { nrows = z/ncols;
491 if(ncols*nrows != z)
492 RETURN(-1);
493 }
494
495 if(!(output = vcbuffer(vc, NIL(Vcchar_t*), z, 0)) )
496 RETURN(-1);
497
498 if(rsep < 0)
499 transfixed(dt, ncols, z/ncols, output);
500 else if(unflip(dt, z, rsep, output) < 0)
501 RETURN(-1);
502
503 if(out)
504 *out = output;
505 return z;
506 }
507
508 #if __STD_C
transevent(Vcodex_t * vc,int type,Void_t * params)509 static int transevent(Vcodex_t* vc, int type, Void_t* params)
510 #else
511 static int transevent(vc, type, params)
512 Vcodex_t* vc;
513 int type;
514 Void_t* params;
515 #endif
516 {
517 Transpose_t *trans;
518 Transctxt_t *ctxt;
519 char *data, val[1024];
520 Vcmtarg_t *arg;
521
522 if(type == VC_OPENING)
523 { if(!(trans = (Transpose_t*)calloc(1,sizeof(Transpose_t))) )
524 RETURN(-1);
525 if(!(trans->ctxt = (Transctxt_t*)vcinitcontext(vc, NIL(Vccontext_t*))) )
526 { free(trans);
527 RETURN(-1);
528 }
529 vcsetmtdata(vc, trans);
530 goto vc_setarg;
531 }
532 else if(type == VC_CLOSING)
533 { if((trans = vcgetmtdata(vc,Transpose_t*)) )
534 free(trans);
535
536 vcsetmtdata(vc, NIL(Transpose_t*));
537 return 0;
538 }
539 else if(type == VC_SETMTARG)
540 { vc_setarg:
541 if(!(ctxt = vcgetcontext(vc, Transctxt_t*)) )
542 RETURN(-1);
543 for(data = (char*)params; data && *data; )
544 { data = vcgetmtarg(data, val, sizeof(val), _Transargs, &arg);
545 switch(TYPECAST(int,arg->data) )
546 { case TR_SEPARATOR: /* setting the record separator */
547 ctxt->rsep = val[0];
548 ctxt->ncols = 0;
549 break;
550 case TR_COLUMNS: /* setting number of columns */
551 ctxt->ncols = (ssize_t) vcatoi(val);
552 ctxt->rsep = -1;
553 break;
554 case TR_PLAIN: /* setting transpose.0 */
555 default :
556 if(type == VC_OPENING)
557 trans->type = TYPECAST(int,arg->data);
558 break;
559 }
560 }
561
562 return 0;
563 }
564 else if(type == VC_INITCONTEXT)
565 { if(!params)
566 return 0;
567 if(!(ctxt = (Transctxt_t*)calloc(1,sizeof(Transctxt_t))) )
568 RETURN(-1);
569 ctxt->ncols = 0;
570 ctxt->rsep = -1;
571 *((Transctxt_t**)params) = ctxt;
572 return 1;
573 }
574 else if(type == VC_FREECONTEXT)
575 { if((ctxt = (Transctxt_t*)params) )
576 free(ctxt);
577 return 0;
578 }
579 else return 0;
580 }
581
582 Vcmethod_t _Vctranspose =
583 { transpose,
584 untranspose,
585 transevent,
586 "transpose", "Transposing a table.",
587 "[-version?transpose (AT&T Research) 2003-01-01]" USAGE_LICENSE,
588 _Transargs,
589 256*1024,
590 0
591 };
592
593 VCLIB(Vctranspose)
594