1 
2 /**
3  *  @file columns.c
4  *  @group columns
5  *  @{
6  */
7 /*
8  *  Columns Copyright (C) 1992-2018 by Bruce Korb - all rights reserved
9  *  Columns is free software.
10  *  This file is part of AutoGen.
11  *
12  *  AutoGen Copyright (C) 1992-2017 by Bruce Korb - all rights reserved
13  *
14  *  AutoGen is free software: you can redistribute it and/or modify it
15  *  under the terms of the GNU General Public License as published by the
16  *  Free Software Foundation, either version 3 of the License, or
17  *  (at your option) any later version.
18  *
19  *  AutoGen is distributed in the hope that it will be useful, but
20  *  WITHOUT ANY WARRANTY; without even the implied warranty of
21  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22  *  See the GNU General Public License for more details.
23  *
24  *  You should have received a copy of the GNU General Public License along
25  *  with this program.  If not, see <http://www.gnu.org/licenses/>.
26  */
27 #ifndef NUL
28 # define NUL '\0'
29 #endif
30 
31 struct print_list {
32     char **    papz;
33     int        index;
34 };
35 
36 typedef struct print_list tPrintList, *tpPrintList;
37 
38 int maxEntryWidth = 0;
39 int fillColumnCt  = 0;
40 
41 #define LINE_LIMIT 4096
42 char zLine[ LINE_LIMIT ];
43 char zFmtLine[ LINE_LIMIT ];
44 
45 char **      papzLines  = (char **)NULL;
46 char const * pzLinePfx  = "";
47 char const * pzFirstPfx = NULL;
48 size_t       allocCt    = 0;
49 size_t       usedCt     = 0;
50 size_t       columnCt   = 0;
51 size_t       columnSz   = 0;
52 size_t       indentSize = 0;
53 
54 MOD_LOCAL inline void *
55 malloc_or_die(size_t sz);
56 MOD_LOCAL char const *
57 construct_first_pfx(char const * f_indent);
58 MOD_LOCAL uint32_t
59 pad_indentation(char const * pzIndentArg, char const ** pfx);
60 MOD_LOCAL void
61 readLines(void);
62 MOD_LOCAL void
63 writeColumns(void);
64 MOD_LOCAL void
65 trim_last_separation(void);
66 MOD_LOCAL void
67 writeRows(void);
68 MOD_LOCAL int
69 emitWord(char const * word, size_t len, int col);
70 MOD_LOCAL void
71 writeFill(void);
72 MOD_LOCAL int
73 compProc(const void * p1, const void * p2);
74 
75 int
main(int argc,char ** argv)76 main(int argc, char ** argv)
77 {
78     (void)optionProcess( &columnsOptions, argc, argv );
79 
80     if (HAVE_OPT( INDENT )) {
81         indentSize = pad_indentation( OPT_ARG(INDENT), &pzLinePfx);
82         OPT_VALUE_WIDTH -= (long)indentSize;
83 
84         pzFirstPfx = HAVE_OPT(FIRST_INDENT)
85             ? construct_first_pfx( OPT_ARG(FIRST_INDENT))
86             : pzLinePfx;
87     }
88 
89     if (HAVE_OPT( LINE_SEPARATION ))
90         OPT_VALUE_WIDTH -= (long int)strlen( OPT_ARG( LINE_SEPARATION));
91 
92     if (HAVE_OPT( COL_WIDTH ))
93         columnSz = (size_t)OPT_VALUE_COL_WIDTH;
94 
95     if (HAVE_OPT( COLUMNS ))
96         columnCt = (size_t)OPT_VALUE_COLUMNS;
97 
98     if (OPT_VALUE_WIDTH <= 16)
99         OPT_VALUE_WIDTH = 16;
100 
101     readLines();
102 
103     if (HAVE_OPT( SORT ))
104         qsort(VOIDP(papzLines), usedCt, sizeof(char *), &compProc);
105 
106     if (HAVE_OPT( BY_COLUMNS ))
107         writeColumns();
108     else if (HAVE_OPT(FILL))
109         writeFill();
110     else
111         writeRows();
112 
113     return EXIT_SUCCESS;
114 }
115 
116 MOD_LOCAL inline void *
malloc_or_die(size_t sz)117 malloc_or_die(size_t sz)
118 {
119     void * res = malloc(sz);
120     if (res == NULL)
121         die(COLUMNS_EXIT_FAILURE, "could not allocate %d bytes", sz);
122 
123     return res;
124 }
125 
126 MOD_LOCAL char const *
construct_first_pfx(char const * f_indent)127 construct_first_pfx(char const * f_indent)
128 {
129     static char const pad_fmt[] = "%%-%ds";
130     // 24 > log10(0xFFFFFFFFFFFFFFFFULL)
131     char pfx_buf[24 + sizeof(pad_fmt)];
132     size_t firstSize = pad_indentation(f_indent, &pzFirstPfx);
133     size_t len;
134     char * res;
135 
136     /*
137      *  If this first line exceeds the indentation size, then we
138      *  need to append a newline and any indentation.
139      */
140     if (firstSize > indentSize) {
141         size_t sep_len = HAVE_OPT(LINE_SEPARATION)
142             ? strlen( OPT_ARG(LINE_SEPARATION)) : 0;
143         len = firstSize + sep_len + indentSize + 3;
144         sprintf(pfx_buf, pad_fmt, (int)firstSize);
145     } else {
146         len = indentSize + 3;
147         sprintf(pfx_buf, pad_fmt, (int)indentSize);
148     }
149 
150     res = malloc_or_die(len);
151     snprintf(res, len, pfx_buf, pzFirstPfx);
152     pzFirstPfx = res;
153 
154     if (firstSize > indentSize) {
155         char * p = res + firstSize;
156 
157         if (HAVE_OPT( LINE_SEPARATION )) {
158             len = strlen(OPT_ARG(LINE_SEPARATION));
159             memcpy(p, OPT_ARG(LINE_SEPARATION), len);
160             p  += len;
161         }
162 
163         sprintf(p, "\n%s", pzLinePfx);
164     }
165 
166     return res;
167 }
168 
169 MOD_LOCAL uint32_t
pad_indentation(char const * pzIndentArg,char const ** pfx)170 pad_indentation(char const * pzIndentArg, char const ** pfx)
171 {
172     char * pz;
173     unsigned int cct;
174 
175     errno = 0;
176     cct = (unsigned int)strtoul(pzIndentArg, &pz, 0);
177 
178     /*
179      *  IF the indent argument is a number
180      */
181     if ((*pz == NUL) && (errno == 0) && (cct < OPT_VALUE_WIDTH)) {
182         char * p;
183 
184         /*
185          *  Allocate a string to hold the line prefix
186          */
187         *pfx = p = malloc_or_die( (size_t)cct + 1 );
188 
189         /*
190          *  Set it to a NUL terminated string of spaces
191          */
192         if (cct > 0)
193             memset(p, ' ', (size_t)cct);
194         p[cct] = NUL;
195 
196     } else {
197         /*
198          *  Otherwise, set the line prefix to whatever the string is.
199          *  It will not be the empty string because that is handled
200          *  as an indent count of zero and is ignored.
201          */
202         char const * p = pzIndentArg;
203         *pfx = pzIndentArg;
204         cct     =  0;
205 
206         for (;;) {
207             /*
208              *  Compute the length of the last line of the prefix.
209              */
210             switch (*p++) {
211             case NUL:
212                 goto colsCounted;
213 
214             case '\t':
215                 cct += (unsigned)OPT_VALUE_TAB_WIDTH;
216                 cct -= (unsigned)(cct % OPT_VALUE_TAB_WIDTH);
217                 break;
218 
219             case '\n':
220             case '\f':
221             case '\r':
222                 cct = 0;
223                 break;
224 
225             default:
226                 cct++;
227                 break;
228 
229             case '\a':
230                 break;
231             }
232         } colsCounted:;
233     }
234 
235     return cct;
236 }
237 
238 MOD_LOCAL void
readLines(void)239 readLines(void)
240 {
241     int sepLen = HAVE_OPT(SEPARATION)
242         ? (int)strlen(OPT_ARG(SEPARATION)) : 0;
243 
244     /*
245      *  Read the input text, stripping trailing white space
246      */
247     for (;;) {
248         char *  pzL;
249         char *  pzText = fgets(zLine, (int)sizeof(zLine), stdin);
250         size_t  len;
251 
252         if (pzText == NULL)
253             break;
254 
255         /*
256          *  Trim off trailing white space.
257          */
258         len = strlen( pzText );
259         pzText += len;
260         while (isspace(pzText[-1])) {
261             if (--pzText == zLine) {
262                 if (HAVE_OPT(FILL))
263                     break;
264                 goto next_line;
265             }
266             len--;
267         }
268 
269         *pzText = NUL;
270 
271         /*
272          *  IF the input lines are to be reformatted,
273          *  THEN the length is the result of the sprintf
274          *  Else, compute the length.
275          */
276         if (HAVE_OPT(FORMAT)) {
277             pzText = zFmtLine;
278             len = (size_t)snprintf(
279                 zFmtLine, sizeof(zFmtLine), OPT_ARG(FORMAT), zLine);
280         } else {
281             pzText = zLine;
282         }
283 
284         /*
285          *  Allocate a string and space in the pointer array.
286          */
287         len += (size_t)sepLen + 1;
288         pzL = (char *)malloc_or_die( len );
289         if (++usedCt > allocCt) {
290             allocCt += 128;
291             papzLines = (char **)realloc(VOIDP(papzLines),
292                                          sizeof(char *) * allocCt );
293         }
294         papzLines[ usedCt-1 ] = pzL;
295 
296         /*
297          *  Copy the text and append the separation character
298          */
299         strcpy( pzL, pzText );
300 
301         /*
302          *  Initially, all strings have the separator,
303          *  the entries may get reordered.
304          */
305         if (sepLen > 0)
306             strcat(pzL, OPT_ARG(SEPARATION));
307 
308         if ((int)len > maxEntryWidth)
309             maxEntryWidth = (int)len;
310     next_line:;
311     }
312 
313     if (maxEntryWidth == 0) {
314         fputs( "columns warning:  no input text was read\n", stderr );
315         exit( EXIT_SUCCESS );
316     }
317 
318     /*
319      *  Set the line width to the amount of space we have to play with.
320      */
321     if ((OPT_VALUE_WIDTH < maxEntryWidth) && (! HAVE_OPT(FILL)))
322         OPT_VALUE_WIDTH = maxEntryWidth;
323 
324     /*
325      *  If we do not have a column size set,
326      *  then figure out what it must be.
327      */
328     if (columnSz == 0) {
329         /*
330          *  IF the column count has not been set,
331          *  THEN compute it.
332          */
333         if (columnCt == 0)
334             columnCt = (size_t)(OPT_VALUE_WIDTH / maxEntryWidth);
335 
336         /*
337          *  IF there are to be multiple columns, ...
338          */
339         if (columnCt > 1) {
340             size_t spreadwidth = (size_t)(OPT_VALUE_WIDTH - maxEntryWidth);
341             int  sz = (int)(spreadwidth / (size_t)(columnCt-1));
342 
343             /*
344              *  Either there is room for added space,
345              *  or we must (possibly) reduce the number of columns
346              */
347             if (sz >= maxEntryWidth)
348                 columnSz = (size_t)sz;
349             else {
350                 columnCt = ((size_t)spreadwidth / (size_t)maxEntryWidth) + 1;
351                 if (columnCt > 1)
352                      columnSz = (size_t)spreadwidth / (size_t)(columnCt - 1);
353                 else columnSz = (size_t)OPT_VALUE_WIDTH;
354             }
355         }
356     }
357 
358     /*
359      *  Otherwise, the column size has been set.  Ensure it is sane.
360      */
361     else {
362         bool compute_val = (columnCt == 0);
363 
364         /*
365          *  Increase the column size to the width of the widest entry
366          */
367         if (maxEntryWidth > (int)columnSz)
368             columnSz = (size_t)maxEntryWidth;
369 
370         /*
371          *  IF     we have not been provided a column count
372          *    *OR* we are set to overfill the output line,
373          *  THEN compute the number of columns.
374          */
375         if (! compute_val) {
376             long width = (long)maxEntryWidth + (long)(columnSz * (columnCt-1));
377             compute_val = width > (unsigned)OPT_VALUE_WIDTH;
378         }
379         if (compute_val)
380             columnCt = (size_t)(
381                 (((long)OPT_VALUE_WIDTH - (long)maxEntryWidth) /
382                  (long)columnSz) + 1);
383     }
384 
385     /*
386      *  Ensure that any "spread" we added to the column size
387      *  does not exceed the parameterized limit.
388      */
389     if (   HAVE_OPT( SPREAD )
390            && ((maxEntryWidth + OPT_VALUE_SPREAD - 1) < (int)columnSz))
391         columnSz = (size_t)(maxEntryWidth + OPT_VALUE_SPREAD - 1);
392 }
393 
394 MOD_LOCAL void
writeColumns(void)395 writeColumns(void)
396 {
397     char zFmt[ 12 ];
398     int  colCt, rowCt, col, row;
399     tpPrintList pPL;
400 
401     colCt = (int)columnCt;
402     snprintf(zFmt, sizeof(zFmt), "%%-%ds", (int)columnSz);
403 
404     if (colCt == 1) {
405         writeRows();
406         return;
407     }
408 
409     pPL = (tpPrintList)malloc_or_die((unsigned long)colCt * sizeof(tPrintList));
410 
411     /*
412      *  This "loop" is normally executed half way through and exited.
413      *  IF, however, we would  produce an empty final column,
414      *  we will reduce our column count and line width and then
415      *  try the top-of-column pointer computation again.
416      *
417      *  The problem solved here is that sometimes, when the
418      *  number of entries in a row is greater than the number of rows,
419      *  it is possible that all the entries that would have been
420      *  in the last column are, instead, essentially put on the
421      *  last row.  That will leave the final column empty.
422      *  We could regroup at that point and spread the columns some more,
423      *  but, if done, is an exercise for later.
424      */
425     for (;;) {
426         int  rem;
427         int  fsz;
428 
429         rowCt = ((int)usedCt/colCt) + (((int)usedCt % colCt) ? 1 : 0);
430 
431         /*
432          *  FOR each column, compute the address of the pointer to
433          *  the string at the top of the column, and the index of
434          *  that entry.
435          */
436         for (col = 0; col < colCt ; col++) {
437             pPL[col].papz  = papzLines + (col * rowCt);
438             pPL[col].index = col * rowCt;
439         }
440 
441         /*
442          *  IF the final column is not empty,
443          *  THEN break out and start printing.
444          */
445         if (pPL[colCt-1].index < (int)usedCt)
446             break;
447 
448         /*
449          *  The last column is blank, so we reduce our column count,
450          *  even if the user specified a count!!
451          */
452         colCt--;
453         rem   = (int)OPT_VALUE_WIDTH - (int)(colCt * maxEntryWidth);
454 
455         if ((rem == 0) || (colCt < 2) || (columnSz > 0))
456             fsz = maxEntryWidth;
457         else
458             fsz = maxEntryWidth + (rem / (colCt-1));
459         snprintf( zFmt, sizeof(zFmt), "%%-%ds", fsz );
460     }
461 
462     /*
463      *  Now, actually print each row...
464      */
465     for ( row = 0 ;; ) {
466         char * pzL;
467         char * pzE;
468 
469         if (pzLinePfx != NULL)
470             fputs( pzLinePfx, stdout );
471 
472         /*
473          *  Increment the index of the current entry in the last column.
474          *  IF it goes beyond the end of the entries in use,
475          *  THEN reduce our column count.
476          */
477         if ((pPL[colCt-1].index)++ >= (int)usedCt)
478             colCt--;
479 
480         /*
481          *  Get the address of the string in the last column.
482          */
483         pzE = *(pPL[colCt-1].papz++);
484 
485         col = 0;
486 
487         /*
488          *  FOR every column except the last,
489          *     print the entry with the width format
490          *
491          *  No need to worry about referring to a non-existent entry.  Only
492          *  the last column might have that problem, and we addressed it above
493          *  where the column count got decremented.
494          */
495         while (++col < colCt) {
496             pzL = *(pPL[col-1].papz++);
497             fprintf( stdout, zFmt, pzL );
498             free( VOIDP(pzL) );
499         }
500 
501         /*
502          *  See if we are on the last row. If so, then this is the last entry.
503          *  Strip any separation characters, emit the entry and break out.
504          */
505         if (++row == rowCt) {
506             /*
507              *  IF we have a separator,
508              *  THEN remove it from the last entry.
509              */
510             if (HAVE_OPT( SEPARATION )) {
511                 char * pz = pzE + strlen( pzE )
512                           - strlen( OPT_ARG(SEPARATION));
513                 *pz = NUL;
514             }
515 
516             fputs(pzE, stdout);
517             if (HAVE_OPT(ENDING))
518                 fputs(OPT_ARG(ENDING), stdout);
519 
520             putc( '\n', stdout );
521             break;
522         }
523 
524         /*
525          *  Print the last entry on the line, without the width format.
526          *  If we have line separation (which does not apply to the last
527          *  line), then emit those characters, too.
528          */
529         fputs( pzE, stdout );
530         if (HAVE_OPT( LINE_SEPARATION ))
531             fputs( OPT_ARG( LINE_SEPARATION ), stdout );
532 
533         putc( '\n', stdout );
534         free( VOIDP(pzE) );
535     }
536 
537     free(pPL);
538 }
539 
540 MOD_LOCAL void
trim_last_separation(void)541 trim_last_separation(void)
542 {
543     char * pz = papzLines[ usedCt-1 ];
544     pz += strlen(pz) - strlen( OPT_ARG(SEPARATION));
545     *pz = NUL;
546 }
547 
548 MOD_LOCAL void
writeRows(void)549 writeRows(void)
550 {
551     char zFmt[32];
552     int  colCt;
553 
554     colCt = (int)columnCt;
555     snprintf(zFmt, sizeof(zFmt), "%%-%ds", (int)columnSz);
556 
557     if (HAVE_OPT( SEPARATION ))
558         trim_last_separation();
559 
560     if (pzFirstPfx != NULL) {
561         fputs( pzFirstPfx, stdout );
562         pzFirstPfx = pzLinePfx;
563     }
564 
565     {
566         char ** ppzLL = papzLines;
567         size_t  left  = usedCt;
568         int     lnNo  = 0;
569 
570         /*
571          *  FOR every entry we are to emit, ...
572          */
573         for (;;) {
574             char * pzL = *ppzLL++;
575 
576             /*
577              *  IF this is the last entry,
578              *  THEN emit it and a new line and break out
579              */
580             if (--left <= 0) {
581                 fputs(pzL, stdout);
582                 if (HAVE_OPT(ENDING))
583                     fputs(OPT_ARG(ENDING), stdout);
584 
585                 putc('\n', stdout);
586                 free(VOIDP(pzL));
587                 break;
588             }
589 
590             /*
591              *  IF the count of entries on this line is still less
592              *     than the number of columns,
593              *  THEN emit the padded entry
594              *  ELSE ...
595              */
596             if (++lnNo < colCt)
597                 fprintf( stdout, zFmt, pzL );
598 
599             else {
600                 lnNo = 0;
601                 /*
602                  *  Last entry on the line.  Emit the string without padding.
603                  *  IF we have a line separation string, emit that too.
604                  */
605                 fputs( pzL, stdout );
606                 if (HAVE_OPT( LINE_SEPARATION ))
607                     fputs( OPT_ARG( LINE_SEPARATION ), stdout );
608 
609                 putc( '\n', stdout );
610 
611                 /*
612                  *  Start the next line with any required indentation
613                  */
614                 if (pzFirstPfx != NULL) {
615                     fputs( pzFirstPfx, stdout );
616                     pzFirstPfx = pzLinePfx;
617                 }
618             }
619 
620             free( VOIDP(pzL) );
621         }
622     }
623 }
624 
625 MOD_LOCAL int
emitWord(char const * word,size_t len,int col)626 emitWord(char const * word, size_t len, int col)
627 {
628     static int ended_with_period = 0;
629 
630     if (col > 0) {
631         if ((int)len >= (OPT_VALUE_WIDTH - col)) {
632             putc('\n', stdout);
633             if (pzLinePfx != NULL)
634                 fputs(pzLinePfx, stdout);
635             col = 0;
636 
637         } else {
638             if (ended_with_period) {
639                 putc(' ', stdout);
640                 col++;
641             }
642             putc(' ', stdout);
643             col++;
644         }
645     }
646 
647     fwrite(word, len, 1, stdout);
648     col += (int)len;
649     ended_with_period = (word[len - 1] == '.');
650 
651     return col;
652 }
653 
654 /*
655  *  writeFill -- fill the output.  Pack together as much as will fit
656  *  on each line.
657  */
658 MOD_LOCAL void
writeFill(void)659 writeFill(void)
660 {
661     char ** ppzLL = papzLines;
662     size_t  left  = usedCt;
663     int     colNo = 0;
664 
665     if (HAVE_OPT( SEPARATION ))
666         trim_last_separation();
667 
668     if (pzFirstPfx != NULL)
669         fputs(pzFirstPfx, stdout);
670 
671     /*
672      *  FOR every entry we are to emit, ...
673      */
674     while (left-- > 0) {
675         char * pzL = *ppzLL;
676 
677         while (isspace(*pzL))  pzL++;
678 
679         /*
680          *  Blank lines are magical and trigger a blank line in output.
681          */
682         if (*pzL == NUL) {
683             if (! HAVE_OPT(SORT)) {
684                 if (colNo > 0) /* guard against multiple blank lines */
685                     putc('\n', stdout);
686                 putc('\n', stdout);
687                 colNo = -2;
688             }
689 
690             free(*(ppzLL++));
691             continue;
692         }
693 
694         /*
695          *  We are going to emit some output.  Make sure we're indented.
696          */
697         if (colNo < 0) {
698             if (pzLinePfx != NULL)
699                 fputs(pzLinePfx, stdout);
700             colNo = 0;
701         }
702 
703         do {
704             size_t tknlen;
705 
706             for (tknlen = 0; pzL[tknlen] != NUL; tknlen++)
707                 if (isspace(pzL[tknlen]))
708                     break;
709             colNo = emitWord(pzL, tknlen, colNo);
710             pzL += tknlen;
711             while (isspace(*pzL))  pzL++;
712         } while (*pzL != NUL);
713 
714         free(*(ppzLL++));
715     }
716 
717     if (HAVE_OPT(ENDING) && (left == 0))
718         fputs(OPT_ARG(ENDING), stdout);
719 
720     putc('\n', stdout);
721 }
722 
723 
724 /*
725  *  Line comparison procedure
726  */
727 MOD_LOCAL int
compProc(const void * p1,const void * p2)728 compProc(const void * p1, const void * p2)
729 {
730     char const * pz1 = *(char * const *)p1;
731     char const * pz2 = *(char * const *)p2;
732     return strcmp(pz1, pz2);
733 }
734 /** @}
735  *
736  * Local Variables:
737  * mode: C
738  * c-file-style: "stroustrup"
739  * indent-tabs-mode: nil
740  * End:
741  * end of columns/columns.c */
742