1 /*
2 * Copyright (C) 1991,1992,1993 NEC Corporation.
3 */
4 /*
5 * Table parsing routine
6 */
7 #ifndef lint
8 static char rcsid[] =
9 "$Id: table.c,v 2.13 1994/04/19 10:17:06 uchida Exp $ (NEC)";
10 #endif
11
12 #include <stdio.h>
13 #include "plain2.h"
14 #include "table.h"
15 #include "kanji.h"
16
17 #define ATR_VLINE 0x01 /* Vertical line */
18 #define ATR_HLINE 0x02 /* Horizontal line */
19 #define ATR_HDLINE 0x04 /* Horizontal double */
20 #define ATR_ANYLINE (ATR_VLINE | ATR_HLINE | ATR_HDLINE)
21 #define ATR_CHAR 0x08 /* non line character */
22 #define ATR_JIS2 0x10 /* second byte of JIS wakusen */
23 #define ATR_PLUS (ATR_HLINE | ATR_HDLINE)
24 /*
25 * Alphabetic frame lines
26 */
27 struct strVal tblwaku1[] = {
28 {"-", ATR_HLINE},
29 {"=", ATR_HDLINE},
30 {"|", ATR_VLINE},
31 {"+", ATR_PLUS},
32 {"", 0}};
33 #ifdef KANJI
34 /*
35 * JIS Keisen
36 */
37 struct strVal tblwaku2[] = {
38 {"��", ATR_HLINE},
39 {"��", ATR_VLINE},
40 {"��", ATR_HLINE | ATR_VLINE},
41 {"��", ATR_HLINE | ATR_VLINE},
42 {"��", ATR_HLINE | ATR_VLINE},
43 {"��", ATR_HLINE | ATR_VLINE},
44 {"��", ATR_HLINE | ATR_VLINE},
45 {"��", ATR_HLINE | ATR_VLINE},
46 {"��", ATR_HLINE | ATR_VLINE},
47 {"��", ATR_HLINE | ATR_VLINE},
48 {"��", ATR_HLINE | ATR_VLINE},
49 {"", 0}};
50 #endif
51 #define COLV_JIS 2
52 #define COLV_ASC 1
53 static char colVline[MAX_LINE_LEN]; /* Vertical line exist on the column*/
54 static char colNchrs[MAX_LINE_LEN]; /* Number of chars on the column */
55 static char colPchrs[MAX_LINE_LEN]; /* chars to be printed */
56 static short maxLen; /* Max length of the region */
57 static short wakuAll, wakuExist;
58 static short lineHoriFull, lineHoriPart;
59
60 int expectedFields; /* Number of expected fields */
61
62 static unsigned char tblKanjiByte[256];
63 static unsigned char tblAsciAttr[256];
tblParseInit()64 tblParseInit()
65 {
66 struct strVal *twp;
67 bzero((char *)tblKanjiByte, sizeof(tblKanjiByte));
68 bzero((char *)tblAsciAttr, sizeof(tblAsciAttr));
69 for (twp = tblwaku1; *twp->pattern; twp++)
70 tblAsciAttr[(unsigned char)*twp->pattern] = twp->value;
71 #ifdef KANJI
72 for (twp = tblwaku2; *twp->pattern; twp++)
73 tblKanjiByte[(unsigned char)*twp->pattern]++;
74 #endif
75 }
tblKwakuAttr(s)76 tblKwakuAttr(s)
77 register char *s;
78 {
79 register struct strVal *twp;
80 #ifdef KANJI
81 if (jisTableEnabled && maybeThinKeisen(s)) {
82 /* first byte of ���������������������� */
83 twp = tblwaku2;
84 while (*twp->pattern) {
85 if (str2match(s, twp->pattern))
86 return twp->value;
87 twp++;
88 }
89 }
90 #endif
91 return 0;
92 }
93 /*
94 * Attribute of the line (Horizontal line, Double line)
95 */
lineAttr(s,horiAtrp)96 lineAttr(s, horiAtrp)
97 register char *s;
98 char **horiAtrp;
99 {
100 static char attribute[MAX_LINE_LEN];
101 register char *atrp;
102 int charAttr;
103 int twinLine, singleLine;
104 int hexist;
105 int inLeadingSpace = 1;
106 if (*s == '\0')
107 return HORI_NULL;
108 twinLine = singleLine = 1;
109 hexist = 0;
110 for (atrp = attribute; *s; s++) {
111 if ((charAttr = tblAsciAttr[(unsigned char)*s])
112 || (tblKanjiByte[(unsigned char)*s]
113 && (charAttr = tblKwakuAttr(s)))) {
114 inLeadingSpace = 0;
115 *atrp++ = charAttr;
116 if ((charAttr & ATR_VLINE) == 0) {
117 twinLine &= ((charAttr & ATR_HDLINE) != 0);
118 singleLine &= ((charAttr & ATR_HLINE) != 0);
119 hexist |= ((charAttr & ATR_HLINE) != 0);
120 }
121 #ifdef KANJI
122 if (isZenkaku(s)) {
123 if (charAttr & ATR_VLINE)
124 *atrp = ATR_JIS2;
125 else
126 *atrp = 0;
127 *atrp++ |= (charAttr & ATR_HLINE);
128 s++;
129 }
130 #endif
131 }
132 else if (*s != ' ') {
133 inLeadingSpace = 0;
134 *atrp++ = ATR_CHAR;
135 twinLine = singleLine = 0;
136 #ifdef KANJI
137 if (isZenkaku(s)) {
138 *atrp++ = ATR_CHAR;
139 s++;
140 }
141 #endif
142 }
143 else {
144 if (!inLeadingSpace)
145 twinLine = singleLine = 0;
146 *atrp++ = 0;
147 }
148 }
149 *horiAtrp = attribute;
150 return twinLine ? HORI_DBL_ALL
151 : (singleLine ? HORI_SNGL_ALL
152 : (hexist ? HORI_EXIST
153 : HORI_NULL)) ;
154 }
findFieldLeft(col)155 findFieldLeft(col)
156 int col;
157 {
158 if (wakuExist) {
159 int seeVline = 0;
160 while (col < maxLen) {
161 if (colVline[col] == 0 && colNchrs[col])
162 return col;
163 if (colVline[col] == 0 && seeVline)
164 return col;
165 if (colVline[col] != 0)
166 seeVline++;
167 col++;
168 }
169 }
170 else {
171 for (; col < maxLen; col++) {
172 if (colNchrs[col] && (colVline[col] == 0))
173 return col;
174 }
175 }
176 return maxLen;
177 }
findFieldEnd(col)178 findFieldEnd(col)
179 int col;
180 {
181 for (; col < maxLen; col++) {
182 if (wakuExist && colVline[col])
183 return col;
184 if (!wakuExist && colNchrs[col] == 0)
185 return col;
186 }
187 return maxLen;
188 }
189 /*
190 * Build a default format character for each field
191 */
buildDefFormat(tblp)192 buildDefFormat(tblp)
193 register struct table *tblp;
194 {
195 int fld, col, delta;
196 int charsInVert;
197 int maxDeltaL, maxDeltaR;
198 int maxPosL, maxPosR;
199 for (fld = 1; fld < tbl_nfield; fld++) {
200 charsInVert = 0;
201 maxDeltaL = maxDeltaR = 0;
202 for (col = tbl_field[fld].left; col < tbl_field[fld].right; col++)
203 if (colNchrs[col] > charsInVert)
204 charsInVert = colNchrs[col];
205 if (charsInVert == 0) {
206 tbl_field[fld].defFormat = 'c';
207 continue;
208 }
209 for (col = tbl_field[fld].left; col < tbl_field[fld].right; col++) {
210 delta = colNchrs[col] - (fld ? colNchrs[col-1] : 0);
211 if (delta > maxDeltaL) {
212 maxDeltaL = delta;
213 maxPosL = col;
214 }
215 if (-delta > maxDeltaR) {
216 maxDeltaR = -delta;
217 maxPosR = col;
218 }
219 }
220 if (colNchrs[col - 1] > maxDeltaR) {
221 maxDeltaR = colNchrs[col - 1];
222 }
223 if ((maxDeltaL * 3 < charsInVert * 2)
224 && (maxDeltaR * 3 < charsInVert * 2))
225 tbl_field[fld].defFormat = 'c';
226 else if (maxDeltaL > maxDeltaR) {
227 tbl_field[fld].defFormat = 'l';
228 tbl_field[fld].align = maxPosL;
229 }
230 else if (maxDeltaL < maxDeltaR) {
231 tbl_field[fld].defFormat = 'r';
232 tbl_field[fld].align = maxPosR;
233 }
234 else if (maxDeltaL == maxDeltaR) {
235 tbl_field[fld].defFormat = 'c';
236 }
237 }
238 }
239 /*
240 * Build field
241 */
buildField(begin,end,tblp)242 buildField(begin, end, tblp)
243 int begin;
244 int end;
245 register struct table *tblp;
246 {
247 register int col;
248 int fld, l, nbody;
249 int lineExist;
250 char *s, *atr;
251
252 maxLen = maxLength(begin, end);
253 wakuAll = 1;
254 wakuExist = 0;
255 nbody = 0;
256 lineHoriFull = lineHoriPart = 0;
257 for (col = 0; col < maxLen; col++)
258 colVline[col] = colNchrs[col] = 0;
259 for (l = begin; l < end; l++) {
260 int blankStartCol = -1;
261 int lastIsChar = 0;
262 if (texts[l]->blank)
263 continue;
264 switch (lineAttr(texts[l]->body, &atr)) {
265 case HORI_NULL:
266 nbody++;
267 lineExist = 0;
268 break;
269 case HORI_DBL_ALL:
270 case HORI_SNGL_ALL:
271 lineHoriFull++;
272 lineExist = 1;
273 break;
274 case HORI_EXIST:
275 lineHoriPart++;
276 lineExist = 1;
277 break;
278 }
279 for (col = 0, s = texts[l]->body; *s; s++, atr++, col++) {
280 if (*atr & ATR_JIS2) {
281 colVline[col] |= COLV_JIS;
282 }
283 if (*atr & ATR_VLINE) {
284 lineExist = 1;
285 wakuExist++;
286 colVline[col] |= COLV_ASC;
287 }
288 if (*atr & ATR_CHAR) {
289 colNchrs[col]++;
290 if (wakuExist
291 && blankStartCol != -1 && lastIsChar) {
292 while (blankStartCol < col) {
293 colNchrs[blankStartCol++]++;
294 }
295 }
296 lastIsChar = 1;
297 }
298 else {
299 if (lastIsChar && *atr)
300 lastIsChar = 0;
301 }
302 if (*atr == 0) {
303 blankStartCol = col;
304 }
305 else {
306 blankStartCol = -1;
307 }
308 }
309 if (!lineExist)
310 wakuAll = 0;
311 }
312 tbl_field[0].defFormat = '-';
313 tbl_field[0].right = 0;
314 tbl_field[0].left = -1;
315 tbl_nfield = 1;
316 tbl_field[tbl_nfield].left = findFieldLeft(0);
317 while (tbl_field[tbl_nfield].left < maxLen) {
318 tbl_field[tbl_nfield].right
319 = findFieldEnd(tbl_field[tbl_nfield].left + 1);
320 tbl_field[tbl_nfield + 1].left
321 = findFieldLeft(tbl_field[tbl_nfield].right);
322 tbl_nfield++;
323 if (tbl_nfield >= TBL_MAX_FIELDS)
324 return -1; /* too many fields */
325 }
326 /* Find vertical lines */
327 for (fld = 0; fld < tbl_nfield; fld++) {
328 tbl_field[fld].vlines = 0;
329 for (col = tbl_field[fld].right;
330 col < tbl_field[fld + 1].left; col++)
331 if (colVline[col] == COLV_ASC)
332 tbl_field[fld].vlines++;
333 }
334 expectedFields = nbody * (tbl_nfield - 1);
335 return 0; /* OK */
336 }
fieldOnlyHline(lnum,fld,tblp)337 fieldOnlyHline(lnum, fld, tblp)
338 int lnum;
339 int fld;
340 register struct table *tblp;
341 {
342 int i;
343 char *s;
344 s = texts[lnum]->body + tbl_field[fld].left;
345 for (i = tbl_field[fld].left; i < tbl_field[fld].right; i++, s++)
346 if (*s != '-' && *s != '=' && *s != '+' && *s != '|')
347 return 0;
348 return 1;
349 }
shrinkField(begin,end,tblp)350 shrinkField(begin, end, tblp)
351 int begin;
352 int end;
353 register struct table *tblp;
354 {
355 int i, l, fld;
356 int hattr;
357 char *atr;
358 if (!wakuExist)
359 return;
360 for (i = 0; i < maxLen; i++)
361 colPchrs[i] = 0;
362 for (l = begin; l < end; l++) {
363 if ((hattr = lineAttr(texts[l]->body, &atr))
364 == HORI_DBL_ALL || hattr == HORI_SNGL_ALL)
365 continue;
366 for (fld = 1; fld < tbl_nfield; fld++) {
367 if (fieldOnlyHline(l, fld, tblp))
368 continue;
369 for (i = tbl_field[fld].left;
370 i < tbl_field[fld].right; i++)
371 if (atr[i] != 0)
372 colPchrs[i]++;
373 }
374 }
375 for (fld = 1; fld < tbl_nfield; fld++) {
376 while (tbl_field[fld].left < tbl_field[fld].right) {
377 if (colPchrs[tbl_field[fld].left])
378 break;
379 else
380 tbl_field[fld].left++;
381 }
382 while (tbl_field[fld].left < tbl_field[fld].right) {
383 if (colPchrs[tbl_field[fld].right - 1])
384 break;
385 else
386 tbl_field[fld].right--;
387 }
388 }
389 }
spaceInField(l,fld,tblp)390 spaceInField(l, fld, tblp)
391 int l;
392 int fld;
393 struct table *tblp;
394 {
395 int j;
396 int stat = 0;
397 for (j = tbl_field[fld].left;
398 j < tbl_field[fld].right && j < texts[l]->length; j++) {
399 if (stat == 0 && *(texts[l]->body + j) != ' ')
400 stat = 1;
401 if (stat == 1 && *(texts[l]->body + j) == ' ')
402 stat = 2;
403 if (stat == 2 && *(texts[l]->body + j) != ' ')
404 return 1;
405 }
406 if (stat == 0)
407 return 1;
408 return 0;
409 }
410 /*
411 * Check if the specified region looks like table
412 */
tblCheck(begin,end,tblp)413 tblCheck(begin, end, tblp)
414 int begin;
415 int end;
416 register struct table *tblp;
417 {
418 int l, col, fld;
419 int nilg = 0;
420 int spaces = 0;
421 int chars = 0;
422 int lines = 0;
423 int factor;
424 factor = tableFactor * tableFactor / 50;
425 if (!wakuAll) {
426 if (tbl_nfield <= TBL_MIN_FIELDS)
427 return 0;
428 if (tbl_nfield * 100 > 12 * factor)
429 return 0;
430 for (l = begin; l < end; l++) {
431 if (!texts[l]->blank) {
432 lines += texts[l]->picLines;
433 spaces += texts[l]->spaces;
434 chars += texts[l]->length - texts[l]->indent;
435 }
436 for (fld = 0; fld < tbl_nfield; fld++)
437 if (spaceInField(l, fld, tblp))
438 nilg++;
439 }
440 DBG3(4, "tblCheck (%d-%d) ilg:%d\n", begin, end, nilg);
441
442 if (((nilg * 25) > ((end - begin) * factor))
443 || (spaces * factor < chars * (japaneseText?15:10))
444 || ((lines * 25)> ((end - begin) * factor)))
445 return 0;
446 }
447 else {
448 char *s;
449 #ifdef undef
450 if (tbl_nfield * 100 > 16 * factor)
451 return 0;
452 #endif
453 for (fld = 0; fld < tbl_nfield; fld++)
454 if (tbl_field[fld].vlines > 2)
455 return 0;
456 if (tbl_field[0].vlines == 0 ||
457 tbl_field[tbl_nfield - 1].vlines == 0)
458 return 0;
459 if (lineHoriFull == 0)
460 return 0;
461 for (l = begin; l < end; l++) {
462 s = texts[l]->body;
463 for (col = 0; col < texts[l]->length; col++, s++) {
464 if (colVline[col] == COLV_ASC
465 && tblAsciAttr[(unsigned char)*s] == 0
466 && tblKanjiByte[(unsigned char)*s]
467 && tblKwakuAttr(s) == 0)
468 nilg++;
469 #ifdef KANJI
470 if (isZenkaku(s)) {
471 s++;
472 col++;
473 }
474 #endif
475 }
476 }
477 DBG3(4, "tblCheck (%d-%d) ilg:%d\n", begin, end, nilg);
478 }
479 if (nilg * 200 > expectedFields * factor)
480 return 0;
481 return 1;
482 }
markIfTable(begin,end)483 markIfTable(begin, end)
484 int begin;
485 int end;
486 {
487 struct textBlock *tbp;
488 struct table table;
489 int l;
490 DBG2(2, "markIfTable (%d-%d)\n", begin, end);
491 if (end - begin <= TBL_MIN_LINES)
492 return 0;
493 for (l = begin; l < end; l++)
494 if (texts[l]->block)
495 return 0;
496 if (buildField(begin, end, &table))
497 return 0;
498 if (tblCheck(begin, end, &table) == 0)
499 return 0;
500 tbp = newTextBlock(begin, end, TB_TABLE);
501 MSG2("%d-%d ", begin, end - 1);
502 for (l = begin; l < end; l++)
503 texts[l]->block = tbp;
504 return 1;
505 }
extraColumn(atr,len,fld,tblp)506 extraColumn(atr, len, fld, tblp)
507 char *atr;
508 int len;
509 int fld;
510 register struct table *tblp;
511 {
512 int fldLeft;
513 int i;
514 fldLeft = fld;
515 for (; fld < tbl_nfield; fld++) {
516 if (wakuAll) {
517 for (i = tbl_field[fld].right;
518 i < len && i < tbl_field[fld + 1].left; i++)
519 if ((atr[i] & ATR_VLINE) != 0
520 || (atr[i] & ATR_HLINE) != 0)
521 break;
522 if (i < tbl_field[fld + 1].left)
523 break;
524 }
525 else {
526 if (tbl_field[fld].right >= len
527 || (atr[tbl_field[fld].right] & ATR_CHAR) == 0)
528 break;
529 }
530 }
531 if (fld == tbl_nfield)
532 return tbl_nfield - fldLeft -1;
533 else
534 return fld - fldLeft;
535 }
formatOf(atr,len,fld,exCol,tblp)536 formatOf(atr, len, fld, exCol, tblp)
537 char *atr;
538 int len;
539 int fld;
540 int exCol;
541 register struct table *tblp;
542 {
543 if (tbl_field[fld].defFormat == 'l') {
544 if (tbl_field[fld].left >= len
545 || atr[tbl_field[fld].left])
546 return 'l';
547 if (tbl_field[fld].left <= len) {
548 if (tbl_field[fld + exCol].left
549 == tbl_field[fld + exCol].align) {
550 if (atr[tbl_field[fld].left])
551 return 'l';
552 }
553 else if ((atr[tbl_field[fld].align - 1] & ATR_CHAR)
554 == 0)
555 return 'l';
556
557 }
558 if (tbl_field[fld + exCol].right <= len
559 && atr[tbl_field[fld + exCol].right - 1])
560 return 'r';
561 }
562 else if (tbl_field[fld].defFormat == 'r') {
563 if (tbl_field[fld + exCol].right <= len) {
564 if (tbl_field[fld + exCol].right
565 == tbl_field[fld + exCol].align) {
566 if(atr[tbl_field[fld + exCol].right - 1])
567 return 'r';
568 }
569 else if ((atr[tbl_field[fld + exCol].right] &ATR_CHAR)
570 == 0){
571 return 'r';
572 }
573 }
574 if (tbl_field[fld].left >= len
575 || atr[tbl_field[fld].left])
576 return 'l';
577 }
578 return 'c';
579 }
fieldEmpty(atr,len,fld,exCol,tblp)580 fieldEmpty(atr, len, fld, exCol, tblp)
581 char *atr;
582 int len;
583 int fld;
584 int exCol;
585 register struct table *tblp;
586 {
587 int i;
588 for (i = tbl_field[fld].left; i < tbl_field[fld + exCol].right
589 && i < len; i++)
590 if (atr[i] & ATR_CHAR)
591 return 0;
592 return 1;
593 }
594 char *
tblSubstr(l,fld,exCol,tblp)595 tblSubstr(l, fld, exCol, tblp)
596 int l;
597 int fld;
598 int exCol;
599 register struct table *tblp;
600 {
601 static char buf[256];
602 char *to;
603 char *from;
604 int i;
605 from = texts[l]->body + tbl_field[fld].left;
606 to = buf;
607 for (i = tbl_field[fld].left; i < tbl_field[fld + exCol].right
608 && i < texts[l]->length; i++) {
609 *to++ = *from++;
610 }
611 *to = '\0';
612 return buf;
613 }
attrOfStr(atr,len,f,exCol,tblp)614 attrOfStr(atr, len, f, exCol, tblp)
615 char *atr;
616 int len;
617 int f;
618 int exCol;
619 register struct table *tblp;
620 {
621 int col;
622 if (tbl_field[f + exCol].right >= len)
623 return HORI_NULL;
624 for (col = tbl_field[f].left; col < tbl_field[f + exCol].right; col++)
625 if ((atr[col] & ATR_HLINE) == 0)
626 return HORI_NULL;
627 return HORI_SNGL_ALL;
628 }
tblOutput(begin,end)629 tblOutput(begin, end)
630 int begin;
631 int end;
632 {
633 int ttop, tbtm;
634 struct table table;
635
636 PRINTED2(begin, end);
637 /*
638 * Suppress Blank lines
639 */
640 for (ttop = begin; ttop < end; ttop++) {
641 if (!texts[ttop]->blank)
642 break;
643 }
644 for (tbtm = end; tbtm > ttop; tbtm--) {
645 if (!texts[tbtm - 1]->blank)
646 break;
647 }
648 (void)buildField(ttop, tbtm, &table);/* Should Success */
649 buildDefFormat(&table);
650 shrinkField(ttop, tbtm, &table);
651 (put->table)(ttop, tbtm, &table);
652 }
653