1 /*
2  * Copyright (C) 1991,1992,1993 NEC Corporation.
3  */
4 /*
5  * Table parsing routine
6  */
7 #ifndef lint
8 static char rcsid[] =
9 	"$Id: table.c,v 2.13 1994/04/19 10:17:06 uchida Exp $ (NEC)";
10 #endif
11 
12 #include <stdio.h>
13 #include "plain2.h"
14 #include "table.h"
15 #include "kanji.h"
16 
17 #define	ATR_VLINE	0x01		/* Vertical line	*/
18 #define	ATR_HLINE	0x02		/* Horizontal line	*/
19 #define	ATR_HDLINE	0x04		/* Horizontal double	*/
20 #define	ATR_ANYLINE	(ATR_VLINE | ATR_HLINE | ATR_HDLINE)
21 #define	ATR_CHAR	0x08	/* non line character		*/
22 #define	ATR_JIS2	0x10	/* second byte of JIS wakusen	*/
23 #define	ATR_PLUS	(ATR_HLINE | ATR_HDLINE)
24 /*
25  * Alphabetic frame lines
26  */
27 struct	strVal	tblwaku1[] = {
28 	{"-",  ATR_HLINE},
29 	{"=",  ATR_HDLINE},
30 	{"|",  ATR_VLINE},
31 	{"+",  ATR_PLUS},
32 	{"", 0}};
33 #ifdef	KANJI
34 /*
35  * JIS Keisen
36  */
37 struct	strVal	tblwaku2[] = {
38 	{"��", ATR_HLINE},
39 	{"��", ATR_VLINE},
40 	{"��", ATR_HLINE | ATR_VLINE},
41 	{"��", ATR_HLINE | ATR_VLINE},
42 	{"��", ATR_HLINE | ATR_VLINE},
43 	{"��", ATR_HLINE | ATR_VLINE},
44 	{"��", ATR_HLINE | ATR_VLINE},
45 	{"��", ATR_HLINE | ATR_VLINE},
46 	{"��", ATR_HLINE | ATR_VLINE},
47 	{"��", ATR_HLINE | ATR_VLINE},
48 	{"��", ATR_HLINE | ATR_VLINE},
49 	{"", 0}};
50 #endif
51 #define	COLV_JIS	2
52 #define	COLV_ASC	1
53 static	char	colVline[MAX_LINE_LEN];	/* Vertical line exist on the column*/
54 static	char	colNchrs[MAX_LINE_LEN];	/* Number of chars on the column */
55 static	char	colPchrs[MAX_LINE_LEN];	/* chars to be printed		*/
56 static	short	maxLen;			/* Max length of the region	*/
57 static	short	wakuAll,	wakuExist;
58 static	short	lineHoriFull,	lineHoriPart;
59 
60 int	expectedFields;		/* Number of expected fields	*/
61 
62 static	unsigned char tblKanjiByte[256];
63 static	unsigned char tblAsciAttr[256];
tblParseInit()64 tblParseInit()
65 {
66 	struct strVal	*twp;
67 	bzero((char *)tblKanjiByte, sizeof(tblKanjiByte));
68 	bzero((char *)tblAsciAttr, sizeof(tblAsciAttr));
69 	for (twp = tblwaku1; *twp->pattern; twp++)
70 		tblAsciAttr[(unsigned char)*twp->pattern] = twp->value;
71 #ifdef	KANJI
72 	for (twp = tblwaku2; *twp->pattern; twp++)
73 		tblKanjiByte[(unsigned char)*twp->pattern]++;
74 #endif
75 }
tblKwakuAttr(s)76 tblKwakuAttr(s)
77 register char	*s;
78 {
79 	register struct	strVal	*twp;
80 #ifdef	KANJI
81 	if (jisTableEnabled && maybeThinKeisen(s)) {
82 		/* first byte of ����������������������	*/
83 		twp = tblwaku2;
84 		while (*twp->pattern) {
85 			if (str2match(s, twp->pattern))
86 				return twp->value;
87 			twp++;
88 		}
89 	}
90 #endif
91 	return 0;
92 }
93 /*
94  * Attribute of the line (Horizontal line, Double line)
95  */
lineAttr(s,horiAtrp)96 lineAttr(s, horiAtrp)
97 register char	*s;
98 char	**horiAtrp;
99 {
100 	static	char	attribute[MAX_LINE_LEN];
101 	register char	*atrp;
102 	int	charAttr;
103 	int	twinLine, singleLine;
104 	int	hexist;
105 	int	inLeadingSpace = 1;
106 	if (*s == '\0')
107 		return	HORI_NULL;
108 	twinLine = singleLine = 1;
109 	hexist	 = 0;
110 	for (atrp = attribute; *s; s++) {
111 		if ((charAttr = tblAsciAttr[(unsigned char)*s])
112 		    || (tblKanjiByte[(unsigned char)*s]
113 			&& (charAttr = tblKwakuAttr(s)))) {
114 			inLeadingSpace = 0;
115 			*atrp++ = charAttr;
116 			if ((charAttr & ATR_VLINE) == 0) {
117 				twinLine   &= ((charAttr & ATR_HDLINE) != 0);
118 				singleLine &= ((charAttr & ATR_HLINE) != 0);
119 				hexist	   |= ((charAttr & ATR_HLINE) != 0);
120 			}
121 #ifdef	KANJI
122 			if (isZenkaku(s)) {
123 				if (charAttr & ATR_VLINE)
124 					*atrp = ATR_JIS2;
125 				else
126 					*atrp = 0;
127 				*atrp++ |= (charAttr & ATR_HLINE);
128 				s++;
129 			}
130 #endif
131 		}
132 		else if (*s != ' ') {
133 			inLeadingSpace = 0;
134 			*atrp++ = ATR_CHAR;
135 			twinLine = singleLine = 0;
136 #ifdef	KANJI
137 			if (isZenkaku(s)) {
138 				*atrp++ = ATR_CHAR;
139 				s++;
140 			}
141 #endif
142 		}
143 		else {
144 			if (!inLeadingSpace)
145 				twinLine = singleLine = 0;
146 			*atrp++ = 0;
147 		}
148 	}
149 	*horiAtrp = attribute;
150 	return twinLine ? HORI_DBL_ALL
151 		: (singleLine ? HORI_SNGL_ALL
152 		   : (hexist ? HORI_EXIST
153 		      : HORI_NULL)) ;
154 }
findFieldLeft(col)155 findFieldLeft(col)
156 int	col;
157 {
158 	if (wakuExist) {
159 		int	seeVline = 0;
160 		while (col < maxLen) {
161 			if (colVline[col] == 0 && colNchrs[col])
162 				return col;
163 			if (colVline[col] == 0 && seeVline)
164 				return col;
165 			if (colVline[col] != 0)
166 				seeVline++;
167 			col++;
168 		}
169 	}
170 	else {
171 		for (; col < maxLen; col++) {
172 			if (colNchrs[col] && (colVline[col] ==	0))
173 				return col;
174 		}
175 	}
176 	return maxLen;
177 }
findFieldEnd(col)178 findFieldEnd(col)
179 int	col;
180 {
181 	for (; col < maxLen; col++) {
182 		if (wakuExist && colVline[col])
183 			return col;
184 		if (!wakuExist && colNchrs[col] == 0)
185 			return col;
186 	}
187 	return maxLen;
188 }
189 /*
190  * Build a default format character for each field
191  */
buildDefFormat(tblp)192 buildDefFormat(tblp)
193 register struct	table	*tblp;
194 {
195 	int	fld, col, delta;
196 	int	charsInVert;
197 	int	maxDeltaL, maxDeltaR;
198 	int	maxPosL,   maxPosR;
199 	for (fld = 1; fld < tbl_nfield; fld++) {
200 		charsInVert = 0;
201 		maxDeltaL = maxDeltaR = 0;
202 		for (col = tbl_field[fld].left; col < tbl_field[fld].right; col++)
203 			if (colNchrs[col] > charsInVert)
204 				charsInVert = colNchrs[col];
205 		if (charsInVert == 0) {
206 			tbl_field[fld].defFormat = 'c';
207 			continue;
208 		}
209 		for (col = tbl_field[fld].left; col < tbl_field[fld].right; col++) {
210 			delta = colNchrs[col] - (fld ? colNchrs[col-1] : 0);
211 			if (delta > maxDeltaL) {
212 				maxDeltaL = delta;
213 				maxPosL   = col;
214 			}
215 			if (-delta > maxDeltaR) {
216 				maxDeltaR = -delta;
217 				maxPosR   = col;
218 			}
219 		}
220 		if (colNchrs[col - 1] > maxDeltaR) {
221 			maxDeltaR = colNchrs[col - 1];
222 		}
223 		if ((maxDeltaL * 3 < charsInVert * 2)
224 		    &&	(maxDeltaR * 3 < charsInVert * 2))
225 			tbl_field[fld].defFormat = 'c';
226 		else if (maxDeltaL > maxDeltaR) {
227 			tbl_field[fld].defFormat = 'l';
228 			tbl_field[fld].align = maxPosL;
229 		}
230 		else if (maxDeltaL < maxDeltaR) {
231 			tbl_field[fld].defFormat = 'r';
232 			tbl_field[fld].align = maxPosR;
233 		}
234 		else if (maxDeltaL == maxDeltaR) {
235 			tbl_field[fld].defFormat = 'c';
236 		}
237 	}
238 }
239 /*
240  * Build field
241  */
buildField(begin,end,tblp)242 buildField(begin, end, tblp)
243 int	begin;
244 int	end;
245 register struct	table	*tblp;
246 {
247 	register int	col;
248 	int	fld, l, nbody;
249 	int	lineExist;
250 	char	*s, *atr;
251 
252 	maxLen = maxLength(begin, end);
253 	wakuAll	  = 1;
254 	wakuExist = 0;
255 	nbody	  = 0;
256 	lineHoriFull = lineHoriPart = 0;
257 	for (col = 0; col < maxLen; col++)
258 		colVline[col] = colNchrs[col] = 0;
259 	for (l = begin; l < end; l++) {
260 		int	blankStartCol = -1;
261 		int	lastIsChar = 0;
262 		if (texts[l]->blank)
263 			continue;
264 		switch (lineAttr(texts[l]->body, &atr)) {
265 		    case HORI_NULL:
266 			nbody++;
267 			lineExist = 0;
268 			break;
269 		    case HORI_DBL_ALL:
270 		    case HORI_SNGL_ALL:
271 			lineHoriFull++;
272 			lineExist = 1;
273 			break;
274 		    case HORI_EXIST:
275 			lineHoriPart++;
276 			lineExist = 1;
277 			break;
278 		}
279 		for (col = 0, s = texts[l]->body; *s; s++, atr++, col++) {
280 			if (*atr & ATR_JIS2) {
281 				colVline[col] |= COLV_JIS;
282 			}
283 			if (*atr & ATR_VLINE) {
284 				lineExist = 1;
285 				wakuExist++;
286 				colVline[col] |= COLV_ASC;
287 			}
288 			if (*atr & ATR_CHAR) {
289 				colNchrs[col]++;
290 				if (wakuExist
291 				    && blankStartCol != -1 && lastIsChar) {
292 					while (blankStartCol < col) {
293 						colNchrs[blankStartCol++]++;
294 					}
295 				}
296 				lastIsChar = 1;
297 			}
298 			else {
299 				if (lastIsChar && *atr)
300 					lastIsChar = 0;
301 			}
302 			if (*atr == 0) {
303 				blankStartCol = col;
304 			}
305 			else {
306 				blankStartCol = -1;
307 			}
308 		}
309 		if (!lineExist)
310 			wakuAll = 0;
311 	}
312 	tbl_field[0].defFormat = '-';
313 	tbl_field[0].right     = 0;
314 	tbl_field[0].left      = -1;
315 	tbl_nfield = 1;
316 	tbl_field[tbl_nfield].left  = findFieldLeft(0);
317 	while (tbl_field[tbl_nfield].left < maxLen) {
318 		tbl_field[tbl_nfield].right
319 			= findFieldEnd(tbl_field[tbl_nfield].left + 1);
320 		tbl_field[tbl_nfield + 1].left
321 			= findFieldLeft(tbl_field[tbl_nfield].right);
322 		tbl_nfield++;
323 		if (tbl_nfield >= TBL_MAX_FIELDS)
324 			return -1;	/* too many fields	*/
325 	}
326 	/* Find vertical lines	*/
327 	for (fld = 0; fld < tbl_nfield; fld++) {
328 		tbl_field[fld].vlines = 0;
329 		for (col = tbl_field[fld].right;
330 		     col < tbl_field[fld + 1].left; col++)
331 			if (colVline[col] == COLV_ASC)
332 				tbl_field[fld].vlines++;
333 	}
334 	expectedFields	= nbody * (tbl_nfield - 1);
335 	return 0;	/* OK	*/
336 }
fieldOnlyHline(lnum,fld,tblp)337 fieldOnlyHline(lnum, fld, tblp)
338 int	lnum;
339 int	fld;
340 register struct	table	*tblp;
341 {
342 	int	i;
343 	char	*s;
344 	s = texts[lnum]->body + tbl_field[fld].left;
345 	for (i = tbl_field[fld].left; i < tbl_field[fld].right; i++, s++)
346 		if (*s != '-' && *s != '=' && *s != '+' && *s != '|')
347 			return 0;
348 	return 1;
349 }
shrinkField(begin,end,tblp)350 shrinkField(begin, end, tblp)
351 int	begin;
352 int	end;
353 register struct	table	*tblp;
354 {
355 	int	i, l, fld;
356 	int	hattr;
357 	char	*atr;
358 	if (!wakuExist)
359 		return;
360 	for (i = 0; i < maxLen; i++)
361 		colPchrs[i] = 0;
362 	for (l = begin; l < end; l++) {
363 		if ((hattr = lineAttr(texts[l]->body, &atr))
364 		    == HORI_DBL_ALL || hattr == HORI_SNGL_ALL)
365 			continue;
366 		for (fld = 1; fld < tbl_nfield; fld++) {
367 			if (fieldOnlyHline(l, fld, tblp))
368 				continue;
369 			for (i = tbl_field[fld].left;
370 			     i < tbl_field[fld].right; i++)
371 				if (atr[i] != 0)
372 					colPchrs[i]++;
373 		}
374 	}
375 	for (fld = 1; fld < tbl_nfield; fld++) {
376 		while (tbl_field[fld].left < tbl_field[fld].right) {
377 			if (colPchrs[tbl_field[fld].left])
378 				break;
379 			else
380 				tbl_field[fld].left++;
381 		}
382 		while (tbl_field[fld].left < tbl_field[fld].right) {
383 			if (colPchrs[tbl_field[fld].right - 1])
384 				break;
385 			else
386 				tbl_field[fld].right--;
387 		}
388 	}
389 }
spaceInField(l,fld,tblp)390 spaceInField(l, fld, tblp)
391 int	l;
392 int	fld;
393 struct	table	*tblp;
394 {
395 	int	j;
396 	int	stat = 0;
397 	for (j = tbl_field[fld].left;
398 	     j < tbl_field[fld].right && j < texts[l]->length; j++) {
399 		if (stat == 0 && *(texts[l]->body + j) != ' ')
400 			stat = 1;
401 		if (stat == 1 && *(texts[l]->body + j) == ' ')
402 			stat = 2;
403 		if (stat == 2 && *(texts[l]->body + j) != ' ')
404 			return 1;
405 	}
406 	if (stat == 0)
407 		return 1;
408 	return 0;
409 }
410 /*
411  * Check if the specified region looks like table
412  */
tblCheck(begin,end,tblp)413 tblCheck(begin, end, tblp)
414 int	begin;
415 int	end;
416 register struct	table	*tblp;
417 {
418 	int	l, col, fld;
419 	int	nilg   = 0;
420 	int	spaces = 0;
421 	int	chars  = 0;
422 	int	lines  = 0;
423 	int	factor;
424 	factor = tableFactor * tableFactor / 50;
425 	if (!wakuAll) {
426 		if (tbl_nfield <= TBL_MIN_FIELDS)
427 			return 0;
428 		if (tbl_nfield * 100 > 12 * factor)
429 			return 0;
430 		for (l = begin; l < end; l++) {
431 			if (!texts[l]->blank) {
432 				lines  += texts[l]->picLines;
433 				spaces += texts[l]->spaces;
434 				chars  += texts[l]->length - texts[l]->indent;
435 			}
436 			for (fld = 0; fld < tbl_nfield; fld++)
437 				if (spaceInField(l, fld, tblp))
438 					nilg++;
439 		}
440 		DBG3(4, "tblCheck (%d-%d) ilg:%d\n", begin, end, nilg);
441 
442 		if (((nilg * 25) > ((end - begin) * factor))
443 		    || (spaces * factor < chars * (japaneseText?15:10))
444 		    || ((lines * 25)> ((end - begin) * factor)))
445 			return 0;
446 	}
447 	else {
448 		char	*s;
449 #ifdef	undef
450 		if (tbl_nfield * 100 > 16 * factor)
451 			return 0;
452 #endif
453 		for (fld = 0; fld < tbl_nfield; fld++)
454 			if (tbl_field[fld].vlines > 2)
455 				return 0;
456 		if (tbl_field[0].vlines == 0 ||
457 		    tbl_field[tbl_nfield - 1].vlines == 0)
458 			return 0;
459 		if (lineHoriFull == 0)
460 			return 0;
461 		for (l = begin; l < end; l++) {
462 			s = texts[l]->body;
463 			for (col = 0; col < texts[l]->length; col++, s++) {
464 				if (colVline[col] == COLV_ASC
465 				    && tblAsciAttr[(unsigned char)*s] == 0
466 				    && tblKanjiByte[(unsigned char)*s]
467 				    && tblKwakuAttr(s) == 0)
468 					nilg++;
469 #ifdef	KANJI
470 				if (isZenkaku(s)) {
471 					s++;
472 					col++;
473 				}
474 #endif
475 			}
476 		}
477 		DBG3(4, "tblCheck (%d-%d) ilg:%d\n", begin, end, nilg);
478 	}
479 	if (nilg * 200 > expectedFields * factor)
480 		return 0;
481 	return 1;
482 }
markIfTable(begin,end)483 markIfTable(begin, end)
484 int	begin;
485 int	end;
486 {
487 	struct	textBlock *tbp;
488 	struct	table	table;
489 	int	l;
490 	DBG2(2, "markIfTable (%d-%d)\n", begin, end);
491 	if (end - begin <= TBL_MIN_LINES)
492 		return 0;
493 	for (l = begin; l < end; l++)
494 		if (texts[l]->block)
495 			return 0;
496 	if (buildField(begin, end, &table))
497 		return 0;
498 	if (tblCheck(begin, end, &table) == 0)
499 		return 0;
500 	tbp = newTextBlock(begin, end, TB_TABLE);
501 	MSG2("%d-%d ", begin, end - 1);
502 	for (l = begin; l < end; l++)
503 		texts[l]->block = tbp;
504 	return 1;
505 }
extraColumn(atr,len,fld,tblp)506 extraColumn(atr, len, fld, tblp)
507 char	*atr;
508 int	len;
509 int	fld;
510 register struct	table	*tblp;
511 {
512 	int	fldLeft;
513 	int	i;
514 	fldLeft = fld;
515 	for (; fld < tbl_nfield; fld++) {
516 		if (wakuAll) {
517 			for (i = tbl_field[fld].right;
518 			     i < len && i < tbl_field[fld + 1].left; i++)
519 				if ((atr[i] & ATR_VLINE) != 0
520 				    || (atr[i] & ATR_HLINE) != 0)
521 					break;
522 			if (i < tbl_field[fld + 1].left)
523 				break;
524 		}
525 		else {
526 			if (tbl_field[fld].right >= len
527 			    || (atr[tbl_field[fld].right] & ATR_CHAR) == 0)
528 				break;
529 		}
530 	}
531 	if (fld == tbl_nfield)
532 		return tbl_nfield - fldLeft -1;
533 	else
534 		return fld - fldLeft;
535 }
formatOf(atr,len,fld,exCol,tblp)536 formatOf(atr, len, fld, exCol, tblp)
537 char	*atr;
538 int	len;
539 int	fld;
540 int	exCol;
541 register struct	table	*tblp;
542 {
543 	if (tbl_field[fld].defFormat == 'l') {
544 		if (tbl_field[fld].left >= len
545 		    || atr[tbl_field[fld].left])
546 			return 'l';
547 		if (tbl_field[fld].left <= len) {
548 			if (tbl_field[fld + exCol].left
549 			    == tbl_field[fld + exCol].align) {
550 				if (atr[tbl_field[fld].left])
551 					return 'l';
552 			}
553 			else if ((atr[tbl_field[fld].align - 1] & ATR_CHAR)
554 				 == 0)
555 					return 'l';
556 
557 		}
558 		if (tbl_field[fld + exCol].right <= len
559 		    && atr[tbl_field[fld + exCol].right - 1])
560 			return 'r';
561 	}
562 	else if (tbl_field[fld].defFormat == 'r') {
563 		if (tbl_field[fld + exCol].right <= len) {
564 			if (tbl_field[fld + exCol].right
565 			    == tbl_field[fld + exCol].align) {
566 				if(atr[tbl_field[fld + exCol].right - 1])
567 					return 'r';
568 			}
569 			else if ((atr[tbl_field[fld + exCol].right] &ATR_CHAR)
570 				 == 0){
571 				return 'r';
572 			}
573 		}
574 		if (tbl_field[fld].left >= len
575 		    || atr[tbl_field[fld].left])
576 			return 'l';
577 	}
578 	return 'c';
579 }
fieldEmpty(atr,len,fld,exCol,tblp)580 fieldEmpty(atr, len, fld, exCol, tblp)
581 char	*atr;
582 int	len;
583 int	fld;
584 int	exCol;
585 register struct	table	*tblp;
586 {
587 	int	i;
588 	for (i = tbl_field[fld].left; i < tbl_field[fld + exCol].right
589 	     && i < len; i++)
590 		if (atr[i] & ATR_CHAR)
591 			return 0;
592 	return 1;
593 }
594 char	*
tblSubstr(l,fld,exCol,tblp)595 tblSubstr(l, fld, exCol, tblp)
596 int	l;
597 int	fld;
598 int	exCol;
599 register struct	table	*tblp;
600 {
601 	static	char	buf[256];
602 	char	*to;
603 	char	*from;
604 	int	i;
605 	from = texts[l]->body + tbl_field[fld].left;
606 	to   = buf;
607 	for (i = tbl_field[fld].left; i < tbl_field[fld + exCol].right
608 	     && i < texts[l]->length; i++) {
609 		*to++ = *from++;
610 	}
611 	*to = '\0';
612 	return buf;
613 }
attrOfStr(atr,len,f,exCol,tblp)614 attrOfStr(atr, len, f, exCol, tblp)
615 char	*atr;
616 int	len;
617 int	f;
618 int	exCol;
619 register struct	table	*tblp;
620 {
621 	int	col;
622 	if (tbl_field[f + exCol].right >= len)
623 		return HORI_NULL;
624 	for (col = tbl_field[f].left; col < tbl_field[f + exCol].right; col++)
625 		if ((atr[col] & ATR_HLINE) == 0)
626 			return HORI_NULL;
627 	return HORI_SNGL_ALL;
628 }
tblOutput(begin,end)629 tblOutput(begin, end)
630 int	begin;
631 int	end;
632 {
633 	int	ttop, tbtm;
634 	struct	table	table;
635 
636 	PRINTED2(begin, end);
637 	/*
638 	 * Suppress Blank lines
639 	 */
640 	for (ttop = begin; ttop < end; ttop++) {
641 		if (!texts[ttop]->blank)
642 			break;
643 	}
644 	for (tbtm = end; tbtm > ttop; tbtm--) {
645 		if (!texts[tbtm - 1]->blank)
646 			break;
647 	}
648 	(void)buildField(ttop, tbtm, &table);/* Should Success */
649 	buildDefFormat(&table);
650 	shrinkField(ttop, tbtm, &table);
651 	(put->table)(ttop, tbtm, &table);
652 }
653