1 /*	This file is part of the software similarity tester SIM.
2 	Written by Dick Grune, Vrije Universiteit, Amsterdam.
3 	$Id: text.c,v 1.2 2001/11/13 12:55:58 dick Exp $
4 */
5 
6 #include	<stdio.h>
7 #include	<malloc.h>
8 
9 #include	"debug.par"
10 #include	"sim.h"
11 #include	"token.h"
12 #include	"stream.h"
13 #include	"lex.h"
14 #include	"options.h"
15 #include	"error.h"
16 #include	"text.h"
17 
18 struct newline {
19 	unsigned char nl_tk_diff;	/* token position difference */
20 };
21 
22 #define	NL_INCR		1000		/* increment of newline buffer size */
23 
24 static struct newline *nl_buff;		/* to be filled by malloc */
25 static unsigned int nl_size;		/* size of nl_buff[] */
26 static unsigned int nl_free;		/* next free position in nl_buff[] */
27 
28 static unsigned int nl_next, nl_limit;	/* nl_buff[] pointers during pass 2 */
29 
30 static void store_newline(void);
31 static void init_nl_buff(void);
32 
33 /*							TEXT INTERFACE */
34 
35 static unsigned int last_tk_cnt;	/* token count at newline */
36 static unsigned int last_nl_cnt;	/* nl counter during pass 2 */
37 
38 void
InitText(int nfiles)39 InitText(int nfiles) {
40 	/* allocate the array of text descriptors */
41 	NumberOfTexts = nfiles;
42 	Text = (struct text *)
43 		malloc((unsigned int)(NumberOfTexts*sizeof (struct text)));
44 	if (!Text) fatal("out of memory");
45 
46 	init_nl_buff();
47 }
48 
49 int
OpenText(enum Pass pass,struct text * txt)50 OpenText(enum Pass pass, struct text *txt) {
51 	switch (pass) {
52 	case First:
53 		last_tk_cnt = 0;
54 		if (nl_buff) {
55 			txt->tx_nl_start = nl_free;
56 		}
57 		break;
58 
59 	case Second:
60 		last_tk_cnt = 0;
61 		if (nl_buff) {
62 			nl_next = txt->tx_nl_start;
63 			nl_limit = txt->tx_nl_limit;
64 			last_nl_cnt = 1;
65 			lex_nl_cnt = 1;
66 			lex_tk_cnt = 0;
67 			return 1;
68 		}
69 		break;
70 	}
71 
72 	return OpenStream(txt->tx_fname);
73 }
74 
75 int
NextTextTokenObtained(enum Pass pass)76 NextTextTokenObtained(enum Pass pass) {
77 	register int ok = 0;	/* gcc does not understand enum Pass */
78 
79 	switch (pass) {
80 	case First:
81 		ok = NextStreamTokenObtained();
82 		if (TOKEN_EQ(lex_token, EOL)) {
83 			store_newline();
84 			last_tk_cnt = lex_tk_cnt;
85 		}
86 		break;
87 
88 	case Second:
89 		/* get newline info from the buffer or from the file itself */
90 		if (nl_buff) {
91 			if (nl_next == nl_limit) {
92 				ok = 0;
93 			}
94 			else {
95 				struct newline *nl = &nl_buff[nl_next++];
96 
97 				lex_nl_cnt = ++last_nl_cnt;
98 				lex_tk_cnt = (last_tk_cnt += nl->nl_tk_diff);
99 				lex_token = EOL;
100 				ok = 1;
101 			}
102 		}
103 		else {
104 			while (	(ok = NextStreamTokenObtained())
105 			&&	!TOKEN_EQ(lex_token, EOL)
106 			) {
107 				/* skip */
108 			}
109 		}
110 		break;
111 	}
112 
113 	return ok;
114 }
115 
116 void
CloseText(enum Pass pass,struct text * txt)117 CloseText(enum Pass pass, struct text *txt) {
118 	switch (pass) {
119 	case First:
120 		if (nl_buff) {
121 			if (last_tk_cnt != lex_tk_cnt) {
122 				/* there were tokens after the last newline */
123 				store_newline();
124 			}
125 			txt->tx_nl_limit = nl_free;
126 		}
127 		break;
128 	case Second:
129 		break;
130 	}
131 	CloseStream();
132 }
133 
134 /*							NEWLINE CACHING */
135 
136 /*	To speed up pass2 which is interested in token positions at line ends,
137 	the newline buffer keeps this info from pass1. To reduce the size of
138 	the newline buffer, the info is kept as the differences of the values
139 	at consecutive line ends. This allows unsigned chars to be used rather
140 	than integers.
141 
142 	The recording of token position differences at EOL is optional, and
143 	is switched off if
144 	-	there is not room enough for the newline buffer.
145 	-	a difference would not fit in the field in the struct.
146 	Switching off is done by freeing the buffer and setting nl_buff to 0.
147 	Anybody using nl_buff should therefore test for nl_buff being zero.
148 */
149 
150 static void abandon_nl_buff(void);
151 
152 static void
init_nl_buff(void)153 init_nl_buff(void) {
154 	/* Allocate the newline buffer, if possible */
155 	nl_size = 0 + NL_INCR;
156 	nl_buff = (option_set('x') ? 0 :
157 		(struct newline *)malloc(sizeof (struct newline) * nl_size)
158 	);
159 }
160 
161 static void
store_newline(void)162 store_newline(void) {
163 	if (!nl_buff) return;
164 
165 	if (nl_free == nl_size) {
166 		/* allocated array is full; try to increase its size */
167 		unsigned int new_size = nl_size + NL_INCR;
168 		struct newline *new_buff = (struct newline *)realloc(
169 			(char *)nl_buff,
170 			sizeof (struct newline) * new_size
171 		);
172 
173 		if (!new_buff) {
174 			/* we failed */
175 			abandon_nl_buff();
176 			return;
177 		}
178 		nl_buff = new_buff, nl_size = new_size;
179 	}
180 
181 	/* now we are sure there is room enough */
182 	{
183 		register struct newline *nl = &nl_buff[nl_free++];
184 		register unsigned int tk_diff = lex_tk_cnt - last_tk_cnt;
185 
186 		nl->nl_tk_diff = tk_diff;
187 		if (nl->nl_tk_diff != tk_diff) {
188 			/* tk_diff does not fit in nl_tk_diff */
189 			abandon_nl_buff();
190 		}
191 	}
192 }
193 
194 static void
abandon_nl_buff(void)195 abandon_nl_buff(void) {
196 	if (nl_buff) {
197 		free((char *)nl_buff);
198 		nl_buff = 0;
199 	}
200 }
201 
202 #ifdef	DB_NL_BUFF
203 
204 void
db_print_nl_buff(unsigned int start,unsigned int limit)205 db_print_nl_buff(unsigned int start, unsigned int limit) {
206 	int i;
207 
208 	fprintf(DebugFile, "\n**** DB_NL_BUFF ****\n");
209 	if (!nl_buff) {
210 		fprintf(DebugFile, ">>>> NO NL_BUFF\n\n");
211 		return;
212 	}
213 
214 	if (start > nl_free) {
215 		fprintf(DebugFile, ">>>> start (%u) > nl_free (%u)\n\n",
216 			start, nl_free
217 		);
218 		return;
219 	}
220 	if (limit > nl_free) {
221 		fprintf(DebugFile, ">>>> limit (%u) > nl_free (%u)\n\n",
222 			limit, nl_free
223 		);
224 		return;
225 	}
226 
227 	fprintf(DebugFile, "nl_buff: %u entries:\n", nl_free);
228 	for (i = start; i < limit; i++) {
229 		struct newline *nl = &nl_buff[i];
230 
231 		fprintf(DebugFile, "nl_tk_diff = %d\n", nl->nl_tk_diff);
232 	}
233 	fprintf(DebugFile, "\n");
234 }
235 
236 #endif	/* DB_NL_BUFF */
237