1 /* This file is part of the software similarity tester SIM.
2 Written by Dick Grune, Vrije Universiteit, Amsterdam.
3 $Id: text.c,v 1.2 2001/11/13 12:55:58 dick Exp $
4 */
5
6 #include <stdio.h>
7 #include <malloc.h>
8
9 #include "debug.par"
10 #include "sim.h"
11 #include "token.h"
12 #include "stream.h"
13 #include "lex.h"
14 #include "options.h"
15 #include "error.h"
16 #include "text.h"
17
18 struct newline {
19 unsigned char nl_tk_diff; /* token position difference */
20 };
21
22 #define NL_INCR 1000 /* increment of newline buffer size */
23
24 static struct newline *nl_buff; /* to be filled by malloc */
25 static unsigned int nl_size; /* size of nl_buff[] */
26 static unsigned int nl_free; /* next free position in nl_buff[] */
27
28 static unsigned int nl_next, nl_limit; /* nl_buff[] pointers during pass 2 */
29
30 static void store_newline(void);
31 static void init_nl_buff(void);
32
33 /* TEXT INTERFACE */
34
35 static unsigned int last_tk_cnt; /* token count at newline */
36 static unsigned int last_nl_cnt; /* nl counter during pass 2 */
37
38 void
InitText(int nfiles)39 InitText(int nfiles) {
40 /* allocate the array of text descriptors */
41 NumberOfTexts = nfiles;
42 Text = (struct text *)
43 malloc((unsigned int)(NumberOfTexts*sizeof (struct text)));
44 if (!Text) fatal("out of memory");
45
46 init_nl_buff();
47 }
48
49 int
OpenText(enum Pass pass,struct text * txt)50 OpenText(enum Pass pass, struct text *txt) {
51 switch (pass) {
52 case First:
53 last_tk_cnt = 0;
54 if (nl_buff) {
55 txt->tx_nl_start = nl_free;
56 }
57 break;
58
59 case Second:
60 last_tk_cnt = 0;
61 if (nl_buff) {
62 nl_next = txt->tx_nl_start;
63 nl_limit = txt->tx_nl_limit;
64 last_nl_cnt = 1;
65 lex_nl_cnt = 1;
66 lex_tk_cnt = 0;
67 return 1;
68 }
69 break;
70 }
71
72 return OpenStream(txt->tx_fname);
73 }
74
75 int
NextTextTokenObtained(enum Pass pass)76 NextTextTokenObtained(enum Pass pass) {
77 register int ok = 0; /* gcc does not understand enum Pass */
78
79 switch (pass) {
80 case First:
81 ok = NextStreamTokenObtained();
82 if (TOKEN_EQ(lex_token, EOL)) {
83 store_newline();
84 last_tk_cnt = lex_tk_cnt;
85 }
86 break;
87
88 case Second:
89 /* get newline info from the buffer or from the file itself */
90 if (nl_buff) {
91 if (nl_next == nl_limit) {
92 ok = 0;
93 }
94 else {
95 struct newline *nl = &nl_buff[nl_next++];
96
97 lex_nl_cnt = ++last_nl_cnt;
98 lex_tk_cnt = (last_tk_cnt += nl->nl_tk_diff);
99 lex_token = EOL;
100 ok = 1;
101 }
102 }
103 else {
104 while ( (ok = NextStreamTokenObtained())
105 && !TOKEN_EQ(lex_token, EOL)
106 ) {
107 /* skip */
108 }
109 }
110 break;
111 }
112
113 return ok;
114 }
115
116 void
CloseText(enum Pass pass,struct text * txt)117 CloseText(enum Pass pass, struct text *txt) {
118 switch (pass) {
119 case First:
120 if (nl_buff) {
121 if (last_tk_cnt != lex_tk_cnt) {
122 /* there were tokens after the last newline */
123 store_newline();
124 }
125 txt->tx_nl_limit = nl_free;
126 }
127 break;
128 case Second:
129 break;
130 }
131 CloseStream();
132 }
133
134 /* NEWLINE CACHING */
135
136 /* To speed up pass2 which is interested in token positions at line ends,
137 the newline buffer keeps this info from pass1. To reduce the size of
138 the newline buffer, the info is kept as the differences of the values
139 at consecutive line ends. This allows unsigned chars to be used rather
140 than integers.
141
142 The recording of token position differences at EOL is optional, and
143 is switched off if
144 - there is not room enough for the newline buffer.
145 - a difference would not fit in the field in the struct.
146 Switching off is done by freeing the buffer and setting nl_buff to 0.
147 Anybody using nl_buff should therefore test for nl_buff being zero.
148 */
149
150 static void abandon_nl_buff(void);
151
152 static void
init_nl_buff(void)153 init_nl_buff(void) {
154 /* Allocate the newline buffer, if possible */
155 nl_size = 0 + NL_INCR;
156 nl_buff = (option_set('x') ? 0 :
157 (struct newline *)malloc(sizeof (struct newline) * nl_size)
158 );
159 }
160
161 static void
store_newline(void)162 store_newline(void) {
163 if (!nl_buff) return;
164
165 if (nl_free == nl_size) {
166 /* allocated array is full; try to increase its size */
167 unsigned int new_size = nl_size + NL_INCR;
168 struct newline *new_buff = (struct newline *)realloc(
169 (char *)nl_buff,
170 sizeof (struct newline) * new_size
171 );
172
173 if (!new_buff) {
174 /* we failed */
175 abandon_nl_buff();
176 return;
177 }
178 nl_buff = new_buff, nl_size = new_size;
179 }
180
181 /* now we are sure there is room enough */
182 {
183 register struct newline *nl = &nl_buff[nl_free++];
184 register unsigned int tk_diff = lex_tk_cnt - last_tk_cnt;
185
186 nl->nl_tk_diff = tk_diff;
187 if (nl->nl_tk_diff != tk_diff) {
188 /* tk_diff does not fit in nl_tk_diff */
189 abandon_nl_buff();
190 }
191 }
192 }
193
194 static void
abandon_nl_buff(void)195 abandon_nl_buff(void) {
196 if (nl_buff) {
197 free((char *)nl_buff);
198 nl_buff = 0;
199 }
200 }
201
202 #ifdef DB_NL_BUFF
203
204 void
db_print_nl_buff(unsigned int start,unsigned int limit)205 db_print_nl_buff(unsigned int start, unsigned int limit) {
206 int i;
207
208 fprintf(DebugFile, "\n**** DB_NL_BUFF ****\n");
209 if (!nl_buff) {
210 fprintf(DebugFile, ">>>> NO NL_BUFF\n\n");
211 return;
212 }
213
214 if (start > nl_free) {
215 fprintf(DebugFile, ">>>> start (%u) > nl_free (%u)\n\n",
216 start, nl_free
217 );
218 return;
219 }
220 if (limit > nl_free) {
221 fprintf(DebugFile, ">>>> limit (%u) > nl_free (%u)\n\n",
222 limit, nl_free
223 );
224 return;
225 }
226
227 fprintf(DebugFile, "nl_buff: %u entries:\n", nl_free);
228 for (i = start; i < limit; i++) {
229 struct newline *nl = &nl_buff[i];
230
231 fprintf(DebugFile, "nl_tk_diff = %d\n", nl->nl_tk_diff);
232 }
233 fprintf(DebugFile, "\n");
234 }
235
236 #endif /* DB_NL_BUFF */
237