1 /* Copyright (C) 1994, 1995, 1996, 1998, 1999, 2000 artofcode LLC.  All rights reserved.
2 
3   This program is free software; you can redistribute it and/or modify it
4   under the terms of the GNU General Public License as published by the
5   Free Software Foundation; either version 2 of the License, or (at your
6   option) any later version.
7 
8   This program is distributed in the hope that it will be useful, but
9   WITHOUT ANY WARRANTY; without even the implied warranty of
10   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11   General Public License for more details.
12 
13   You should have received a copy of the GNU General Public License along
14   with this program; if not, write to the Free Software Foundation, Inc.,
15   59 Temple Place, Suite 330, Boston, MA, 02111-1307.
16 
17 */
18 
19 /*$Id: igcstr.c,v 1.3.6.1.2.1 2003/01/17 00:49:04 giles Exp $ */
20 /* String GC routines for Ghostscript */
21 #include "memory_.h"
22 #include "ghost.h"
23 #include "gsmdebug.h"
24 #include "gsstruct.h"
25 #include "iastate.h"
26 #include "igcstr.h"
27 
28 /* Forward references */
29 private bool gc_mark_string(P4(const byte *, uint, bool, const chunk_t *));
30 
31 /* (Un)mark the strings in a chunk. */
32 void
gc_strings_set_marks(chunk_t * cp,bool mark)33 gc_strings_set_marks(chunk_t * cp, bool mark)
34 {
35     if (cp->smark != 0) {
36 	if_debug3('6', "[6]clearing string marks 0x%lx[%u] to %d\n",
37 		  (ulong) cp->smark, cp->smark_size, (int)mark);
38 	memset(cp->smark, 0, cp->smark_size);
39 	if (mark)
40 	    gc_mark_string(cp->sbase, cp->climit - cp->sbase, true, cp);
41     }
42 }
43 
44 /* We mark strings a word at a time. */
45 typedef string_mark_unit bword;
46 
47 #define bword_log2_bytes log2_sizeof_string_mark_unit
48 #define bword_bytes (1 << bword_log2_bytes)
49 #define bword_log2_bits (bword_log2_bytes + 3)
50 #define bword_bits (1 << bword_log2_bits)
51 #define bword_1s (~(bword)0)
52 /* Compensate for byte order reversal if necessary. */
53 #if arch_is_big_endian
54 #  if bword_bytes == 2
55 #    define bword_swap_bytes(m) m = (m << 8) | (m >> 8)
56 #  else				/* bword_bytes == 4 */
57 #    define bword_swap_bytes(m)\
58 	m = (m << 24) | ((m & 0xff00) << 8) | ((m >> 8) & 0xff00) | (m >> 24)
59 #  endif
60 #else
61 #  define bword_swap_bytes(m) DO_NOTHING
62 #endif
63 
64 /* (Un)mark a string in a known chunk.  Return true iff any new marks. */
65 private bool
gc_mark_string(const byte * ptr,uint size,bool set,const chunk_t * cp)66 gc_mark_string(const byte * ptr, uint size, bool set, const chunk_t * cp)
67 {
68     uint offset = ptr - cp->sbase;
69     bword *bp = (bword *) (cp->smark + ((offset & -bword_bits) >> 3));
70     uint bn = offset & (bword_bits - 1);
71     bword m = bword_1s << bn;
72     uint left = size;
73     bword marks = 0;
74 
75     bword_swap_bytes(m);
76     if (set) {
77 	if (left + bn >= bword_bits) {
78 	    marks |= ~*bp & m;
79 	    *bp |= m;
80 	    m = bword_1s, left -= bword_bits - bn, bp++;
81 	    while (left >= bword_bits) {
82 		marks |= ~*bp;
83 		*bp = bword_1s;
84 		left -= bword_bits, bp++;
85 	    }
86 	}
87 	if (left) {
88 	    bword_swap_bytes(m);
89 	    m -= m << left;
90 	    bword_swap_bytes(m);
91 	    marks |= ~*bp & m;
92 	    *bp |= m;
93 	}
94     } else {
95 	if (left + bn >= bword_bits) {
96 	    *bp &= ~m;
97 	    m = bword_1s, left -= bword_bits - bn, bp++;
98 	    if (left >= bword_bits * 5) {
99 		memset(bp, 0, (left & -bword_bits) >> 3);
100 		bp += left >> bword_log2_bits;
101 		left &= bword_bits - 1;
102 	    } else
103 		while (left >= bword_bits) {
104 		    *bp = 0;
105 		    left -= bword_bits, bp++;
106 		}
107 	}
108 	if (left) {
109 	    bword_swap_bytes(m);
110 	    m -= m << left;
111 	    bword_swap_bytes(m);
112 	    *bp &= ~m;
113 	}
114     }
115     return marks != 0;
116 }
117 
118 /* Mark a string.  Return true if any new marks. */
119 bool
gc_string_mark(const byte * ptr,uint size,bool set,gc_state_t * gcst)120 gc_string_mark(const byte * ptr, uint size, bool set, gc_state_t * gcst)
121 {
122     const chunk_t *cp;
123     bool marks;
124 
125     if (size == 0)
126 	return false;
127 #define dprintstr()\
128   dputc('('); fwrite(ptr, 1, min(size, 20), dstderr);\
129   dputs((size <= 20 ? ")" : "...)"))
130     if (!(cp = gc_locate(ptr, gcst))) {		/* not in a chunk */
131 #ifdef DEBUG
132 	if (gs_debug_c('5')) {
133 	    dlprintf2("[5]0x%lx[%u]", (ulong) ptr, size);
134 	    dprintstr();
135 	    dputs(" not in a chunk\n");
136 	}
137 #endif
138 	return false;
139     }
140     if (cp->smark == 0)		/* not marking strings */
141 	return false;
142 #ifdef DEBUG
143     if (ptr < cp->ctop) {
144 	lprintf4("String pointer 0x%lx[%u] outside [0x%lx..0x%lx)\n",
145 		 (ulong) ptr, size, (ulong) cp->ctop, (ulong) cp->climit);
146 	return false;
147     } else if (ptr + size > cp->climit) {	/*
148 						 * If this is the bottommost string in a chunk that has
149 						 * an inner chunk, the string's starting address is both
150 						 * cp->ctop of the outer chunk and cp->climit of the inner;
151 						 * gc_locate may incorrectly attribute the string to the
152 						 * inner chunk because of this.  This doesn't affect
153 						 * marking or relocation, since the machinery for these
154 						 * is all associated with the outermost chunk,
155 						 * but it can cause the validity check to fail.
156 						 * Check for this case now.
157 						 */
158 	const chunk_t *scp = cp;
159 
160 	while (ptr == scp->climit && scp->outer != 0)
161 	    scp = scp->outer;
162 	if (ptr + size > scp->climit) {
163 	    lprintf4("String pointer 0x%lx[%u] outside [0x%lx..0x%lx)\n",
164 		     (ulong) ptr, size,
165 		     (ulong) scp->ctop, (ulong) scp->climit);
166 	    return false;
167 	}
168     }
169 #endif
170     marks = gc_mark_string(ptr, size, set, cp);
171 #ifdef DEBUG
172     if (gs_debug_c('5')) {
173 	dlprintf4("[5]%s%smarked 0x%lx[%u]",
174 		  (marks ? "" : "already "), (set ? "" : "un"),
175 		  (ulong) ptr, size);
176 	dprintstr();
177 	dputc('\n');
178     }
179 #endif
180     return marks;
181 }
182 
183 /* Clear the relocation for strings. */
184 /* This requires setting the marks. */
185 void
gc_strings_clear_reloc(chunk_t * cp)186 gc_strings_clear_reloc(chunk_t * cp)
187 {
188     if (cp->sreloc != 0) {
189 	gc_strings_set_marks(cp, true);
190 	if_debug1('6', "[6]clearing string reloc 0x%lx\n",
191 		  (ulong) cp->sreloc);
192 	gc_strings_set_reloc(cp);
193     }
194 }
195 
196 /* Count the 0-bits in a byte. */
197 private const byte count_zero_bits_table[256] =
198 {
199 #define o4(n) n,n-1,n-1,n-2
200 #define o16(n) o4(n),o4(n-1),o4(n-1),o4(n-2)
201 #define o64(n) o16(n),o16(n-1),o16(n-1),o16(n-2)
202     o64(8), o64(7), o64(7), o64(6)
203 };
204 
205 #define byte_count_zero_bits(byt)\
206   (uint)(count_zero_bits_table[byt])
207 #define byte_count_one_bits(byt)\
208   (uint)(8 - count_zero_bits_table[byt])
209 
210 /* Set the relocation for the strings in a chunk. */
211 /* The sreloc table stores the relocated offset from climit for */
212 /* the beginning of each block of string_data_quantum characters. */
213 void
gc_strings_set_reloc(chunk_t * cp)214 gc_strings_set_reloc(chunk_t * cp)
215 {
216     if (cp->sreloc != 0 && cp->smark != 0) {
217 	byte *bot = cp->ctop;
218 	byte *top = cp->climit;
219 	uint count =
220 	    (top - bot + (string_data_quantum - 1)) >>
221 	    log2_string_data_quantum;
222 	string_reloc_offset *relp =
223 	    cp->sreloc +
224 	    (cp->smark_size >> (log2_string_data_quantum - 3));
225 	register const byte *bitp = cp->smark + cp->smark_size;
226 	register string_reloc_offset reloc = 0;
227 
228 	/* Skip initial unrelocated strings quickly. */
229 #if string_data_quantum == bword_bits || string_data_quantum == bword_bits * 2
230 	{
231 	    /* Work around the alignment aliasing bug. */
232 	    const bword *wp = (const bword *)bitp;
233 
234 #if string_data_quantum == bword_bits
235 #  define RELOC_TEST_1S(wp) (wp[-1])
236 #else /* string_data_quantum == bword_bits * 2 */
237 #  define RELOC_TEST_1S(wp) (wp[-1] & wp[-2])
238 #endif
239 	    while (count && RELOC_TEST_1S(wp) == bword_1s) {
240 		wp -= string_data_quantum / bword_bits;
241 		*--relp = reloc += string_data_quantum;
242 		--count;
243 	    }
244 #undef RELOC_TEST_1S
245 	    bitp = (const byte *)wp;
246 	}
247 #endif
248 	while (count--) {
249 	    bitp -= string_data_quantum / 8;
250 	    reloc += string_data_quantum -
251 		byte_count_zero_bits(bitp[0]);
252 	    reloc -= byte_count_zero_bits(bitp[1]);
253 	    reloc -= byte_count_zero_bits(bitp[2]);
254 	    reloc -= byte_count_zero_bits(bitp[3]);
255 #if log2_string_data_quantum > 5
256 	    reloc -= byte_count_zero_bits(bitp[4]);
257 	    reloc -= byte_count_zero_bits(bitp[5]);
258 	    reloc -= byte_count_zero_bits(bitp[6]);
259 	    reloc -= byte_count_zero_bits(bitp[7]);
260 #endif
261 	    *--relp = reloc;
262 	}
263     }
264     cp->sdest = cp->climit;
265 }
266 
267 /* Relocate a string pointer. */
268 void
igc_reloc_string(gs_string * sptr,gc_state_t * gcst)269 igc_reloc_string(gs_string * sptr, gc_state_t * gcst)
270 {
271     byte *ptr;
272     const chunk_t *cp;
273     uint offset;
274     uint reloc;
275     const byte *bitp;
276     byte byt;
277 
278     if (sptr->size == 0) {
279 	sptr->data = 0;
280 	return;
281     }
282     ptr = sptr->data;
283     if (!(cp = gc_locate(ptr, gcst)))	/* not in a chunk */
284 	return;
285     if (cp->sreloc == 0 || cp->smark == 0)	/* not marking strings */
286 	return;
287     offset = ptr - cp->sbase;
288     reloc = cp->sreloc[offset >> log2_string_data_quantum];
289     bitp = &cp->smark[offset >> 3];
290     switch (offset & (string_data_quantum - 8)) {
291 #if log2_string_data_quantum > 5
292 	case 56:
293 	    reloc -= byte_count_one_bits(bitp[-7]);
294 	case 48:
295 	    reloc -= byte_count_one_bits(bitp[-6]);
296 	case 40:
297 	    reloc -= byte_count_one_bits(bitp[-5]);
298 	case 32:
299 	    reloc -= byte_count_one_bits(bitp[-4]);
300 #endif
301 	case 24:
302 	    reloc -= byte_count_one_bits(bitp[-3]);
303 	case 16:
304 	    reloc -= byte_count_one_bits(bitp[-2]);
305 	case 8:
306 	    reloc -= byte_count_one_bits(bitp[-1]);
307     }
308     byt = *bitp & (0xff >> (8 - (offset & 7)));
309     reloc -= byte_count_one_bits(byt);
310     if_debug2('5', "[5]relocate string 0x%lx to 0x%lx\n",
311 	      (ulong) ptr, (ulong) (cp->sdest - reloc));
312     sptr->data = cp->sdest - reloc;
313 }
314 void
igc_reloc_const_string(gs_const_string * sptr,gc_state_t * gcst)315 igc_reloc_const_string(gs_const_string * sptr, gc_state_t * gcst)
316 {				/* We assume the representation of byte * and const byte * is */
317     /* the same.... */
318     igc_reloc_string((gs_string *) sptr, gcst);
319 }
320 
321 /* Compact the strings in a chunk. */
322 void
gc_strings_compact(chunk_t * cp)323 gc_strings_compact(chunk_t * cp)
324 {
325     if (cp->smark != 0) {
326 	byte *hi = cp->climit;
327 	byte *lo = cp->ctop;
328 	const byte *from = hi;
329 	byte *to = hi;
330 	const byte *bp = cp->smark + cp->smark_size;
331 
332 #ifdef DEBUG
333 	if (gs_debug_c('4') || gs_debug_c('5')) {
334 	    byte *base = cp->sbase;
335 	    uint i = (lo - base) & -string_data_quantum;
336 	    uint n = ROUND_UP(hi - base, string_data_quantum);
337 
338 #define R 16
339 	    for (; i < n; i += R) {
340 		uint j;
341 
342 		dlprintf1("[4]0x%lx: ", (ulong) (base + i));
343 		for (j = i; j < i + R; j++) {
344 		    byte ch = base[j];
345 
346 		    if (ch <= 31) {
347 			dputc('^');
348 			dputc(ch + 0100);
349 		    } else
350 			dputc(ch);
351 		}
352 		dputc(' ');
353 		for (j = i; j < i + R; j++)
354 		    dputc((cp->smark[j >> 3] & (1 << (j & 7)) ?
355 			   '+' : '.'));
356 #undef R
357 		if (!(i & (string_data_quantum - 1)))
358 		    dprintf1(" %u", cp->sreloc[i >> log2_string_data_quantum]);
359 		dputc('\n');
360 	    }
361 	}
362 #endif
363 	/*
364 	 * Skip unmodified strings quickly.  We know that cp->smark is
365 	 * aligned to a string_mark_unit.
366 	 */
367 	{
368 	    /* Work around the alignment aliasing bug. */
369 	    const bword *wp = (const bword *)bp;
370 
371 	    while (to > lo && wp[-1] == bword_1s)
372 		to -= bword_bits, --wp;
373 	    bp = (const byte *)wp;
374 	    while (to > lo && bp[-1] == 0xff)
375 		to -= 8, --bp;
376 	}
377 	from = to;
378 	while (from > lo) {
379 	    byte b = *--bp;
380 
381 	    from -= 8;
382 	    switch (b) {
383 		case 0xff:
384 		    to -= 8;
385 		    /*
386 		     * Since we've seen a byte other than 0xff, we know
387 		     * to != from at this point.
388 		     */
389 		    to[7] = from[7];
390 		    to[6] = from[6];
391 		    to[5] = from[5];
392 		    to[4] = from[4];
393 		    to[3] = from[3];
394 		    to[2] = from[2];
395 		    to[1] = from[1];
396 		    to[0] = from[0];
397 		    break;
398 		default:
399 		    if (b & 0x80)
400 			*--to = from[7];
401 		    if (b & 0x40)
402 			*--to = from[6];
403 		    if (b & 0x20)
404 			*--to = from[5];
405 		    if (b & 0x10)
406 			*--to = from[4];
407 		    if (b & 8)
408 			*--to = from[3];
409 		    if (b & 4)
410 			*--to = from[2];
411 		    if (b & 2)
412 			*--to = from[1];
413 		    if (b & 1)
414 			*--to = from[0];
415 		    /* falls through */
416 		case 0:
417 		    ;
418 	    }
419 	}
420 	gs_alloc_fill(cp->ctop, gs_alloc_fill_collected,
421 		      to - cp->ctop);
422 	cp->ctop = to;
423     }
424 }
425