1 /* Copyright (C) 2001-2006 Artifex Software, Inc.
2    All Rights Reserved.
3 
4    This software is provided AS-IS with no warranty, either express or
5    implied.
6 
7    This software is distributed under license and may not be copied, modified
8    or distributed except as expressly authorized under the terms of that
9    license.  Refer to licensing information at http://www.artifex.com/
10    or contact Artifex Software, Inc.,  7 Mt. Lassen Drive - Suite A-134,
11    San Rafael, CA  94903, U.S.A., +1(415)492-9861, for further information.
12 */
13 
14 /* $Id: igcstr.c 9062 2008-09-03 11:42:35Z leonardo $ */
15 /* String GC routines for Ghostscript */
16 #include "memory_.h"
17 #include "ghost.h"
18 #include "gsmdebug.h"
19 #include "gsstruct.h"
20 #include "iastate.h"
21 #include "igcstr.h"
22 
23 /* Forward references */
24 static bool gc_mark_string(const byte *, uint, bool, const chunk_t *);
25 
26 /* (Un)mark the strings in a chunk. */
27 void
gc_strings_set_marks(chunk_t * cp,bool mark)28 gc_strings_set_marks(chunk_t * cp, bool mark)
29 {
30     if (cp->smark != 0) {
31 	if_debug3('6', "[6]clearing string marks 0x%lx[%u] to %d\n",
32 		  (ulong) cp->smark, cp->smark_size, (int)mark);
33 	memset(cp->smark, 0, cp->smark_size);
34 	if (mark)
35 	    gc_mark_string(cp->sbase, cp->climit - cp->sbase, true, cp);
36     }
37 }
38 
39 /* We mark strings a word at a time. */
40 typedef string_mark_unit bword;
41 
42 #define bword_log2_bytes log2_sizeof_string_mark_unit
43 #define bword_bytes (1 << bword_log2_bytes)
44 #define bword_log2_bits (bword_log2_bytes + 3)
45 #define bword_bits (1 << bword_log2_bits)
46 #define bword_1s (~(bword)0)
47 /* Compensate for byte order reversal if necessary. */
48 #if arch_is_big_endian
49 #  if bword_bytes == 2
50 #    define bword_swap_bytes(m) m = (m << 8) | (m >> 8)
51 #  else				/* bword_bytes == 4 */
52 #    define bword_swap_bytes(m)\
53 	m = (m << 24) | ((m & 0xff00) << 8) | ((m >> 8) & 0xff00) | (m >> 24)
54 #  endif
55 #else
56 #  define bword_swap_bytes(m) DO_NOTHING
57 #endif
58 
59 /* (Un)mark a string in a known chunk.  Return true iff any new marks. */
60 static bool
gc_mark_string(const byte * ptr,uint size,bool set,const chunk_t * cp)61 gc_mark_string(const byte * ptr, uint size, bool set, const chunk_t * cp)
62 {
63     uint offset = ptr - cp->sbase;
64     bword *bp = (bword *) (cp->smark + ((offset & -bword_bits) >> 3));
65     uint bn = offset & (bword_bits - 1);
66     bword m = bword_1s << bn;
67     uint left = size;
68     bword marks = 0;
69 
70     bword_swap_bytes(m);
71     if (set) {
72 	if (left + bn >= bword_bits) {
73 	    marks |= ~*bp & m;
74 	    *bp |= m;
75 	    m = bword_1s, left -= bword_bits - bn, bp++;
76 	    while (left >= bword_bits) {
77 		marks |= ~*bp;
78 		*bp = bword_1s;
79 		left -= bword_bits, bp++;
80 	    }
81 	}
82 	if (left) {
83 	    bword_swap_bytes(m);
84 	    m -= m << left;
85 	    bword_swap_bytes(m);
86 	    marks |= ~*bp & m;
87 	    *bp |= m;
88 	}
89     } else {
90 	if (left + bn >= bword_bits) {
91 	    *bp &= ~m;
92 	    m = bword_1s, left -= bword_bits - bn, bp++;
93 	    if (left >= bword_bits * 5) {
94 		memset(bp, 0, (left & -bword_bits) >> 3);
95 		bp += left >> bword_log2_bits;
96 		left &= bword_bits - 1;
97 	    } else
98 		while (left >= bword_bits) {
99 		    *bp = 0;
100 		    left -= bword_bits, bp++;
101 		}
102 	}
103 	if (left) {
104 	    bword_swap_bytes(m);
105 	    m -= m << left;
106 	    bword_swap_bytes(m);
107 	    *bp &= ~m;
108 	}
109     }
110     return marks != 0;
111 }
112 
113 #ifdef DEBUG
114 /* Print a string for debugging.  We need this because there is no d---
115  * equivalent of fwrite.
116  */
117 static void
dfwrite(const byte * ptr,uint count)118 dfwrite(const byte *ptr, uint count)
119 {
120     uint i;
121     for (i = 0; i < count; ++i)
122 	dputc(ptr[i]);
123 }
124 #endif
125 
126 /* Mark a string.  Return true if any new marks. */
127 bool
gc_string_mark(const byte * ptr,uint size,bool set,gc_state_t * gcst)128 gc_string_mark(const byte * ptr, uint size, bool set, gc_state_t * gcst)
129 {
130     const chunk_t *cp;
131     bool marks;
132 
133     if (size == 0)
134 	return false;
135 #define dprintstr()\
136   dputc('('); dfwrite(ptr, min(size, 20));\
137   dputs((size <= 20 ? ")" : "...)"))
138     if (!(cp = gc_locate(ptr, gcst))) {		/* not in a chunk */
139 #ifdef DEBUG
140 	if (gs_debug_c('5')) {
141 	    dlprintf2("[5]0x%lx[%u]", (ulong) ptr, size);
142 	    dprintstr();
143 	    dputs(" not in a chunk\n");
144 	}
145 #endif
146 	return false;
147     }
148     if (cp->smark == 0)		/* not marking strings */
149 	return false;
150 #ifdef DEBUG
151     if (ptr < cp->ctop) {
152 	lprintf4("String pointer 0x%lx[%u] outside [0x%lx..0x%lx)\n",
153 		 (ulong) ptr, size, (ulong) cp->ctop, (ulong) cp->climit);
154 	return false;
155     } else if (ptr + size > cp->climit) {	/*
156 						 * If this is the bottommost string in a chunk that has
157 						 * an inner chunk, the string's starting address is both
158 						 * cp->ctop of the outer chunk and cp->climit of the inner;
159 						 * gc_locate may incorrectly attribute the string to the
160 						 * inner chunk because of this.  This doesn't affect
161 						 * marking or relocation, since the machinery for these
162 						 * is all associated with the outermost chunk,
163 						 * but it can cause the validity check to fail.
164 						 * Check for this case now.
165 						 */
166 	const chunk_t *scp = cp;
167 
168 	while (ptr == scp->climit && scp->outer != 0)
169 	    scp = scp->outer;
170 	if (ptr + size > scp->climit) {
171 	    lprintf4("String pointer 0x%lx[%u] outside [0x%lx..0x%lx)\n",
172 		     (ulong) ptr, size,
173 		     (ulong) scp->ctop, (ulong) scp->climit);
174 	    return false;
175 	}
176     }
177 #endif
178     marks = gc_mark_string(ptr, size, set, cp);
179 #ifdef DEBUG
180     if (gs_debug_c('5')) {
181 	dlprintf4("[5]%s%smarked 0x%lx[%u]",
182 		  (marks ? "" : "already "), (set ? "" : "un"),
183 		  (ulong) ptr, size);
184 	dprintstr();
185 	dputc('\n');
186     }
187 #endif
188     return marks;
189 }
190 
191 /* Clear the relocation for strings. */
192 /* This requires setting the marks. */
193 void
gc_strings_clear_reloc(chunk_t * cp)194 gc_strings_clear_reloc(chunk_t * cp)
195 {
196     if (cp->sreloc != 0) {
197 	gc_strings_set_marks(cp, true);
198 	if_debug1('6', "[6]clearing string reloc 0x%lx\n",
199 		  (ulong) cp->sreloc);
200 	gc_strings_set_reloc(cp);
201     }
202 }
203 
204 /* Count the 0-bits in a byte. */
205 static const byte count_zero_bits_table[256] =
206 {
207 #define o4(n) n,n-1,n-1,n-2
208 #define o16(n) o4(n),o4(n-1),o4(n-1),o4(n-2)
209 #define o64(n) o16(n),o16(n-1),o16(n-1),o16(n-2)
210     o64(8), o64(7), o64(7), o64(6)
211 };
212 
213 #define byte_count_zero_bits(byt)\
214   (uint)(count_zero_bits_table[byt])
215 #define byte_count_one_bits(byt)\
216   (uint)(8 - count_zero_bits_table[byt])
217 
218 /* Set the relocation for the strings in a chunk. */
219 /* The sreloc table stores the relocated offset from climit for */
220 /* the beginning of each block of string_data_quantum characters. */
221 void
gc_strings_set_reloc(chunk_t * cp)222 gc_strings_set_reloc(chunk_t * cp)
223 {
224     if (cp->sreloc != 0 && cp->smark != 0) {
225 	byte *bot = cp->ctop;
226 	byte *top = cp->climit;
227 	uint count =
228 	    (top - bot + (string_data_quantum - 1)) >>
229 	    log2_string_data_quantum;
230 	string_reloc_offset *relp =
231 	    cp->sreloc +
232 	    (cp->smark_size >> (log2_string_data_quantum - 3));
233 	register const byte *bitp = cp->smark + cp->smark_size;
234 	register string_reloc_offset reloc = 0;
235 
236 	/* Skip initial unrelocated strings quickly. */
237 #if string_data_quantum == bword_bits || string_data_quantum == bword_bits * 2
238 	{
239 	    /* Work around the alignment aliasing bug. */
240 	    const bword *wp = (const bword *)bitp;
241 
242 #if string_data_quantum == bword_bits
243 #  define RELOC_TEST_1S(wp) (wp[-1])
244 #else /* string_data_quantum == bword_bits * 2 */
245 #  define RELOC_TEST_1S(wp) (wp[-1] & wp[-2])
246 #endif
247 	    while (count && RELOC_TEST_1S(wp) == bword_1s) {
248 		wp -= string_data_quantum / bword_bits;
249 		*--relp = reloc += string_data_quantum;
250 		--count;
251 	    }
252 #undef RELOC_TEST_1S
253 	    bitp = (const byte *)wp;
254 	}
255 #endif
256 	while (count--) {
257 	    bitp -= string_data_quantum / 8;
258 	    reloc += string_data_quantum -
259 		byte_count_zero_bits(bitp[0]);
260 	    reloc -= byte_count_zero_bits(bitp[1]);
261 	    reloc -= byte_count_zero_bits(bitp[2]);
262 	    reloc -= byte_count_zero_bits(bitp[3]);
263 #if log2_string_data_quantum > 5
264 	    reloc -= byte_count_zero_bits(bitp[4]);
265 	    reloc -= byte_count_zero_bits(bitp[5]);
266 	    reloc -= byte_count_zero_bits(bitp[6]);
267 	    reloc -= byte_count_zero_bits(bitp[7]);
268 #endif
269 	    *--relp = reloc;
270 	}
271     }
272     cp->sdest = cp->climit;
273 }
274 
275 /* Relocate a string pointer. */
276 void
igc_reloc_string(gs_string * sptr,gc_state_t * gcst)277 igc_reloc_string(gs_string * sptr, gc_state_t * gcst)
278 {
279     byte *ptr;
280     const chunk_t *cp;
281     uint offset;
282     uint reloc;
283     const byte *bitp;
284     byte byt;
285 
286     if (sptr->size == 0) {
287 	sptr->data = 0;
288 	return;
289     }
290     ptr = sptr->data;
291     if (!(cp = gc_locate(ptr, gcst)))	/* not in a chunk */
292 	return;
293     if (cp->sreloc == 0 || cp->smark == 0)	/* not marking strings */
294 	return;
295     offset = ptr - cp->sbase;
296     reloc = cp->sreloc[offset >> log2_string_data_quantum];
297     bitp = &cp->smark[offset >> 3];
298     switch (offset & (string_data_quantum - 8)) {
299 #if log2_string_data_quantum > 5
300 	case 56:
301 	    reloc -= byte_count_one_bits(bitp[-7]);
302 	case 48:
303 	    reloc -= byte_count_one_bits(bitp[-6]);
304 	case 40:
305 	    reloc -= byte_count_one_bits(bitp[-5]);
306 	case 32:
307 	    reloc -= byte_count_one_bits(bitp[-4]);
308 #endif
309 	case 24:
310 	    reloc -= byte_count_one_bits(bitp[-3]);
311 	case 16:
312 	    reloc -= byte_count_one_bits(bitp[-2]);
313 	case 8:
314 	    reloc -= byte_count_one_bits(bitp[-1]);
315     }
316     byt = *bitp & (0xff >> (8 - (offset & 7)));
317     reloc -= byte_count_one_bits(byt);
318     if_debug2('5', "[5]relocate string 0x%lx to 0x%lx\n",
319 	      (ulong) ptr, (ulong) (cp->sdest - reloc));
320     sptr->data = cp->sdest - reloc;
321 }
322 void
igc_reloc_const_string(gs_const_string * sptr,gc_state_t * gcst)323 igc_reloc_const_string(gs_const_string * sptr, gc_state_t * gcst)
324 {   /* We assume the representation of byte * and const byte * is */
325     /* the same.... */
326     igc_reloc_string((gs_string *) sptr, gcst);
327 }
328 void
igc_reloc_param_string(gs_param_string * sptr,gc_state_t * gcst)329 igc_reloc_param_string(gs_param_string * sptr, gc_state_t * gcst)
330 {
331     if (!sptr->persistent) {
332 	/* We assume that gs_param_string is a subclass of gs_string. */
333 	igc_reloc_string((gs_string *)sptr, gcst);
334     }
335 }
336 
337 /* Compact the strings in a chunk. */
338 void
gc_strings_compact(chunk_t * cp)339 gc_strings_compact(chunk_t * cp)
340 {
341     if (cp->smark != 0) {
342 	byte *hi = cp->climit;
343 	byte *lo = cp->ctop;
344 	const byte *from = hi;
345 	byte *to = hi;
346 	const byte *bp = cp->smark + cp->smark_size;
347 
348 #ifdef DEBUG
349 	if (gs_debug_c('4') || gs_debug_c('5')) {
350 	    byte *base = cp->sbase;
351 	    uint i = (lo - base) & -string_data_quantum;
352 	    uint n = ROUND_UP(hi - base, string_data_quantum);
353 
354 #define R 16
355 	    for (; i < n; i += R) {
356 		uint j;
357 
358 		dlprintf1("[4]0x%lx: ", (ulong) (base + i));
359 		for (j = i; j < i + R; j++) {
360 		    byte ch = base[j];
361 
362 		    if (ch <= 31) {
363 			dputc('^');
364 			dputc(ch + 0100);
365 		    } else
366 			dputc(ch);
367 		}
368 		dputc(' ');
369 		for (j = i; j < i + R; j++)
370 		    dputc((cp->smark[j >> 3] & (1 << (j & 7)) ?
371 			   '+' : '.'));
372 #undef R
373 		if (!(i & (string_data_quantum - 1)))
374 		    dprintf1(" %u", cp->sreloc[i >> log2_string_data_quantum]);
375 		dputc('\n');
376 	    }
377 	}
378 #endif
379 	/*
380 	 * Skip unmodified strings quickly.  We know that cp->smark is
381 	 * aligned to a string_mark_unit.
382 	 */
383 	{
384 	    /* Work around the alignment aliasing bug. */
385 	    const bword *wp = (const bword *)bp;
386 
387 	    while (to > lo && wp[-1] == bword_1s)
388 		to -= bword_bits, --wp;
389 	    bp = (const byte *)wp;
390 	    while (to > lo && bp[-1] == 0xff)
391 		to -= 8, --bp;
392 	}
393 	from = to;
394 	while (from > lo) {
395 	    byte b = *--bp;
396 
397 	    from -= 8;
398 	    switch (b) {
399 		case 0xff:
400 		    to -= 8;
401 		    /*
402 		     * Since we've seen a byte other than 0xff, we know
403 		     * to != from at this point.
404 		     */
405 		    to[7] = from[7];
406 		    to[6] = from[6];
407 		    to[5] = from[5];
408 		    to[4] = from[4];
409 		    to[3] = from[3];
410 		    to[2] = from[2];
411 		    to[1] = from[1];
412 		    to[0] = from[0];
413 		    break;
414 		default:
415 		    if (b & 0x80)
416 			*--to = from[7];
417 		    if (b & 0x40)
418 			*--to = from[6];
419 		    if (b & 0x20)
420 			*--to = from[5];
421 		    if (b & 0x10)
422 			*--to = from[4];
423 		    if (b & 8)
424 			*--to = from[3];
425 		    if (b & 4)
426 			*--to = from[2];
427 		    if (b & 2)
428 			*--to = from[1];
429 		    if (b & 1)
430 			*--to = from[0];
431 		    /* falls through */
432 		case 0:
433 		    ;
434 	    }
435 	}
436 	gs_alloc_fill(cp->ctop, gs_alloc_fill_collected,
437 		      to - cp->ctop);
438 	cp->ctop = to;
439     }
440 }
441