1 /* Copyright (C) 2001-2006 Artifex Software, Inc.
2 All Rights Reserved.
3
4 This software is provided AS-IS with no warranty, either express or
5 implied.
6
7 This software is distributed under license and may not be copied, modified
8 or distributed except as expressly authorized under the terms of that
9 license. Refer to licensing information at http://www.artifex.com/
10 or contact Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134,
11 San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information.
12 */
13
14 /* $Id: igcstr.c 9062 2008-09-03 11:42:35Z leonardo $ */
15 /* String GC routines for Ghostscript */
16 #include "memory_.h"
17 #include "ghost.h"
18 #include "gsmdebug.h"
19 #include "gsstruct.h"
20 #include "iastate.h"
21 #include "igcstr.h"
22
23 /* Forward references */
24 static bool gc_mark_string(const byte *, uint, bool, const chunk_t *);
25
26 /* (Un)mark the strings in a chunk. */
27 void
gc_strings_set_marks(chunk_t * cp,bool mark)28 gc_strings_set_marks(chunk_t * cp, bool mark)
29 {
30 if (cp->smark != 0) {
31 if_debug3('6', "[6]clearing string marks 0x%lx[%u] to %d\n",
32 (ulong) cp->smark, cp->smark_size, (int)mark);
33 memset(cp->smark, 0, cp->smark_size);
34 if (mark)
35 gc_mark_string(cp->sbase, cp->climit - cp->sbase, true, cp);
36 }
37 }
38
39 /* We mark strings a word at a time. */
40 typedef string_mark_unit bword;
41
42 #define bword_log2_bytes log2_sizeof_string_mark_unit
43 #define bword_bytes (1 << bword_log2_bytes)
44 #define bword_log2_bits (bword_log2_bytes + 3)
45 #define bword_bits (1 << bword_log2_bits)
46 #define bword_1s (~(bword)0)
47 /* Compensate for byte order reversal if necessary. */
48 #if arch_is_big_endian
49 # if bword_bytes == 2
50 # define bword_swap_bytes(m) m = (m << 8) | (m >> 8)
51 # else /* bword_bytes == 4 */
52 # define bword_swap_bytes(m)\
53 m = (m << 24) | ((m & 0xff00) << 8) | ((m >> 8) & 0xff00) | (m >> 24)
54 # endif
55 #else
56 # define bword_swap_bytes(m) DO_NOTHING
57 #endif
58
59 /* (Un)mark a string in a known chunk. Return true iff any new marks. */
60 static bool
gc_mark_string(const byte * ptr,uint size,bool set,const chunk_t * cp)61 gc_mark_string(const byte * ptr, uint size, bool set, const chunk_t * cp)
62 {
63 uint offset = ptr - cp->sbase;
64 bword *bp = (bword *) (cp->smark + ((offset & -bword_bits) >> 3));
65 uint bn = offset & (bword_bits - 1);
66 bword m = bword_1s << bn;
67 uint left = size;
68 bword marks = 0;
69
70 bword_swap_bytes(m);
71 if (set) {
72 if (left + bn >= bword_bits) {
73 marks |= ~*bp & m;
74 *bp |= m;
75 m = bword_1s, left -= bword_bits - bn, bp++;
76 while (left >= bword_bits) {
77 marks |= ~*bp;
78 *bp = bword_1s;
79 left -= bword_bits, bp++;
80 }
81 }
82 if (left) {
83 bword_swap_bytes(m);
84 m -= m << left;
85 bword_swap_bytes(m);
86 marks |= ~*bp & m;
87 *bp |= m;
88 }
89 } else {
90 if (left + bn >= bword_bits) {
91 *bp &= ~m;
92 m = bword_1s, left -= bword_bits - bn, bp++;
93 if (left >= bword_bits * 5) {
94 memset(bp, 0, (left & -bword_bits) >> 3);
95 bp += left >> bword_log2_bits;
96 left &= bword_bits - 1;
97 } else
98 while (left >= bword_bits) {
99 *bp = 0;
100 left -= bword_bits, bp++;
101 }
102 }
103 if (left) {
104 bword_swap_bytes(m);
105 m -= m << left;
106 bword_swap_bytes(m);
107 *bp &= ~m;
108 }
109 }
110 return marks != 0;
111 }
112
113 #ifdef DEBUG
114 /* Print a string for debugging. We need this because there is no d---
115 * equivalent of fwrite.
116 */
117 static void
dfwrite(const byte * ptr,uint count)118 dfwrite(const byte *ptr, uint count)
119 {
120 uint i;
121 for (i = 0; i < count; ++i)
122 dputc(ptr[i]);
123 }
124 #endif
125
126 /* Mark a string. Return true if any new marks. */
127 bool
gc_string_mark(const byte * ptr,uint size,bool set,gc_state_t * gcst)128 gc_string_mark(const byte * ptr, uint size, bool set, gc_state_t * gcst)
129 {
130 const chunk_t *cp;
131 bool marks;
132
133 if (size == 0)
134 return false;
135 #define dprintstr()\
136 dputc('('); dfwrite(ptr, min(size, 20));\
137 dputs((size <= 20 ? ")" : "...)"))
138 if (!(cp = gc_locate(ptr, gcst))) { /* not in a chunk */
139 #ifdef DEBUG
140 if (gs_debug_c('5')) {
141 dlprintf2("[5]0x%lx[%u]", (ulong) ptr, size);
142 dprintstr();
143 dputs(" not in a chunk\n");
144 }
145 #endif
146 return false;
147 }
148 if (cp->smark == 0) /* not marking strings */
149 return false;
150 #ifdef DEBUG
151 if (ptr < cp->ctop) {
152 lprintf4("String pointer 0x%lx[%u] outside [0x%lx..0x%lx)\n",
153 (ulong) ptr, size, (ulong) cp->ctop, (ulong) cp->climit);
154 return false;
155 } else if (ptr + size > cp->climit) { /*
156 * If this is the bottommost string in a chunk that has
157 * an inner chunk, the string's starting address is both
158 * cp->ctop of the outer chunk and cp->climit of the inner;
159 * gc_locate may incorrectly attribute the string to the
160 * inner chunk because of this. This doesn't affect
161 * marking or relocation, since the machinery for these
162 * is all associated with the outermost chunk,
163 * but it can cause the validity check to fail.
164 * Check for this case now.
165 */
166 const chunk_t *scp = cp;
167
168 while (ptr == scp->climit && scp->outer != 0)
169 scp = scp->outer;
170 if (ptr + size > scp->climit) {
171 lprintf4("String pointer 0x%lx[%u] outside [0x%lx..0x%lx)\n",
172 (ulong) ptr, size,
173 (ulong) scp->ctop, (ulong) scp->climit);
174 return false;
175 }
176 }
177 #endif
178 marks = gc_mark_string(ptr, size, set, cp);
179 #ifdef DEBUG
180 if (gs_debug_c('5')) {
181 dlprintf4("[5]%s%smarked 0x%lx[%u]",
182 (marks ? "" : "already "), (set ? "" : "un"),
183 (ulong) ptr, size);
184 dprintstr();
185 dputc('\n');
186 }
187 #endif
188 return marks;
189 }
190
191 /* Clear the relocation for strings. */
192 /* This requires setting the marks. */
193 void
gc_strings_clear_reloc(chunk_t * cp)194 gc_strings_clear_reloc(chunk_t * cp)
195 {
196 if (cp->sreloc != 0) {
197 gc_strings_set_marks(cp, true);
198 if_debug1('6', "[6]clearing string reloc 0x%lx\n",
199 (ulong) cp->sreloc);
200 gc_strings_set_reloc(cp);
201 }
202 }
203
204 /* Count the 0-bits in a byte. */
205 static const byte count_zero_bits_table[256] =
206 {
207 #define o4(n) n,n-1,n-1,n-2
208 #define o16(n) o4(n),o4(n-1),o4(n-1),o4(n-2)
209 #define o64(n) o16(n),o16(n-1),o16(n-1),o16(n-2)
210 o64(8), o64(7), o64(7), o64(6)
211 };
212
213 #define byte_count_zero_bits(byt)\
214 (uint)(count_zero_bits_table[byt])
215 #define byte_count_one_bits(byt)\
216 (uint)(8 - count_zero_bits_table[byt])
217
218 /* Set the relocation for the strings in a chunk. */
219 /* The sreloc table stores the relocated offset from climit for */
220 /* the beginning of each block of string_data_quantum characters. */
221 void
gc_strings_set_reloc(chunk_t * cp)222 gc_strings_set_reloc(chunk_t * cp)
223 {
224 if (cp->sreloc != 0 && cp->smark != 0) {
225 byte *bot = cp->ctop;
226 byte *top = cp->climit;
227 uint count =
228 (top - bot + (string_data_quantum - 1)) >>
229 log2_string_data_quantum;
230 string_reloc_offset *relp =
231 cp->sreloc +
232 (cp->smark_size >> (log2_string_data_quantum - 3));
233 register const byte *bitp = cp->smark + cp->smark_size;
234 register string_reloc_offset reloc = 0;
235
236 /* Skip initial unrelocated strings quickly. */
237 #if string_data_quantum == bword_bits || string_data_quantum == bword_bits * 2
238 {
239 /* Work around the alignment aliasing bug. */
240 const bword *wp = (const bword *)bitp;
241
242 #if string_data_quantum == bword_bits
243 # define RELOC_TEST_1S(wp) (wp[-1])
244 #else /* string_data_quantum == bword_bits * 2 */
245 # define RELOC_TEST_1S(wp) (wp[-1] & wp[-2])
246 #endif
247 while (count && RELOC_TEST_1S(wp) == bword_1s) {
248 wp -= string_data_quantum / bword_bits;
249 *--relp = reloc += string_data_quantum;
250 --count;
251 }
252 #undef RELOC_TEST_1S
253 bitp = (const byte *)wp;
254 }
255 #endif
256 while (count--) {
257 bitp -= string_data_quantum / 8;
258 reloc += string_data_quantum -
259 byte_count_zero_bits(bitp[0]);
260 reloc -= byte_count_zero_bits(bitp[1]);
261 reloc -= byte_count_zero_bits(bitp[2]);
262 reloc -= byte_count_zero_bits(bitp[3]);
263 #if log2_string_data_quantum > 5
264 reloc -= byte_count_zero_bits(bitp[4]);
265 reloc -= byte_count_zero_bits(bitp[5]);
266 reloc -= byte_count_zero_bits(bitp[6]);
267 reloc -= byte_count_zero_bits(bitp[7]);
268 #endif
269 *--relp = reloc;
270 }
271 }
272 cp->sdest = cp->climit;
273 }
274
275 /* Relocate a string pointer. */
276 void
igc_reloc_string(gs_string * sptr,gc_state_t * gcst)277 igc_reloc_string(gs_string * sptr, gc_state_t * gcst)
278 {
279 byte *ptr;
280 const chunk_t *cp;
281 uint offset;
282 uint reloc;
283 const byte *bitp;
284 byte byt;
285
286 if (sptr->size == 0) {
287 sptr->data = 0;
288 return;
289 }
290 ptr = sptr->data;
291 if (!(cp = gc_locate(ptr, gcst))) /* not in a chunk */
292 return;
293 if (cp->sreloc == 0 || cp->smark == 0) /* not marking strings */
294 return;
295 offset = ptr - cp->sbase;
296 reloc = cp->sreloc[offset >> log2_string_data_quantum];
297 bitp = &cp->smark[offset >> 3];
298 switch (offset & (string_data_quantum - 8)) {
299 #if log2_string_data_quantum > 5
300 case 56:
301 reloc -= byte_count_one_bits(bitp[-7]);
302 case 48:
303 reloc -= byte_count_one_bits(bitp[-6]);
304 case 40:
305 reloc -= byte_count_one_bits(bitp[-5]);
306 case 32:
307 reloc -= byte_count_one_bits(bitp[-4]);
308 #endif
309 case 24:
310 reloc -= byte_count_one_bits(bitp[-3]);
311 case 16:
312 reloc -= byte_count_one_bits(bitp[-2]);
313 case 8:
314 reloc -= byte_count_one_bits(bitp[-1]);
315 }
316 byt = *bitp & (0xff >> (8 - (offset & 7)));
317 reloc -= byte_count_one_bits(byt);
318 if_debug2('5', "[5]relocate string 0x%lx to 0x%lx\n",
319 (ulong) ptr, (ulong) (cp->sdest - reloc));
320 sptr->data = cp->sdest - reloc;
321 }
322 void
igc_reloc_const_string(gs_const_string * sptr,gc_state_t * gcst)323 igc_reloc_const_string(gs_const_string * sptr, gc_state_t * gcst)
324 { /* We assume the representation of byte * and const byte * is */
325 /* the same.... */
326 igc_reloc_string((gs_string *) sptr, gcst);
327 }
328 void
igc_reloc_param_string(gs_param_string * sptr,gc_state_t * gcst)329 igc_reloc_param_string(gs_param_string * sptr, gc_state_t * gcst)
330 {
331 if (!sptr->persistent) {
332 /* We assume that gs_param_string is a subclass of gs_string. */
333 igc_reloc_string((gs_string *)sptr, gcst);
334 }
335 }
336
337 /* Compact the strings in a chunk. */
338 void
gc_strings_compact(chunk_t * cp)339 gc_strings_compact(chunk_t * cp)
340 {
341 if (cp->smark != 0) {
342 byte *hi = cp->climit;
343 byte *lo = cp->ctop;
344 const byte *from = hi;
345 byte *to = hi;
346 const byte *bp = cp->smark + cp->smark_size;
347
348 #ifdef DEBUG
349 if (gs_debug_c('4') || gs_debug_c('5')) {
350 byte *base = cp->sbase;
351 uint i = (lo - base) & -string_data_quantum;
352 uint n = ROUND_UP(hi - base, string_data_quantum);
353
354 #define R 16
355 for (; i < n; i += R) {
356 uint j;
357
358 dlprintf1("[4]0x%lx: ", (ulong) (base + i));
359 for (j = i; j < i + R; j++) {
360 byte ch = base[j];
361
362 if (ch <= 31) {
363 dputc('^');
364 dputc(ch + 0100);
365 } else
366 dputc(ch);
367 }
368 dputc(' ');
369 for (j = i; j < i + R; j++)
370 dputc((cp->smark[j >> 3] & (1 << (j & 7)) ?
371 '+' : '.'));
372 #undef R
373 if (!(i & (string_data_quantum - 1)))
374 dprintf1(" %u", cp->sreloc[i >> log2_string_data_quantum]);
375 dputc('\n');
376 }
377 }
378 #endif
379 /*
380 * Skip unmodified strings quickly. We know that cp->smark is
381 * aligned to a string_mark_unit.
382 */
383 {
384 /* Work around the alignment aliasing bug. */
385 const bword *wp = (const bword *)bp;
386
387 while (to > lo && wp[-1] == bword_1s)
388 to -= bword_bits, --wp;
389 bp = (const byte *)wp;
390 while (to > lo && bp[-1] == 0xff)
391 to -= 8, --bp;
392 }
393 from = to;
394 while (from > lo) {
395 byte b = *--bp;
396
397 from -= 8;
398 switch (b) {
399 case 0xff:
400 to -= 8;
401 /*
402 * Since we've seen a byte other than 0xff, we know
403 * to != from at this point.
404 */
405 to[7] = from[7];
406 to[6] = from[6];
407 to[5] = from[5];
408 to[4] = from[4];
409 to[3] = from[3];
410 to[2] = from[2];
411 to[1] = from[1];
412 to[0] = from[0];
413 break;
414 default:
415 if (b & 0x80)
416 *--to = from[7];
417 if (b & 0x40)
418 *--to = from[6];
419 if (b & 0x20)
420 *--to = from[5];
421 if (b & 0x10)
422 *--to = from[4];
423 if (b & 8)
424 *--to = from[3];
425 if (b & 4)
426 *--to = from[2];
427 if (b & 2)
428 *--to = from[1];
429 if (b & 1)
430 *--to = from[0];
431 /* falls through */
432 case 0:
433 ;
434 }
435 }
436 gs_alloc_fill(cp->ctop, gs_alloc_fill_collected,
437 to - cp->ctop);
438 cp->ctop = to;
439 }
440 }
441