1 /* Copyright (C) 2001-2019 Artifex Software, Inc.
2    All Rights Reserved.
3 
4    This software is provided AS-IS with no warranty, either express or
5    implied.
6 
7    This software is distributed under license and may not be copied,
8    modified or distributed except as expressly authorized under the terms
9    of the license contained in the file LICENSE in this distribution.
10 
11    Refer to licensing information at http://www.artifex.com or contact
12    Artifex Software, Inc.,  1305 Grant Avenue - Suite 200, Novato,
13    CA 94945, U.S.A., +1(415)492-9861, for further information.
14 */
15 
16 
17 /* String GC routines for Ghostscript */
18 #include "memory_.h"
19 #include "ghost.h"
20 #include "gsmdebug.h"
21 #include "gsstruct.h"
22 #include "iastate.h"
23 #include "igcstr.h"
24 #include "igc.h"
25 
26 /* Forward references */
27 static bool gc_mark_string(const byte *, uint, bool, const clump_t *);
28 
29 /* (Un)mark the strings in a clump. */
30 void
gc_strings_set_marks(clump_t * cp,bool mark)31 gc_strings_set_marks(clump_t * cp, bool mark)
32 {
33     if (cp->smark != 0) {
34         if_debug3('6', "[6]clearing string marks 0x%lx[%u] to %d\n",
35                   (ulong) cp->smark, cp->smark_size, (int)mark);
36         memset(cp->smark, 0, cp->smark_size);
37         if (mark)
38             gc_mark_string(cp->sbase + HDR_ID_OFFSET, (cp->climit - cp->sbase) - HDR_ID_OFFSET, true, cp);
39     }
40 }
41 
42 /* We mark strings a word at a time. */
43 typedef string_mark_unit bword;
44 
45 #define bword_log2_bytes log2_sizeof_string_mark_unit
46 #define bword_bytes (1 << bword_log2_bytes)
47 #define bword_log2_bits (bword_log2_bytes + 3)
48 #define bword_bits (1 << bword_log2_bits)
49 #define bword_1s (~(bword)0)
50 /* Compensate for byte order reversal if necessary. */
51 #if ARCH_IS_BIG_ENDIAN
52 #  if bword_bytes == 2
53 #    define bword_swap_bytes(m) m = (m << 8) | (m >> 8)
54 #  else				/* bword_bytes == 4 */
55 #    define bword_swap_bytes(m)\
56         m = (m << 24) | ((m & 0xff00) << 8) | ((m >> 8) & 0xff00) | (m >> 24)
57 #  endif
58 #else
59 #  define bword_swap_bytes(m) DO_NOTHING
60 #endif
61 
62 /* (Un)mark a string in a known clump.  Return true iff any new marks. */
63 static bool
gc_mark_string(const byte * ptr,uint size,bool set,const clump_t * cp)64 gc_mark_string(const byte * ptr, uint size, bool set, const clump_t * cp)
65 {
66     uint offset = (ptr - HDR_ID_OFFSET) - cp->sbase;
67     bword *bp = (bword *) (cp->smark + ((offset & -bword_bits) >> 3));
68     uint bn = offset & (bword_bits - 1);
69     bword m = bword_1s << bn;
70     uint left = size + HDR_ID_OFFSET;
71     bword marks = 0;
72 
73     bword_swap_bytes(m);
74     if (set) {
75         if (left + bn >= bword_bits) {
76             marks |= ~*bp & m;
77             *bp |= m;
78             m = bword_1s, left -= bword_bits - bn, bp++;
79             while (left >= bword_bits) {
80                 marks |= ~*bp;
81                 *bp = bword_1s;
82                 left -= bword_bits, bp++;
83             }
84         }
85         if (left) {
86             bword_swap_bytes(m);
87             m -= m << left;
88             bword_swap_bytes(m);
89             marks |= ~*bp & m;
90             *bp |= m;
91         }
92     } else {
93         if (left + bn >= bword_bits) {
94             *bp &= ~m;
95             m = bword_1s, left -= bword_bits - bn, bp++;
96             if (left >= bword_bits * 5) {
97                 memset(bp, 0, (left & -bword_bits) >> 3);
98                 bp += left >> bword_log2_bits;
99                 left &= bword_bits - 1;
100             } else
101                 while (left >= bword_bits) {
102                     *bp = 0;
103                     left -= bword_bits, bp++;
104                 }
105         }
106         if (left) {
107             bword_swap_bytes(m);
108             m -= m << left;
109             bword_swap_bytes(m);
110             *bp &= ~m;
111         }
112     }
113     return marks != 0;
114 }
115 
116 #ifdef DEBUG
117 /* Print a string for debugging.  We need this because there is no d---
118  * equivalent of fwrite.
119  */
120 static void
dmfwrite(const gs_memory_t * mem,const byte * ptr,uint count)121 dmfwrite(const gs_memory_t *mem, const byte *ptr, uint count)
122 {
123     uint i;
124     for (i = 0; i < count; ++i)
125         dmputc(mem, ptr[i]);
126 }
127 #endif
128 
129 /* Mark a string.  Return true if any new marks. */
130 bool
gc_string_mark(const byte * ptr,uint size,bool set,gc_state_t * gcst)131 gc_string_mark(const byte * ptr, uint size, bool set, gc_state_t * gcst)
132 {
133     const clump_t *cp;
134     bool marks;
135 
136     if (size == 0)
137         return false;
138 #define dmprintstr(mem)\
139   dmputc(mem, '('); dmfwrite(mem, ptr - HDR_ID_OFFSET, min(size, 20));\
140   dmputs(mem, (size <= 20 ? ")" : "...)"))
141     if (!(cp = gc_locate(ptr - HDR_ID_OFFSET, gcst))) {		/* not in a clump */
142 #ifdef DEBUG
143         if (gs_debug_c('5')) {
144             dmlprintf2(gcst->heap, "[5]0x%lx[%u]", (ulong) ptr - HDR_ID_OFFSET, size);
145             dmprintstr(gcst->heap);
146             dmputs(gcst->heap, " not in a clump\n");
147         }
148 #endif
149         return false;
150     }
151     if (cp->smark == 0)		/* not marking strings */
152         return false;
153 #ifdef DEBUG
154     if (ptr - HDR_ID_OFFSET < cp->ctop) {
155         lprintf4("String pointer 0x%lx[%u] outside [0x%lx..0x%lx)\n",
156                  (ulong) ptr - HDR_ID_OFFSET, size, (ulong) cp->ctop, (ulong) cp->climit);
157         return false;
158     } else if (ptr + size > cp->climit) {	/*
159                                                  * If this is the bottommost string in a clump that has
160                                                  * an inner clump, the string's starting address is both
161                                                  * cp->ctop of the outer clump and cp->climit of the inner;
162                                                  * gc_locate may incorrectly attribute the string to the
163                                                  * inner clump because of this.  This doesn't affect
164                                                  * marking or relocation, since the machinery for these
165                                                  * is all associated with the outermost clump,
166                                                  * but it can cause the validity check to fail.
167                                                  * Check for this case now.
168                                                  */
169         const clump_t *scp = cp;
170 
171         while (ptr - HDR_ID_OFFSET == scp->climit && scp->outer != 0)
172             scp = scp->outer;
173         if (ptr - HDR_ID_OFFSET + size > scp->climit) {
174             lprintf4("String pointer 0x%lx[%u] outside [0x%lx..0x%lx)\n",
175                      (ulong) ptr - HDR_ID_OFFSET, size,
176                      (ulong) scp->ctop, (ulong) scp->climit);
177             return false;
178         }
179     }
180 #endif
181     marks = gc_mark_string(ptr, size, set, cp);
182 #ifdef DEBUG
183     if (gs_debug_c('5')) {
184         dmlprintf4(gcst->heap, "[5]%s%smarked 0x%lx[%u]",
185                   (marks ? "" : "already "), (set ? "" : "un"),
186                   (ulong) ptr - HDR_ID_OFFSET, size);
187         dmprintstr(gcst->heap);
188         dmputc(gcst->heap, '\n');
189     }
190 #endif
191     return marks;
192 }
193 
194 /* Clear the relocation for strings. */
195 /* This requires setting the marks. */
196 void
gc_strings_clear_reloc(clump_t * cp)197 gc_strings_clear_reloc(clump_t * cp)
198 {
199     if (cp->sreloc != 0) {
200         gc_strings_set_marks(cp, true);
201         if_debug1('6', "[6]clearing string reloc 0x%lx\n",
202                   (ulong) cp->sreloc);
203         gc_strings_set_reloc(cp);
204     }
205 }
206 
207 /* Count the 0-bits in a byte. */
208 static const byte count_zero_bits_table[256] =
209 {
210 #define o4(n) n,n-1,n-1,n-2
211 #define o16(n) o4(n),o4(n-1),o4(n-1),o4(n-2)
212 #define o64(n) o16(n),o16(n-1),o16(n-1),o16(n-2)
213     o64(8), o64(7), o64(7), o64(6)
214 };
215 
216 #define byte_count_zero_bits(byt)\
217   (uint)(count_zero_bits_table[byt])
218 #define byte_count_one_bits(byt)\
219   (uint)(8 - count_zero_bits_table[byt])
220 
221 /* Set the relocation for the strings in a clump. */
222 /* The sreloc table stores the relocated offset from climit for */
223 /* the beginning of each block of string_data_quantum characters. */
224 void
gc_strings_set_reloc(clump_t * cp)225 gc_strings_set_reloc(clump_t * cp)
226 {
227     if (cp->sreloc != 0 && cp->smark != 0) {
228         byte *bot = cp->ctop;
229         byte *top = cp->climit;
230         uint count =
231             (top - bot + (string_data_quantum - 1)) >>
232             log2_string_data_quantum;
233         string_reloc_offset *relp =
234             cp->sreloc +
235             (cp->smark_size >> (log2_string_data_quantum - 3));
236         register const byte *bitp = cp->smark + cp->smark_size;
237         register string_reloc_offset reloc = 0;
238 
239         /* Skip initial unrelocated strings quickly. */
240 #if string_data_quantum == bword_bits || string_data_quantum == bword_bits * 2
241         {
242             /* Work around the alignment aliasing bug. */
243             const bword *wp = (const bword *)bitp;
244 
245 #if string_data_quantum == bword_bits
246 #  define RELOC_TEST_1S(wp) (wp[-1])
247 #else /* string_data_quantum == bword_bits * 2 */
248 #  define RELOC_TEST_1S(wp) (wp[-1] & wp[-2])
249 #endif
250             while (count && RELOC_TEST_1S(wp) == bword_1s) {
251                 wp -= string_data_quantum / bword_bits;
252                 *--relp = reloc += string_data_quantum;
253                 --count;
254             }
255 #undef RELOC_TEST_1S
256             bitp = (const byte *)wp;
257         }
258 #endif
259         while (count--) {
260             bitp -= string_data_quantum / 8;
261             reloc += string_data_quantum -
262                 byte_count_zero_bits(bitp[0]);
263             reloc -= byte_count_zero_bits(bitp[1]);
264             reloc -= byte_count_zero_bits(bitp[2]);
265             reloc -= byte_count_zero_bits(bitp[3]);
266 #if log2_string_data_quantum > 5
267             reloc -= byte_count_zero_bits(bitp[4]);
268             reloc -= byte_count_zero_bits(bitp[5]);
269             reloc -= byte_count_zero_bits(bitp[6]);
270             reloc -= byte_count_zero_bits(bitp[7]);
271 #endif
272             *--relp = reloc;
273         }
274     }
275     cp->sdest = cp->climit;
276 }
277 
278 /* Relocate a string pointer. */
279 void
igc_reloc_string(gs_string * sptr,gc_state_t * gcst)280 igc_reloc_string(gs_string * sptr, gc_state_t * gcst)
281 {
282     byte *ptr;
283     const clump_t *cp;
284     uint offset;
285     uint reloc;
286     const byte *bitp;
287     byte byt;
288 
289     if (sptr->size == 0) {
290         sptr->data = 0;
291         return;
292     }
293     ptr = sptr->data;
294     ptr -= HDR_ID_OFFSET;
295 
296     if (!(cp = gc_locate(ptr, gcst)))	/* not in a clump */
297         return;
298     if (cp->sreloc == 0 || cp->smark == 0)	/* not marking strings */
299         return;
300     offset = ptr - cp->sbase;
301     reloc = cp->sreloc[offset >> log2_string_data_quantum];
302     bitp = &cp->smark[offset >> 3];
303     switch (offset & (string_data_quantum - 8)) {
304 #if log2_string_data_quantum > 5
305         case 56:
306             reloc -= byte_count_one_bits(bitp[-7]);
307         case 48:
308             reloc -= byte_count_one_bits(bitp[-6]);
309         case 40:
310             reloc -= byte_count_one_bits(bitp[-5]);
311         case 32:
312             reloc -= byte_count_one_bits(bitp[-4]);
313 #endif
314         case 24:
315             reloc -= byte_count_one_bits(bitp[-3]);
316         case 16:
317             reloc -= byte_count_one_bits(bitp[-2]);
318         case 8:
319             reloc -= byte_count_one_bits(bitp[-1]);
320     }
321     byt = *bitp & (0xff >> (8 - (offset & 7)));
322     reloc -= byte_count_one_bits(byt);
323     if_debug2('5', "[5]relocate string 0x%lx to 0x%lx\n",
324               (ulong) ptr, (ulong) (cp->sdest - reloc));
325     sptr->data = (cp->sdest - reloc) + HDR_ID_OFFSET;
326 }
327 void
igc_reloc_const_string(gs_const_string * sptr,gc_state_t * gcst)328 igc_reloc_const_string(gs_const_string * sptr, gc_state_t * gcst)
329 {   /* We assume the representation of byte * and const byte * is */
330     /* the same.... */
331     igc_reloc_string((gs_string *) sptr, gcst);
332 }
333 void
igc_reloc_param_string(gs_param_string * sptr,gc_state_t * gcst)334 igc_reloc_param_string(gs_param_string * sptr, gc_state_t * gcst)
335 {
336     if (!sptr->persistent) {
337         /* We assume that gs_param_string is a subclass of gs_string. */
338         igc_reloc_string((gs_string *)sptr, gcst);
339     }
340 }
341 
342 /* Compact the strings in a clump. */
343 void
gc_strings_compact(clump_t * cp,const gs_memory_t * mem)344 gc_strings_compact(clump_t * cp, const gs_memory_t *mem)
345 {
346     if (cp->smark != 0) {
347         byte *hi = cp->climit;
348         byte *lo = cp->ctop;
349         const byte *from = hi;
350         byte *to = hi;
351         const byte *bp = cp->smark + cp->smark_size;
352 
353 #ifdef DEBUG
354         if (gs_debug_c('4') || gs_debug_c('5')) {
355             byte *base = cp->sbase;
356             uint i = (lo - base) & -string_data_quantum;
357             uint n = ROUND_UP(hi - base, string_data_quantum);
358 
359 #define R 16
360             for (; i < n; i += R) {
361                 uint j;
362 
363                 dmlprintf1(mem, "[4]0x%lx: ", (ulong) (base + i));
364                 for (j = i; j < i + R; j++) {
365                     byte ch = base[j];
366 
367                     if (ch <= 31) {
368                         dmputc(mem, '^');
369                         dmputc(mem, ch + 0100);
370                     } else
371                         dmputc(mem, ch);
372                 }
373                 dmputc(mem, ' ');
374                 for (j = i; j < i + R; j++)
375                     dmputc(mem, (cp->smark[j >> 3] & (1 << (j & 7)) ?
376                            '+' : '.'));
377 #undef R
378                 if (!(i & (string_data_quantum - 1)))
379                     dmprintf1(mem, " %u", cp->sreloc[i >> log2_string_data_quantum]);
380                 dmputc(mem, '\n');
381             }
382         }
383 #endif
384         /*
385          * Skip unmodified strings quickly.  We know that cp->smark is
386          * aligned to a string_mark_unit.
387          */
388         {
389             /* Work around the alignment aliasing bug. */
390             const bword *wp = (const bword *)bp;
391 
392             while (to > lo && wp[-1] == bword_1s)
393                 to -= bword_bits, --wp;
394             bp = (const byte *)wp;
395             while (to > lo && bp[-1] == 0xff)
396                 to -= 8, --bp;
397         }
398         from = to;
399 
400         while (from > lo) {
401             byte b = *--bp;
402 
403             from -= 8;
404             switch (b) {
405                 case 0xff:
406                     to -= 8;
407                     /*
408                      * Since we've seen a byte other than 0xff, we know
409                      * to != from at this point.
410                      */
411                     to[7] = from[7];
412                     to[6] = from[6];
413                     to[5] = from[5];
414                     to[4] = from[4];
415                     to[3] = from[3];
416                     to[2] = from[2];
417                     to[1] = from[1];
418                     to[0] = from[0];
419                     break;
420                 default:
421                     if (b & 0x80)
422                         *--to = from[7];
423                     if (b & 0x40)
424                         *--to = from[6];
425                     if (b & 0x20)
426                         *--to = from[5];
427                     if (b & 0x10)
428                         *--to = from[4];
429                     if (b & 8)
430                         *--to = from[3];
431                     if (b & 4)
432                         *--to = from[2];
433                     if (b & 2)
434                         *--to = from[1];
435                     if (b & 1)
436                         *--to = from[0];
437                     /* falls through */
438                 case 0:
439                     ;
440             }
441         }
442         gs_alloc_fill(cp->ctop, gs_alloc_fill_collected,
443                       to - cp->ctop);
444         cp->ctop = to;
445     }
446 }
447