1 /* libpbm3.c - pbm utility library part 3
2 **
3 ** Copyright (C) 1988 by Jef Poskanzer.
4 **
5 ** Permission to use, copy, modify, and distribute this software and its
6 ** documentation for any purpose and without fee is hereby granted, provided
7 ** that the above copyright notice appear in all copies and that both that
8 ** copyright notice and this permission notice appear in supporting
9 ** documentation.  This software is provided "as is" without express or
10 ** implied warranty.
11 */
12 
13 #include <assert.h>
14 
15 #include "netpbm/pm_c_util.h"
16 
17 #include "pbm.h"
18 
19 #ifndef PACKBITS_SSE
20 #if WANT_SSE && defined(__SSE2__) && HAVE_GCC_BSWAP
21   #define PACKBITS_SSE 2
22 #else
23   #define PACKBITS_SSE 0
24 #endif
25 #endif
26 
27 /* WANT_SSE means we want to use SSE CPU facilities to make PBM raster
28    processing faster.  This implies it's actually possible - i.e. the
29    build environment has <emmintrin.h>.
30 
31    The GNU Compiler -msse2 option makes SSE/SSE2 available, and is
32    evidenced by __SSE2__.
33    For x86-32 with SSE, "-msse2" must be explicitly given.
34    For x86-64 and AMD64, "-msse2" is the default (from Gcc v.4.)
35 */
36 
37 #if PACKBITS_SSE == 2
38   #include <emmintrin.h>
39 #endif
40 
41 
42 void
pbm_writepbminit(FILE * const fileP,int const cols,int const rows,int const forceplain)43 pbm_writepbminit(FILE * const fileP,
44                  int    const cols,
45                  int    const rows,
46                  int    const forceplain) {
47 
48     if (!forceplain && !pm_plain_output) {
49         fprintf(fileP, "%c%c\n%d %d\n", PBM_MAGIC1, RPBM_MAGIC2, cols, rows);
50     } else
51         fprintf(fileP, "%c%c\n%d %d\n", PBM_MAGIC1, PBM_MAGIC2, cols, rows);
52 }
53 
54 
55 
56 static void
writePackedRawRow(FILE * const fileP,const unsigned char * const packedBits,unsigned int const cols)57 writePackedRawRow(FILE *                const fileP,
58                   const unsigned char * const packedBits,
59                   unsigned int          const cols) {
60 
61     unsigned int const packedByteCt = pbm_packed_bytes(cols);
62 
63     size_t writtenByteCt;
64 
65     writtenByteCt = fwrite(packedBits, 1, packedByteCt, fileP);
66     if (writtenByteCt < packedByteCt)
67         pm_error("I/O error writing packed row to raw PBM file.  "
68                  "(Attempted fwrite() of %u packed bytes; "
69                  "only %u got written)",
70                  packedByteCt, (unsigned)writtenByteCt);
71 }
72 
73 
74 
75 #if PACKBITS_SSE == 2
76 static void
packBitsWithSse2(FILE * const fileP,const bit * const bitrow,unsigned char * const packedBits,unsigned int const cols)77 packBitsWithSse2(  FILE *          const fileP,
78                    const bit *     const bitrow,
79                    unsigned char * const packedBits,
80                    unsigned int    const cols) {
81 /*----------------------------------------------------------------------------
82     Pack the bits of bitrow[] into bytes at 'packedBits'.
83 
84     Use the SSE2 facilities to pack the bits quickly, but
85     perform the exact same function as the simpler
86     packBitsGeneric() + packPartialBytes()
87 
88     Unlike packBitsGeneric(), the whole row is converted.
89 -----------------------------------------------------------------------------*/
90     /*
91       We use 2 SSE registers.
92 
93       The key machine instructions are:
94 
95       PCMPGTB128  Packed CoMPare Greater Than Byte
96 
97         Compares 16 bytes in parallel
98         Result is x00 if greater than, xFF if not for each byte
99 
100 
101       PMOVMSKB128 Packed MOVe MaSK Byte
102 
103         Result is 16 bits, the MSBs of 16 bytes
104         x00 xFF x00 xFF xFF xFF x00 x00 xFF xFF xFF xFF x00 x00 x00 x00
105         --> 0101110011110000B = 0x5CF0
106 
107         The result is actually a 64 bit int, but the higher bits are
108         always 0.
109 
110       We use SSE instructions in "_mm_" form in favor of "__builtin_".
111       In GCC the "__builtin_" form is documented but "_mm_" is not.
112       Former versions of this source file used "__builtin_".  This was
113       changed to make possible compilation with clang, which does not
114       implement some "__builtin_" forms.
115 
116       __builtin_ia32_pcmpgtb128 :  _mm_cmpgt_epi8
117       __builtin_ia32_pmovmskb128 : _mm_movemask_epi8
118 
119       The conversion requires <emmintrin.h> .
120     */
121 
122     typedef char v16qi __attribute__ ((vector_size(16)));
123 
124     unsigned int col;
125     union {
126         v16qi    v16;
127         uint64_t i64[2];
128         unsigned char byte[16];
129     } bit128;
130 
131     v16qi zero128;
132     zero128 = zero128 ^ zero128;   /* clear to zero */
133 
134     for (col = 0; col + 15 < cols; col += 16) {
135         bit128.i64[0]=__builtin_bswap64( *(uint64_t*) &bitrow[col]);
136         bit128.i64[1]=__builtin_bswap64( *(uint64_t*) &bitrow[col+8]);
137 
138         {
139             v16qi const compare = (v16qi)
140                 _mm_cmpgt_epi8((__m128i)bit128.v16, (__m128i) zero128);
141             uint16_t const blackMask = _mm_movemask_epi8 ((__m128i)compare);
142 
143             *(uint16_t *) & packedBits[col/8] = blackMask;
144         }
145     }
146 
147     if (cols % 16 > 0) {
148         unsigned int i, j;
149 
150         bit128.v16 = bit128.v16 ^ bit128.v16;
151 
152         for (i = 0, j = col ; j < cols; ++i, ++j)
153             bit128.byte[ (i&8) + 7-(i&7) ] = bitrow[j];
154 
155         {
156             v16qi const compare = (v16qi)
157                 _mm_cmpgt_epi8((__m128i)bit128.v16, (__m128i) zero128);
158             uint16_t const blackMask = _mm_movemask_epi8 ((__m128i)compare);
159 
160             if ( cols%16 >8 )  /* Two partial bytes */
161                 *(uint16_t *) & packedBits[col/8] = blackMask;
162             else              /* One partial byte */
163                 packedBits[col/8] = (unsigned char) blackMask ;
164         }
165     }
166 }
167 #else
168 /* Avoid undefined function warning; never actually called */
169 
170 #define packBitsWithSse2(a,b,c,d) packBitsGeneric((a),(b),(c),(d),NULL)
171 #endif
172 
173 
174 
175 static unsigned int
bitValue(unsigned char const byteValue)176 bitValue(unsigned char const byteValue) {
177 
178     return byteValue == 0 ? 0 : 1;
179 }
180 
181 
182 
183 static void
packBitsGeneric(FILE * const fileP,const bit * const bitrow,unsigned char * const packedBits,unsigned int const cols,unsigned int * const nextColP)184 packBitsGeneric(FILE *          const fileP,
185                 const bit *     const bitrow,
186                 unsigned char * const packedBits,
187                 unsigned int    const cols,
188                 unsigned int *  const nextColP) {
189 /*----------------------------------------------------------------------------
190    Pack the bits of bitrow[] into bytes at 'packedBits'.  Going left to right,
191    stop when there aren't enough bits left to fill a whole byte.  Return
192    as *nextColP the number of the next column after the rightmost one we
193    packed.
194 
195    Don't use any special CPU facilities to do the packing.
196 -----------------------------------------------------------------------------*/
197     unsigned int col;
198 
199     for (col = 0; col + 7 < cols; col += 8)
200         packedBits[col/8] = (
201             bitValue(bitrow[col+0]) << 7 |
202             bitValue(bitrow[col+1]) << 6 |
203             bitValue(bitrow[col+2]) << 5 |
204             bitValue(bitrow[col+3]) << 4 |
205             bitValue(bitrow[col+4]) << 3 |
206             bitValue(bitrow[col+5]) << 2 |
207             bitValue(bitrow[col+6]) << 1 |
208             bitValue(bitrow[col+7]) << 0
209             );
210     *nextColP = col;
211 }
212 
213 
214 
215 static void
packPartialBytes(const bit * const bitrow,unsigned int const cols,unsigned int const nextCol,unsigned char * const packedBits)216 packPartialBytes(const bit *     const bitrow,
217                  unsigned int    const cols,
218                  unsigned int    const nextCol,
219                  unsigned char * const packedBits) {
220 
221     /* routine for partial byte at the end of packedBits[]
222        Prior to addition of the above enhancement,
223        this method was used for the entire process
224     */
225 
226     unsigned int col;
227     int bitshift;
228     unsigned char item;
229 
230     bitshift = 7;  /* initial value */
231     item = 0;      /* initial value */
232     for (col = nextCol; col < cols; ++col, --bitshift)
233         if (bitrow[col] != 0)
234             item |= 1 << bitshift;
235 
236     packedBits[col/8] = item;
237 }
238 
239 
240 
241 static void
writePbmRowRaw(FILE * const fileP,const bit * const bitrow,int const cols)242 writePbmRowRaw(FILE *      const fileP,
243                const bit * const bitrow,
244                int         const cols) {
245 
246     jmp_buf jmpbuf;
247     jmp_buf * origJmpbufP;
248     unsigned char * packedBits;
249 
250     packedBits = pbm_allocrow_packed(cols);
251 
252     if (setjmp(jmpbuf) != 0) {
253         pbm_freerow_packed(packedBits);
254         pm_setjmpbuf(origJmpbufP);
255         pm_longjmp();
256     } else {
257 
258         pm_setjmpbufsave(&jmpbuf, &origJmpbufP);
259 
260         switch (PACKBITS_SSE) {
261         case 2:
262             packBitsWithSse2(fileP, bitrow, packedBits, cols);
263             break;
264         default: {
265             unsigned int nextCol;
266             packBitsGeneric(fileP, bitrow, packedBits, cols, &nextCol);
267             if (cols % 8 > 0)
268                 packPartialBytes(bitrow, cols, nextCol, packedBits);
269         }
270         }
271         writePackedRawRow(fileP, packedBits, cols);
272 
273         pm_setjmpbuf(origJmpbufP);
274     }
275     pbm_freerow_packed(packedBits);
276 }
277 
278 
279 
280 static void
writePbmRowPlain(FILE * const fileP,const bit * const bitrow,int const cols)281 writePbmRowPlain(FILE *      const fileP,
282                  const bit * const bitrow,
283                  int         const cols) {
284 
285     int col, charcount;
286 
287     charcount = 0;
288     for (col = 0; col < cols; ++col) {
289         if (charcount >= 70) {
290             putc('\n', fileP);
291             charcount = 0;
292         }
293         putc(bitrow[col] ? '1' : '0', fileP);
294         ++charcount;
295     }
296     putc('\n', fileP);
297 }
298 
299 
300 
301 void
pbm_writepbmrow(FILE * const fileP,const bit * const bitrow,int const cols,int const forceplain)302 pbm_writepbmrow(FILE *       const fileP,
303                 const bit *  const bitrow,
304                 int          const cols,
305                 int          const forceplain) {
306 
307     if (!forceplain && !pm_plain_output)
308         writePbmRowRaw(fileP, bitrow, cols);
309     else
310         writePbmRowPlain(fileP, bitrow, cols);
311 }
312 
313 
314 
315 void
pbm_writepbmrow_packed(FILE * const fileP,const unsigned char * const packedBits,int const cols,int const forceplain)316 pbm_writepbmrow_packed(FILE *                const fileP,
317                        const unsigned char * const packedBits,
318                        int                   const cols,
319                        int                   const forceplain) {
320 
321     if (!forceplain && !pm_plain_output)
322         writePackedRawRow(fileP, packedBits, cols);
323     else {
324         jmp_buf jmpbuf;
325         jmp_buf * origJmpbufP;
326         bit * bitrow;
327 
328         bitrow = pbm_allocrow(cols);
329 
330         if (setjmp(jmpbuf) != 0) {
331             pbm_freerow(bitrow);
332             pm_setjmpbuf(origJmpbufP);
333             pm_longjmp();
334         } else {
335             unsigned int col;
336 
337             pm_setjmpbufsave(&jmpbuf, &origJmpbufP);
338 
339             for (col = 0; col < cols; ++col)
340                 bitrow[col] =
341                     packedBits[col/8] & (0x80 >> (col%8)) ?
342                     PBM_BLACK : PBM_WHITE;
343 
344             writePbmRowPlain(fileP, bitrow, cols);
345 
346             pm_setjmpbuf(origJmpbufP);
347         }
348         pbm_freerow(bitrow);
349     }
350 }
351 
352 
353 
354 static unsigned char
leftBits(unsigned char const x,unsigned int const n)355 leftBits(unsigned char const x,
356          unsigned int  const n) {
357 /*----------------------------------------------------------------------------
358    Clear rightmost (8-n) bits, retain leftmost (=high) n bits.
359 -----------------------------------------------------------------------------*/
360     unsigned char buffer;
361 
362     assert(n < 8);
363 
364     buffer = x;
365 
366     buffer >>= (8-n);
367     buffer <<= (8-n);
368 
369     return buffer;
370 }
371 
372 
373 
374 void
pbm_writepbmrow_bitoffset(FILE * const fileP,unsigned char * const packedBits,unsigned int const cols,int const format,unsigned int const offset)375 pbm_writepbmrow_bitoffset(FILE *          const fileP,
376                           unsigned char * const packedBits,
377                           unsigned int    const cols,
378                           int             const format,
379                           unsigned int    const offset) {
380 /*----------------------------------------------------------------------------
381    Write PBM row from a packed bit buffer 'packedBits, starting at the
382    specified offset 'offset' in the buffer.
383 
384    We destroy the buffer.
385 -----------------------------------------------------------------------------*/
386     unsigned int const rsh = offset % 8;
387     unsigned int const lsh = (8 - rsh) % 8;
388     unsigned int const csh = cols % 8;
389     unsigned char * const window = &packedBits[offset/8];
390         /* Area of packed row buffer from which we take the image data.
391            Aligned to nearest byte boundary to the left, so the first
392            few bits might be irrelevant.
393 
394            Also our work buffer, in which we shift bits and from which we
395            ultimately write the bits to the file.
396         */
397     unsigned int const colByteCnt = pbm_packed_bytes(cols);
398     unsigned int const last = colByteCnt - 1;
399         /* Position within window of rightmost byte after shift */
400 
401     bool const carryover = (csh == 0 || rsh + csh > 8);
402         /* TRUE:  Input comes from colByteCnt bytes and one extra byte.
403            FALSE: Input comes from colByteCnt bytes.  For example:
404            TRUE:  xxxxxxii iiiiiiii iiiiiiii iiixxxxx  cols=21, offset=6
405            FALSE: xiiiiiii iiiiiiii iiiiiixx ________  cols=21, offset=1
406 
407            We treat these differently for in the FALSE case the byte after
408            last (indicated by ________) may not exist.
409         */
410 
411     if (rsh > 0) {
412         unsigned int const shiftBytes =  carryover ? colByteCnt : colByteCnt-1;
413 
414         unsigned int i;
415         for (i = 0; i < shiftBytes; ++i)
416             window[i] = window[i] << rsh | window[i+1] >> lsh;
417 
418         if (!carryover)
419             window[last] = window[last] << rsh;
420     }
421 
422     if (csh > 0)
423         window[last] = leftBits(window[last], csh);
424 
425     pbm_writepbmrow_packed(fileP, window, cols, 0);
426 }
427 
428 
429 
430 void
pbm_writepbm(FILE * const fileP,bit ** const bits,int const cols,int const rows,int const forceplain)431 pbm_writepbm(FILE * const fileP,
432              bit ** const bits,
433              int    const cols,
434              int    const rows,
435              int    const forceplain) {
436 
437     int row;
438 
439     pbm_writepbminit(fileP, cols, rows, forceplain);
440 
441     for (row = 0; row < rows; ++row)
442         pbm_writepbmrow(fileP, bits[row], cols, forceplain);
443 }
444