1 /* libpbm3.c - pbm utility library part 3
2 **
3 ** Copyright (C) 1988 by Jef Poskanzer.
4 **
5 ** Permission to use, copy, modify, and distribute this software and its
6 ** documentation for any purpose and without fee is hereby granted, provided
7 ** that the above copyright notice appear in all copies and that both that
8 ** copyright notice and this permission notice appear in supporting
9 ** documentation. This software is provided "as is" without express or
10 ** implied warranty.
11 */
12
13 #include <assert.h>
14
15 #include "netpbm/pm_c_util.h"
16
17 #include "pbm.h"
18
19 #ifndef PACKBITS_SSE
20 #if WANT_SSE && defined(__SSE2__) && HAVE_GCC_BSWAP
21 #define PACKBITS_SSE 2
22 #else
23 #define PACKBITS_SSE 0
24 #endif
25 #endif
26
27 /* WANT_SSE means we want to use SSE CPU facilities to make PBM raster
28 processing faster. This implies it's actually possible - i.e. the
29 build environment has <emmintrin.h>.
30
31 The GNU Compiler -msse2 option makes SSE/SSE2 available, and is
32 evidenced by __SSE2__.
33 For x86-32 with SSE, "-msse2" must be explicitly given.
34 For x86-64 and AMD64, "-msse2" is the default (from Gcc v.4.)
35 */
36
37 #if PACKBITS_SSE == 2
38 #include <emmintrin.h>
39 #endif
40
41
42 void
pbm_writepbminit(FILE * const fileP,int const cols,int const rows,int const forceplain)43 pbm_writepbminit(FILE * const fileP,
44 int const cols,
45 int const rows,
46 int const forceplain) {
47
48 if (!forceplain && !pm_plain_output) {
49 fprintf(fileP, "%c%c\n%d %d\n", PBM_MAGIC1, RPBM_MAGIC2, cols, rows);
50 } else
51 fprintf(fileP, "%c%c\n%d %d\n", PBM_MAGIC1, PBM_MAGIC2, cols, rows);
52 }
53
54
55
56 static void
writePackedRawRow(FILE * const fileP,const unsigned char * const packedBits,unsigned int const cols)57 writePackedRawRow(FILE * const fileP,
58 const unsigned char * const packedBits,
59 unsigned int const cols) {
60
61 unsigned int const packedByteCt = pbm_packed_bytes(cols);
62
63 size_t writtenByteCt;
64
65 writtenByteCt = fwrite(packedBits, 1, packedByteCt, fileP);
66 if (writtenByteCt < packedByteCt)
67 pm_error("I/O error writing packed row to raw PBM file. "
68 "(Attempted fwrite() of %u packed bytes; "
69 "only %u got written)",
70 packedByteCt, (unsigned)writtenByteCt);
71 }
72
73
74
75 #if PACKBITS_SSE == 2
76 static void
packBitsWithSse2(FILE * const fileP,const bit * const bitrow,unsigned char * const packedBits,unsigned int const cols)77 packBitsWithSse2( FILE * const fileP,
78 const bit * const bitrow,
79 unsigned char * const packedBits,
80 unsigned int const cols) {
81 /*----------------------------------------------------------------------------
82 Pack the bits of bitrow[] into bytes at 'packedBits'.
83
84 Use the SSE2 facilities to pack the bits quickly, but
85 perform the exact same function as the simpler
86 packBitsGeneric() + packPartialBytes()
87
88 Unlike packBitsGeneric(), the whole row is converted.
89 -----------------------------------------------------------------------------*/
90 /*
91 We use 2 SSE registers.
92
93 The key machine instructions are:
94
95 PCMPGTB128 Packed CoMPare Greater Than Byte
96
97 Compares 16 bytes in parallel
98 Result is x00 if greater than, xFF if not for each byte
99
100
101 PMOVMSKB128 Packed MOVe MaSK Byte
102
103 Result is 16 bits, the MSBs of 16 bytes
104 x00 xFF x00 xFF xFF xFF x00 x00 xFF xFF xFF xFF x00 x00 x00 x00
105 --> 0101110011110000B = 0x5CF0
106
107 The result is actually a 64 bit int, but the higher bits are
108 always 0.
109
110 We use SSE instructions in "_mm_" form in favor of "__builtin_".
111 In GCC the "__builtin_" form is documented but "_mm_" is not.
112 Former versions of this source file used "__builtin_". This was
113 changed to make possible compilation with clang, which does not
114 implement some "__builtin_" forms.
115
116 __builtin_ia32_pcmpgtb128 : _mm_cmpgt_epi8
117 __builtin_ia32_pmovmskb128 : _mm_movemask_epi8
118
119 The conversion requires <emmintrin.h> .
120 */
121
122 typedef char v16qi __attribute__ ((vector_size(16)));
123
124 unsigned int col;
125 union {
126 v16qi v16;
127 uint64_t i64[2];
128 unsigned char byte[16];
129 } bit128;
130
131 v16qi zero128;
132 zero128 = zero128 ^ zero128; /* clear to zero */
133
134 for (col = 0; col + 15 < cols; col += 16) {
135 bit128.i64[0]=__builtin_bswap64( *(uint64_t*) &bitrow[col]);
136 bit128.i64[1]=__builtin_bswap64( *(uint64_t*) &bitrow[col+8]);
137
138 {
139 v16qi const compare = (v16qi)
140 _mm_cmpgt_epi8((__m128i)bit128.v16, (__m128i) zero128);
141 uint16_t const blackMask = _mm_movemask_epi8 ((__m128i)compare);
142
143 *(uint16_t *) & packedBits[col/8] = blackMask;
144 }
145 }
146
147 if (cols % 16 > 0) {
148 unsigned int i, j;
149
150 bit128.v16 = bit128.v16 ^ bit128.v16;
151
152 for (i = 0, j = col ; j < cols; ++i, ++j)
153 bit128.byte[ (i&8) + 7-(i&7) ] = bitrow[j];
154
155 {
156 v16qi const compare = (v16qi)
157 _mm_cmpgt_epi8((__m128i)bit128.v16, (__m128i) zero128);
158 uint16_t const blackMask = _mm_movemask_epi8 ((__m128i)compare);
159
160 if ( cols%16 >8 ) /* Two partial bytes */
161 *(uint16_t *) & packedBits[col/8] = blackMask;
162 else /* One partial byte */
163 packedBits[col/8] = (unsigned char) blackMask ;
164 }
165 }
166 }
167 #else
168 /* Avoid undefined function warning; never actually called */
169
170 #define packBitsWithSse2(a,b,c,d) packBitsGeneric((a),(b),(c),(d),NULL)
171 #endif
172
173
174
175 static unsigned int
bitValue(unsigned char const byteValue)176 bitValue(unsigned char const byteValue) {
177
178 return byteValue == 0 ? 0 : 1;
179 }
180
181
182
183 static void
packBitsGeneric(FILE * const fileP,const bit * const bitrow,unsigned char * const packedBits,unsigned int const cols,unsigned int * const nextColP)184 packBitsGeneric(FILE * const fileP,
185 const bit * const bitrow,
186 unsigned char * const packedBits,
187 unsigned int const cols,
188 unsigned int * const nextColP) {
189 /*----------------------------------------------------------------------------
190 Pack the bits of bitrow[] into bytes at 'packedBits'. Going left to right,
191 stop when there aren't enough bits left to fill a whole byte. Return
192 as *nextColP the number of the next column after the rightmost one we
193 packed.
194
195 Don't use any special CPU facilities to do the packing.
196 -----------------------------------------------------------------------------*/
197 unsigned int col;
198
199 for (col = 0; col + 7 < cols; col += 8)
200 packedBits[col/8] = (
201 bitValue(bitrow[col+0]) << 7 |
202 bitValue(bitrow[col+1]) << 6 |
203 bitValue(bitrow[col+2]) << 5 |
204 bitValue(bitrow[col+3]) << 4 |
205 bitValue(bitrow[col+4]) << 3 |
206 bitValue(bitrow[col+5]) << 2 |
207 bitValue(bitrow[col+6]) << 1 |
208 bitValue(bitrow[col+7]) << 0
209 );
210 *nextColP = col;
211 }
212
213
214
215 static void
packPartialBytes(const bit * const bitrow,unsigned int const cols,unsigned int const nextCol,unsigned char * const packedBits)216 packPartialBytes(const bit * const bitrow,
217 unsigned int const cols,
218 unsigned int const nextCol,
219 unsigned char * const packedBits) {
220
221 /* routine for partial byte at the end of packedBits[]
222 Prior to addition of the above enhancement,
223 this method was used for the entire process
224 */
225
226 unsigned int col;
227 int bitshift;
228 unsigned char item;
229
230 bitshift = 7; /* initial value */
231 item = 0; /* initial value */
232 for (col = nextCol; col < cols; ++col, --bitshift)
233 if (bitrow[col] != 0)
234 item |= 1 << bitshift;
235
236 packedBits[col/8] = item;
237 }
238
239
240
241 static void
writePbmRowRaw(FILE * const fileP,const bit * const bitrow,int const cols)242 writePbmRowRaw(FILE * const fileP,
243 const bit * const bitrow,
244 int const cols) {
245
246 jmp_buf jmpbuf;
247 jmp_buf * origJmpbufP;
248 unsigned char * packedBits;
249
250 packedBits = pbm_allocrow_packed(cols);
251
252 if (setjmp(jmpbuf) != 0) {
253 pbm_freerow_packed(packedBits);
254 pm_setjmpbuf(origJmpbufP);
255 pm_longjmp();
256 } else {
257
258 pm_setjmpbufsave(&jmpbuf, &origJmpbufP);
259
260 switch (PACKBITS_SSE) {
261 case 2:
262 packBitsWithSse2(fileP, bitrow, packedBits, cols);
263 break;
264 default: {
265 unsigned int nextCol;
266 packBitsGeneric(fileP, bitrow, packedBits, cols, &nextCol);
267 if (cols % 8 > 0)
268 packPartialBytes(bitrow, cols, nextCol, packedBits);
269 }
270 }
271 writePackedRawRow(fileP, packedBits, cols);
272
273 pm_setjmpbuf(origJmpbufP);
274 }
275 pbm_freerow_packed(packedBits);
276 }
277
278
279
280 static void
writePbmRowPlain(FILE * const fileP,const bit * const bitrow,int const cols)281 writePbmRowPlain(FILE * const fileP,
282 const bit * const bitrow,
283 int const cols) {
284
285 int col, charcount;
286
287 charcount = 0;
288 for (col = 0; col < cols; ++col) {
289 if (charcount >= 70) {
290 putc('\n', fileP);
291 charcount = 0;
292 }
293 putc(bitrow[col] ? '1' : '0', fileP);
294 ++charcount;
295 }
296 putc('\n', fileP);
297 }
298
299
300
301 void
pbm_writepbmrow(FILE * const fileP,const bit * const bitrow,int const cols,int const forceplain)302 pbm_writepbmrow(FILE * const fileP,
303 const bit * const bitrow,
304 int const cols,
305 int const forceplain) {
306
307 if (!forceplain && !pm_plain_output)
308 writePbmRowRaw(fileP, bitrow, cols);
309 else
310 writePbmRowPlain(fileP, bitrow, cols);
311 }
312
313
314
315 void
pbm_writepbmrow_packed(FILE * const fileP,const unsigned char * const packedBits,int const cols,int const forceplain)316 pbm_writepbmrow_packed(FILE * const fileP,
317 const unsigned char * const packedBits,
318 int const cols,
319 int const forceplain) {
320
321 if (!forceplain && !pm_plain_output)
322 writePackedRawRow(fileP, packedBits, cols);
323 else {
324 jmp_buf jmpbuf;
325 jmp_buf * origJmpbufP;
326 bit * bitrow;
327
328 bitrow = pbm_allocrow(cols);
329
330 if (setjmp(jmpbuf) != 0) {
331 pbm_freerow(bitrow);
332 pm_setjmpbuf(origJmpbufP);
333 pm_longjmp();
334 } else {
335 unsigned int col;
336
337 pm_setjmpbufsave(&jmpbuf, &origJmpbufP);
338
339 for (col = 0; col < cols; ++col)
340 bitrow[col] =
341 packedBits[col/8] & (0x80 >> (col%8)) ?
342 PBM_BLACK : PBM_WHITE;
343
344 writePbmRowPlain(fileP, bitrow, cols);
345
346 pm_setjmpbuf(origJmpbufP);
347 }
348 pbm_freerow(bitrow);
349 }
350 }
351
352
353
354 static unsigned char
leftBits(unsigned char const x,unsigned int const n)355 leftBits(unsigned char const x,
356 unsigned int const n) {
357 /*----------------------------------------------------------------------------
358 Clear rightmost (8-n) bits, retain leftmost (=high) n bits.
359 -----------------------------------------------------------------------------*/
360 unsigned char buffer;
361
362 assert(n < 8);
363
364 buffer = x;
365
366 buffer >>= (8-n);
367 buffer <<= (8-n);
368
369 return buffer;
370 }
371
372
373
374 void
pbm_writepbmrow_bitoffset(FILE * const fileP,unsigned char * const packedBits,unsigned int const cols,int const format,unsigned int const offset)375 pbm_writepbmrow_bitoffset(FILE * const fileP,
376 unsigned char * const packedBits,
377 unsigned int const cols,
378 int const format,
379 unsigned int const offset) {
380 /*----------------------------------------------------------------------------
381 Write PBM row from a packed bit buffer 'packedBits, starting at the
382 specified offset 'offset' in the buffer.
383
384 We destroy the buffer.
385 -----------------------------------------------------------------------------*/
386 unsigned int const rsh = offset % 8;
387 unsigned int const lsh = (8 - rsh) % 8;
388 unsigned int const csh = cols % 8;
389 unsigned char * const window = &packedBits[offset/8];
390 /* Area of packed row buffer from which we take the image data.
391 Aligned to nearest byte boundary to the left, so the first
392 few bits might be irrelevant.
393
394 Also our work buffer, in which we shift bits and from which we
395 ultimately write the bits to the file.
396 */
397 unsigned int const colByteCnt = pbm_packed_bytes(cols);
398 unsigned int const last = colByteCnt - 1;
399 /* Position within window of rightmost byte after shift */
400
401 bool const carryover = (csh == 0 || rsh + csh > 8);
402 /* TRUE: Input comes from colByteCnt bytes and one extra byte.
403 FALSE: Input comes from colByteCnt bytes. For example:
404 TRUE: xxxxxxii iiiiiiii iiiiiiii iiixxxxx cols=21, offset=6
405 FALSE: xiiiiiii iiiiiiii iiiiiixx ________ cols=21, offset=1
406
407 We treat these differently for in the FALSE case the byte after
408 last (indicated by ________) may not exist.
409 */
410
411 if (rsh > 0) {
412 unsigned int const shiftBytes = carryover ? colByteCnt : colByteCnt-1;
413
414 unsigned int i;
415 for (i = 0; i < shiftBytes; ++i)
416 window[i] = window[i] << rsh | window[i+1] >> lsh;
417
418 if (!carryover)
419 window[last] = window[last] << rsh;
420 }
421
422 if (csh > 0)
423 window[last] = leftBits(window[last], csh);
424
425 pbm_writepbmrow_packed(fileP, window, cols, 0);
426 }
427
428
429
430 void
pbm_writepbm(FILE * const fileP,bit ** const bits,int const cols,int const rows,int const forceplain)431 pbm_writepbm(FILE * const fileP,
432 bit ** const bits,
433 int const cols,
434 int const rows,
435 int const forceplain) {
436
437 int row;
438
439 pbm_writepbminit(fileP, cols, rows, forceplain);
440
441 for (row = 0; row < rows; ++row)
442 pbm_writepbmrow(fileP, bits[row], cols, forceplain);
443 }
444