1Index: dcraw/dcraw_lz.c 2=================================================================== 3--- dcraw.orig/dcraw_lz.c 4+++ dcraw/dcraw_lz.c 5@@ -1,4 +1,8 @@ 6 /* 7+ * OpenMP multithread version of dcraw 8+ * based on UFRaw dcraw_indi.c by Udi Fuchs 9+ */ 10+/* 11 dcraw.c -- Dave Coffin's raw photo decoder 12 Copyright 1997-2018 by Dave Coffin, dcoffin a cybercom o net 13 14@@ -96,6 +100,15 @@ typedef unsigned long long UINT64; 15 #define ushort unsigned short 16 #endif 17 18+#ifdef _OPENMP 19+#include <omp.h> 20+#define uf_omp_get_thread_num() omp_get_thread_num() 21+#define uf_omp_get_num_threads() omp_get_num_threads() 22+#else 23+#define uf_omp_get_thread_num() 0 24+#define uf_omp_get_num_threads() 1 25+#endif 26+ 27 /* 28 All global variables are defined here, and all functions that 29 access them are prefixed with "CLASS". For thread-safety, all 30@@ -4132,6 +4145,13 @@ void CLASS wavelet_denoise() 31 merror (fimg, "wavelet_denoise()"); 32 temp = fimg + size*3; 33 if ((nc = colors) == 3 && filters) nc++; 34+#if defined(__sun) && !defined(__GNUC__) /* Fix UFRaw bug #3205673 - NKBJ */ 35+ #pragma omp parallel for default(shared) \ 36+ private(c,i,hpass,lev,lpass,row,col,thold,fimg,temp) 37+#else 38+ #pragma omp parallel for default(shared) \ 39+ private(c,i,hpass,lev,lpass,row,col,thold,fimg,temp) 40+#endif 41 FORC(nc) { /* denoise R,G1,B,G3 individually */ 42 for (i=0; i < size; i++) 43 fimg[i] = 256 * sqrt(image[i][c] << scale); 44@@ -4355,8 +4375,10 @@ void CLASS pre_interpolate() 45 46 void CLASS border_interpolate (int border) 47 { 48- unsigned row, col, y, x, f, c, sum[8]; 49+ int row; 50+ unsigned col, y, x, f, c, sum[8]; 51 52+#pragma omp for 53 for (row=0; row < height; row++) 54 for (col=0; col < width; col++) { 55 if (col==border && row >= border && row < height-border) 56@@ -4383,7 +4405,11 @@ void CLASS lin_interpolate() 57 58 if (verbose) fprintf (stderr,_("Bilinear interpolation...\n")); 59 if (filters == 9) size = 6; 60+#pragma omp parallel default(shared) \ 61+ private(ip, sum, f, c, i, x, y, row, col, shift, color, pix) 62+{ 63 border_interpolate(1); 64+#pragma omp for 65 for (row=0; row < size; row++) 66 for (col=0; col < size; col++) { 67 ip = code[row][col]+1; 68@@ -4406,6 +4432,7 @@ void CLASS lin_interpolate() 69 *ip++ = 256 / sum[c]; 70 } 71 } 72+#pragma omp for 73 for (row=1; row < height-1; row++) 74 for (col=1; col < width-1; col++) { 75 pix = image[row*width+col]; 76@@ -4416,6 +4443,7 @@ void CLASS lin_interpolate() 77 for (i=colors; --i; ip+=2) 78 pix[ip[0]] = sum[ip[0]] * ip[1] >> 8; 79 } 80+} /* pragma omp parallel */ 81 } 82 83 /* 84@@ -4454,7 +4482,7 @@ void CLASS vng_interpolate() 85 +1,-1,+1,+1,0,0x88, +1,+0,+1,+2,0,0x08, +1,+0,+2,-1,0,0x40, 86 +1,+0,+2,+1,0,0x10 87 }, chood[] = { -1,-1, -1,0, -1,+1, 0,+1, +1,+1, +1,0, +1,-1, 0,-1 }; 88- ushort (*brow[5])[4], *pix; 89+ ushort (*brow[4])[4], *pix; 90 int prow=8, pcol=2, *ip, *code[16][16], gval[8], gmin, gmax, sum[4]; 91 int row, col, x, y, x1, x2, y1, y2, t, weight, grads, color, diag; 92 int g, diff, thold, num, c; 93@@ -4464,7 +4492,7 @@ void CLASS vng_interpolate() 94 95 if (filters == 1) prow = pcol = 16; 96 if (filters == 9) prow = pcol = 6; 97- ip = (int *) calloc (prow*pcol, 1280); 98+ int *ipalloc = ip = (int *) calloc (prow*pcol, 1280); 99 merror (ip, "vng_interpolate()"); 100 for (row=0; row < prow; row++) /* Precalculate for VNG */ 101 for (col=0; col < pcol; col++) { 102@@ -4496,11 +4524,18 @@ void CLASS vng_interpolate() 103 *ip++ = 0; 104 } 105 } 106- brow[4] = (ushort (*)[4]) calloc (width*3, sizeof **brow); 107- merror (brow[4], "vng_interpolate()"); 108- for (row=0; row < 3; row++) 109- brow[row] = brow[4] + row*width; 110- for (row=2; row < height-2; row++) { /* Do VNG interpolation */ 111+#pragma omp parallel default(shared) \ 112+ private(row,col,g,brow,pix,ip,gval,diff,gmin,gmax,thold,sum,color,num,c,t) 113+{ 114+ ushort (*rowtmp)[4]; 115+ rowtmp = (ushort(*)[4])malloc(4 * width * 4 * sizeof(ushort)); 116+ int slice = (height - 4) / uf_omp_get_num_threads(); 117+ int start_row = 2 + slice * uf_omp_get_thread_num(); 118+ int end_row = MIN(start_row + slice, height - 2); 119+ for (row = start_row; row < end_row; row++) { /* Do VNG interpolation */ 120+ 121+ for (g = 0; g < 4; g++) 122+ brow[g] = &rowtmp[(row + g - 2) % 4]; 123 for (col=2; col < width-2; col++) { 124 pix = image[row*width+col]; 125 ip = code[row % prow][col % pcol]; 126@@ -4544,15 +4579,15 @@ void CLASS vng_interpolate() 127 brow[2][col][c] = CLIP(t); 128 } 129 } 130- if (row > 3) /* Write buffer to image */ 131+ if (row > start_row + 1) /* Write buffer to image */ 132 memcpy (image[(row-2)*width+2], brow[0]+2, (width-4)*sizeof *image); 133- for (g=0; g < 4; g++) 134- brow[(g-1) & 3] = brow[g]; 135 } 136- memcpy (image[(row-2)*width+2], brow[0]+2, (width-4)*sizeof *image); 137- memcpy (image[(row-1)*width+2], brow[1]+2, (width-4)*sizeof *image); 138- free (brow[4]); 139- free (code[0][0]); 140+ if (row == height - 2) { 141+ memcpy (image[(row-2)*width+2], brow[0]+2, (width-4)*sizeof *image); 142+ memcpy (image[(row-1)*width+2], brow[1]+2, (width-4)*sizeof *image); 143+ } 144+} /* pragma omp parallel */ 145+ free(ipalloc); 146 } 147 148 /* 149@@ -4564,10 +4599,13 @@ void CLASS ppg_interpolate() 150 int row, col, diff[2], guess[2], c, d, i; 151 ushort (*pix)[4]; 152 153- border_interpolate(3); 154 if (verbose) fprintf (stderr,_("PPG interpolation...\n")); 155 156+#pragma omp parallel default(shared) private(row,col,i,d,c,pix,guess) 157+{ 158+ border_interpolate(3); 159 /* Fill in the green layer with gradients and pattern recognition: */ 160+#pragma omp for 161 for (row=3; row < height-3; row++) 162 for (col=3+(FC(row,3) & 1), c=FC(row,col); col < width-3; col+=2) { 163 pix = image + row*width+col; 164@@ -4584,6 +4622,7 @@ void CLASS ppg_interpolate() 165 pix[0][1] = ULIM(guess[i] >> 2, pix[d][1], pix[-d][1]); 166 } 167 /* Calculate red and blue for each green pixel: */ 168+#pragma omp for 169 for (row=1; row < height-1; row++) 170 for (col=1+(FC(row,2) & 1), c=FC(row,col+1); col < width-1; col+=2) { 171 pix = image + row*width+col; 172@@ -4592,6 +4631,7 @@ void CLASS ppg_interpolate() 173 - pix[-d][1] - pix[d][1]) >> 1); 174 } 175 /* Calculate blue for red pixels and vice versa: */ 176+#pragma omp for 177 for (row=1; row < height-1; row++) 178 for (col=1+(FC(row,1) & 1), c=2-FC(row,col); col < width-1; col+=2) { 179 pix = image + row*width+col; 180@@ -4607,6 +4647,7 @@ void CLASS ppg_interpolate() 181 else 182 pix[0][c] = CLIP((guess[0]+guess[1]) >> 2); 183 } 184+} /* pragma omp parallel */ 185 } 186 187 void CLASS cielab (ushort rgb[3], short lab[3]) 188@@ -4616,13 +4657,16 @@ void CLASS cielab (ushort rgb[3], short 189 static float cbrt[0x10000], xyz_cam[3][4]; 190 191 if (!rgb) { 192+#pragma omp for 193 for (i=0; i < 0x10000; i++) { 194 r = i / 65535.0; 195 cbrt[i] = r > 0.008856 ? pow(r,1/3.0) : 7.787*r + 16/116.0; 196 } 197+#pragma omp for 198 for (i=0; i < 3; i++) 199 for (j=0; j < colors; j++) 200 for (xyz_cam[i][j] = k=0; k < 3; k++) 201+#pragma omp atomic 202 xyz_cam[i][j] += xyz_rgb[i][k] * rgb_cam[k][j] / d65_white[i]; 203 return; 204 } 205@@ -4888,6 +4932,10 @@ void CLASS ahd_interpolate() 206 207 if (verbose) fprintf (stderr,_("AHD interpolation...\n")); 208 209+#pragma omp parallel default(shared) \ 210+ private(top,left,row,col,pix,rix,lix,c,val,d,tc,tr,i,j,ldiff,abdiff,leps, \ 211+ abeps,hm,buffer,rgb,lab,homo) 212+{ 213 cielab (0,0); 214 border_interpolate(5); 215 buffer = (char *) malloc (26*TS*TS); 216@@ -4896,6 +4944,7 @@ void CLASS ahd_interpolate() 217 lab = (short (*)[TS][TS][3])(buffer + 12*TS*TS); 218 homo = (char (*)[TS][TS]) (buffer + 24*TS*TS); 219 220+#pragma omp for 221 for (top=2; top < height-5; top += TS-6) 222 for (left=2; left < width-5; left += TS-6) { 223 224@@ -4978,6 +5027,7 @@ void CLASS ahd_interpolate() 225 } 226 } 227 free (buffer); 228+} /* pragma omp parallel */ 229 } 230 #undef TS 231 232@@ -9825,6 +9875,7 @@ void CLASS fuji_rotate() 233 img = (ushort (*)[4]) calloc (high, wide*sizeof *img); 234 merror (img, "fuji_rotate()"); 235 236+#pragma omp parallel for default(shared) private(row,col,ur,uc,r,c,fr,fc,pix,i) 237 for (row=0; row < high; row++) 238 for (col=0; col < wide; col++) { 239 ur = r = fuji_width + (row-col)*step; 240