1 /*
2  * libtxc_dxtn
3  * Version:  1.0
4  *
5  * Copyright (C) 2004  Roland Scheidegger   All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included
15  * in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include "txc_dxtn.h"
28 
29 /* weights used for error function, basically weights (unsquared 2/4/1) according to rgb->luminance conversion
30    not sure if this really reflects visual perception */
31 #define REDWEIGHT 4
32 #define GREENWEIGHT 16
33 #define BLUEWEIGHT 1
34 
35 #define ALPHACUT 127
36 
37 static void fancybasecolorsearch( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
38                            GLint numxpixels, GLint numypixels, GLint type, GLboolean haveAlpha)
39 {
40    /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
41 
42    /* TODO could also try to find a better encoding for the 3-color-encoding type, this really should be done
43       if it's rgba_dxt1 and we have alpha in the block, currently even values which will be mapped to black
44       due to their alpha value will influence the result */
45    GLint i, j, colors, z;
46    GLuint pixerror, pixerrorred, pixerrorgreen, pixerrorblue, pixerrorbest;
47    GLint colordist, blockerrlin[2][3];
48    GLubyte nrcolor[2];
49    GLint pixerrorcolorbest[3];
50    GLubyte enc = 0;
51    GLubyte cv[4][4];
52    GLubyte testcolor[2][3];
53 
54 /*   fprintf(stderr, "color begin 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
55       bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
56    if (((bestcolor[0][0] & 0xf8) << 8 | (bestcolor[0][1] & 0xfc) << 3 | bestcolor[0][2] >> 3) <
57       ((bestcolor[1][0] & 0xf8) << 8 | (bestcolor[1][1] & 0xfc) << 3 | bestcolor[1][2] >> 3)) {
58       testcolor[0][0] = bestcolor[0][0];
59       testcolor[0][1] = bestcolor[0][1];
60       testcolor[0][2] = bestcolor[0][2];
61       testcolor[1][0] = bestcolor[1][0];
62       testcolor[1][1] = bestcolor[1][1];
63       testcolor[1][2] = bestcolor[1][2];
64    }
65    else {
66       testcolor[1][0] = bestcolor[0][0];
67       testcolor[1][1] = bestcolor[0][1];
68       testcolor[1][2] = bestcolor[0][2];
69       testcolor[0][0] = bestcolor[1][0];
70       testcolor[0][1] = bestcolor[1][1];
71       testcolor[0][2] = bestcolor[1][2];
72    }
73 
74    for (i = 0; i < 3; i ++) {
75       cv[0][i] = testcolor[0][i];
76       cv[1][i] = testcolor[1][i];
77       cv[2][i] = (testcolor[0][i] * 2 + testcolor[1][i]) / 3;
78       cv[3][i] = (testcolor[0][i] + testcolor[1][i] * 2) / 3;
79    }
80 
81    blockerrlin[0][0] = 0;
82    blockerrlin[0][1] = 0;
83    blockerrlin[0][2] = 0;
84    blockerrlin[1][0] = 0;
85    blockerrlin[1][1] = 0;
86    blockerrlin[1][2] = 0;
87 
88    nrcolor[0] = 0;
89    nrcolor[1] = 0;
90 
91    for (j = 0; j < numypixels; j++) {
92       for (i = 0; i < numxpixels; i++) {
93          pixerrorbest = 0xffffffff;
94          for (colors = 0; colors < 4; colors++) {
95             colordist = srccolors[j][i][0] - (cv[colors][0]);
96             pixerror = colordist * colordist * REDWEIGHT;
97             pixerrorred = colordist;
98             colordist = srccolors[j][i][1] - (cv[colors][1]);
99             pixerror += colordist * colordist * GREENWEIGHT;
100             pixerrorgreen = colordist;
101             colordist = srccolors[j][i][2] - (cv[colors][2]);
102             pixerror += colordist * colordist * BLUEWEIGHT;
103             pixerrorblue = colordist;
104             if (pixerror < pixerrorbest) {
105                enc = colors;
106                pixerrorbest = pixerror;
107                pixerrorcolorbest[0] = pixerrorred;
108                pixerrorcolorbest[1] = pixerrorgreen;
109                pixerrorcolorbest[2] = pixerrorblue;
110             }
111          }
112          if (enc == 0) {
113             for (z = 0; z < 3; z++) {
114                blockerrlin[0][z] += 3 * pixerrorcolorbest[z];
115             }
116             nrcolor[0] += 3;
117          }
118          else if (enc == 2) {
119             for (z = 0; z < 3; z++) {
120                blockerrlin[0][z] += 2 * pixerrorcolorbest[z];
121             }
122             nrcolor[0] += 2;
123             for (z = 0; z < 3; z++) {
124                blockerrlin[1][z] += 1 * pixerrorcolorbest[z];
125             }
126             nrcolor[1] += 1;
127          }
128          else if (enc == 3) {
129             for (z = 0; z < 3; z++) {
130                blockerrlin[0][z] += 1 * pixerrorcolorbest[z];
131             }
132             nrcolor[0] += 1;
133             for (z = 0; z < 3; z++) {
134                blockerrlin[1][z] += 2 * pixerrorcolorbest[z];
135             }
136             nrcolor[1] += 2;
137          }
138          else if (enc == 1) {
139             for (z = 0; z < 3; z++) {
140                blockerrlin[1][z] += 3 * pixerrorcolorbest[z];
141             }
142             nrcolor[1] += 3;
143          }
144       }
145    }
146    if (nrcolor[0] == 0) nrcolor[0] = 1;
147    if (nrcolor[1] == 0) nrcolor[1] = 1;
148    for (j = 0; j < 2; j++) {
149       for (i = 0; i < 3; i++) {
150 	 GLint newvalue = testcolor[j][i] + blockerrlin[j][i] / nrcolor[j];
151 	 if (newvalue <= 0)
152 	    testcolor[j][i] = 0;
153 	 else if (newvalue >= 255)
154 	    testcolor[j][i] = 255;
155 	 else testcolor[j][i] = newvalue;
156       }
157    }
158 
159    if ((abs(testcolor[0][0] - testcolor[1][0]) < 8) &&
160        (abs(testcolor[0][1] - testcolor[1][1]) < 4) &&
161        (abs(testcolor[0][2] - testcolor[1][2]) < 8)) {
162        /* both colors are so close they might get encoded as the same 16bit values */
163       GLubyte coldiffred, coldiffgreen, coldiffblue, coldiffmax, factor, ind0, ind1;
164 
165       coldiffred = abs(testcolor[0][0] - testcolor[1][0]);
166       coldiffgreen = 2 * abs(testcolor[0][1] - testcolor[1][1]);
167       coldiffblue = abs(testcolor[0][2] - testcolor[1][2]);
168       coldiffmax = coldiffred;
169       if (coldiffmax < coldiffgreen) coldiffmax = coldiffgreen;
170       if (coldiffmax < coldiffblue) coldiffmax = coldiffblue;
171       if (coldiffmax > 0) {
172          if (coldiffmax > 4) factor = 2;
173          else if (coldiffmax > 2) factor = 3;
174          else factor = 4;
175          /* Won't do much if the color value is near 255... */
176          /* argh so many ifs */
177          if (testcolor[1][1] >= testcolor[0][1]) {
178             ind1 = 1; ind0 = 0;
179          }
180          else {
181             ind1 = 0; ind0 = 1;
182          }
183          if ((testcolor[ind1][1] + factor * coldiffgreen) <= 255)
184             testcolor[ind1][1] += factor * coldiffgreen;
185          else testcolor[ind1][1] = 255;
186          if ((testcolor[ind1][0] - testcolor[ind0][1]) > 0) {
187             if ((testcolor[ind1][0] + factor * coldiffred) <= 255)
188                testcolor[ind1][0] += factor * coldiffred;
189             else testcolor[ind1][0] = 255;
190          }
191          else {
192             if ((testcolor[ind0][0] + factor * coldiffred) <= 255)
193                testcolor[ind0][0] += factor * coldiffred;
194             else testcolor[ind0][0] = 255;
195          }
196          if ((testcolor[ind1][2] - testcolor[ind0][2]) > 0) {
197             if ((testcolor[ind1][2] + factor * coldiffblue) <= 255)
198                testcolor[ind1][2] += factor * coldiffblue;
199             else testcolor[ind1][2] = 255;
200          }
201          else {
202             if ((testcolor[ind0][2] + factor * coldiffblue) <= 255)
203                testcolor[ind0][2] += factor * coldiffblue;
204             else testcolor[ind0][2] = 255;
205          }
206       }
207    }
208 
209    if (((testcolor[0][0] & 0xf8) << 8 | (testcolor[0][1] & 0xfc) << 3 | testcolor[0][2] >> 3) <
210       ((testcolor[1][0] & 0xf8) << 8 | (testcolor[1][1] & 0xfc) << 3 | testcolor[1][2]) >> 3) {
211       for (i = 0; i < 3; i++) {
212          bestcolor[0][i] = testcolor[0][i];
213          bestcolor[1][i] = testcolor[1][i];
214       }
215    }
216    else {
217       for (i = 0; i < 3; i++) {
218          bestcolor[0][i] = testcolor[1][i];
219          bestcolor[1][i] = testcolor[0][i];
220       }
221    }
222 
223 /*     fprintf(stderr, "color end 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
224      bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
225 }
226 
227 
228 
229 static void storedxtencodedblock( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
230                            GLint numxpixels, GLint numypixels, GLuint type, GLboolean haveAlpha)
231 {
232    /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
233 
234    GLint i, j, colors;
235    GLuint testerror, testerror2, pixerror, pixerrorbest;
236    GLint colordist;
237    GLushort color0, color1, tempcolor;
238    GLuint bits = 0, bits2 = 0;
239    GLubyte *colorptr;
240    GLubyte enc = 0;
241    GLubyte cv[4][4];
242 
243    bestcolor[0][0] = bestcolor[0][0] & 0xf8;
244    bestcolor[0][1] = bestcolor[0][1] & 0xfc;
245    bestcolor[0][2] = bestcolor[0][2] & 0xf8;
246    bestcolor[1][0] = bestcolor[1][0] & 0xf8;
247    bestcolor[1][1] = bestcolor[1][1] & 0xfc;
248    bestcolor[1][2] = bestcolor[1][2] & 0xf8;
249 
250    color0 = bestcolor[0][0] << 8 | bestcolor[0][1] << 3 | bestcolor[0][2] >> 3;
251    color1 = bestcolor[1][0] << 8 | bestcolor[1][1] << 3 | bestcolor[1][2] >> 3;
252    if (color0 < color1) {
253       tempcolor = color0; color0 = color1; color1 = tempcolor;
254       colorptr = bestcolor[0]; bestcolor[0] = bestcolor[1]; bestcolor[1] = colorptr;
255    }
256 
257 
258    for (i = 0; i < 3; i++) {
259       cv[0][i] = bestcolor[0][i];
260       cv[1][i] = bestcolor[1][i];
261       cv[2][i] = (bestcolor[0][i] * 2 + bestcolor[1][i]) / 3;
262       cv[3][i] = (bestcolor[0][i] + bestcolor[1][i] * 2) / 3;
263    }
264 
265    testerror = 0;
266    for (j = 0; j < numypixels; j++) {
267       for (i = 0; i < numxpixels; i++) {
268          pixerrorbest = 0xffffffff;
269          for (colors = 0; colors < 4; colors++) {
270             colordist = srccolors[j][i][0] - cv[colors][0];
271             pixerror = colordist * colordist * REDWEIGHT;
272             colordist = srccolors[j][i][1] - cv[colors][1];
273             pixerror += colordist * colordist * GREENWEIGHT;
274             colordist = srccolors[j][i][2] - cv[colors][2];
275             pixerror += colordist * colordist * BLUEWEIGHT;
276             if (pixerror < pixerrorbest) {
277                pixerrorbest = pixerror;
278                enc = colors;
279             }
280          }
281          testerror += pixerrorbest;
282          bits |= enc << (2 * (j * 4 + i));
283       }
284    }
285    /* some hw might disagree but actually decoding should always use 4-color encoding
286       for non-dxt1 formats */
287    if (type == GL_COMPRESSED_RGB_S3TC_DXT1_EXT || type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
288       for (i = 0; i < 3; i++) {
289          cv[2][i] = (bestcolor[0][i] + bestcolor[1][i]) / 2;
290          /* this isn't used. Looks like the black color constant can only be used
291             with RGB_DXT1 if I read the spec correctly (note though that the radeon gpu disagrees,
292             it will decode 3 to black even with DXT3/5), and due to how the color searching works
293             it won't get used even then */
294          cv[3][i] = 0;
295       }
296       testerror2 = 0;
297       for (j = 0; j < numypixels; j++) {
298          for (i = 0; i < numxpixels; i++) {
299             pixerrorbest = 0xffffffff;
300             if ((type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) && (srccolors[j][i][3] <= ALPHACUT)) {
301                enc = 3;
302                pixerrorbest = 0; /* don't calculate error */
303             }
304             else {
305                /* we're calculating the same what we have done already for colors 0-1 above... */
306                for (colors = 0; colors < 3; colors++) {
307                   colordist = srccolors[j][i][0] - cv[colors][0];
308                   pixerror = colordist * colordist * REDWEIGHT;
309                   colordist = srccolors[j][i][1] - cv[colors][1];
310                   pixerror += colordist * colordist * GREENWEIGHT;
311                   colordist = srccolors[j][i][2] - cv[colors][2];
312                   pixerror += colordist * colordist * BLUEWEIGHT;
313                   if (pixerror < pixerrorbest) {
314                      pixerrorbest = pixerror;
315                      /* need to exchange colors later */
316                      if (colors > 1) enc = colors;
317                      else enc = colors ^ 1;
318                   }
319                }
320             }
321             testerror2 += pixerrorbest;
322             bits2 |= enc << (2 * (j * 4 + i));
323          }
324       }
325    } else {
326       testerror2 = 0xffffffff;
327    }
328 
329    /* finally we're finished, write back colors and bits */
330    if ((testerror > testerror2) || (haveAlpha)) {
331       *blkaddr++ = color1 & 0xff;
332       *blkaddr++ = color1 >> 8;
333       *blkaddr++ = color0 & 0xff;
334       *blkaddr++ = color0 >> 8;
335       *blkaddr++ = bits2 & 0xff;
336       *blkaddr++ = ( bits2 >> 8) & 0xff;
337       *blkaddr++ = ( bits2 >> 16) & 0xff;
338       *blkaddr = bits2 >> 24;
339    }
340    else {
341       *blkaddr++ = color0 & 0xff;
342       *blkaddr++ = color0 >> 8;
343       *blkaddr++ = color1 & 0xff;
344       *blkaddr++ = color1 >> 8;
345       *blkaddr++ = bits & 0xff;
346       *blkaddr++ = ( bits >> 8) & 0xff;
347       *blkaddr++ = ( bits >> 16) & 0xff;
348       *blkaddr = bits >> 24;
349    }
350 }
351 
352 static void encodedxtcolorblockfaster( GLubyte *blkaddr, GLubyte srccolors[4][4][4],
353                          GLint numxpixels, GLint numypixels, GLuint type )
354 {
355 /* simplistic approach. We need two base colors, simply use the "highest" and the "lowest" color
356    present in the picture as base colors */
357 
358    /* define lowest and highest color as shortest and longest vector to 0/0/0, though the
359       vectors are weighted similar to their importance in rgb-luminance conversion
360       doesn't work too well though...
361       This seems to be a rather difficult problem */
362 
363    GLubyte *bestcolor[2];
364    GLubyte basecolors[2][3];
365    GLubyte i, j;
366    GLuint lowcv, highcv, testcv;
367    GLboolean haveAlpha = GL_FALSE;
368 
369    lowcv = highcv = srccolors[0][0][0] * srccolors[0][0][0] * REDWEIGHT +
370                           srccolors[0][0][1] * srccolors[0][0][1] * GREENWEIGHT +
371                           srccolors[0][0][2] * srccolors[0][0][2] * BLUEWEIGHT;
372    bestcolor[0] = bestcolor[1] = srccolors[0][0];
373    for (j = 0; j < numypixels; j++) {
374       for (i = 0; i < numxpixels; i++) {
375          /* don't use this as a base color if the pixel will get black/transparent anyway */
376          if ((type != GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) || (srccolors[j][i][3] > ALPHACUT)) {
377             testcv = srccolors[j][i][0] * srccolors[j][i][0] * REDWEIGHT +
378                      srccolors[j][i][1] * srccolors[j][i][1] * GREENWEIGHT +
379                      srccolors[j][i][2] * srccolors[j][i][2] * BLUEWEIGHT;
380             if (testcv > highcv) {
381                highcv = testcv;
382                bestcolor[1] = srccolors[j][i];
383             }
384             else if (testcv < lowcv) {
385                lowcv = testcv;
386                bestcolor[0] = srccolors[j][i];
387             }
388          }
389          else haveAlpha = GL_TRUE;
390       }
391    }
392    /* make sure the original color values won't get touched... */
393    for (j = 0; j < 2; j++) {
394       for (i = 0; i < 3; i++) {
395          basecolors[j][i] = bestcolor[j][i];
396       }
397    }
398    bestcolor[0] = basecolors[0];
399    bestcolor[1] = basecolors[1];
400 
401    /* try to find better base colors */
402    fancybasecolorsearch(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
403    /* find the best encoding for these colors, and store the result */
404    storedxtencodedblock(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
405 }
406 
407 static void writedxt5encodedalphablock( GLubyte *blkaddr, GLubyte alphabase1, GLubyte alphabase2,
408                          GLubyte alphaenc[16])
409 {
410    *blkaddr++ = alphabase1;
411    *blkaddr++ = alphabase2;
412    *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
413    *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
414    *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
415    *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
416    *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
417    *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
418 }
419 
420 static void encodedxt5alpha(GLubyte *blkaddr, GLubyte srccolors[4][4][4],
421                             GLint numxpixels, GLint numypixels)
422 {
423    GLubyte alphabase[2], alphause[2];
424    GLshort alphatest[2];
425    GLuint alphablockerror1, alphablockerror2, alphablockerror3;
426    GLubyte i, j, aindex, acutValues[7];
427    GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
428    GLboolean alphaabsmin = GL_FALSE;
429    GLboolean alphaabsmax = GL_FALSE;
430    GLshort alphadist;
431 
432    /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
433    alphabase[0] = 0xff; alphabase[1] = 0x0;
434    for (j = 0; j < numypixels; j++) {
435       for (i = 0; i < numxpixels; i++) {
436          if (srccolors[j][i][3] == 0)
437             alphaabsmin = GL_TRUE;
438          else if (srccolors[j][i][3] == 255)
439             alphaabsmax = GL_TRUE;
440          else {
441             if (srccolors[j][i][3] > alphabase[1])
442                alphabase[1] = srccolors[j][i][3];
443             if (srccolors[j][i][3] < alphabase[0])
444                alphabase[0] = srccolors[j][i][3];
445          }
446       }
447    }
448 
449 
450    if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
451       /* shortcut here since it is a very common case (and also avoids later problems) */
452       /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
453       /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
454 
455       *blkaddr++ = srccolors[0][0][3];
456       blkaddr++;
457       *blkaddr++ = 0;
458       *blkaddr++ = 0;
459       *blkaddr++ = 0;
460       *blkaddr++ = 0;
461       *blkaddr++ = 0;
462       *blkaddr++ = 0;
463 /*      fprintf(stderr, "enc0 used\n");*/
464       return;
465    }
466 
467    /* find best encoding for alpha0 > alpha1 */
468    /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
469    alphablockerror1 = 0x0;
470    alphablockerror2 = 0xffffffff;
471    alphablockerror3 = 0xffffffff;
472    if (alphaabsmin) alphause[0] = 0;
473    else alphause[0] = alphabase[0];
474    if (alphaabsmax) alphause[1] = 255;
475    else alphause[1] = alphabase[1];
476    /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
477    for (aindex = 0; aindex < 7; aindex++) {
478       /* don't forget here is always rounded down */
479       acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
480    }
481 
482    for (j = 0; j < numypixels; j++) {
483       for (i = 0; i < numxpixels; i++) {
484          /* maybe it's overkill to have the most complicated calculation just for the error
485             calculation which we only need to figure out if encoding1 or encoding2 is better... */
486          if (srccolors[j][i][3] > acutValues[0]) {
487             alphaenc1[4*j + i] = 0;
488             alphadist = srccolors[j][i][3] - alphause[1];
489          }
490          else if (srccolors[j][i][3] > acutValues[1]) {
491             alphaenc1[4*j + i] = 2;
492             alphadist = srccolors[j][i][3] - (alphause[1] * 6 + alphause[0] * 1) / 7;
493          }
494          else if (srccolors[j][i][3] > acutValues[2]) {
495             alphaenc1[4*j + i] = 3;
496             alphadist = srccolors[j][i][3] - (alphause[1] * 5 + alphause[0] * 2) / 7;
497          }
498          else if (srccolors[j][i][3] > acutValues[3]) {
499             alphaenc1[4*j + i] = 4;
500             alphadist = srccolors[j][i][3] - (alphause[1] * 4 + alphause[0] * 3) / 7;
501          }
502          else if (srccolors[j][i][3] > acutValues[4]) {
503             alphaenc1[4*j + i] = 5;
504             alphadist = srccolors[j][i][3] - (alphause[1] * 3 + alphause[0] * 4) / 7;
505          }
506          else if (srccolors[j][i][3] > acutValues[5]) {
507             alphaenc1[4*j + i] = 6;
508             alphadist = srccolors[j][i][3] - (alphause[1] * 2 + alphause[0] * 5) / 7;
509          }
510          else if (srccolors[j][i][3] > acutValues[6]) {
511             alphaenc1[4*j + i] = 7;
512             alphadist = srccolors[j][i][3] - (alphause[1] * 1 + alphause[0] * 6) / 7;
513          }
514          else {
515             alphaenc1[4*j + i] = 1;
516             alphadist = srccolors[j][i][3] - alphause[0];
517          }
518          alphablockerror1 += alphadist * alphadist;
519       }
520    }
521 /*      for (i = 0; i < 16; i++) {
522          fprintf(stderr, "%d ", alphaenc1[i]);
523       }
524       fprintf(stderr, "cutVals ");
525       for (i = 0; i < 8; i++) {
526          fprintf(stderr, "%d ", acutValues[i]);
527       }
528       fprintf(stderr, "srcVals ");
529       for (j = 0; j < numypixels; j++)
530          for (i = 0; i < numxpixels; i++) {
531             fprintf(stderr, "%d ", srccolors[j][i][3]);
532          }
533 
534       fprintf(stderr, "\n");
535    }*/
536    /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
537       are false but try it anyway */
538    if (alphablockerror1 >= 32) {
539 
540       /* don't bother if encoding is already very good, this condition should also imply
541       we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
542       alphablockerror2 = 0;
543       for (aindex = 0; aindex < 5; aindex++) {
544          /* don't forget here is always rounded down */
545          acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
546       }
547       for (j = 0; j < numypixels; j++) {
548          for (i = 0; i < numxpixels; i++) {
549              /* maybe it's overkill to have the most complicated calculation just for the error
550                calculation which we only need to figure out if encoding1 or encoding2 is better... */
551             if (srccolors[j][i][3] == 0) {
552                alphaenc2[4*j + i] = 6;
553                alphadist = 0;
554             }
555             else if (srccolors[j][i][3] == 255) {
556                alphaenc2[4*j + i] = 7;
557                alphadist = 0;
558             }
559             else if (srccolors[j][i][3] <= acutValues[0]) {
560                alphaenc2[4*j + i] = 0;
561                alphadist = srccolors[j][i][3] - alphabase[0];
562             }
563             else if (srccolors[j][i][3] <= acutValues[1]) {
564                alphaenc2[4*j + i] = 2;
565                alphadist = srccolors[j][i][3] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
566             }
567             else if (srccolors[j][i][3] <= acutValues[2]) {
568                alphaenc2[4*j + i] = 3;
569                alphadist = srccolors[j][i][3] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
570             }
571             else if (srccolors[j][i][3] <= acutValues[3]) {
572                alphaenc2[4*j + i] = 4;
573                alphadist = srccolors[j][i][3] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
574             }
575             else if (srccolors[j][i][3] <= acutValues[4]) {
576                alphaenc2[4*j + i] = 5;
577                alphadist = srccolors[j][i][3] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
578             }
579             else {
580                alphaenc2[4*j + i] = 1;
581                alphadist = srccolors[j][i][3] - alphabase[1];
582             }
583             alphablockerror2 += alphadist * alphadist;
584          }
585       }
586 
587 
588       /* skip this if the error is already very small
589          this encoding is MUCH better on average than #2 though, but expensive! */
590       if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
591          GLshort blockerrlin1 = 0;
592          GLshort blockerrlin2 = 0;
593          GLubyte nralphainrangelow = 0;
594          GLubyte nralphainrangehigh = 0;
595          alphatest[0] = 0xff;
596          alphatest[1] = 0x0;
597          /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
598          for (j = 0; j < numypixels; j++) {
599             for (i = 0; i < numxpixels; i++) {
600                if ((srccolors[j][i][3] > alphatest[1]) && (srccolors[j][i][3] < (255 -(alphabase[1] - alphabase[0]) / 28)))
601                   alphatest[1] = srccolors[j][i][3];
602                if ((srccolors[j][i][3] < alphatest[0]) && (srccolors[j][i][3] > (alphabase[1] - alphabase[0]) / 28))
603                   alphatest[0] = srccolors[j][i][3];
604             }
605          }
606           /* shouldn't happen too often, don't really care about those degenerated cases */
607           if (alphatest[1] <= alphatest[0]) {
608              alphatest[0] = 1;
609              alphatest[1] = 254;
610 /*             fprintf(stderr, "only 1 or 0 colors for encoding!\n");*/
611          }
612          for (aindex = 0; aindex < 5; aindex++) {
613          /* don't forget here is always rounded down */
614             acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
615          }
616 
617          /* find the "average" difference between the alpha values and the next encoded value.
618             This is then used to calculate new base values.
619             Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
620             since they will see more improvement, and also because the values in the middle are somewhat
621             likely to get no improvement at all (because the base values might move in different directions)?
622             OTOH it would mean the values in the middle are even less likely to get an improvement
623          */
624          for (j = 0; j < numypixels; j++) {
625             for (i = 0; i < numxpixels; i++) {
626                if (srccolors[j][i][3] <= alphatest[0] / 2) {
627                }
628                else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
629                }
630                else if (srccolors[j][i][3] <= acutValues[0]) {
631                   blockerrlin1 += (srccolors[j][i][3] - alphatest[0]);
632                   nralphainrangelow += 1;
633                }
634                else if (srccolors[j][i][3] <= acutValues[1]) {
635                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
636                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
637                   nralphainrangelow += 1;
638                   nralphainrangehigh += 1;
639                }
640                else if (srccolors[j][i][3] <= acutValues[2]) {
641                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
642                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
643                   nralphainrangelow += 1;
644                   nralphainrangehigh += 1;
645                }
646                else if (srccolors[j][i][3] <= acutValues[3]) {
647                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
648                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
649                   nralphainrangelow += 1;
650                   nralphainrangehigh += 1;
651                }
652                else if (srccolors[j][i][3] <= acutValues[4]) {
653                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
654                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
655                   nralphainrangelow += 1;
656                   nralphainrangehigh += 1;
657                   }
658                else {
659                   blockerrlin2 += (srccolors[j][i][3] - alphatest[1]);
660                   nralphainrangehigh += 1;
661                }
662             }
663          }
664          /* shouldn't happen often, needed to avoid div by zero */
665          if (nralphainrangelow == 0) nralphainrangelow = 1;
666          if (nralphainrangehigh == 0) nralphainrangehigh = 1;
667          alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
668 /*         fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
669          fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);*/
670          /* again shouldn't really happen often... */
671          if (alphatest[0] < 0) {
672             alphatest[0] = 0;
673 /*            fprintf(stderr, "adj alpha base val to 0\n");*/
674          }
675          alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
676          if (alphatest[1] > 255) {
677             alphatest[1] = 255;
678 /*            fprintf(stderr, "adj alpha base val to 255\n");*/
679          }
680 
681          alphablockerror3 = 0;
682          for (aindex = 0; aindex < 5; aindex++) {
683          /* don't forget here is always rounded down */
684             acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
685          }
686          for (j = 0; j < numypixels; j++) {
687             for (i = 0; i < numxpixels; i++) {
688                 /* maybe it's overkill to have the most complicated calculation just for the error
689                   calculation which we only need to figure out if encoding1 or encoding2 is better... */
690                if (srccolors[j][i][3] <= alphatest[0] / 2) {
691                   alphaenc3[4*j + i] = 6;
692                   alphadist = srccolors[j][i][3];
693                }
694                else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
695                   alphaenc3[4*j + i] = 7;
696                   alphadist = 255 - srccolors[j][i][3];
697                }
698                else if (srccolors[j][i][3] <= acutValues[0]) {
699                   alphaenc3[4*j + i] = 0;
700                   alphadist = srccolors[j][i][3] - alphatest[0];
701                }
702                else if (srccolors[j][i][3] <= acutValues[1]) {
703                  alphaenc3[4*j + i] = 2;
704                  alphadist = srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
705                }
706                else if (srccolors[j][i][3] <= acutValues[2]) {
707                   alphaenc3[4*j + i] = 3;
708                   alphadist = srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
709                }
710                else if (srccolors[j][i][3] <= acutValues[3]) {
711                   alphaenc3[4*j + i] = 4;
712                   alphadist = srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
713                }
714                else if (srccolors[j][i][3] <= acutValues[4]) {
715                   alphaenc3[4*j + i] = 5;
716                   alphadist = srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
717                }
718                else {
719                   alphaenc3[4*j + i] = 1;
720                   alphadist = srccolors[j][i][3] - alphatest[1];
721                }
722                alphablockerror3 += alphadist * alphadist;
723             }
724          }
725       }
726    }
727   /* write the alpha values and encoding back. */
728    if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
729 /*      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);*/
730       writedxt5encodedalphablock( blkaddr, alphause[1], alphause[0], alphaenc1 );
731    }
732    else if (alphablockerror2 <= alphablockerror3) {
733 /*      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);*/
734       writedxt5encodedalphablock( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
735    }
736    else {
737 /*      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);*/
738       writedxt5encodedalphablock( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
739    }
740 }
741 
742 static void extractsrccolors( GLubyte srcpixels[4][4][4], const GLchan *srcaddr,
743                          GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
744 {
745    GLubyte i, j, c;
746    const GLchan *curaddr;
747    for (j = 0; j < numypixels; j++) {
748       curaddr = srcaddr + j * srcRowStride * comps;
749       for (i = 0; i < numxpixels; i++) {
750          for (c = 0; c < comps; c++) {
751             srcpixels[j][i][c] = *curaddr++ / (CHAN_MAX / 255);
752          }
753       }
754    }
755 }
756 
757 
758 void tx_compress_dxtn(GLint srccomps, GLint width, GLint height, const GLubyte *srcPixData,
759                      GLenum destFormat, GLubyte *dest, GLint dstRowStride)
760 {
761       GLubyte *blkaddr = dest;
762       GLubyte srcpixels[4][4][4];
763       const GLchan *srcaddr = srcPixData;
764       GLint numxpixels, numypixels;
765       GLint i, j;
766       GLint dstRowDiff;
767 
768    switch (destFormat) {
769    case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
770    case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
771       /* hmm we used to get called without dstRowStride... */
772       dstRowDiff = dstRowStride >= (width * 2) ? dstRowStride - (((width + 3) & ~3) * 2) : 0;
773 /*      fprintf(stderr, "dxt1 tex width %d tex height %d dstRowStride %d\n",
774               width, height, dstRowStride); */
775       for (j = 0; j < height; j += 4) {
776          if (height > j + 3) numypixels = 4;
777          else numypixels = height - j;
778          srcaddr = srcPixData + j * width * srccomps;
779          for (i = 0; i < width; i += 4) {
780             if (width > i + 3) numxpixels = 4;
781             else numxpixels = width - i;
782             extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
783             encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
784             srcaddr += srccomps * numxpixels;
785             blkaddr += 8;
786          }
787          blkaddr += dstRowDiff;
788       }
789       break;
790    case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
791       dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0;
792 /*      fprintf(stderr, "dxt3 tex width %d tex height %d dstRowStride %d\n",
793               width, height, dstRowStride); */
794       for (j = 0; j < height; j += 4) {
795          if (height > j + 3) numypixels = 4;
796          else numypixels = height - j;
797          srcaddr = srcPixData + j * width * srccomps;
798          for (i = 0; i < width; i += 4) {
799             if (width > i + 3) numxpixels = 4;
800             else numxpixels = width - i;
801             extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
802             *blkaddr++ = (srcpixels[0][0][3] >> 4) | (srcpixels[0][1][3] & 0xf0);
803             *blkaddr++ = (srcpixels[0][2][3] >> 4) | (srcpixels[0][3][3] & 0xf0);
804             *blkaddr++ = (srcpixels[1][0][3] >> 4) | (srcpixels[1][1][3] & 0xf0);
805             *blkaddr++ = (srcpixels[1][2][3] >> 4) | (srcpixels[1][3][3] & 0xf0);
806             *blkaddr++ = (srcpixels[2][0][3] >> 4) | (srcpixels[2][1][3] & 0xf0);
807             *blkaddr++ = (srcpixels[2][2][3] >> 4) | (srcpixels[2][3][3] & 0xf0);
808             *blkaddr++ = (srcpixels[3][0][3] >> 4) | (srcpixels[3][1][3] & 0xf0);
809             *blkaddr++ = (srcpixels[3][2][3] >> 4) | (srcpixels[3][3][3] & 0xf0);
810             encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
811             srcaddr += srccomps * numxpixels;
812             blkaddr += 8;
813          }
814          blkaddr += dstRowDiff;
815       }
816       break;
817    case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
818       dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0;
819 /*      fprintf(stderr, "dxt5 tex width %d tex height %d dstRowStride %d\n",
820               width, height, dstRowStride); */
821       for (j = 0; j < height; j += 4) {
822          if (height > j + 3) numypixels = 4;
823          else numypixels = height - j;
824          srcaddr = srcPixData + j * width * srccomps;
825          for (i = 0; i < width; i += 4) {
826             if (width > i + 3) numxpixels = 4;
827             else numxpixels = width - i;
828             extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
829             encodedxt5alpha(blkaddr, srcpixels, numxpixels, numypixels);
830             encodedxtcolorblockfaster(blkaddr + 8, srcpixels, numxpixels, numypixels, destFormat);
831             srcaddr += srccomps * numxpixels;
832             blkaddr += 16;
833          }
834          blkaddr += dstRowDiff;
835       }
836       break;
837    default:
838       fprintf(stderr, "libdxtn: Bad dstFormat %d in tx_compress_dxtn\n", destFormat);
839       return;
840    }
841 }
842 
843 
844