1 #ifdef __REACTOS__
2 #include "precomp.h"
3 #else
4 /*
5  * libtxc_dxtn
6  * Version:  1.0
7  *
8  * Copyright (C) 2004  Roland Scheidegger   All Rights Reserved.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a
11  * copy of this software and associated documentation files (the "Software"),
12  * to deal in the Software without restriction, including without limitation
13  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14  * and/or sell copies of the Software, and to permit persons to whom the
15  * Software is furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included
18  * in all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
24  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  */
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include "txc_dxtn.h"
31 #endif /* __REACTOS__ */
32 
33 /* weights used for error function, basically weights (unsquared 2/4/1) according to rgb->luminance conversion
34    not sure if this really reflects visual perception */
35 #define REDWEIGHT 4
36 #define GREENWEIGHT 16
37 #define BLUEWEIGHT 1
38 
39 #define ALPHACUT 127
40 
41 static void fancybasecolorsearch( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
42                            GLint numxpixels, GLint numypixels, GLint type, GLboolean haveAlpha)
43 {
44    /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
45 
46    /* TODO could also try to find a better encoding for the 3-color-encoding type, this really should be done
47       if it's rgba_dxt1 and we have alpha in the block, currently even values which will be mapped to black
48       due to their alpha value will influence the result */
49    GLint i, j, colors, z;
50    GLuint pixerror, pixerrorred, pixerrorgreen, pixerrorblue, pixerrorbest;
51    GLint colordist, blockerrlin[2][3];
52    GLubyte nrcolor[2];
53    GLint pixerrorcolorbest[3];
54    GLubyte enc = 0;
55    GLubyte cv[4][4];
56    GLubyte testcolor[2][3];
57 
58 /*   fprintf(stderr, "color begin 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
59       bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
60    if (((bestcolor[0][0] & 0xf8) << 8 | (bestcolor[0][1] & 0xfc) << 3 | bestcolor[0][2] >> 3) <
61       ((bestcolor[1][0] & 0xf8) << 8 | (bestcolor[1][1] & 0xfc) << 3 | bestcolor[1][2] >> 3)) {
62       testcolor[0][0] = bestcolor[0][0];
63       testcolor[0][1] = bestcolor[0][1];
64       testcolor[0][2] = bestcolor[0][2];
65       testcolor[1][0] = bestcolor[1][0];
66       testcolor[1][1] = bestcolor[1][1];
67       testcolor[1][2] = bestcolor[1][2];
68    }
69    else {
70       testcolor[1][0] = bestcolor[0][0];
71       testcolor[1][1] = bestcolor[0][1];
72       testcolor[1][2] = bestcolor[0][2];
73       testcolor[0][0] = bestcolor[1][0];
74       testcolor[0][1] = bestcolor[1][1];
75       testcolor[0][2] = bestcolor[1][2];
76    }
77 
78    for (i = 0; i < 3; i ++) {
79       cv[0][i] = testcolor[0][i];
80       cv[1][i] = testcolor[1][i];
81       cv[2][i] = (testcolor[0][i] * 2 + testcolor[1][i]) / 3;
82       cv[3][i] = (testcolor[0][i] + testcolor[1][i] * 2) / 3;
83    }
84 
85    blockerrlin[0][0] = 0;
86    blockerrlin[0][1] = 0;
87    blockerrlin[0][2] = 0;
88    blockerrlin[1][0] = 0;
89    blockerrlin[1][1] = 0;
90    blockerrlin[1][2] = 0;
91 
92    nrcolor[0] = 0;
93    nrcolor[1] = 0;
94 
95    for (j = 0; j < numypixels; j++) {
96       for (i = 0; i < numxpixels; i++) {
97          pixerrorbest = 0xffffffff;
98          for (colors = 0; colors < 4; colors++) {
99             colordist = srccolors[j][i][0] - (cv[colors][0]);
100             pixerror = colordist * colordist * REDWEIGHT;
101             pixerrorred = colordist;
102             colordist = srccolors[j][i][1] - (cv[colors][1]);
103             pixerror += colordist * colordist * GREENWEIGHT;
104             pixerrorgreen = colordist;
105             colordist = srccolors[j][i][2] - (cv[colors][2]);
106             pixerror += colordist * colordist * BLUEWEIGHT;
107             pixerrorblue = colordist;
108             if (pixerror < pixerrorbest) {
109                enc = colors;
110                pixerrorbest = pixerror;
111                pixerrorcolorbest[0] = pixerrorred;
112                pixerrorcolorbest[1] = pixerrorgreen;
113                pixerrorcolorbest[2] = pixerrorblue;
114             }
115          }
116          if (enc == 0) {
117             for (z = 0; z < 3; z++) {
118                blockerrlin[0][z] += 3 * pixerrorcolorbest[z];
119             }
120             nrcolor[0] += 3;
121          }
122          else if (enc == 2) {
123             for (z = 0; z < 3; z++) {
124                blockerrlin[0][z] += 2 * pixerrorcolorbest[z];
125             }
126             nrcolor[0] += 2;
127             for (z = 0; z < 3; z++) {
128                blockerrlin[1][z] += 1 * pixerrorcolorbest[z];
129             }
130             nrcolor[1] += 1;
131          }
132          else if (enc == 3) {
133             for (z = 0; z < 3; z++) {
134                blockerrlin[0][z] += 1 * pixerrorcolorbest[z];
135             }
136             nrcolor[0] += 1;
137             for (z = 0; z < 3; z++) {
138                blockerrlin[1][z] += 2 * pixerrorcolorbest[z];
139             }
140             nrcolor[1] += 2;
141          }
142          else if (enc == 1) {
143             for (z = 0; z < 3; z++) {
144                blockerrlin[1][z] += 3 * pixerrorcolorbest[z];
145             }
146             nrcolor[1] += 3;
147          }
148       }
149    }
150    if (nrcolor[0] == 0) nrcolor[0] = 1;
151    if (nrcolor[1] == 0) nrcolor[1] = 1;
152    for (j = 0; j < 2; j++) {
153       for (i = 0; i < 3; i++) {
154 	 GLint newvalue = testcolor[j][i] + blockerrlin[j][i] / nrcolor[j];
155 	 if (newvalue <= 0)
156 	    testcolor[j][i] = 0;
157 	 else if (newvalue >= 255)
158 	    testcolor[j][i] = 255;
159 	 else testcolor[j][i] = newvalue;
160       }
161    }
162 
163    if ((abs(testcolor[0][0] - testcolor[1][0]) < 8) &&
164        (abs(testcolor[0][1] - testcolor[1][1]) < 4) &&
165        (abs(testcolor[0][2] - testcolor[1][2]) < 8)) {
166        /* both colors are so close they might get encoded as the same 16bit values */
167       GLubyte coldiffred, coldiffgreen, coldiffblue, coldiffmax, factor, ind0, ind1;
168 
169       coldiffred = abs(testcolor[0][0] - testcolor[1][0]);
170       coldiffgreen = 2 * abs(testcolor[0][1] - testcolor[1][1]);
171       coldiffblue = abs(testcolor[0][2] - testcolor[1][2]);
172       coldiffmax = coldiffred;
173       if (coldiffmax < coldiffgreen) coldiffmax = coldiffgreen;
174       if (coldiffmax < coldiffblue) coldiffmax = coldiffblue;
175       if (coldiffmax > 0) {
176          if (coldiffmax > 4) factor = 2;
177          else if (coldiffmax > 2) factor = 3;
178          else factor = 4;
179          /* Won't do much if the color value is near 255... */
180          /* argh so many ifs */
181          if (testcolor[1][1] >= testcolor[0][1]) {
182             ind1 = 1; ind0 = 0;
183          }
184          else {
185             ind1 = 0; ind0 = 1;
186          }
187          if ((testcolor[ind1][1] + factor * coldiffgreen) <= 255)
188             testcolor[ind1][1] += factor * coldiffgreen;
189          else testcolor[ind1][1] = 255;
190          if ((testcolor[ind1][0] - testcolor[ind0][1]) > 0) {
191             if ((testcolor[ind1][0] + factor * coldiffred) <= 255)
192                testcolor[ind1][0] += factor * coldiffred;
193             else testcolor[ind1][0] = 255;
194          }
195          else {
196             if ((testcolor[ind0][0] + factor * coldiffred) <= 255)
197                testcolor[ind0][0] += factor * coldiffred;
198             else testcolor[ind0][0] = 255;
199          }
200          if ((testcolor[ind1][2] - testcolor[ind0][2]) > 0) {
201             if ((testcolor[ind1][2] + factor * coldiffblue) <= 255)
202                testcolor[ind1][2] += factor * coldiffblue;
203             else testcolor[ind1][2] = 255;
204          }
205          else {
206             if ((testcolor[ind0][2] + factor * coldiffblue) <= 255)
207                testcolor[ind0][2] += factor * coldiffblue;
208             else testcolor[ind0][2] = 255;
209          }
210       }
211    }
212 
213    if (((testcolor[0][0] & 0xf8) << 8 | (testcolor[0][1] & 0xfc) << 3 | testcolor[0][2] >> 3) <
214       ((testcolor[1][0] & 0xf8) << 8 | (testcolor[1][1] & 0xfc) << 3 | testcolor[1][2]) >> 3) {
215       for (i = 0; i < 3; i++) {
216          bestcolor[0][i] = testcolor[0][i];
217          bestcolor[1][i] = testcolor[1][i];
218       }
219    }
220    else {
221       for (i = 0; i < 3; i++) {
222          bestcolor[0][i] = testcolor[1][i];
223          bestcolor[1][i] = testcolor[0][i];
224       }
225    }
226 
227 /*     fprintf(stderr, "color end 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
228      bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
229 }
230 
231 
232 
233 static void storedxtencodedblock( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
234                            GLint numxpixels, GLint numypixels, GLuint type, GLboolean haveAlpha)
235 {
236    /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
237 
238    GLint i, j, colors;
239    GLuint testerror, testerror2, pixerror, pixerrorbest;
240    GLint colordist;
241    GLushort color0, color1, tempcolor;
242    GLuint bits = 0, bits2 = 0;
243    GLubyte *colorptr;
244    GLubyte enc = 0;
245    GLubyte cv[4][4];
246 
247    bestcolor[0][0] = bestcolor[0][0] & 0xf8;
248    bestcolor[0][1] = bestcolor[0][1] & 0xfc;
249    bestcolor[0][2] = bestcolor[0][2] & 0xf8;
250    bestcolor[1][0] = bestcolor[1][0] & 0xf8;
251    bestcolor[1][1] = bestcolor[1][1] & 0xfc;
252    bestcolor[1][2] = bestcolor[1][2] & 0xf8;
253 
254    color0 = bestcolor[0][0] << 8 | bestcolor[0][1] << 3 | bestcolor[0][2] >> 3;
255    color1 = bestcolor[1][0] << 8 | bestcolor[1][1] << 3 | bestcolor[1][2] >> 3;
256    if (color0 < color1) {
257       tempcolor = color0; color0 = color1; color1 = tempcolor;
258       colorptr = bestcolor[0]; bestcolor[0] = bestcolor[1]; bestcolor[1] = colorptr;
259    }
260 
261 
262    for (i = 0; i < 3; i++) {
263       cv[0][i] = bestcolor[0][i];
264       cv[1][i] = bestcolor[1][i];
265       cv[2][i] = (bestcolor[0][i] * 2 + bestcolor[1][i]) / 3;
266       cv[3][i] = (bestcolor[0][i] + bestcolor[1][i] * 2) / 3;
267    }
268 
269    testerror = 0;
270    for (j = 0; j < numypixels; j++) {
271       for (i = 0; i < numxpixels; i++) {
272          pixerrorbest = 0xffffffff;
273          for (colors = 0; colors < 4; colors++) {
274             colordist = srccolors[j][i][0] - cv[colors][0];
275             pixerror = colordist * colordist * REDWEIGHT;
276             colordist = srccolors[j][i][1] - cv[colors][1];
277             pixerror += colordist * colordist * GREENWEIGHT;
278             colordist = srccolors[j][i][2] - cv[colors][2];
279             pixerror += colordist * colordist * BLUEWEIGHT;
280             if (pixerror < pixerrorbest) {
281                pixerrorbest = pixerror;
282                enc = colors;
283             }
284          }
285          testerror += pixerrorbest;
286          bits |= enc << (2 * (j * 4 + i));
287       }
288    }
289    /* some hw might disagree but actually decoding should always use 4-color encoding
290       for non-dxt1 formats */
291    if (type == GL_COMPRESSED_RGB_S3TC_DXT1_EXT || type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
292       for (i = 0; i < 3; i++) {
293          cv[2][i] = (bestcolor[0][i] + bestcolor[1][i]) / 2;
294          /* this isn't used. Looks like the black color constant can only be used
295             with RGB_DXT1 if I read the spec correctly (note though that the radeon gpu disagrees,
296             it will decode 3 to black even with DXT3/5), and due to how the color searching works
297             it won't get used even then */
298          cv[3][i] = 0;
299       }
300       testerror2 = 0;
301       for (j = 0; j < numypixels; j++) {
302          for (i = 0; i < numxpixels; i++) {
303             pixerrorbest = 0xffffffff;
304             if ((type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) && (srccolors[j][i][3] <= ALPHACUT)) {
305                enc = 3;
306                pixerrorbest = 0; /* don't calculate error */
307             }
308             else {
309                /* we're calculating the same what we have done already for colors 0-1 above... */
310                for (colors = 0; colors < 3; colors++) {
311                   colordist = srccolors[j][i][0] - cv[colors][0];
312                   pixerror = colordist * colordist * REDWEIGHT;
313                   colordist = srccolors[j][i][1] - cv[colors][1];
314                   pixerror += colordist * colordist * GREENWEIGHT;
315                   colordist = srccolors[j][i][2] - cv[colors][2];
316                   pixerror += colordist * colordist * BLUEWEIGHT;
317                   if (pixerror < pixerrorbest) {
318                      pixerrorbest = pixerror;
319                      /* need to exchange colors later */
320                      if (colors > 1) enc = colors;
321                      else enc = colors ^ 1;
322                   }
323                }
324             }
325             testerror2 += pixerrorbest;
326             bits2 |= enc << (2 * (j * 4 + i));
327          }
328       }
329    } else {
330       testerror2 = 0xffffffff;
331    }
332 
333    /* finally we're finished, write back colors and bits */
334    if ((testerror > testerror2) || (haveAlpha)) {
335       *blkaddr++ = color1 & 0xff;
336       *blkaddr++ = color1 >> 8;
337       *blkaddr++ = color0 & 0xff;
338       *blkaddr++ = color0 >> 8;
339       *blkaddr++ = bits2 & 0xff;
340       *blkaddr++ = ( bits2 >> 8) & 0xff;
341       *blkaddr++ = ( bits2 >> 16) & 0xff;
342       *blkaddr = bits2 >> 24;
343    }
344    else {
345       *blkaddr++ = color0 & 0xff;
346       *blkaddr++ = color0 >> 8;
347       *blkaddr++ = color1 & 0xff;
348       *blkaddr++ = color1 >> 8;
349       *blkaddr++ = bits & 0xff;
350       *blkaddr++ = ( bits >> 8) & 0xff;
351       *blkaddr++ = ( bits >> 16) & 0xff;
352       *blkaddr = bits >> 24;
353    }
354 }
355 
356 static void encodedxtcolorblockfaster( GLubyte *blkaddr, GLubyte srccolors[4][4][4],
357                          GLint numxpixels, GLint numypixels, GLuint type )
358 {
359 /* simplistic approach. We need two base colors, simply use the "highest" and the "lowest" color
360    present in the picture as base colors */
361 
362    /* define lowest and highest color as shortest and longest vector to 0/0/0, though the
363       vectors are weighted similar to their importance in rgb-luminance conversion
364       doesn't work too well though...
365       This seems to be a rather difficult problem */
366 
367    GLubyte *bestcolor[2];
368    GLubyte basecolors[2][3];
369    GLubyte i, j;
370    GLuint lowcv, highcv, testcv;
371    GLboolean haveAlpha = GL_FALSE;
372 
373    lowcv = highcv = srccolors[0][0][0] * srccolors[0][0][0] * REDWEIGHT +
374                           srccolors[0][0][1] * srccolors[0][0][1] * GREENWEIGHT +
375                           srccolors[0][0][2] * srccolors[0][0][2] * BLUEWEIGHT;
376    bestcolor[0] = bestcolor[1] = srccolors[0][0];
377    for (j = 0; j < numypixels; j++) {
378       for (i = 0; i < numxpixels; i++) {
379          /* don't use this as a base color if the pixel will get black/transparent anyway */
380          if ((type != GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) || (srccolors[j][i][3] > ALPHACUT)) {
381             testcv = srccolors[j][i][0] * srccolors[j][i][0] * REDWEIGHT +
382                      srccolors[j][i][1] * srccolors[j][i][1] * GREENWEIGHT +
383                      srccolors[j][i][2] * srccolors[j][i][2] * BLUEWEIGHT;
384             if (testcv > highcv) {
385                highcv = testcv;
386                bestcolor[1] = srccolors[j][i];
387             }
388             else if (testcv < lowcv) {
389                lowcv = testcv;
390                bestcolor[0] = srccolors[j][i];
391             }
392          }
393          else haveAlpha = GL_TRUE;
394       }
395    }
396    /* make sure the original color values won't get touched... */
397    for (j = 0; j < 2; j++) {
398       for (i = 0; i < 3; i++) {
399          basecolors[j][i] = bestcolor[j][i];
400       }
401    }
402    bestcolor[0] = basecolors[0];
403    bestcolor[1] = basecolors[1];
404 
405    /* try to find better base colors */
406    fancybasecolorsearch(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
407    /* find the best encoding for these colors, and store the result */
408    storedxtencodedblock(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
409 }
410 
411 static void writedxt5encodedalphablock( GLubyte *blkaddr, GLubyte alphabase1, GLubyte alphabase2,
412                          GLubyte alphaenc[16])
413 {
414    *blkaddr++ = alphabase1;
415    *blkaddr++ = alphabase2;
416    *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
417    *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
418    *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
419    *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
420    *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
421    *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
422 }
423 
424 static void encodedxt5alpha(GLubyte *blkaddr, GLubyte srccolors[4][4][4],
425                             GLint numxpixels, GLint numypixels)
426 {
427    GLubyte alphabase[2], alphause[2];
428    GLshort alphatest[2];
429    GLuint alphablockerror1, alphablockerror2, alphablockerror3;
430    GLubyte i, j, aindex, acutValues[7];
431    GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
432    GLboolean alphaabsmin = GL_FALSE;
433    GLboolean alphaabsmax = GL_FALSE;
434    GLshort alphadist;
435 
436    /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
437    alphabase[0] = 0xff; alphabase[1] = 0x0;
438    for (j = 0; j < numypixels; j++) {
439       for (i = 0; i < numxpixels; i++) {
440          if (srccolors[j][i][3] == 0)
441             alphaabsmin = GL_TRUE;
442          else if (srccolors[j][i][3] == 255)
443             alphaabsmax = GL_TRUE;
444          else {
445             if (srccolors[j][i][3] > alphabase[1])
446                alphabase[1] = srccolors[j][i][3];
447             if (srccolors[j][i][3] < alphabase[0])
448                alphabase[0] = srccolors[j][i][3];
449          }
450       }
451    }
452 
453 
454    if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
455       /* shortcut here since it is a very common case (and also avoids later problems) */
456       /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
457       /* could also test for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
458 
459       *blkaddr++ = srccolors[0][0][3];
460       blkaddr++;
461       *blkaddr++ = 0;
462       *blkaddr++ = 0;
463       *blkaddr++ = 0;
464       *blkaddr++ = 0;
465       *blkaddr++ = 0;
466       *blkaddr++ = 0;
467 /*      fprintf(stderr, "enc0 used\n");*/
468       return;
469    }
470 
471    /* find best encoding for alpha0 > alpha1 */
472    /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
473    alphablockerror1 = 0x0;
474    alphablockerror2 = 0xffffffff;
475    alphablockerror3 = 0xffffffff;
476    if (alphaabsmin) alphause[0] = 0;
477    else alphause[0] = alphabase[0];
478    if (alphaabsmax) alphause[1] = 255;
479    else alphause[1] = alphabase[1];
480    /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
481    for (aindex = 0; aindex < 7; aindex++) {
482       /* don't forget here is always rounded down */
483       acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
484    }
485 
486    for (j = 0; j < numypixels; j++) {
487       for (i = 0; i < numxpixels; i++) {
488          /* maybe it's overkill to have the most complicated calculation just for the error
489             calculation which we only need to figure out if encoding1 or encoding2 is better... */
490          if (srccolors[j][i][3] > acutValues[0]) {
491             alphaenc1[4*j + i] = 0;
492             alphadist = srccolors[j][i][3] - alphause[1];
493          }
494          else if (srccolors[j][i][3] > acutValues[1]) {
495             alphaenc1[4*j + i] = 2;
496             alphadist = srccolors[j][i][3] - (alphause[1] * 6 + alphause[0] * 1) / 7;
497          }
498          else if (srccolors[j][i][3] > acutValues[2]) {
499             alphaenc1[4*j + i] = 3;
500             alphadist = srccolors[j][i][3] - (alphause[1] * 5 + alphause[0] * 2) / 7;
501          }
502          else if (srccolors[j][i][3] > acutValues[3]) {
503             alphaenc1[4*j + i] = 4;
504             alphadist = srccolors[j][i][3] - (alphause[1] * 4 + alphause[0] * 3) / 7;
505          }
506          else if (srccolors[j][i][3] > acutValues[4]) {
507             alphaenc1[4*j + i] = 5;
508             alphadist = srccolors[j][i][3] - (alphause[1] * 3 + alphause[0] * 4) / 7;
509          }
510          else if (srccolors[j][i][3] > acutValues[5]) {
511             alphaenc1[4*j + i] = 6;
512             alphadist = srccolors[j][i][3] - (alphause[1] * 2 + alphause[0] * 5) / 7;
513          }
514          else if (srccolors[j][i][3] > acutValues[6]) {
515             alphaenc1[4*j + i] = 7;
516             alphadist = srccolors[j][i][3] - (alphause[1] * 1 + alphause[0] * 6) / 7;
517          }
518          else {
519             alphaenc1[4*j + i] = 1;
520             alphadist = srccolors[j][i][3] - alphause[0];
521          }
522          alphablockerror1 += alphadist * alphadist;
523       }
524    }
525 /*      for (i = 0; i < 16; i++) {
526          fprintf(stderr, "%d ", alphaenc1[i]);
527       }
528       fprintf(stderr, "cutVals ");
529       for (i = 0; i < 8; i++) {
530          fprintf(stderr, "%d ", acutValues[i]);
531       }
532       fprintf(stderr, "srcVals ");
533       for (j = 0; j < numypixels; j++)
534          for (i = 0; i < numxpixels; i++) {
535             fprintf(stderr, "%d ", srccolors[j][i][3]);
536          }
537 
538       fprintf(stderr, "\n");
539    }*/
540    /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
541       are false but try it anyway */
542    if (alphablockerror1 >= 32) {
543 
544       /* don't bother if encoding is already very good, this condition should also imply
545       we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
546       alphablockerror2 = 0;
547       for (aindex = 0; aindex < 5; aindex++) {
548          /* don't forget here is always rounded down */
549          acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
550       }
551       for (j = 0; j < numypixels; j++) {
552          for (i = 0; i < numxpixels; i++) {
553              /* maybe it's overkill to have the most complicated calculation just for the error
554                calculation which we only need to figure out if encoding1 or encoding2 is better... */
555             if (srccolors[j][i][3] == 0) {
556                alphaenc2[4*j + i] = 6;
557                alphadist = 0;
558             }
559             else if (srccolors[j][i][3] == 255) {
560                alphaenc2[4*j + i] = 7;
561                alphadist = 0;
562             }
563             else if (srccolors[j][i][3] <= acutValues[0]) {
564                alphaenc2[4*j + i] = 0;
565                alphadist = srccolors[j][i][3] - alphabase[0];
566             }
567             else if (srccolors[j][i][3] <= acutValues[1]) {
568                alphaenc2[4*j + i] = 2;
569                alphadist = srccolors[j][i][3] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
570             }
571             else if (srccolors[j][i][3] <= acutValues[2]) {
572                alphaenc2[4*j + i] = 3;
573                alphadist = srccolors[j][i][3] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
574             }
575             else if (srccolors[j][i][3] <= acutValues[3]) {
576                alphaenc2[4*j + i] = 4;
577                alphadist = srccolors[j][i][3] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
578             }
579             else if (srccolors[j][i][3] <= acutValues[4]) {
580                alphaenc2[4*j + i] = 5;
581                alphadist = srccolors[j][i][3] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
582             }
583             else {
584                alphaenc2[4*j + i] = 1;
585                alphadist = srccolors[j][i][3] - alphabase[1];
586             }
587             alphablockerror2 += alphadist * alphadist;
588          }
589       }
590 
591 
592       /* skip this if the error is already very small
593          this encoding is MUCH better on average than #2 though, but expensive! */
594       if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
595          GLshort blockerrlin1 = 0;
596          GLshort blockerrlin2 = 0;
597          GLubyte nralphainrangelow = 0;
598          GLubyte nralphainrangehigh = 0;
599          alphatest[0] = 0xff;
600          alphatest[1] = 0x0;
601          /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
602          for (j = 0; j < numypixels; j++) {
603             for (i = 0; i < numxpixels; i++) {
604                if ((srccolors[j][i][3] > alphatest[1]) && (srccolors[j][i][3] < (255 -(alphabase[1] - alphabase[0]) / 28)))
605                   alphatest[1] = srccolors[j][i][3];
606                if ((srccolors[j][i][3] < alphatest[0]) && (srccolors[j][i][3] > (alphabase[1] - alphabase[0]) / 28))
607                   alphatest[0] = srccolors[j][i][3];
608             }
609          }
610           /* shouldn't happen too often, don't really care about those degenerated cases */
611           if (alphatest[1] <= alphatest[0]) {
612              alphatest[0] = 1;
613              alphatest[1] = 254;
614 /*             fprintf(stderr, "only 1 or 0 colors for encoding!\n");*/
615          }
616          for (aindex = 0; aindex < 5; aindex++) {
617          /* don't forget here is always rounded down */
618             acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
619          }
620 
621          /* find the "average" difference between the alpha values and the next encoded value.
622             This is then used to calculate new base values.
623             Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
624             since they will see more improvement, and also because the values in the middle are somewhat
625             likely to get no improvement at all (because the base values might move in different directions)?
626             OTOH it would mean the values in the middle are even less likely to get an improvement
627          */
628          for (j = 0; j < numypixels; j++) {
629             for (i = 0; i < numxpixels; i++) {
630                if (srccolors[j][i][3] <= alphatest[0] / 2) {
631                }
632                else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
633                }
634                else if (srccolors[j][i][3] <= acutValues[0]) {
635                   blockerrlin1 += (srccolors[j][i][3] - alphatest[0]);
636                   nralphainrangelow += 1;
637                }
638                else if (srccolors[j][i][3] <= acutValues[1]) {
639                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
640                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
641                   nralphainrangelow += 1;
642                   nralphainrangehigh += 1;
643                }
644                else if (srccolors[j][i][3] <= acutValues[2]) {
645                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
646                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
647                   nralphainrangelow += 1;
648                   nralphainrangehigh += 1;
649                }
650                else if (srccolors[j][i][3] <= acutValues[3]) {
651                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
652                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
653                   nralphainrangelow += 1;
654                   nralphainrangehigh += 1;
655                }
656                else if (srccolors[j][i][3] <= acutValues[4]) {
657                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
658                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
659                   nralphainrangelow += 1;
660                   nralphainrangehigh += 1;
661                   }
662                else {
663                   blockerrlin2 += (srccolors[j][i][3] - alphatest[1]);
664                   nralphainrangehigh += 1;
665                }
666             }
667          }
668          /* shouldn't happen often, needed to avoid div by zero */
669          if (nralphainrangelow == 0) nralphainrangelow = 1;
670          if (nralphainrangehigh == 0) nralphainrangehigh = 1;
671          alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
672 /*         fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
673          fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);*/
674          /* again shouldn't really happen often... */
675          if (alphatest[0] < 0) {
676             alphatest[0] = 0;
677 /*            fprintf(stderr, "adj alpha base val to 0\n");*/
678          }
679          alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
680          if (alphatest[1] > 255) {
681             alphatest[1] = 255;
682 /*            fprintf(stderr, "adj alpha base val to 255\n");*/
683          }
684 
685          alphablockerror3 = 0;
686          for (aindex = 0; aindex < 5; aindex++) {
687          /* don't forget here is always rounded down */
688             acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
689          }
690          for (j = 0; j < numypixels; j++) {
691             for (i = 0; i < numxpixels; i++) {
692                 /* maybe it's overkill to have the most complicated calculation just for the error
693                   calculation which we only need to figure out if encoding1 or encoding2 is better... */
694                if (srccolors[j][i][3] <= alphatest[0] / 2) {
695                   alphaenc3[4*j + i] = 6;
696                   alphadist = srccolors[j][i][3];
697                }
698                else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
699                   alphaenc3[4*j + i] = 7;
700                   alphadist = 255 - srccolors[j][i][3];
701                }
702                else if (srccolors[j][i][3] <= acutValues[0]) {
703                   alphaenc3[4*j + i] = 0;
704                   alphadist = srccolors[j][i][3] - alphatest[0];
705                }
706                else if (srccolors[j][i][3] <= acutValues[1]) {
707                  alphaenc3[4*j + i] = 2;
708                  alphadist = srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
709                }
710                else if (srccolors[j][i][3] <= acutValues[2]) {
711                   alphaenc3[4*j + i] = 3;
712                   alphadist = srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
713                }
714                else if (srccolors[j][i][3] <= acutValues[3]) {
715                   alphaenc3[4*j + i] = 4;
716                   alphadist = srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
717                }
718                else if (srccolors[j][i][3] <= acutValues[4]) {
719                   alphaenc3[4*j + i] = 5;
720                   alphadist = srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
721                }
722                else {
723                   alphaenc3[4*j + i] = 1;
724                   alphadist = srccolors[j][i][3] - alphatest[1];
725                }
726                alphablockerror3 += alphadist * alphadist;
727             }
728          }
729       }
730    }
731   /* write the alpha values and encoding back. */
732    if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
733 /*      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);*/
734       writedxt5encodedalphablock( blkaddr, alphause[1], alphause[0], alphaenc1 );
735    }
736    else if (alphablockerror2 <= alphablockerror3) {
737 /*      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);*/
738       writedxt5encodedalphablock( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
739    }
740    else {
741 /*      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);*/
742       writedxt5encodedalphablock( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
743    }
744 }
745 
746 static void extractsrccolors( GLubyte srcpixels[4][4][4], const GLchan *srcaddr,
747                          GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
748 {
749    GLubyte i, j, c;
750    const GLchan *curaddr;
751    for (j = 0; j < numypixels; j++) {
752       curaddr = srcaddr + j * srcRowStride * comps;
753       for (i = 0; i < numxpixels; i++) {
754          for (c = 0; c < comps; c++) {
755             srcpixels[j][i][c] = *curaddr++ / (CHAN_MAX / 255);
756          }
757       }
758    }
759 }
760 
761 
762 void tx_compress_dxtn(GLint srccomps, GLint width, GLint height, const GLubyte *srcPixData,
763                      GLenum destFormat, GLubyte *dest, GLint dstRowStride)
764 {
765       GLubyte *blkaddr = dest;
766       GLubyte srcpixels[4][4][4];
767       const GLchan *srcaddr = srcPixData;
768       GLint numxpixels, numypixels;
769       GLint i, j;
770       GLint dstRowDiff;
771 
772    switch (destFormat) {
773    case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
774    case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
775       /* hmm we used to get called without dstRowStride... */
776       dstRowDiff = dstRowStride >= (width * 2) ? dstRowStride - (((width + 3) & ~3) * 2) : 0;
777 /*      fprintf(stderr, "dxt1 tex width %d tex height %d dstRowStride %d\n",
778               width, height, dstRowStride); */
779       for (j = 0; j < height; j += 4) {
780          if (height > j + 3) numypixels = 4;
781          else numypixels = height - j;
782          srcaddr = srcPixData + j * width * srccomps;
783          for (i = 0; i < width; i += 4) {
784             if (width > i + 3) numxpixels = 4;
785             else numxpixels = width - i;
786             extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
787             encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
788             srcaddr += srccomps * numxpixels;
789             blkaddr += 8;
790          }
791          blkaddr += dstRowDiff;
792       }
793       break;
794    case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
795       dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0;
796 /*      fprintf(stderr, "dxt3 tex width %d tex height %d dstRowStride %d\n",
797               width, height, dstRowStride); */
798       for (j = 0; j < height; j += 4) {
799          if (height > j + 3) numypixels = 4;
800          else numypixels = height - j;
801          srcaddr = srcPixData + j * width * srccomps;
802          for (i = 0; i < width; i += 4) {
803             if (width > i + 3) numxpixels = 4;
804             else numxpixels = width - i;
805             extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
806             *blkaddr++ = (srcpixels[0][0][3] >> 4) | (srcpixels[0][1][3] & 0xf0);
807             *blkaddr++ = (srcpixels[0][2][3] >> 4) | (srcpixels[0][3][3] & 0xf0);
808             *blkaddr++ = (srcpixels[1][0][3] >> 4) | (srcpixels[1][1][3] & 0xf0);
809             *blkaddr++ = (srcpixels[1][2][3] >> 4) | (srcpixels[1][3][3] & 0xf0);
810             *blkaddr++ = (srcpixels[2][0][3] >> 4) | (srcpixels[2][1][3] & 0xf0);
811             *blkaddr++ = (srcpixels[2][2][3] >> 4) | (srcpixels[2][3][3] & 0xf0);
812             *blkaddr++ = (srcpixels[3][0][3] >> 4) | (srcpixels[3][1][3] & 0xf0);
813             *blkaddr++ = (srcpixels[3][2][3] >> 4) | (srcpixels[3][3][3] & 0xf0);
814             encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
815             srcaddr += srccomps * numxpixels;
816             blkaddr += 8;
817          }
818          blkaddr += dstRowDiff;
819       }
820       break;
821    case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
822       dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0;
823 /*      fprintf(stderr, "dxt5 tex width %d tex height %d dstRowStride %d\n",
824               width, height, dstRowStride); */
825       for (j = 0; j < height; j += 4) {
826          if (height > j + 3) numypixels = 4;
827          else numypixels = height - j;
828          srcaddr = srcPixData + j * width * srccomps;
829          for (i = 0; i < width; i += 4) {
830             if (width > i + 3) numxpixels = 4;
831             else numxpixels = width - i;
832             extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
833             encodedxt5alpha(blkaddr, srcpixels, numxpixels, numypixels);
834             encodedxtcolorblockfaster(blkaddr + 8, srcpixels, numxpixels, numypixels, destFormat);
835             srcaddr += srccomps * numxpixels;
836             blkaddr += 16;
837          }
838          blkaddr += dstRowDiff;
839       }
840       break;
841    default:
842       /* fprintf(stderr, "libdxtn: Bad dstFormat %d in tx_compress_dxtn\n", destFormat); */
843       return;
844    }
845 }
846