1 /*  ultra.c - Ultracode
2 
3     libzint - the open source barcode library
4     Copyright (C) 2020 - 2021 Robin Stuart <rstuart114@gmail.com>
5 
6     Redistribution and use in source and binary forms, with or without
7     modification, are permitted provided that the following conditions
8     are met:
9 
10     1. Redistributions of source code must retain the above copyright
11        notice, this list of conditions and the following disclaimer.
12     2. Redistributions in binary form must reproduce the above copyright
13        notice, this list of conditions and the following disclaimer in the
14        documentation and/or other materials provided with the distribution.
15     3. Neither the name of the project nor the names of its contributors
16        may be used to endorse or promote products derived from this software
17        without specific prior written permission.
18 
19     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22     ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
23     FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25     OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28     OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29     SUCH DAMAGE.
30  */
31 /* vim: set ts=4 sw=4 et : */
32 
33  /* This version was developed using AIMD/TSC15032-43 v0.99c Edit 60, dated 4th Nov 2015 */
34 
35 #ifdef _MSC_VER
36 #include <malloc.h>
37 #endif
38 #include <stdio.h>
39 #include "common.h"
40 
41 #define EIGHTBIT_MODE       10
42 #define ASCII_MODE          20
43 #define C43_MODE            30
44 
45 #define PREDICT_WINDOW      12
46 
47 #define GFMUL(i, j) ((((i) == 0)||((j) == 0)) ? 0 : gfPwr[(gfLog[i] + gfLog[j])])
48 
49 static const char fragment[27][14] = {"http://", "https://", "http://www.", "https://www.",
50         "ftp://", "www.", ".com", ".edu", ".gov", ".int", ".mil", ".net", ".org",
51         ".mobi", ".coop", ".biz", ".info", "mailto:", "tel:", ".cgi", ".asp",
52         ".aspx", ".php", ".htm", ".html", ".shtml", "file:"};
53 
54 static const char ultra_c43_set1[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 .,%";
55 static const char ultra_c43_set2[] = "abcdefghijklmnopqrstuvwxyz:/?#[]@=_~!.,-";
56 static const char ultra_c43_set3[] = "{}`()\"+'<>|$;&\\^*";
57 static const char ultra_digit[] = "0123456789,/";
58 static const char ultra_colour[] = "0CBMRYGKW";
59 
60 // Max size and min cols adjusted to BWIPP values as updated 2021-07-14
61 // https://github.com/bwipp/postscriptbarcode/commit/4255810845fa8d45c6192dd30aee1fdad1aaf0cc
62 static const int ultra_maxsize[] = {37, 84, 161, 282};
63 
64 static const int ultra_mincols[] = {5, 13, 22, 29};
65 
66 static const int kec[] = {0, 1, 2, 4, 6, 8}; // Value K(EC) from Table 12
67 
68 static const int dccu[] = {
69     051363, 051563, 051653, 053153, 053163, 053513, 053563, 053613, //  0-7
70     053653, 056153, 056163, 056313, 056353, 056363, 056513, 056563, //  8-15
71     051316, 051356, 051536, 051616, 053156, 053516, 053536, 053616, // 16-23
72     053636, 053656, 056136, 056156, 056316, 056356, 056516, 056536  // 24-31
73 };
74 
75 static const int dccl[] = {
76     061351, 061361, 061531, 061561, 061631, 061651, 063131, 063151, //  0-7
77     063161, 063531, 063561, 063631, 065131, 065161, 065351, 065631, //  8-15
78     031351, 031361, 031531, 031561, 031631, 031651, 035131, 035151, // 16-23
79     035161, 035361, 035631, 035651, 036131, 036151, 036351, 036531  // 24-31
80 };
81 
82 static const int tiles[] = {
83     013135, 013136, 013153, 013156, 013163, 013165, 013513, 013515, 013516, 013531, //   0-9
84     013535, 013536, 013561, 013563, 013565, 013613, 013615, 013616, 013631, 013635, //  10-19
85     013636, 013651, 013653, 013656, 015135, 015136, 015153, 015163, 015165, 015313, //  20-29
86     015315, 015316, 015351, 015353, 015356, 015361, 015363, 015365, 015613, 015615, //  30-39
87     015616, 015631, 015635, 015636, 015651, 015653, 015656, 016135, 016136, 016153, //  40-49
88     016156, 016165, 016313, 016315, 016316, 016351, 016353, 016356, 016361, 016363, //  50-59
89     016365, 016513, 016515, 016516, 016531, 016535, 016536, 016561, 016563, 016565, //  60-69
90     031315, 031316, 031351, 031356, 031361, 031365, 031513, 031515, 031516, 031531, //  70-79
91     031535, 031536, 031561, 031563, 031565, 031613, 031615, 031631, 031635, 031636, //  80-89
92     031651, 031653, 031656, 035131, 035135, 035136, 035151, 035153, 035156, 035161, //  90-99
93     035163, 035165, 035315, 035316, 035351, 035356, 035361, 035365, 035613, 035615, // 100-109
94     035616, 035631, 035635, 035636, 035651, 035653, 035656, 036131, 036135, 036136, // 110-119
95     036151, 036153, 036156, 036163, 036165, 036315, 036316, 036351, 036356, 036361, // 120-129
96     036365, 036513, 036515, 036516, 036531, 036535, 036536, 036561, 036563, 036565, // 130-139
97     051313, 051315, 051316, 051351, 051353, 051356, 051361, 051363, 051365, 051513, // 140-149
98     051516, 051531, 051536, 051561, 051563, 051613, 051615, 051616, 051631, 051635, // 150-159
99     051636, 051651, 051653, 051656, 053131, 053135, 053136, 053151, 053153, 053156, // 160-169
100     053161, 053163, 053165, 053513, 053516, 053531, 053536, 053561, 053563, 053613, // 170-179
101     053615, 053616, 053631, 053635, 053636, 053651, 053653, 053656, 056131, 056135, // 180-189
102     056136, 056151, 056153, 056156, 056161, 056163, 056165, 056313, 056315, 056316, // 190-199
103     056351, 056353, 056356, 056361, 056363, 056365, 056513, 056516, 056531, 056536, // 200-209
104     056561, 056563, 061313, 061315, 061316, 061351, 061353, 061356, 061361, 061363, // 210-219
105     061365, 061513, 061515, 061516, 061531, 061535, 061536, 061561, 061563, 061565, // 220-229
106     061615, 061631, 061635, 061651, 061653, 063131, 063135, 063136, 063151, 063153, // 230-239
107     063156, 063161, 063163, 063165, 063513, 063515, 063516, 063531, 063535, 063536, // 240-249
108     063561, 063563, 063565, 063613, 063615, 063631, 063635, 063651, 063653, 065131, // 250-259
109     065135, 065136, 065151, 065153, 065156, 065161, 065163, 065165, 065313, 065315, // 260-269
110     065316, 065351, 065353, 065356, 065361, 065363, 065365, 065613, 065615, 065631, // 270-279
111     065635, 065651, 065653, 056565, 051515                                     // 280-284
112 };
113 
114 /* The following adapted from ECC283.C "RSEC codeword generator"
115  * from Annex B of Ultracode draft
116  * originally written by Ted Williams of Symbol Vision Corp.
117  * Dated 2001-03-09
118  * Corrected thanks to input from Terry Burton */
119 
120 /*
121  * NOTE: Included here is an attempt to allow code compression within Ultracode. Unfortunately
122  * the copy of the standard this was written from was an early draft which includes self
123  * contradictions, so this is a "best guess" implementation. Because it is not guaranteed
124  * to be correct this compression is not applied by default. To enable compression set
125  *
126  * symbol->option_3 = ULTRA_COMPRESSION;
127  *
128  * Code compression should be enabled by default when it has been implemented according to
129  * a more reliable version of the specification.
130  */
131 
132 /* Generate divisor polynomial gQ(x) for GF283() given the required ECC size, 3 to 101 */
ultra_genPoly(short EccSize,unsigned short gPoly[],unsigned short gfPwr[],unsigned short gfLog[])133 static void ultra_genPoly(short EccSize, unsigned short gPoly[], unsigned short gfPwr[], unsigned short gfLog[]) {
134     int i, j;
135 
136     gPoly[0] = 1;
137     for (i = 1; i < (EccSize + 1); i++) gPoly[i] = 0;
138 
139     for (i = 0; i < EccSize; i++) {
140         for (j = i; j >= 0; j--)
141             gPoly[j + 1] = (gPoly[j] + GFMUL(gPoly[j + 1], gfPwr[i + 1])) % 283;
142         gPoly[0] = GFMUL(gPoly[0], gfPwr[i + 1]);
143     }
144     for (i = EccSize - 1; i >= 0; i -= 2) gPoly[i] = 283 - gPoly[i];
145 
146     /* gPoly[i] is > 0 so modulo operation not needed */
147 }
148 
149 /* Generate the log and antilog tables for GF283() multiplication & division */
ultra_initLogTables(unsigned short gfPwr[],unsigned short gfLog[])150 static void ultra_initLogTables(unsigned short gfPwr[], unsigned short gfLog[]) {
151     int i, j;
152 
153     for (j = 0; j < 283; j++) gfLog[j] = 0;
154     i = 1;
155     for (j = 0; j < 282; j++) {
156         /* j + 282 indices save doing the modulo operation in GFMUL */
157         gfPwr[j + 282] = gfPwr[j] = (short) i;
158         gfLog[i] = (short) j;
159         i = (i * 3) % 283;
160     }
161 }
162 
ultra_gf283(short DataSize,short EccSize,int Message[])163 static void ultra_gf283(short DataSize, short EccSize, int Message[]) {
164     /* Input is complete message codewords in array Message[282]
165      * DataSize is number of message codewords
166      * EccSize is number of Reed-Solomon GF(283) check codewords to generate
167      *
168      * Upon exit, Message[282] contains complete 282 codeword Symbol Message
169      * including leading zeroes corresponding to each truncated codeword */
170 
171     unsigned short gPoly[283], gfPwr[(282 * 2)], gfLog[283];
172     int i, j, n;
173     unsigned short t;
174 
175     /* first build the log & antilog tables used in multiplication & division */
176     ultra_initLogTables(gfPwr, gfLog);
177 
178     /* then generate the division polynomial of length EccSize */
179     ultra_genPoly(EccSize, gPoly, gfPwr, gfLog);
180 
181     /* zero all EccSize codeword values */
182     for (j = 281; (j > (281 - EccSize)); j--) Message[j] = 0;
183 
184     /* shift message codewords to the right, leave space for ECC checkwords */
185     for (i = DataSize - 1; (i >= 0); j--, i--) Message[j] = Message[i];
186 
187     /* add zeroes to pad left end Message[] for truncated codewords */
188     j++;
189     for (i = 0; i < j; i++) Message[i] = 0;
190 
191     /* generate (EccSize) Reed-Solomon checkwords */
192     for (n = j; n < (j + DataSize); n++) {
193         t = (Message[j + DataSize] + Message[n]) % 283;
194         for (i = 0; i < (EccSize - 1); i++) {
195             Message[j + DataSize + i] = (Message[j + DataSize + i + 1] + 283
196             - GFMUL(t, gPoly[EccSize - 1 - i])) % 283;
197         }
198         Message[j + DataSize + EccSize - 1] = (283 - GFMUL(t, gPoly[0])) % 283;
199     }
200     for (i = j + DataSize; i < (j + DataSize + EccSize); i++)
201         Message[i] = (283 - Message[i]) % 283;
202 }
203 
204 /* End of Ted Williams code */
205 
ultra_find_fragment(const unsigned char source[],int source_length,int position)206 static int ultra_find_fragment(const unsigned char source[], int source_length, int position) {
207     int retval = -1;
208     int j, k, latch, fraglen;
209 
210     for (j = 0; j < 27; j++) {
211         latch = 0;
212         fraglen = (int) strlen(fragment[j]);
213         if ((position + fraglen) <= source_length) {
214             latch = 1;
215             for (k = 0; k < fraglen; k++) {
216                 if (source[position + k] != fragment[j][k]) {
217                     latch = 0;
218                     break;
219                 }
220             }
221         }
222 
223         if (latch) {
224             retval = j;
225         }
226     }
227 
228     return retval;
229 }
230 
231 /* Encode characters in 8-bit mode */
look_ahead_eightbit(unsigned char source[],int in_length,int in_locn,char current_mode,int end_char,int cw[],int * cw_len,int gs1)232 static float look_ahead_eightbit(unsigned char source[], int in_length, int in_locn, char current_mode, int end_char,
233             int cw[], int *cw_len, int gs1) {
234     int codeword_count = 0;
235     int i;
236     int letters_encoded = 0;
237 
238     if (current_mode != EIGHTBIT_MODE) {
239         cw[codeword_count] = 282; // Unlatch
240         codeword_count += 1;
241     }
242 
243     i = in_locn;
244     while ((i < in_length) && (i < end_char)) {
245         if ((source[i] == '[') && gs1) {
246             cw[codeword_count] = 268; // FNC1
247         } else {
248             cw[codeword_count] = source[i];
249         }
250         i++;
251         codeword_count++;
252     }
253 
254     letters_encoded = i - in_locn;
255 
256     *cw_len = codeword_count;
257 
258     if (codeword_count == 0) {
259         return 0.0;
260     }
261     return (float) letters_encoded / (float) codeword_count;
262 }
263 
264 /* Encode character in the ASCII mode/submode (including numeric compression) */
look_ahead_ascii(unsigned char source[],int in_length,int in_locn,char current_mode,int symbol_mode,int end_char,int cw[],int * cw_len,int * encoded,int gs1)265 static float look_ahead_ascii(unsigned char source[], int in_length, int in_locn, char current_mode, int symbol_mode,
266             int end_char, int cw[], int *cw_len, int *encoded, int gs1) {
267     int codeword_count = 0;
268     int i;
269     int first_digit, second_digit, done;
270     int letters_encoded = 0;
271 
272     if (current_mode == EIGHTBIT_MODE) {
273         cw[codeword_count] = 267; // Latch ASCII Submode
274         codeword_count++;
275     }
276 
277     if (current_mode == C43_MODE) {
278         cw[codeword_count] = 282; // Unlatch
279         codeword_count++;
280         if (symbol_mode == EIGHTBIT_MODE) {
281             cw[codeword_count] = 267; // Latch ASCII Submode
282             codeword_count++;
283         }
284     }
285 
286     i = in_locn;
287     do {
288         /* Check for double digits */
289         done = 0;
290         if (i + 1 < in_length) {
291             first_digit = posn(ultra_digit, source[i]);
292             second_digit = posn(ultra_digit, source[i + 1]);
293             if ((first_digit != -1) && (second_digit != -1)) {
294                 /* Double digit can be encoded */
295                 if ((first_digit >= 0) && (first_digit <= 9) && (second_digit >= 0) && (second_digit <= 9)) {
296                     /* Double digit numerics */
297                     cw[codeword_count] = (10 * first_digit) + second_digit + 128;
298                     codeword_count++;
299                     i += 2;
300                     done = 1;
301                 } else if ((first_digit >= 0) && (first_digit <= 9) && (second_digit == 10)) {
302                     /* Single digit followed by selected decimal point character */
303                     cw[codeword_count] = first_digit + 228;
304                     codeword_count++;
305                     i += 2;
306                     done = 1;
307                 } else if ((first_digit == 10) && (second_digit >= 0) && (second_digit <= 9)) {
308                     /* Selected decimal point character followed by single digit */
309                     cw[codeword_count] = second_digit + 238;
310                     codeword_count++;
311                     i += 2;
312                     done = 1;
313                 } else if ((first_digit >= 0) && (first_digit <= 9) && (second_digit == 11)) {
314                     /* Single digit or decimal point followed by field deliminator */
315                     cw[codeword_count] = first_digit + 248;
316                     codeword_count++;
317                     i += 2;
318                     done = 1;
319                 } else if ((first_digit == 11) && (second_digit >= 0) && (second_digit <= 9)) {
320                     /* Field deliminator followed by single digit or decimal point */
321                     cw[codeword_count] = second_digit + 259;
322                     codeword_count++;
323                     i += 2;
324                     done = 1;
325                 }
326             }
327         }
328 
329         if (!done && source[i] < 0x80) {
330             if ((source[i] == '[') && gs1) {
331                 cw[codeword_count] = 272; // FNC1
332             } else {
333                 cw[codeword_count] = source[i];
334             }
335             codeword_count++;
336             i++;
337         }
338     } while ((i < in_length) && (i < end_char) && (source[i] < 0x80));
339 
340     letters_encoded = i - in_locn;
341     if (encoded != NULL) {
342         *encoded = letters_encoded;
343     }
344 
345     *cw_len = codeword_count;
346 
347     if (codeword_count == 0) {
348         return 0.0;
349     }
350     return (float) letters_encoded / (float) codeword_count;
351 }
352 
353 /* Returns true if should latch to subset other than given `subset` */
c43_should_latch_other(const unsigned char data[],const int length,const int locn,const int subset,const int gs1)354 static int c43_should_latch_other(const unsigned char data[], const int length, const int locn, const int subset,
355             const int gs1) {
356     int i, fraglen, predict_window;
357     int cnt, alt_cnt, fragno;
358     const char *set = subset == 1 ? ultra_c43_set1 : ultra_c43_set2;
359     const char *alt_set = subset == 2 ? ultra_c43_set1 : ultra_c43_set2;
360 
361     if (locn + 3 > length) {
362         return 0;
363     }
364     predict_window = locn + 3;
365 
366     for (i = locn, cnt = 0, alt_cnt = 0; i < predict_window; i++) {
367         if (data[i] <= 0x1F || data[i] >= 0x7F || (gs1 && data[i] == '[')) {
368             break;
369         }
370 
371         fragno = ultra_find_fragment(data, length, i);
372         if (fragno != -1 && fragno != 26) {
373             fraglen = (int) strlen(fragment[fragno]);
374             predict_window += fraglen;
375             if (predict_window > length) {
376                 predict_window = length;
377             }
378             i += fraglen - 1;
379         } else {
380             if (strchr(set, data[i]) != NULL) {
381                 cnt++;
382             }
383             if (strchr(alt_set, data[i]) != NULL) {
384                 alt_cnt++;
385             }
386         }
387     }
388 
389     return alt_cnt > cnt;
390 }
391 
get_subset(unsigned char source[],int in_length,int in_locn,int current_subset)392 static int get_subset(unsigned char source[], int in_length, int in_locn, int current_subset) {
393     int fragno;
394     int subset = 0;
395 
396     fragno = ultra_find_fragment(source, in_length, in_locn);
397     if ((fragno != -1) && (fragno != 26)) {
398         subset = 3;
399     } else if (current_subset == 2) {
400         if (posn(ultra_c43_set2, source[in_locn]) != -1) {
401             subset = 2;
402         } else if (posn(ultra_c43_set1, source[in_locn]) != -1) {
403             subset = 1;
404         }
405     } else {
406         if (posn(ultra_c43_set1, source[in_locn]) != -1) {
407             subset = 1;
408         } else if (posn(ultra_c43_set2, source[in_locn]) != -1) {
409             subset = 2;
410         }
411     }
412 
413     if (subset == 0) {
414         if (posn(ultra_c43_set3, source[in_locn]) != -1) {
415             subset = 3;
416         }
417     }
418 
419     return subset;
420 }
421 
422 /* Encode characters in the C43 compaction submode */
look_ahead_c43(unsigned char source[],int in_length,int in_locn,char current_mode,int end_char,int subset,int cw[],int * cw_len,int * encoded,int gs1,int debug)423 static float look_ahead_c43(unsigned char source[], int in_length, int in_locn, char current_mode, int end_char,
424             int subset, int cw[], int *cw_len, int *encoded, int gs1, int debug) {
425     int codeword_count = 0;
426     int subcodeword_count = 0;
427     int i;
428     int fragno;
429     int sublocn = in_locn;
430     int new_subset;
431     int unshift_set;
432     int base43_value;
433     int letters_encoded = 0;
434     int pad;
435 
436 #ifndef _MSC_VER
437     int subcw[(in_length + 3) * 2];
438 #else
439     int *subcw = (int *) _alloca((in_length + 3) * 2 * sizeof(int));
440 #endif /* _MSC_VER */
441 
442     if (current_mode == EIGHTBIT_MODE) {
443         /* Check for permissable URL C43 macro sequences, otherwise encode directly */
444         fragno = ultra_find_fragment(source, in_length, sublocn);
445 
446         if ((fragno == 2) || (fragno == 3)) {
447             // http://www. > http://
448             // https://www. > https://
449             fragno -= 2;
450         }
451 
452         switch (fragno) {
453             case 17: // mailto:
454                 cw[codeword_count] = 276;
455                 sublocn += (int) strlen(fragment[fragno]);
456                 codeword_count++;
457                 break;
458             case 18: // tel:
459                 cw[codeword_count] = 277;
460                 sublocn += (int) strlen(fragment[fragno]);
461                 codeword_count++;
462                 break;
463             case 26: // file:
464                 cw[codeword_count] = 278;
465                 sublocn += (int) strlen(fragment[fragno]);
466                 codeword_count++;
467                 break;
468             case 0: // http://
469                 cw[codeword_count] = 279;
470                 sublocn += (int) strlen(fragment[fragno]);
471                 codeword_count++;
472                 break;
473             case 1: // https://
474                 cw[codeword_count] = 280;
475                 sublocn += (int) strlen(fragment[fragno]);
476                 codeword_count++;
477                 break;
478             case 4: // ftp://
479                 cw[codeword_count] = 281;
480                 sublocn += (int) strlen(fragment[fragno]);
481                 codeword_count++;
482                 break;
483             default:
484                 if (subset == 1) {
485                     cw[codeword_count] = 260; // C43 Compaction Submode C1
486                     codeword_count++;
487                 } else if ((subset == 2) || (subset == 3)) {
488                     cw[codeword_count] = 266; // C43 Compaction Submode C2
489                     codeword_count++;
490                 }
491                 break;
492         }
493 
494     } else if (current_mode == ASCII_MODE) {
495         if (subset == 1) {
496             cw[codeword_count] = 278; // C43 Compaction Submode C1
497             codeword_count++;
498         } else if ((subset == 2) || (subset == 3)) {
499             cw[codeword_count] = 280; // C43 Compaction Submode C2
500             codeword_count++;
501         }
502     }
503     unshift_set = subset;
504 
505     while ((sublocn < in_length) && (sublocn < end_char)) {
506         /* Check for FNC1 */
507         if (gs1 && source[sublocn] == '[') {
508             break;
509         }
510 
511         new_subset = get_subset(source, in_length, sublocn, subset);
512 
513         if (new_subset == 0) {
514             break;
515         }
516 
517         if ((new_subset != subset) && ((new_subset == 1) || (new_subset == 2))) {
518             if (c43_should_latch_other(source, in_length, sublocn, subset, gs1)) {
519                 subcw[subcodeword_count] = 42; // Latch to other C43 set
520                 subcodeword_count++;
521                 unshift_set = new_subset;
522             } else {
523                 subcw[subcodeword_count] = 40; // Shift to other C43 set for 1 char
524                 subcodeword_count++;
525                 subcw[subcodeword_count] = posn(new_subset == 1 ? ultra_c43_set1 : ultra_c43_set2, source[sublocn]);
526                 subcodeword_count++;
527                 sublocn++;
528                 continue;
529             }
530         }
531 
532         subset = new_subset;
533 
534         if (subset == 1) {
535             subcw[subcodeword_count] = posn(ultra_c43_set1, source[sublocn]);
536             subcodeword_count++;
537             sublocn++;
538         } else if (subset == 2) {
539             subcw[subcodeword_count] = posn(ultra_c43_set2, source[sublocn]);
540             subcodeword_count++;
541             sublocn++;
542         } else if (subset == 3) {
543             subcw[subcodeword_count] = 41; // Shift to set 3
544             subcodeword_count++;
545 
546             fragno = ultra_find_fragment(source, in_length, sublocn);
547             if (fragno != -1 && fragno != 26) {
548                 if (fragno <= 18) {
549                     subcw[subcodeword_count] = fragno; // C43 Set 3 codewords 0 to 18
550                     subcodeword_count++;
551                     sublocn += (int) strlen(fragment[fragno]);
552                 } else {
553                     subcw[subcodeword_count] = fragno + 17; // C43 Set 3 codewords 36 to 42
554                     subcodeword_count++;
555                     sublocn += (int) strlen(fragment[fragno]);
556                 }
557             } else {
558                 subcw[subcodeword_count] = posn(ultra_c43_set3, source[sublocn]) + 19; // C43 Set 3 codewords 19 to 35
559                 subcodeword_count++;
560                 sublocn++;
561             }
562             subset = unshift_set;
563         }
564     }
565 
566     pad = 3 - (subcodeword_count % 3);
567     if (pad == 3) {
568         pad = 0;
569     }
570 
571     for (i = 0; i < pad; i++) {
572         subcw[subcodeword_count] = 42; // Latch to other C43 set used as pad
573         subcodeword_count++;
574     }
575 
576     if (debug & ZINT_DEBUG_PRINT) {
577         printf("C43 codewords %.*s: (%d)", in_length, source + in_locn, subcodeword_count);
578         for (i = 0; i < subcodeword_count; i++) printf( " %d", subcw[i]);
579         printf("\n");
580     }
581 
582     letters_encoded = sublocn - in_locn;
583     if (encoded != NULL) {
584         *encoded = letters_encoded;
585     }
586 
587     for (i = 0; i < subcodeword_count; i += 3) {
588         base43_value = (43 * 43 * subcw[i]) + (43 * subcw[i + 1]) + subcw[i + 2];
589         cw[codeword_count] = base43_value / 282;
590         codeword_count++;
591         cw[codeword_count] = base43_value % 282;
592         codeword_count++;
593     }
594 
595     *cw_len = codeword_count;
596 
597     if (codeword_count == 0) {
598         return 0.0;
599     }
600     return (float) letters_encoded / (float) codeword_count;
601 }
602 
603 /* Produces a set of codewords which are "somewhat" optimised - this could be improved on */
ultra_generate_codewords(struct zint_symbol * symbol,const unsigned char source[],const int in_length,int codewords[])604 static int ultra_generate_codewords(struct zint_symbol *symbol, const unsigned char source[], const int in_length,
605             int codewords[]) {
606     int i;
607     int crop_length;
608     int codeword_count = 0;
609     int input_locn = 0;
610     char symbol_mode;
611     char current_mode;
612     int subset;
613     float eightbit_score;
614     float ascii_score;
615     float c43_score;
616     int end_char;
617     int block_length;
618     int fragment_length;
619     int fragno;
620     int gs1 = 0;
621     int ascii_encoded, c43_encoded;
622 
623 #ifndef _MSC_VER
624     unsigned char crop_source[in_length + 1];
625     char mode[in_length + 1];
626     int cw_fragment[in_length * 2 + 1];
627 #else
628     unsigned char *crop_source = (unsigned char *) _alloca(in_length + 1);
629     char *mode = (char *) _alloca(in_length + 1);
630     int *cw_fragment = (int *) _alloca((in_length * 2 + 1) * sizeof(int));
631 #endif /* _MSC_VER */
632 
633     if ((symbol->input_mode & 0x07) == GS1_MODE) {
634         gs1 = 1;
635     }
636 
637     // Decide start character codeword (from Table 5)
638     symbol_mode = ASCII_MODE;
639     for (i = 0; i < in_length; i++) {
640         if (source[i] >= 0x80) {
641             symbol_mode = EIGHTBIT_MODE;
642             break;
643         }
644     }
645 
646     if (symbol->option_3 != ULTRA_COMPRESSION && !gs1) {
647         // Force eight-bit mode by default as other modes are poorly documented
648         symbol_mode = EIGHTBIT_MODE;
649     }
650 
651     if (symbol->output_options & READER_INIT) {
652         /* Reader Initialisation mode */
653         codeword_count = 2;
654         if (symbol_mode == ASCII_MODE) {
655             codewords[0] = 272; // 7-bit ASCII mode
656             codewords[1] = 271; // FNC3
657         } else {
658             codewords[0] = 257; // 8859-1
659             codewords[1] = 269; // FNC3
660         }
661     } else {
662         /* Calculate start character codeword */
663         codeword_count = 1;
664         if (symbol_mode == ASCII_MODE) {
665             if (gs1) {
666                 codewords[0] = 273;
667             } else {
668                 codewords[0] = 272;
669             }
670         } else {
671             if ((symbol->eci >= 3) && (symbol->eci <= 18) && (symbol->eci != 14)) {
672                 // ECI indicates use of character set within ISO/IEC 8859
673                 codewords[0] = 257 + (symbol->eci - 3);
674                 if (codewords[0] > 267) {
675                     // Avoids ECI 14 for non-existant ISO/IEC 8859-12
676                     codewords[0]--;
677                 }
678             } else if ((symbol->eci > 18) && (symbol->eci <= 898)) {
679                 // ECI indicates use of character set outside ISO/IEC 8859
680                 codewords[0] = 275 + (symbol->eci / 256);
681                 codewords[1] = symbol->eci % 256;
682                 codeword_count = 2;
683             } else if (symbol->eci == 899) {
684                 // Non-language byte data
685                 codewords[0] = 280;
686             } else if ((symbol->eci > 899) && (symbol->eci <= 9999)) {
687                 // ECI beyond 899 needs to use fixed length encodable ECI invocation (section 7.6.2)
688                 // Encode as 3 codewords
689                 codewords[0] = 257; // ISO/IEC 8859-1 used to enter 8-bit mode
690                 codewords[1] = 274; // Encode ECI as 3 codewords
691                 codewords[2] = (symbol->eci / 100) + 128;
692                 codewords[3] = (symbol->eci % 100) + 128;
693                 codeword_count = 4;
694             } else if (symbol->eci >= 10000) {
695                 // Encode as 4 codewords
696                 codewords[0] = 257; // ISO/IEC 8859-1 used to enter 8-bit mode
697                 codewords[1] = 275; // Encode ECI as 4 codewords
698                 codewords[2] = (symbol->eci / 10000) + 128;
699                 codewords[3] = ((symbol->eci % 10000) / 100) + 128;
700                 codewords[4] = (symbol->eci % 100) + 128;
701                 codeword_count = 5;
702             } else {
703                 codewords[0] = 257; // Default is assumed to be ISO/IEC 8859-1 (ECI 3)
704             }
705         }
706 
707         if ((codewords[0] == 257) || (codewords[0] == 272)) {
708             fragno = ultra_find_fragment(source, in_length, 0);
709 
710             // Check for http:// at start of input
711             if ((fragno == 0) || (fragno == 2)) {
712                 codewords[0] = 281;
713                 input_locn = 7;
714                 symbol_mode = EIGHTBIT_MODE;
715 
716             // Check for https:// at start of input
717             } else if ((fragno == 1) || (fragno == 3)) {
718                 codewords[0] = 282;
719                 input_locn = 8;
720                 symbol_mode = EIGHTBIT_MODE;
721             }
722         }
723     }
724 
725     /* Check for 06 Macro Sequence and crop accordingly */
726     if (in_length >= 9
727             && source[0] == '[' && source[1] == ')' && source[2] == '>' && source[3] == '\x1e'
728             && source[4] == '0' && source[5] == '6' && source[6] == '\x1d'
729             && source[in_length - 2] == '\x1e' && source[in_length - 1] == '\x04') {
730 
731         if (symbol_mode == EIGHTBIT_MODE) {
732             codewords[codeword_count] = 271; // 06 Macro
733         } else {
734             codewords[codeword_count] = 273; // 06 Macro
735         }
736         codeword_count++;
737 
738         for (i = 7; i < (in_length - 2); i++) {
739             crop_source[i - 7] = source[i];
740         }
741         crop_length = in_length - 9;
742         crop_source[crop_length] = '\0';
743    } else {
744         /* Make a cropped version of input data - removes http:// and https:// if needed */
745         for (i = input_locn; i < in_length; i++) {
746             crop_source[i - input_locn] = source[i];
747         }
748         crop_length = in_length - input_locn;
749         crop_source[crop_length] = '\0';
750     }
751 
752     /* Attempt encoding in all three modes to see which offers best compaction and store results */
753     if (symbol->option_3 == ULTRA_COMPRESSION || gs1) {
754         current_mode = symbol_mode;
755         input_locn = 0;
756         do {
757             end_char = input_locn + PREDICT_WINDOW;
758             eightbit_score = look_ahead_eightbit(crop_source, crop_length, input_locn, current_mode, end_char,
759                                 cw_fragment, &fragment_length, gs1);
760             ascii_score = look_ahead_ascii(crop_source, crop_length, input_locn, current_mode, symbol_mode,
761                                 end_char, cw_fragment, &fragment_length, &ascii_encoded, gs1);
762             subset = c43_should_latch_other(crop_source, crop_length, input_locn, 1 /*subset*/, gs1) ? 2 : 1;
763             c43_score = look_ahead_c43(crop_source, crop_length, input_locn, current_mode, end_char,
764                                 subset, cw_fragment, &fragment_length, &c43_encoded, gs1, 0 /*debug*/);
765 
766             mode[input_locn] = 'a';
767             current_mode = ASCII_MODE;
768 
769             if ((c43_score > ascii_score) && (c43_score > eightbit_score)) {
770                 mode[input_locn] = 'c';
771                 current_mode = C43_MODE;
772             } else if ((eightbit_score > ascii_score) && (eightbit_score > c43_score)) {
773                 mode[input_locn] = '8';
774                 current_mode = EIGHTBIT_MODE;
775             }
776             if (mode[input_locn] == 'a') {
777                 for (i = 0; i < ascii_encoded; i++) {
778                     mode[input_locn + i] = 'a';
779                 }
780                 input_locn += ascii_encoded;
781             } else if (mode[input_locn] == 'c') {
782                 for (i = 0; i < c43_encoded; i++) {
783                     mode[input_locn + i] = 'c';
784                 }
785                 input_locn += c43_encoded;
786             } else {
787                 input_locn++;
788             }
789         } while (input_locn < crop_length);
790     } else {
791         // Force eight-bit mode
792         for (input_locn = 0; input_locn < crop_length; input_locn++) {
793             mode[input_locn] = '8';
794         }
795     }
796     mode[crop_length] = '\0';
797 
798     if (symbol->debug & ZINT_DEBUG_PRINT) {
799         printf("Mode: %s (%d)\n", mode, (int) strlen(mode));
800     }
801 
802     /* Use results from test to perform actual mode switching */
803     current_mode = symbol_mode;
804     input_locn = 0;
805     do {
806         fragment_length = 0;
807         block_length = 0;
808         while (input_locn + block_length < crop_length && mode[input_locn + block_length] == mode[input_locn]) {
809             block_length++;
810         }
811 
812         switch (mode[input_locn]) {
813             case 'a':
814                 look_ahead_ascii(crop_source, crop_length, input_locn, current_mode, symbol_mode,
815                             input_locn + block_length, cw_fragment, &fragment_length, NULL, gs1);
816                 current_mode = ASCII_MODE;
817                 break;
818             case 'c':
819                 subset = c43_should_latch_other(crop_source, crop_length, input_locn, 1 /*subset*/, gs1) ? 2 : 1;
820                 look_ahead_c43(crop_source, crop_length, input_locn, current_mode, input_locn + block_length, subset,
821                             cw_fragment, &fragment_length, NULL, gs1, symbol->debug);
822 
823                 /* Substitute temporary latch if possible */
824                 if ((current_mode == EIGHTBIT_MODE) && (cw_fragment[0] == 260)
825                         && (fragment_length >= 5) && (fragment_length <= 11)) {
826                     /* Temporary latch to submode 1 from Table 11 */
827                     cw_fragment[0] = 256 + ((fragment_length - 5) / 2);
828                 } else if ((current_mode == EIGHTBIT_MODE) && (cw_fragment[0] == 266)
829                         && (fragment_length >= 5) && (fragment_length <= 11)) {
830                     /* Temporary latch to submode 2 from Table 11 */
831                     cw_fragment[0] = 262 + ((fragment_length - 5) / 2);
832                 } else if ((current_mode == ASCII_MODE) && (cw_fragment[0] == 278)
833                         && (fragment_length >= 5) && (fragment_length <= 11)) {
834                     /* Temporary latch to submode 1 from Table 9 */
835                     cw_fragment[0] = 274 + ((fragment_length - 5) / 2);
836                 } else {
837                     current_mode = C43_MODE;
838                 }
839                 break;
840             case '8':
841                 look_ahead_eightbit(crop_source, crop_length, input_locn, current_mode, input_locn + block_length,
842                             cw_fragment, &fragment_length, gs1);
843                 current_mode = EIGHTBIT_MODE;
844                 break;
845         }
846 
847         for (i = 0; i < fragment_length; i++) {
848             codewords[codeword_count + i] = cw_fragment[i];
849         }
850         codeword_count += fragment_length;
851 
852         input_locn += block_length;
853     } while (input_locn < crop_length);
854 
855     return codeword_count;
856 }
857 
ultracode(struct zint_symbol * symbol,unsigned char source[],int length)858 INTERNAL int ultracode(struct zint_symbol *symbol, unsigned char source[], int length) {
859     int data_cw_count = 0;
860     int acc, qcc;
861     int ecc_level;
862     int rows, columns;
863     int total_cws;
864     int pads;
865     int cw_memalloc;
866     // Allow for 3 pads in final 57th (60th incl. clock tracks) column of 5-row symbol (57 * 5 == 285)
867     int codeword[282 + 3];
868     int i, j, locn;
869     int total_height, total_width;
870     char tilepat[6];
871     int tilex, tiley;
872     int dcc;
873 #ifdef _MSC_VER
874     int *data_codewords;
875     char *pattern;
876 #endif /* _MSC_VER */
877 
878     cw_memalloc = length * 2;
879     if (cw_memalloc < 283) {
880         cw_memalloc = 283;
881     }
882 
883     if (symbol->eci > 811799) {
884         strcpy(symbol->errtxt, "590: ECI value not supported by Ultracode");
885         return ZINT_ERROR_INVALID_OPTION;
886     }
887 
888 #ifndef _MSC_VER
889     int data_codewords[cw_memalloc];
890 #else
891     data_codewords = (int *) _alloca(cw_memalloc * sizeof(int));
892 #endif /* _MSC_VER */
893 
894     data_cw_count = ultra_generate_codewords(symbol, source, length, data_codewords);
895 
896     if (symbol->debug & ZINT_DEBUG_PRINT) {
897         printf("Codewords (%d):", data_cw_count);
898         for (i = 0; i < data_cw_count; i++) {
899             printf(" %d", data_codewords[i]);
900         }
901         printf("\n");
902     }
903 #ifdef ZINT_TEST
904     if (symbol->debug & ZINT_DEBUG_TEST) {
905         debug_test_codeword_dump_int(symbol, data_codewords, data_cw_count);
906     }
907 #endif
908 
909     data_cw_count += 2; // 2 == MCC + ACC (data codeword count includes start char)
910 
911     /* Default ECC level is EC2 */
912     if ((symbol->option_1 <= 0) || (symbol->option_1 > 6)) {
913         ecc_level = 2;
914     } else {
915         ecc_level = symbol->option_1 - 1;
916     }
917 
918     /* ECC calculation from section 7.7.2 */
919     if (ecc_level == 0) {
920         qcc = 3;
921     } else {
922         if ((data_cw_count % 25) == 0) {
923             qcc = (kec[ecc_level] * (data_cw_count / 25)) + 3 + 2;
924         } else {
925             qcc = (kec[ecc_level] * ((data_cw_count / 25) + 1)) + 3 + 2;
926         }
927 
928     }
929     acc = qcc - 3;
930 
931     if (symbol->debug & ZINT_DEBUG_PRINT) {
932         printf("EC%d codewords: %d\n", ecc_level + 1, qcc);
933     }
934 
935     /* Maximum capacity is 282 codewords */
936     total_cws = data_cw_count + qcc + 3; // 3 == TCC pattern + RSEC pattern + QCC pattern
937     if (total_cws - 3 > 282) {
938         strcpy(symbol->errtxt, "591: Data too long for selected error correction capacity");
939         return ZINT_ERROR_TOO_LONG;
940     }
941 
942     rows = 5;
943     for (i = 2; i >= 0; i--) {
944         // Total codewords less 6 overhead (Start + MCC + ACC + 3 TCC/RSEC/QCC patterns)
945         if (total_cws - 6 <= ultra_maxsize[i]) {
946             rows--;
947         }
948     }
949 
950     if ((total_cws % rows) == 0) {
951         pads = 0;
952         columns = total_cws / rows;
953     } else {
954         pads = rows - (total_cws % rows);
955         columns = (total_cws / rows) + 1;
956     }
957     columns += columns / 15; // Secondary vertical clock tracks
958 
959     if (symbol->debug & ZINT_DEBUG_PRINT) {
960         printf("Calculated size is %d rows by %d columns\n", rows, columns);
961     }
962 
963     /* Insert MCC and ACC into data codewords */
964     for (i = 282; i > 2; i--) {
965         data_codewords[i] = data_codewords[i - 2];
966     }
967     data_codewords[1] = data_cw_count; // MCC
968     data_codewords[2] = acc; // ACC
969 
970     locn = 0;
971     /* Calculate error correction codewords (RSEC) */
972 
973     ultra_gf283((short) data_cw_count, (short) qcc, data_codewords);
974 
975     if (symbol->debug & ZINT_DEBUG_PRINT) {
976         printf("ECCs (%d):", qcc);
977         for (i = 0; i < qcc; i++) {
978             printf(" %d", data_codewords[(282 - qcc) + i]);
979         }
980         printf("\n");
981     }
982 
983     /* Rearrange to make final codeword sequence */
984     codeword[locn++] = data_codewords[282 - (data_cw_count + qcc)]; // Start Character
985     codeword[locn++] = data_cw_count; // MCC
986     for (i = 0; i < qcc; i++) {
987         codeword[locn++] = data_codewords[(282 - qcc) + i]; // RSEC Region
988     }
989     codeword[locn++] = data_cw_count + qcc; // TCC = C + Q - section 6.11.4
990     codeword[locn++] = 283; // Separator
991     codeword[locn++] = acc; // ACC
992     for (i = 0; i < (data_cw_count - 3); i++) {
993         codeword[locn++] = data_codewords[(282 - ((data_cw_count - 3) + qcc)) + i]; // Data Region
994     }
995     for (i = 0; i < pads; i++) {
996         codeword[locn++] = 284; // Pad pattern
997     }
998     codeword[locn++] = qcc; // QCC
999 
1000     if (symbol->debug & ZINT_DEBUG_PRINT) {
1001         printf("Rearranged codewords with ECC:\n");
1002         for (i = 0; i < locn; i++) {
1003             printf("%d ", codeword[i]);
1004         }
1005         printf("\n");
1006     }
1007 
1008     total_height = (rows * 6) + 1;
1009     total_width = columns + 6;
1010 
1011     /* Build symbol */
1012 #ifndef _MSC_VER
1013     char pattern[total_height * total_width];
1014 #else
1015     pattern = (char *) _alloca(total_height * total_width);
1016 #endif /* _MSC_VER */
1017 
1018     for (i = 0; i < (total_height * total_width); i++) {
1019         pattern[i] = 'W';
1020     }
1021 
1022     /* Border */
1023     for (i = 0; i < total_width; i++) {
1024         pattern[i] = 'K'; // Top
1025         pattern[(total_height * total_width) - i - 1] = 'K'; // Bottom
1026     }
1027     for (i = 0; i < total_height; i++) {
1028         pattern[total_width * i] = 'K'; // Left
1029         pattern[(total_width * i) + 3] = 'K';
1030         pattern[(total_width * i) + (total_width - 1)] = 'K'; // Right
1031     }
1032 
1033     /* Clock tracks */
1034     for (i = 0; i < total_height; i += 2) {
1035         pattern[(total_width * i) + 1] = 'K'; // Primary vertical clock track
1036         if (total_width > 20) {
1037             pattern[(total_width * i) + 19] = 'K'; // Secondary vertical clock track
1038         }
1039         if (total_width > 36) {
1040             pattern[(total_width * i) + 35] = 'K'; // Secondary vertical clock track
1041         }
1042         if (total_width > 52) {
1043             pattern[(total_width * i) + 51] = 'K'; // Secondary vertical clock track
1044         }
1045     }
1046     for (i = 6; i < total_height; i += 6) {
1047         for (j = 5; j < total_width; j += 2) {
1048             pattern[(total_width * i) + j] = 'K'; // Horizontal clock track
1049         }
1050     }
1051 
1052     /* Place tiles */
1053     tilepat[5] = '\0';
1054     tilex = 0;
1055     tiley = 0;
1056     for (i = 0; i < locn; i++) {
1057         for (j = 0; j < 5; j++) {
1058             tilepat[4 - j] = ultra_colour[(tiles[codeword[i]] >> (3 * j)) & 0x07];
1059         }
1060         if ((tiley + 1) >= total_height) {
1061             tiley = 0;
1062             tilex++;
1063 
1064             if (tilex == 14) {
1065                 tilex++;
1066             } else if (tilex == 30) {
1067                 tilex++;
1068             } else if (tilex == 46) {
1069                 tilex++;
1070             }
1071         }
1072 
1073         for (j = 0; j < 5; j++) {
1074             pattern[((tiley + j + 1) * total_width) + (tilex + 5)] = tilepat[j];
1075         }
1076         tiley += 6;
1077     }
1078 
1079     /* Add data column count */
1080     dcc = columns - ultra_mincols[rows - 2];
1081     tilex = 2;
1082     tiley = (total_height - 11) / 2;
1083     /* DCCU */
1084     for (j = 0; j < 5; j++) {
1085         tilepat[4 - j] = ultra_colour[(dccu[dcc] >> (3 * j)) & 0x07];
1086     }
1087     for (j = 0; j < 5; j++) {
1088         pattern[((tiley + j) * total_width) + tilex] = tilepat[j];
1089     }
1090     /* DCCL */
1091     tiley += 6;
1092     for (j = 0; j < 5; j++) {
1093         tilepat[4 - j] = ultra_colour[(dccl[dcc] >> (3 * j)) & 0x07];
1094     }
1095     for (j = 0; j < 5; j++) {
1096         pattern[((tiley + j) * total_width) + tilex] = tilepat[j];
1097     }
1098 
1099     if (symbol->debug & ZINT_DEBUG_PRINT) {
1100         printf("DCC: %d\n", dcc);
1101 
1102         for (i = 0; i < (total_height * total_width); i++) {
1103             printf("%c", pattern[i]);
1104             if ((i + 1) % total_width == 0) {
1105                 printf("\n");
1106             }
1107         }
1108     }
1109 
1110     /* Put pattern into symbol */
1111     symbol->rows = total_height;
1112     symbol->width = total_width;
1113 
1114     for (i = 0; i < total_height; i++) {
1115         symbol->row_height[i] = 1;
1116         for (j = 0; j < total_width; j++) {
1117             set_module_colour(symbol, i, j, posn(ultra_colour, pattern[(i * total_width) + j]));
1118         }
1119     }
1120     symbol->height = total_height;
1121 
1122     return 0;
1123 }
1124