1<?php
2declare(strict_types = 1);
3
4namespace BaconQrCode\Encoder;
5
6use BaconQrCode\Common\BitArray;
7use BaconQrCode\Common\CharacterSetEci;
8use BaconQrCode\Common\ErrorCorrectionLevel;
9use BaconQrCode\Common\Mode;
10use BaconQrCode\Common\ReedSolomonCodec;
11use BaconQrCode\Common\Version;
12use BaconQrCode\Exception\WriterException;
13use SplFixedArray;
14
15/**
16 * Encoder.
17 */
18final class Encoder
19{
20    /**
21     * Default byte encoding.
22     */
23    public const DEFAULT_BYTE_MODE_ECODING = 'ISO-8859-1';
24
25    /**
26     * The original table is defined in the table 5 of JISX0510:2004 (p.19).
27     */
28    private const ALPHANUMERIC_TABLE = [
29        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 0x00-0x0f
30        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 0x10-0x1f
31        36, -1, -1, -1, 37, 38, -1, -1, -1, -1, 39, 40, -1, 41, 42, 43,  // 0x20-0x2f
32        0,   1,  2,  3,  4,  5,  6,  7,  8,  9, 44, -1, -1, -1, -1, -1,  // 0x30-0x3f
33        -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,  // 0x40-0x4f
34        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1,  // 0x50-0x5f
35    ];
36
37    /**
38     * Codec cache.
39     *
40     * @var array
41     */
42    private static $codecs = [];
43
44    /**
45     * Encodes "content" with the error correction level "ecLevel".
46     */
47    public static function encode(
48        string $content,
49        ErrorCorrectionLevel $ecLevel,
50        string $encoding = self::DEFAULT_BYTE_MODE_ECODING
51    ) : QrCode {
52        // Pick an encoding mode appropriate for the content. Note that this
53        // will not attempt to use multiple modes / segments even if that were
54        // more efficient. Would be nice.
55        $mode = self::chooseMode($content, $encoding);
56
57        // This will store the header information, like mode and length, as well
58        // as "header" segments like an ECI segment.
59        $headerBits = new BitArray();
60
61        // Append ECI segment if applicable
62        if (Mode::BYTE() === $mode && self::DEFAULT_BYTE_MODE_ECODING !== $encoding) {
63            $eci = CharacterSetEci::getCharacterSetEciByName($encoding);
64
65            if (null !== $eci) {
66                self::appendEci($eci, $headerBits);
67            }
68        }
69
70        // (With ECI in place,) Write the mode marker
71        self::appendModeInfo($mode, $headerBits);
72
73        // Collect data within the main segment, separately, to count its size
74        // if needed. Don't add it to main payload yet.
75        $dataBits = new BitArray();
76        self::appendBytes($content, $mode, $dataBits, $encoding);
77
78        // Hard part: need to know version to know how many bits length takes.
79        // But need to know how many bits it takes to know version. First we
80        // take a guess at version by assuming version will be the minimum, 1:
81        $provisionalBitsNeeded = $headerBits->getSize()
82            + $mode->getCharacterCountBits(Version::getVersionForNumber(1))
83            + $dataBits->getSize();
84        $provisionalVersion = self::chooseVersion($provisionalBitsNeeded, $ecLevel);
85
86        // Use that guess to calculate the right version. I am still not sure
87        // this works in 100% of cases.
88        $bitsNeeded = $headerBits->getSize()
89            + $mode->getCharacterCountBits($provisionalVersion)
90            + $dataBits->getSize();
91        $version = self::chooseVersion($bitsNeeded, $ecLevel);
92
93        $headerAndDataBits = new BitArray();
94        $headerAndDataBits->appendBitArray($headerBits);
95
96        // Find "length" of main segment and write it.
97        $numLetters = (Mode::BYTE() === $mode ? $dataBits->getSizeInBytes() : strlen($content));
98        self::appendLengthInfo($numLetters, $version, $mode, $headerAndDataBits);
99
100        // Put data together into the overall payload.
101        $headerAndDataBits->appendBitArray($dataBits);
102        $ecBlocks = $version->getEcBlocksForLevel($ecLevel);
103        $numDataBytes = $version->getTotalCodewords() - $ecBlocks->getTotalEcCodewords();
104
105        // Terminate the bits properly.
106        self::terminateBits($numDataBytes, $headerAndDataBits);
107
108        // Interleave data bits with error correction code.
109        $finalBits = self::interleaveWithEcBytes(
110            $headerAndDataBits,
111            $version->getTotalCodewords(),
112            $numDataBytes,
113            $ecBlocks->getNumBlocks()
114        );
115
116        // Choose the mask pattern.
117        $dimension = $version->getDimensionForVersion();
118        $matrix = new ByteMatrix($dimension, $dimension);
119        $maskPattern = self::chooseMaskPattern($finalBits, $ecLevel, $version, $matrix);
120
121        // Build the matrix.
122        MatrixUtil::buildMatrix($finalBits, $ecLevel, $version, $maskPattern, $matrix);
123
124        return new QrCode($mode, $ecLevel, $version, $maskPattern, $matrix);
125    }
126
127    /**
128     * Gets the alphanumeric code for a byte.
129     */
130    private static function getAlphanumericCode(int $code) : int
131    {
132        if (isset(self::ALPHANUMERIC_TABLE[$code])) {
133            return self::ALPHANUMERIC_TABLE[$code];
134        }
135
136        return -1;
137    }
138
139    /**
140     * Chooses the best mode for a given content.
141     */
142    private static function chooseMode(string $content, string $encoding = null) : Mode
143    {
144        if (null !== $encoding && 0 === strcasecmp($encoding, 'SHIFT-JIS')) {
145            return self::isOnlyDoubleByteKanji($content) ? Mode::KANJI() : Mode::BYTE();
146        }
147
148        $hasNumeric = false;
149        $hasAlphanumeric = false;
150        $contentLength = strlen($content);
151
152        for ($i = 0; $i < $contentLength; ++$i) {
153            $char = $content[$i];
154
155            if (ctype_digit($char)) {
156                $hasNumeric = true;
157            } elseif (-1 !== self::getAlphanumericCode(ord($char))) {
158                $hasAlphanumeric = true;
159            } else {
160                return Mode::BYTE();
161            }
162        }
163
164        if ($hasAlphanumeric) {
165            return Mode::ALPHANUMERIC();
166        } elseif ($hasNumeric) {
167            return Mode::NUMERIC();
168        }
169
170        return Mode::BYTE();
171    }
172
173    /**
174     * Calculates the mask penalty for a matrix.
175     */
176    private static function calculateMaskPenalty(ByteMatrix $matrix) : int
177    {
178        return (
179            MaskUtil::applyMaskPenaltyRule1($matrix)
180            + MaskUtil::applyMaskPenaltyRule2($matrix)
181            + MaskUtil::applyMaskPenaltyRule3($matrix)
182            + MaskUtil::applyMaskPenaltyRule4($matrix)
183        );
184    }
185
186    /**
187     * Checks if content only consists of double-byte kanji characters.
188     */
189    private static function isOnlyDoubleByteKanji(string $content) : bool
190    {
191        $bytes = @iconv('utf-8', 'SHIFT-JIS', $content);
192
193        if (false === $bytes) {
194            return false;
195        }
196
197        $length = strlen($bytes);
198
199        if (0 !== $length % 2) {
200            return false;
201        }
202
203        for ($i = 0; $i < $length; $i += 2) {
204            $byte = $bytes[$i] & 0xff;
205
206            if (($byte < 0x81 || $byte > 0x9f) && $byte < 0xe0 || $byte > 0xeb) {
207                return false;
208            }
209        }
210
211        return true;
212    }
213
214    /**
215     * Chooses the best mask pattern for a matrix.
216     */
217    private static function chooseMaskPattern(
218        BitArray $bits,
219        ErrorCorrectionLevel $ecLevel,
220        Version $version,
221        ByteMatrix $matrix
222    ) : int {
223        $minPenalty = PHP_INT_MAX;
224        $bestMaskPattern = -1;
225
226        for ($maskPattern = 0; $maskPattern < QrCode::NUM_MASK_PATTERNS; ++$maskPattern) {
227            MatrixUtil::buildMatrix($bits, $ecLevel, $version, $maskPattern, $matrix);
228            $penalty = self::calculateMaskPenalty($matrix);
229
230            if ($penalty < $minPenalty) {
231                $minPenalty = $penalty;
232                $bestMaskPattern = $maskPattern;
233            }
234        }
235
236        return $bestMaskPattern;
237    }
238
239    /**
240     * Chooses the best version for the input.
241     *
242     * @throws WriterException if data is too big
243     */
244    private static function chooseVersion(int $numInputBits, ErrorCorrectionLevel $ecLevel) : Version
245    {
246        for ($versionNum = 1; $versionNum <= 40; ++$versionNum) {
247            $version = Version::getVersionForNumber($versionNum);
248            $numBytes = $version->getTotalCodewords();
249
250            $ecBlocks = $version->getEcBlocksForLevel($ecLevel);
251            $numEcBytes = $ecBlocks->getTotalEcCodewords();
252
253            $numDataBytes = $numBytes - $numEcBytes;
254            $totalInputBytes = intdiv($numInputBits + 8, 8);
255
256            if ($numDataBytes >= $totalInputBytes) {
257                return $version;
258            }
259        }
260
261        throw new WriterException('Data too big');
262    }
263
264    /**
265     * Terminates the bits in a bit array.
266     *
267     * @throws WriterException if data bits cannot fit in the QR code
268     * @throws WriterException if bits size does not equal the capacity
269     */
270    private static function terminateBits(int $numDataBytes, BitArray $bits) : void
271    {
272        $capacity = $numDataBytes << 3;
273
274        if ($bits->getSize() > $capacity) {
275            throw new WriterException('Data bits cannot fit in the QR code');
276        }
277
278        for ($i = 0; $i < 4 && $bits->getSize() < $capacity; ++$i) {
279            $bits->appendBit(false);
280        }
281
282        $numBitsInLastByte = $bits->getSize() & 0x7;
283
284        if ($numBitsInLastByte > 0) {
285            for ($i = $numBitsInLastByte; $i < 8; ++$i) {
286                $bits->appendBit(false);
287            }
288        }
289
290        $numPaddingBytes = $numDataBytes - $bits->getSizeInBytes();
291
292        for ($i = 0; $i < $numPaddingBytes; ++$i) {
293            $bits->appendBits(0 === ($i & 0x1) ? 0xec : 0x11, 8);
294        }
295
296        if ($bits->getSize() !== $capacity) {
297            throw new WriterException('Bits size does not equal capacity');
298        }
299    }
300
301    /**
302     * Gets number of data- and EC bytes for a block ID.
303     *
304     * @return int[]
305     * @throws WriterException if block ID is too large
306     * @throws WriterException if EC bytes mismatch
307     * @throws WriterException if RS blocks mismatch
308     * @throws WriterException if total bytes mismatch
309     */
310    private static function getNumDataBytesAndNumEcBytesForBlockId(
311        int $numTotalBytes,
312        int $numDataBytes,
313        int $numRsBlocks,
314        int $blockId
315    ) : array {
316        if ($blockId >= $numRsBlocks) {
317            throw new WriterException('Block ID too large');
318        }
319
320        $numRsBlocksInGroup2 = $numTotalBytes % $numRsBlocks;
321        $numRsBlocksInGroup1 = $numRsBlocks - $numRsBlocksInGroup2;
322        $numTotalBytesInGroup1 = intdiv($numTotalBytes, $numRsBlocks);
323        $numTotalBytesInGroup2 = $numTotalBytesInGroup1 + 1;
324        $numDataBytesInGroup1 = intdiv($numDataBytes, $numRsBlocks);
325        $numDataBytesInGroup2 = $numDataBytesInGroup1 + 1;
326        $numEcBytesInGroup1 = $numTotalBytesInGroup1 - $numDataBytesInGroup1;
327        $numEcBytesInGroup2 = $numTotalBytesInGroup2 - $numDataBytesInGroup2;
328
329        if ($numEcBytesInGroup1 !== $numEcBytesInGroup2) {
330            throw new WriterException('EC bytes mismatch');
331        }
332
333        if ($numRsBlocks !== $numRsBlocksInGroup1 + $numRsBlocksInGroup2) {
334            throw new WriterException('RS blocks mismatch');
335        }
336
337        if ($numTotalBytes !==
338            (($numDataBytesInGroup1 + $numEcBytesInGroup1) * $numRsBlocksInGroup1)
339            + (($numDataBytesInGroup2 + $numEcBytesInGroup2) * $numRsBlocksInGroup2)
340        ) {
341            throw new WriterException('Total bytes mismatch');
342        }
343
344        if ($blockId < $numRsBlocksInGroup1) {
345            return [$numDataBytesInGroup1, $numEcBytesInGroup1];
346        } else {
347            return [$numDataBytesInGroup2, $numEcBytesInGroup2];
348        }
349    }
350
351    /**
352     * Interleaves data with EC bytes.
353     *
354     * @throws WriterException if number of bits and data bytes does not match
355     * @throws WriterException if data bytes does not match offset
356     * @throws WriterException if an interleaving error occurs
357     */
358    private static function interleaveWithEcBytes(
359        BitArray $bits,
360        int $numTotalBytes,
361        int $numDataBytes,
362        int $numRsBlocks
363    ) : BitArray {
364        if ($bits->getSizeInBytes() !== $numDataBytes) {
365            throw new WriterException('Number of bits and data bytes does not match');
366        }
367
368        $dataBytesOffset = 0;
369        $maxNumDataBytes = 0;
370        $maxNumEcBytes   = 0;
371
372        $blocks = new SplFixedArray($numRsBlocks);
373
374        for ($i = 0; $i < $numRsBlocks; ++$i) {
375            list($numDataBytesInBlock, $numEcBytesInBlock) = self::getNumDataBytesAndNumEcBytesForBlockId(
376                $numTotalBytes,
377                $numDataBytes,
378                $numRsBlocks,
379                $i
380            );
381
382            $size = $numDataBytesInBlock;
383            $dataBytes = $bits->toBytes(8 * $dataBytesOffset, $size);
384            $ecBytes = self::generateEcBytes($dataBytes, $numEcBytesInBlock);
385            $blocks[$i] = new BlockPair($dataBytes, $ecBytes);
386
387            $maxNumDataBytes = max($maxNumDataBytes, $size);
388            $maxNumEcBytes = max($maxNumEcBytes, count($ecBytes));
389            $dataBytesOffset += $numDataBytesInBlock;
390        }
391
392        if ($numDataBytes !== $dataBytesOffset) {
393            throw new WriterException('Data bytes does not match offset');
394        }
395
396        $result = new BitArray();
397
398        for ($i = 0; $i < $maxNumDataBytes; ++$i) {
399            foreach ($blocks as $block) {
400                $dataBytes = $block->getDataBytes();
401
402                if ($i < count($dataBytes)) {
403                    $result->appendBits($dataBytes[$i], 8);
404                }
405            }
406        }
407
408        for ($i = 0; $i < $maxNumEcBytes; ++$i) {
409            foreach ($blocks as $block) {
410                $ecBytes = $block->getErrorCorrectionBytes();
411
412                if ($i < count($ecBytes)) {
413                    $result->appendBits($ecBytes[$i], 8);
414                }
415            }
416        }
417
418        if ($numTotalBytes !== $result->getSizeInBytes()) {
419            throw new WriterException(
420                'Interleaving error: ' . $numTotalBytes . ' and ' . $result->getSizeInBytes() . ' differ'
421            );
422        }
423
424        return $result;
425    }
426
427    /**
428     * Generates EC bytes for given data.
429     *
430     * @param  SplFixedArray<int> $dataBytes
431     * @return SplFixedArray<int>
432     */
433    private static function generateEcBytes(SplFixedArray $dataBytes, int $numEcBytesInBlock) : SplFixedArray
434    {
435        $numDataBytes = count($dataBytes);
436        $toEncode = new SplFixedArray($numDataBytes + $numEcBytesInBlock);
437
438        for ($i = 0; $i < $numDataBytes; $i++) {
439            $toEncode[$i] = $dataBytes[$i] & 0xff;
440        }
441
442        $ecBytes = new SplFixedArray($numEcBytesInBlock);
443        $codec = self::getCodec($numDataBytes, $numEcBytesInBlock);
444        $codec->encode($toEncode, $ecBytes);
445
446        return $ecBytes;
447    }
448
449    /**
450     * Gets an RS codec and caches it.
451     */
452    private static function getCodec(int $numDataBytes, int $numEcBytesInBlock) : ReedSolomonCodec
453    {
454        $cacheId = $numDataBytes . '-' . $numEcBytesInBlock;
455
456        if (isset(self::$codecs[$cacheId])) {
457            return self::$codecs[$cacheId];
458        }
459
460        return self::$codecs[$cacheId] = new ReedSolomonCodec(
461            8,
462            0x11d,
463            0,
464            1,
465            $numEcBytesInBlock,
466            255 - $numDataBytes - $numEcBytesInBlock
467        );
468    }
469
470    /**
471     * Appends mode information to a bit array.
472     */
473    private static function appendModeInfo(Mode $mode, BitArray $bits) : void
474    {
475        $bits->appendBits($mode->getBits(), 4);
476    }
477
478    /**
479     * Appends length information to a bit array.
480     *
481     * @throws WriterException if num letters is bigger than expected
482     */
483    private static function appendLengthInfo(int $numLetters, Version $version, Mode $mode, BitArray $bits) : void
484    {
485        $numBits = $mode->getCharacterCountBits($version);
486
487        if ($numLetters >= (1 << $numBits)) {
488            throw new WriterException($numLetters . ' is bigger than ' . ((1 << $numBits) - 1));
489        }
490
491        $bits->appendBits($numLetters, $numBits);
492    }
493
494    /**
495     * Appends bytes to a bit array in a specific mode.
496     *
497     * @throws WriterException if an invalid mode was supplied
498     */
499    private static function appendBytes(string $content, Mode $mode, BitArray $bits, string $encoding) : void
500    {
501        switch ($mode) {
502            case Mode::NUMERIC():
503                self::appendNumericBytes($content, $bits);
504                break;
505
506            case Mode::ALPHANUMERIC():
507                self::appendAlphanumericBytes($content, $bits);
508                break;
509
510            case Mode::BYTE():
511                self::append8BitBytes($content, $bits, $encoding);
512                break;
513
514            case Mode::KANJI():
515                self::appendKanjiBytes($content, $bits);
516                break;
517
518            default:
519                throw new WriterException('Invalid mode: ' . $mode);
520        }
521    }
522
523    /**
524     * Appends numeric bytes to a bit array.
525     */
526    private static function appendNumericBytes(string $content, BitArray $bits) : void
527    {
528        $length = strlen($content);
529        $i = 0;
530
531        while ($i < $length) {
532            $num1 = (int) $content[$i];
533
534            if ($i + 2 < $length) {
535                // Encode three numeric letters in ten bits.
536                $num2 = (int) $content[$i + 1];
537                $num3 = (int) $content[$i + 2];
538                $bits->appendBits($num1 * 100 + $num2 * 10 + $num3, 10);
539                $i += 3;
540            } elseif ($i + 1 < $length) {
541                // Encode two numeric letters in seven bits.
542                $num2 = (int) $content[$i + 1];
543                $bits->appendBits($num1 * 10 + $num2, 7);
544                $i += 2;
545            } else {
546                // Encode one numeric letter in four bits.
547                $bits->appendBits($num1, 4);
548                ++$i;
549            }
550        }
551    }
552
553    /**
554     * Appends alpha-numeric bytes to a bit array.
555     *
556     * @throws WriterException if an invalid alphanumeric code was found
557     */
558    private static function appendAlphanumericBytes(string $content, BitArray $bits) : void
559    {
560        $length = strlen($content);
561        $i = 0;
562
563        while ($i < $length) {
564            $code1 = self::getAlphanumericCode(ord($content[$i]));
565
566            if (-1 === $code1) {
567                throw new WriterException('Invalid alphanumeric code');
568            }
569
570            if ($i + 1 < $length) {
571                $code2 = self::getAlphanumericCode(ord($content[$i + 1]));
572
573                if (-1 === $code2) {
574                    throw new WriterException('Invalid alphanumeric code');
575                }
576
577                // Encode two alphanumeric letters in 11 bits.
578                $bits->appendBits($code1 * 45 + $code2, 11);
579                $i += 2;
580            } else {
581                // Encode one alphanumeric letter in six bits.
582                $bits->appendBits($code1, 6);
583                ++$i;
584            }
585        }
586    }
587
588    /**
589     * Appends regular 8-bit bytes to a bit array.
590     *
591     * @throws WriterException if content cannot be encoded to target encoding
592     */
593    private static function append8BitBytes(string $content, BitArray $bits, string $encoding) : void
594    {
595        $bytes = @iconv('utf-8', $encoding, $content);
596
597        if (false === $bytes) {
598            throw new WriterException('Could not encode content to ' . $encoding);
599        }
600
601        $length = strlen($bytes);
602
603        for ($i = 0; $i < $length; $i++) {
604            $bits->appendBits(ord($bytes[$i]), 8);
605        }
606    }
607
608    /**
609     * Appends KANJI bytes to a bit array.
610     *
611     * @throws WriterException if content does not seem to be encoded in SHIFT-JIS
612     * @throws WriterException if an invalid byte sequence occurs
613     */
614    private static function appendKanjiBytes(string $content, BitArray $bits) : void
615    {
616        if (strlen($content) % 2 > 0) {
617            // We just do a simple length check here. The for loop will check
618            // individual characters.
619            throw new WriterException('Content does not seem to be encoded in SHIFT-JIS');
620        }
621
622        $length = strlen($content);
623
624        for ($i = 0; $i < $length; $i += 2) {
625            $byte1 = ord($content[$i]) & 0xff;
626            $byte2 = ord($content[$i + 1]) & 0xff;
627            $code = ($byte1 << 8) | $byte2;
628
629            if ($code >= 0x8140 && $code <= 0x9ffc) {
630                $subtracted = $code - 0x8140;
631            } elseif ($code >= 0xe040 && $code <= 0xebbf) {
632                $subtracted = $code - 0xc140;
633            } else {
634                throw new WriterException('Invalid byte sequence');
635            }
636
637            $encoded = (($subtracted >> 8) * 0xc0) + ($subtracted & 0xff);
638
639            $bits->appendBits($encoded, 13);
640        }
641    }
642
643    /**
644     * Appends ECI information to a bit array.
645     */
646    private static function appendEci(CharacterSetEci $eci, BitArray $bits) : void
647    {
648        $mode = Mode::ECI();
649        $bits->appendBits($mode->getBits(), 4);
650        $bits->appendBits($eci->getValue(), 8);
651    }
652}
653