1<?php 2declare(strict_types = 1); 3 4namespace BaconQrCode\Encoder; 5 6use BaconQrCode\Common\BitArray; 7use BaconQrCode\Common\CharacterSetEci; 8use BaconQrCode\Common\ErrorCorrectionLevel; 9use BaconQrCode\Common\Mode; 10use BaconQrCode\Common\ReedSolomonCodec; 11use BaconQrCode\Common\Version; 12use BaconQrCode\Exception\WriterException; 13use SplFixedArray; 14 15/** 16 * Encoder. 17 */ 18final class Encoder 19{ 20 /** 21 * Default byte encoding. 22 */ 23 public const DEFAULT_BYTE_MODE_ECODING = 'ISO-8859-1'; 24 25 /** 26 * The original table is defined in the table 5 of JISX0510:2004 (p.19). 27 */ 28 private const ALPHANUMERIC_TABLE = [ 29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x00-0x0f 30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x10-0x1f 31 36, -1, -1, -1, 37, 38, -1, -1, -1, -1, 39, 40, -1, 41, 42, 43, // 0x20-0x2f 32 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 44, -1, -1, -1, -1, -1, // 0x30-0x3f 33 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 0x40-0x4f 34 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, // 0x50-0x5f 35 ]; 36 37 /** 38 * Codec cache. 39 * 40 * @var array 41 */ 42 private static $codecs = []; 43 44 /** 45 * Encodes "content" with the error correction level "ecLevel". 46 */ 47 public static function encode( 48 string $content, 49 ErrorCorrectionLevel $ecLevel, 50 string $encoding = self::DEFAULT_BYTE_MODE_ECODING 51 ) : QrCode { 52 // Pick an encoding mode appropriate for the content. Note that this 53 // will not attempt to use multiple modes / segments even if that were 54 // more efficient. Would be nice. 55 $mode = self::chooseMode($content, $encoding); 56 57 // This will store the header information, like mode and length, as well 58 // as "header" segments like an ECI segment. 59 $headerBits = new BitArray(); 60 61 // Append ECI segment if applicable 62 if (Mode::BYTE() === $mode && self::DEFAULT_BYTE_MODE_ECODING !== $encoding) { 63 $eci = CharacterSetEci::getCharacterSetEciByName($encoding); 64 65 if (null !== $eci) { 66 self::appendEci($eci, $headerBits); 67 } 68 } 69 70 // (With ECI in place,) Write the mode marker 71 self::appendModeInfo($mode, $headerBits); 72 73 // Collect data within the main segment, separately, to count its size 74 // if needed. Don't add it to main payload yet. 75 $dataBits = new BitArray(); 76 self::appendBytes($content, $mode, $dataBits, $encoding); 77 78 // Hard part: need to know version to know how many bits length takes. 79 // But need to know how many bits it takes to know version. First we 80 // take a guess at version by assuming version will be the minimum, 1: 81 $provisionalBitsNeeded = $headerBits->getSize() 82 + $mode->getCharacterCountBits(Version::getVersionForNumber(1)) 83 + $dataBits->getSize(); 84 $provisionalVersion = self::chooseVersion($provisionalBitsNeeded, $ecLevel); 85 86 // Use that guess to calculate the right version. I am still not sure 87 // this works in 100% of cases. 88 $bitsNeeded = $headerBits->getSize() 89 + $mode->getCharacterCountBits($provisionalVersion) 90 + $dataBits->getSize(); 91 $version = self::chooseVersion($bitsNeeded, $ecLevel); 92 93 $headerAndDataBits = new BitArray(); 94 $headerAndDataBits->appendBitArray($headerBits); 95 96 // Find "length" of main segment and write it. 97 $numLetters = (Mode::BYTE() === $mode ? $dataBits->getSizeInBytes() : strlen($content)); 98 self::appendLengthInfo($numLetters, $version, $mode, $headerAndDataBits); 99 100 // Put data together into the overall payload. 101 $headerAndDataBits->appendBitArray($dataBits); 102 $ecBlocks = $version->getEcBlocksForLevel($ecLevel); 103 $numDataBytes = $version->getTotalCodewords() - $ecBlocks->getTotalEcCodewords(); 104 105 // Terminate the bits properly. 106 self::terminateBits($numDataBytes, $headerAndDataBits); 107 108 // Interleave data bits with error correction code. 109 $finalBits = self::interleaveWithEcBytes( 110 $headerAndDataBits, 111 $version->getTotalCodewords(), 112 $numDataBytes, 113 $ecBlocks->getNumBlocks() 114 ); 115 116 // Choose the mask pattern. 117 $dimension = $version->getDimensionForVersion(); 118 $matrix = new ByteMatrix($dimension, $dimension); 119 $maskPattern = self::chooseMaskPattern($finalBits, $ecLevel, $version, $matrix); 120 121 // Build the matrix. 122 MatrixUtil::buildMatrix($finalBits, $ecLevel, $version, $maskPattern, $matrix); 123 124 return new QrCode($mode, $ecLevel, $version, $maskPattern, $matrix); 125 } 126 127 /** 128 * Gets the alphanumeric code for a byte. 129 */ 130 private static function getAlphanumericCode(int $code) : int 131 { 132 if (isset(self::ALPHANUMERIC_TABLE[$code])) { 133 return self::ALPHANUMERIC_TABLE[$code]; 134 } 135 136 return -1; 137 } 138 139 /** 140 * Chooses the best mode for a given content. 141 */ 142 private static function chooseMode(string $content, string $encoding = null) : Mode 143 { 144 if (null !== $encoding && 0 === strcasecmp($encoding, 'SHIFT-JIS')) { 145 return self::isOnlyDoubleByteKanji($content) ? Mode::KANJI() : Mode::BYTE(); 146 } 147 148 $hasNumeric = false; 149 $hasAlphanumeric = false; 150 $contentLength = strlen($content); 151 152 for ($i = 0; $i < $contentLength; ++$i) { 153 $char = $content[$i]; 154 155 if (ctype_digit($char)) { 156 $hasNumeric = true; 157 } elseif (-1 !== self::getAlphanumericCode(ord($char))) { 158 $hasAlphanumeric = true; 159 } else { 160 return Mode::BYTE(); 161 } 162 } 163 164 if ($hasAlphanumeric) { 165 return Mode::ALPHANUMERIC(); 166 } elseif ($hasNumeric) { 167 return Mode::NUMERIC(); 168 } 169 170 return Mode::BYTE(); 171 } 172 173 /** 174 * Calculates the mask penalty for a matrix. 175 */ 176 private static function calculateMaskPenalty(ByteMatrix $matrix) : int 177 { 178 return ( 179 MaskUtil::applyMaskPenaltyRule1($matrix) 180 + MaskUtil::applyMaskPenaltyRule2($matrix) 181 + MaskUtil::applyMaskPenaltyRule3($matrix) 182 + MaskUtil::applyMaskPenaltyRule4($matrix) 183 ); 184 } 185 186 /** 187 * Checks if content only consists of double-byte kanji characters. 188 */ 189 private static function isOnlyDoubleByteKanji(string $content) : bool 190 { 191 $bytes = @iconv('utf-8', 'SHIFT-JIS', $content); 192 193 if (false === $bytes) { 194 return false; 195 } 196 197 $length = strlen($bytes); 198 199 if (0 !== $length % 2) { 200 return false; 201 } 202 203 for ($i = 0; $i < $length; $i += 2) { 204 $byte = $bytes[$i] & 0xff; 205 206 if (($byte < 0x81 || $byte > 0x9f) && $byte < 0xe0 || $byte > 0xeb) { 207 return false; 208 } 209 } 210 211 return true; 212 } 213 214 /** 215 * Chooses the best mask pattern for a matrix. 216 */ 217 private static function chooseMaskPattern( 218 BitArray $bits, 219 ErrorCorrectionLevel $ecLevel, 220 Version $version, 221 ByteMatrix $matrix 222 ) : int { 223 $minPenalty = PHP_INT_MAX; 224 $bestMaskPattern = -1; 225 226 for ($maskPattern = 0; $maskPattern < QrCode::NUM_MASK_PATTERNS; ++$maskPattern) { 227 MatrixUtil::buildMatrix($bits, $ecLevel, $version, $maskPattern, $matrix); 228 $penalty = self::calculateMaskPenalty($matrix); 229 230 if ($penalty < $minPenalty) { 231 $minPenalty = $penalty; 232 $bestMaskPattern = $maskPattern; 233 } 234 } 235 236 return $bestMaskPattern; 237 } 238 239 /** 240 * Chooses the best version for the input. 241 * 242 * @throws WriterException if data is too big 243 */ 244 private static function chooseVersion(int $numInputBits, ErrorCorrectionLevel $ecLevel) : Version 245 { 246 for ($versionNum = 1; $versionNum <= 40; ++$versionNum) { 247 $version = Version::getVersionForNumber($versionNum); 248 $numBytes = $version->getTotalCodewords(); 249 250 $ecBlocks = $version->getEcBlocksForLevel($ecLevel); 251 $numEcBytes = $ecBlocks->getTotalEcCodewords(); 252 253 $numDataBytes = $numBytes - $numEcBytes; 254 $totalInputBytes = intdiv($numInputBits + 8, 8); 255 256 if ($numDataBytes >= $totalInputBytes) { 257 return $version; 258 } 259 } 260 261 throw new WriterException('Data too big'); 262 } 263 264 /** 265 * Terminates the bits in a bit array. 266 * 267 * @throws WriterException if data bits cannot fit in the QR code 268 * @throws WriterException if bits size does not equal the capacity 269 */ 270 private static function terminateBits(int $numDataBytes, BitArray $bits) : void 271 { 272 $capacity = $numDataBytes << 3; 273 274 if ($bits->getSize() > $capacity) { 275 throw new WriterException('Data bits cannot fit in the QR code'); 276 } 277 278 for ($i = 0; $i < 4 && $bits->getSize() < $capacity; ++$i) { 279 $bits->appendBit(false); 280 } 281 282 $numBitsInLastByte = $bits->getSize() & 0x7; 283 284 if ($numBitsInLastByte > 0) { 285 for ($i = $numBitsInLastByte; $i < 8; ++$i) { 286 $bits->appendBit(false); 287 } 288 } 289 290 $numPaddingBytes = $numDataBytes - $bits->getSizeInBytes(); 291 292 for ($i = 0; $i < $numPaddingBytes; ++$i) { 293 $bits->appendBits(0 === ($i & 0x1) ? 0xec : 0x11, 8); 294 } 295 296 if ($bits->getSize() !== $capacity) { 297 throw new WriterException('Bits size does not equal capacity'); 298 } 299 } 300 301 /** 302 * Gets number of data- and EC bytes for a block ID. 303 * 304 * @return int[] 305 * @throws WriterException if block ID is too large 306 * @throws WriterException if EC bytes mismatch 307 * @throws WriterException if RS blocks mismatch 308 * @throws WriterException if total bytes mismatch 309 */ 310 private static function getNumDataBytesAndNumEcBytesForBlockId( 311 int $numTotalBytes, 312 int $numDataBytes, 313 int $numRsBlocks, 314 int $blockId 315 ) : array { 316 if ($blockId >= $numRsBlocks) { 317 throw new WriterException('Block ID too large'); 318 } 319 320 $numRsBlocksInGroup2 = $numTotalBytes % $numRsBlocks; 321 $numRsBlocksInGroup1 = $numRsBlocks - $numRsBlocksInGroup2; 322 $numTotalBytesInGroup1 = intdiv($numTotalBytes, $numRsBlocks); 323 $numTotalBytesInGroup2 = $numTotalBytesInGroup1 + 1; 324 $numDataBytesInGroup1 = intdiv($numDataBytes, $numRsBlocks); 325 $numDataBytesInGroup2 = $numDataBytesInGroup1 + 1; 326 $numEcBytesInGroup1 = $numTotalBytesInGroup1 - $numDataBytesInGroup1; 327 $numEcBytesInGroup2 = $numTotalBytesInGroup2 - $numDataBytesInGroup2; 328 329 if ($numEcBytesInGroup1 !== $numEcBytesInGroup2) { 330 throw new WriterException('EC bytes mismatch'); 331 } 332 333 if ($numRsBlocks !== $numRsBlocksInGroup1 + $numRsBlocksInGroup2) { 334 throw new WriterException('RS blocks mismatch'); 335 } 336 337 if ($numTotalBytes !== 338 (($numDataBytesInGroup1 + $numEcBytesInGroup1) * $numRsBlocksInGroup1) 339 + (($numDataBytesInGroup2 + $numEcBytesInGroup2) * $numRsBlocksInGroup2) 340 ) { 341 throw new WriterException('Total bytes mismatch'); 342 } 343 344 if ($blockId < $numRsBlocksInGroup1) { 345 return [$numDataBytesInGroup1, $numEcBytesInGroup1]; 346 } else { 347 return [$numDataBytesInGroup2, $numEcBytesInGroup2]; 348 } 349 } 350 351 /** 352 * Interleaves data with EC bytes. 353 * 354 * @throws WriterException if number of bits and data bytes does not match 355 * @throws WriterException if data bytes does not match offset 356 * @throws WriterException if an interleaving error occurs 357 */ 358 private static function interleaveWithEcBytes( 359 BitArray $bits, 360 int $numTotalBytes, 361 int $numDataBytes, 362 int $numRsBlocks 363 ) : BitArray { 364 if ($bits->getSizeInBytes() !== $numDataBytes) { 365 throw new WriterException('Number of bits and data bytes does not match'); 366 } 367 368 $dataBytesOffset = 0; 369 $maxNumDataBytes = 0; 370 $maxNumEcBytes = 0; 371 372 $blocks = new SplFixedArray($numRsBlocks); 373 374 for ($i = 0; $i < $numRsBlocks; ++$i) { 375 list($numDataBytesInBlock, $numEcBytesInBlock) = self::getNumDataBytesAndNumEcBytesForBlockId( 376 $numTotalBytes, 377 $numDataBytes, 378 $numRsBlocks, 379 $i 380 ); 381 382 $size = $numDataBytesInBlock; 383 $dataBytes = $bits->toBytes(8 * $dataBytesOffset, $size); 384 $ecBytes = self::generateEcBytes($dataBytes, $numEcBytesInBlock); 385 $blocks[$i] = new BlockPair($dataBytes, $ecBytes); 386 387 $maxNumDataBytes = max($maxNumDataBytes, $size); 388 $maxNumEcBytes = max($maxNumEcBytes, count($ecBytes)); 389 $dataBytesOffset += $numDataBytesInBlock; 390 } 391 392 if ($numDataBytes !== $dataBytesOffset) { 393 throw new WriterException('Data bytes does not match offset'); 394 } 395 396 $result = new BitArray(); 397 398 for ($i = 0; $i < $maxNumDataBytes; ++$i) { 399 foreach ($blocks as $block) { 400 $dataBytes = $block->getDataBytes(); 401 402 if ($i < count($dataBytes)) { 403 $result->appendBits($dataBytes[$i], 8); 404 } 405 } 406 } 407 408 for ($i = 0; $i < $maxNumEcBytes; ++$i) { 409 foreach ($blocks as $block) { 410 $ecBytes = $block->getErrorCorrectionBytes(); 411 412 if ($i < count($ecBytes)) { 413 $result->appendBits($ecBytes[$i], 8); 414 } 415 } 416 } 417 418 if ($numTotalBytes !== $result->getSizeInBytes()) { 419 throw new WriterException( 420 'Interleaving error: ' . $numTotalBytes . ' and ' . $result->getSizeInBytes() . ' differ' 421 ); 422 } 423 424 return $result; 425 } 426 427 /** 428 * Generates EC bytes for given data. 429 * 430 * @param SplFixedArray<int> $dataBytes 431 * @return SplFixedArray<int> 432 */ 433 private static function generateEcBytes(SplFixedArray $dataBytes, int $numEcBytesInBlock) : SplFixedArray 434 { 435 $numDataBytes = count($dataBytes); 436 $toEncode = new SplFixedArray($numDataBytes + $numEcBytesInBlock); 437 438 for ($i = 0; $i < $numDataBytes; $i++) { 439 $toEncode[$i] = $dataBytes[$i] & 0xff; 440 } 441 442 $ecBytes = new SplFixedArray($numEcBytesInBlock); 443 $codec = self::getCodec($numDataBytes, $numEcBytesInBlock); 444 $codec->encode($toEncode, $ecBytes); 445 446 return $ecBytes; 447 } 448 449 /** 450 * Gets an RS codec and caches it. 451 */ 452 private static function getCodec(int $numDataBytes, int $numEcBytesInBlock) : ReedSolomonCodec 453 { 454 $cacheId = $numDataBytes . '-' . $numEcBytesInBlock; 455 456 if (isset(self::$codecs[$cacheId])) { 457 return self::$codecs[$cacheId]; 458 } 459 460 return self::$codecs[$cacheId] = new ReedSolomonCodec( 461 8, 462 0x11d, 463 0, 464 1, 465 $numEcBytesInBlock, 466 255 - $numDataBytes - $numEcBytesInBlock 467 ); 468 } 469 470 /** 471 * Appends mode information to a bit array. 472 */ 473 private static function appendModeInfo(Mode $mode, BitArray $bits) : void 474 { 475 $bits->appendBits($mode->getBits(), 4); 476 } 477 478 /** 479 * Appends length information to a bit array. 480 * 481 * @throws WriterException if num letters is bigger than expected 482 */ 483 private static function appendLengthInfo(int $numLetters, Version $version, Mode $mode, BitArray $bits) : void 484 { 485 $numBits = $mode->getCharacterCountBits($version); 486 487 if ($numLetters >= (1 << $numBits)) { 488 throw new WriterException($numLetters . ' is bigger than ' . ((1 << $numBits) - 1)); 489 } 490 491 $bits->appendBits($numLetters, $numBits); 492 } 493 494 /** 495 * Appends bytes to a bit array in a specific mode. 496 * 497 * @throws WriterException if an invalid mode was supplied 498 */ 499 private static function appendBytes(string $content, Mode $mode, BitArray $bits, string $encoding) : void 500 { 501 switch ($mode) { 502 case Mode::NUMERIC(): 503 self::appendNumericBytes($content, $bits); 504 break; 505 506 case Mode::ALPHANUMERIC(): 507 self::appendAlphanumericBytes($content, $bits); 508 break; 509 510 case Mode::BYTE(): 511 self::append8BitBytes($content, $bits, $encoding); 512 break; 513 514 case Mode::KANJI(): 515 self::appendKanjiBytes($content, $bits); 516 break; 517 518 default: 519 throw new WriterException('Invalid mode: ' . $mode); 520 } 521 } 522 523 /** 524 * Appends numeric bytes to a bit array. 525 */ 526 private static function appendNumericBytes(string $content, BitArray $bits) : void 527 { 528 $length = strlen($content); 529 $i = 0; 530 531 while ($i < $length) { 532 $num1 = (int) $content[$i]; 533 534 if ($i + 2 < $length) { 535 // Encode three numeric letters in ten bits. 536 $num2 = (int) $content[$i + 1]; 537 $num3 = (int) $content[$i + 2]; 538 $bits->appendBits($num1 * 100 + $num2 * 10 + $num3, 10); 539 $i += 3; 540 } elseif ($i + 1 < $length) { 541 // Encode two numeric letters in seven bits. 542 $num2 = (int) $content[$i + 1]; 543 $bits->appendBits($num1 * 10 + $num2, 7); 544 $i += 2; 545 } else { 546 // Encode one numeric letter in four bits. 547 $bits->appendBits($num1, 4); 548 ++$i; 549 } 550 } 551 } 552 553 /** 554 * Appends alpha-numeric bytes to a bit array. 555 * 556 * @throws WriterException if an invalid alphanumeric code was found 557 */ 558 private static function appendAlphanumericBytes(string $content, BitArray $bits) : void 559 { 560 $length = strlen($content); 561 $i = 0; 562 563 while ($i < $length) { 564 $code1 = self::getAlphanumericCode(ord($content[$i])); 565 566 if (-1 === $code1) { 567 throw new WriterException('Invalid alphanumeric code'); 568 } 569 570 if ($i + 1 < $length) { 571 $code2 = self::getAlphanumericCode(ord($content[$i + 1])); 572 573 if (-1 === $code2) { 574 throw new WriterException('Invalid alphanumeric code'); 575 } 576 577 // Encode two alphanumeric letters in 11 bits. 578 $bits->appendBits($code1 * 45 + $code2, 11); 579 $i += 2; 580 } else { 581 // Encode one alphanumeric letter in six bits. 582 $bits->appendBits($code1, 6); 583 ++$i; 584 } 585 } 586 } 587 588 /** 589 * Appends regular 8-bit bytes to a bit array. 590 * 591 * @throws WriterException if content cannot be encoded to target encoding 592 */ 593 private static function append8BitBytes(string $content, BitArray $bits, string $encoding) : void 594 { 595 $bytes = @iconv('utf-8', $encoding, $content); 596 597 if (false === $bytes) { 598 throw new WriterException('Could not encode content to ' . $encoding); 599 } 600 601 $length = strlen($bytes); 602 603 for ($i = 0; $i < $length; $i++) { 604 $bits->appendBits(ord($bytes[$i]), 8); 605 } 606 } 607 608 /** 609 * Appends KANJI bytes to a bit array. 610 * 611 * @throws WriterException if content does not seem to be encoded in SHIFT-JIS 612 * @throws WriterException if an invalid byte sequence occurs 613 */ 614 private static function appendKanjiBytes(string $content, BitArray $bits) : void 615 { 616 if (strlen($content) % 2 > 0) { 617 // We just do a simple length check here. The for loop will check 618 // individual characters. 619 throw new WriterException('Content does not seem to be encoded in SHIFT-JIS'); 620 } 621 622 $length = strlen($content); 623 624 for ($i = 0; $i < $length; $i += 2) { 625 $byte1 = ord($content[$i]) & 0xff; 626 $byte2 = ord($content[$i + 1]) & 0xff; 627 $code = ($byte1 << 8) | $byte2; 628 629 if ($code >= 0x8140 && $code <= 0x9ffc) { 630 $subtracted = $code - 0x8140; 631 } elseif ($code >= 0xe040 && $code <= 0xebbf) { 632 $subtracted = $code - 0xc140; 633 } else { 634 throw new WriterException('Invalid byte sequence'); 635 } 636 637 $encoded = (($subtracted >> 8) * 0xc0) + ($subtracted & 0xff); 638 639 $bits->appendBits($encoded, 13); 640 } 641 } 642 643 /** 644 * Appends ECI information to a bit array. 645 */ 646 private static function appendEci(CharacterSetEci $eci, BitArray $bits) : void 647 { 648 $mode = Mode::ECI(); 649 $bits->appendBits($mode->getBits(), 4); 650 $bits->appendBits($eci->getValue(), 8); 651 } 652} 653