1<?php 2 3namespace Rubix\ML\Persisters\Serializers; 4 5use Rubix\ML\Encoding; 6use Rubix\ML\Persistable; 7use Rubix\ML\Other\Helpers\JSON; 8use Rubix\ML\Exceptions\ClassRevisionMismatch; 9use Rubix\ML\Exceptions\RuntimeException; 10 11use function strlen; 12use function strpos; 13use function substr; 14use function hash; 15use function get_class; 16use function array_pad; 17use function explode; 18 19use const Rubix\ML\VERSION as LIBRARY_VERSION; 20 21/** 22 * RBX 23 * 24 * Rubix Object File format (RBX) is a format designed to reliably store and share serialized PHP objects. Based on PHP's native 25 * serialization format, RBX adds additional layers of compression, data integrity checks, and class compatibility detection all 26 * in one robust format. 27 * 28 * @category Machine Learning 29 * @package Rubix/ML 30 * @author Andrew DalPino 31 */ 32class RBX implements Serializer 33{ 34 /** 35 * The identifier or "magic number" of the format. 36 * 37 * @var string 38 */ 39 protected const IDENTIFIER_STRING = "\241RBX\r\n\032\n"; 40 41 /** 42 * The current version of the format. 43 * 44 * @var int 45 */ 46 protected const VERSION = 1; 47 48 /** 49 * The hashing function used to generate checksums. 50 * 51 * @var string 52 */ 53 protected const CHECKSUM_HASH_TYPE = 'crc32b'; 54 55 /** 56 * The end of line character. 57 * 58 * @var string 59 */ 60 protected const EOL = "\n"; 61 62 /** 63 * The base serializer. 64 * 65 * @var \Rubix\ML\Persisters\Serializers\Gzip 66 */ 67 protected $base; 68 69 public function __construct() 70 { 71 $this->base = new Gzip(9, new Native()); 72 } 73 74 /** 75 * Serialize a persistable object and return the data. 76 * 77 * @internal 78 * 79 * @param \Rubix\ML\Persistable $persistable 80 * @return \Rubix\ML\Encoding 81 */ 82 public function serialize(Persistable $persistable) : Encoding 83 { 84 $encoding = $this->base->serialize($persistable); 85 86 $hash = hash(self::CHECKSUM_HASH_TYPE, $encoding); 87 88 $header = JSON::encode([ 89 'library' => [ 90 'version' => LIBRARY_VERSION, 91 ], 92 'class' => [ 93 'name' => get_class($persistable), 94 'revision' => $persistable->revision(), 95 ], 96 'data' => [ 97 'checksum' => [ 98 'type' => self::CHECKSUM_HASH_TYPE, 99 'hash' => $hash, 100 ], 101 'length' => $encoding->bytes(), 102 ], 103 ]); 104 105 $hash = hash(self::CHECKSUM_HASH_TYPE, $header); 106 107 $checksum = self::CHECKSUM_HASH_TYPE . ':' . $hash; 108 109 $data = self::IDENTIFIER_STRING; 110 $data .= self::VERSION . self::EOL; 111 $data .= $checksum . self::EOL; 112 $data .= $header . self::EOL; 113 $data .= $encoding; 114 115 return new Encoding($data); 116 } 117 118 /** 119 * Unserialize a persistable object and return it. 120 * 121 * @internal 122 * 123 * @param \Rubix\ML\Encoding $encoding 124 * @throws \Rubix\ML\Exceptions\RuntimeException 125 * @return \Rubix\ML\Persistable 126 */ 127 public function unserialize(Encoding $encoding) : Persistable 128 { 129 if (strpos($encoding, self::IDENTIFIER_STRING) !== 0) { 130 throw new RuntimeException('Unrecognized message format.'); 131 } 132 133 $data = substr($encoding, strlen(self::IDENTIFIER_STRING)); 134 135 [$version, $checksum, $header, $payload] = array_pad(explode(self::EOL, $data, 4), 4, null); 136 137 if (!$version or !$checksum or !$header or !$payload) { 138 throw new RuntimeException('Invalid message format.'); 139 } 140 141 [$type, $hash] = array_pad(explode(':', $checksum, 2), 2, null); 142 143 if ($hash !== hash($type, $header)) { 144 throw new RuntimeException('Header checksum verification failed.'); 145 } 146 147 $header = JSON::decode($header); 148 149 if (strlen($payload) !== $header['data']['length']) { 150 throw new RuntimeException('Data is corrupted.'); 151 } 152 153 $hash = hash($header['data']['checksum']['type'], $payload); 154 155 if ($header['data']['checksum']['hash'] !== $hash) { 156 throw new RuntimeException('Data checksum verification failed.'); 157 } 158 159 $persistable = $this->base->unserialize(new Encoding($payload)); 160 161 if (get_class($persistable) !== $header['class']['name']) { 162 throw new RuntimeException('Class name mismatch.'); 163 } 164 165 if ($persistable->revision() !== $header['class']['revision']) { 166 throw new ClassRevisionMismatch($header['library']['version']); 167 } 168 169 return $persistable; 170 } 171 172 /** 173 * Return the string representation of the object. 174 * 175 * @return string 176 */ 177 public function __toString() : string 178 { 179 return 'RBX'; 180 } 181} 182