1<?php
2
3namespace Rubix\ML\Persisters\Serializers;
4
5use Rubix\ML\Encoding;
6use Rubix\ML\Persistable;
7use Rubix\ML\Other\Helpers\JSON;
8use Rubix\ML\Exceptions\ClassRevisionMismatch;
9use Rubix\ML\Exceptions\RuntimeException;
10
11use function strlen;
12use function strpos;
13use function substr;
14use function hash;
15use function get_class;
16use function array_pad;
17use function explode;
18
19use const Rubix\ML\VERSION as LIBRARY_VERSION;
20
21/**
22 * RBX
23 *
24 * Rubix Object File format (RBX) is a format designed to reliably store and share serialized PHP objects. Based on PHP's native
25 * serialization format, RBX adds additional layers of compression, data integrity checks, and class compatibility detection all
26 * in one robust format.
27 *
28 * @category    Machine Learning
29 * @package     Rubix/ML
30 * @author      Andrew DalPino
31 */
32class RBX implements Serializer
33{
34    /**
35     * The identifier or "magic number" of the format.
36     *
37     * @var string
38     */
39    protected const IDENTIFIER_STRING = "\241RBX\r\n\032\n";
40
41    /**
42     * The current version of the format.
43     *
44     * @var int
45     */
46    protected const VERSION = 1;
47
48    /**
49     * The hashing function used to generate checksums.
50     *
51     * @var string
52     */
53    protected const CHECKSUM_HASH_TYPE = 'crc32b';
54
55    /**
56     * The end of line character.
57     *
58     * @var string
59     */
60    protected const EOL = "\n";
61
62    /**
63     * The base serializer.
64     *
65     * @var \Rubix\ML\Persisters\Serializers\Gzip
66     */
67    protected $base;
68
69    public function __construct()
70    {
71        $this->base = new Gzip(9, new Native());
72    }
73
74    /**
75     * Serialize a persistable object and return the data.
76     *
77     * @internal
78     *
79     * @param \Rubix\ML\Persistable $persistable
80     * @return \Rubix\ML\Encoding
81     */
82    public function serialize(Persistable $persistable) : Encoding
83    {
84        $encoding = $this->base->serialize($persistable);
85
86        $hash = hash(self::CHECKSUM_HASH_TYPE, $encoding);
87
88        $header = JSON::encode([
89            'library' => [
90                'version' => LIBRARY_VERSION,
91            ],
92            'class' => [
93                'name' => get_class($persistable),
94                'revision' => $persistable->revision(),
95            ],
96            'data' => [
97                'checksum' => [
98                    'type' => self::CHECKSUM_HASH_TYPE,
99                    'hash' => $hash,
100                ],
101                'length' => $encoding->bytes(),
102            ],
103        ]);
104
105        $hash = hash(self::CHECKSUM_HASH_TYPE, $header);
106
107        $checksum = self::CHECKSUM_HASH_TYPE . ':' . $hash;
108
109        $data = self::IDENTIFIER_STRING;
110        $data .= self::VERSION . self::EOL;
111        $data .= $checksum . self::EOL;
112        $data .= $header . self::EOL;
113        $data .= $encoding;
114
115        return new Encoding($data);
116    }
117
118    /**
119     * Unserialize a persistable object and return it.
120     *
121     * @internal
122     *
123     * @param \Rubix\ML\Encoding $encoding
124     * @throws \Rubix\ML\Exceptions\RuntimeException
125     * @return \Rubix\ML\Persistable
126     */
127    public function unserialize(Encoding $encoding) : Persistable
128    {
129        if (strpos($encoding, self::IDENTIFIER_STRING) !== 0) {
130            throw new RuntimeException('Unrecognized message format.');
131        }
132
133        $data = substr($encoding, strlen(self::IDENTIFIER_STRING));
134
135        [$version, $checksum, $header, $payload] = array_pad(explode(self::EOL, $data, 4), 4, null);
136
137        if (!$version or !$checksum or !$header or !$payload) {
138            throw new RuntimeException('Invalid message format.');
139        }
140
141        [$type, $hash] = array_pad(explode(':', $checksum, 2), 2, null);
142
143        if ($hash !== hash($type, $header)) {
144            throw new RuntimeException('Header checksum verification failed.');
145        }
146
147        $header = JSON::decode($header);
148
149        if (strlen($payload) !== $header['data']['length']) {
150            throw new RuntimeException('Data is corrupted.');
151        }
152
153        $hash = hash($header['data']['checksum']['type'], $payload);
154
155        if ($header['data']['checksum']['hash'] !== $hash) {
156            throw new RuntimeException('Data checksum verification failed.');
157        }
158
159        $persistable = $this->base->unserialize(new Encoding($payload));
160
161        if (get_class($persistable) !== $header['class']['name']) {
162            throw new RuntimeException('Class name mismatch.');
163        }
164
165        if ($persistable->revision() !== $header['class']['revision']) {
166            throw new ClassRevisionMismatch($header['library']['version']);
167        }
168
169        return $persistable;
170    }
171
172    /**
173     * Return the string representation of the object.
174     *
175     * @return string
176     */
177    public function __toString() : string
178    {
179        return 'RBX';
180    }
181}
182