1<?php
2//============================================================+
3// File name   : tcpdf_filters.php
4// Version     : 1.0.001
5// Begin       : 2011-05-23
6// Last Update : 2014-04-25
7// Author      : Nicola Asuni - Tecnick.com LTD - www.tecnick.com - info@tecnick.com
8// License     : GNU-LGPL v3 (http://www.gnu.org/copyleft/lesser.html)
9// -------------------------------------------------------------------
10// Copyright (C) 2011-2013 Nicola Asuni - Tecnick.com LTD
11//
12// This file is part of TCPDF software library.
13//
14// TCPDF is free software: you can redistribute it and/or modify it
15// under the terms of the GNU Lesser General Public License as
16// published by the Free Software Foundation, either version 3 of the
17// License, or (at your option) any later version.
18//
19// TCPDF is distributed in the hope that it will be useful, but
20// WITHOUT ANY WARRANTY; without even the implied warranty of
21// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22// See the GNU Lesser General Public License for more details.
23//
24// You should have received a copy of the License
25// along with TCPDF. If not, see
26// <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>.
27//
28// See LICENSE.TXT file for more information.
29// -------------------------------------------------------------------
30//
31// Description : This is a PHP class for decoding common PDF filters (PDF 32000-2008 - 7.4 Filters).
32//
33//============================================================+
34
35/**
36 * @file
37 * This is a PHP class for decoding common PDF filters (PDF 32000-2008 - 7.4 Filters).<br>
38 * @package com.tecnick.tcpdf
39 * @author Nicola Asuni
40 * @version 1.0.001
41 */
42
43/**
44 * @class TCPDF_FILTERS
45 * This is a PHP class for decoding common PDF filters (PDF 32000-2008 - 7.4 Filters).<br>
46 * @package com.tecnick.tcpdf
47 * @brief This is a PHP class for decoding common PDF filters.
48 * @version 1.0.001
49 * @author Nicola Asuni - info@tecnick.com
50 */
51class TCPDF_FILTERS {
52
53	/**
54	 * Define a list of available filter decoders.
55	 * @private static
56	 */
57	private static $available_filters = array('ASCIIHexDecode', 'ASCII85Decode', 'LZWDecode', 'FlateDecode', 'RunLengthDecode');
58
59// -----------------------------------------------------------------------------
60
61	/**
62	 * Get a list of available decoding filters.
63	 * @return array Array of available filter decoders.
64	 * @since 1.0.000 (2011-05-23)
65	 * @public static
66	 */
67	public static function getAvailableFilters() {
68		return self::$available_filters;
69	}
70
71	/**
72	 * Decode data using the specified filter type.
73	 * @param string $filter Filter name.
74	 * @param string $data Data to decode.
75	 * @return string Decoded data string.
76	 * @since 1.0.000 (2011-05-23)
77	 * @public static
78	 */
79	public static function decodeFilter($filter, $data) {
80		switch ($filter) {
81			case 'ASCIIHexDecode': {
82				return self::decodeFilterASCIIHexDecode($data);
83				break;
84			}
85			case 'ASCII85Decode': {
86				return self::decodeFilterASCII85Decode($data);
87				break;
88			}
89			case 'LZWDecode': {
90				return self::decodeFilterLZWDecode($data);
91				break;
92			}
93			case 'FlateDecode': {
94				return self::decodeFilterFlateDecode($data);
95				break;
96			}
97			case 'RunLengthDecode': {
98				return self::decodeFilterRunLengthDecode($data);
99				break;
100			}
101			case 'CCITTFaxDecode': {
102				return self::decodeFilterCCITTFaxDecode($data);
103				break;
104			}
105			case 'JBIG2Decode': {
106				return self::decodeFilterJBIG2Decode($data);
107				break;
108			}
109			case 'DCTDecode': {
110				return self::decodeFilterDCTDecode($data);
111				break;
112			}
113			case 'JPXDecode': {
114				return self::decodeFilterJPXDecode($data);
115				break;
116			}
117			case 'Crypt': {
118				return self::decodeFilterCrypt($data);
119				break;
120			}
121			default: {
122				return self::decodeFilterStandard($data);
123				break;
124			}
125		}
126	}
127
128	// --- FILTERS (PDF 32000-2008 - 7.4 Filters) ------------------------------
129
130	/**
131	 * Standard
132	 * Default decoding filter (leaves data unchanged).
133	 * @param string $data Data to decode.
134	 * @return string Decoded data string.
135	 * @since 1.0.000 (2011-05-23)
136	 * @public static
137	 */
138	public static function decodeFilterStandard($data) {
139		return $data;
140	}
141
142	/**
143	 * ASCIIHexDecode
144	 * Decodes data encoded in an ASCII hexadecimal representation, reproducing the original binary data.
145	 * @param string $data Data to decode.
146	 * @return string Decoded data string.
147	 * @since 1.0.000 (2011-05-23)
148	 * @public static
149	 */
150	public static function decodeFilterASCIIHexDecode($data) {
151		// initialize string to return
152		$decoded = '';
153		// all white-space characters shall be ignored
154		$data = preg_replace('/[\s]/', '', $data);
155		// check for EOD character: GREATER-THAN SIGN (3Eh)
156		$eod = strpos($data, '>');
157		if ($eod !== false) {
158			// remove EOD and extra data (if any)
159			$data = substr($data, 0, $eod);
160			$eod = true;
161		}
162		// get data length
163		$data_length = strlen($data);
164		if (($data_length % 2) != 0) {
165			// odd number of hexadecimal digits
166			if ($eod) {
167				// EOD shall behave as if a 0 (zero) followed the last digit
168				$data = substr($data, 0, -1).'0'.substr($data, -1);
169			} else {
170				self::Error('decodeFilterASCIIHexDecode: invalid code');
171			}
172		}
173		// check for invalid characters
174		if (preg_match('/[^a-fA-F\d]/', $data) > 0) {
175			self::Error('decodeFilterASCIIHexDecode: invalid code');
176		}
177		// get one byte of binary data for each pair of ASCII hexadecimal digits
178		$decoded = pack('H*', $data);
179		return $decoded;
180	}
181
182	/**
183	 * ASCII85Decode
184	 * Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data.
185	 * @param string $data Data to decode.
186	 * @return string Decoded data string.
187	 * @since 1.0.000 (2011-05-23)
188	 * @public static
189	 */
190	public static function decodeFilterASCII85Decode($data) {
191		// initialize string to return
192		$decoded = '';
193		// all white-space characters shall be ignored
194		$data = preg_replace('/[\s]/', '', $data);
195		// remove start sequence 2-character sequence <~ (3Ch)(7Eh)
196		if (strpos($data, '<~') !== false) {
197			// remove EOD and extra data (if any)
198			$data = substr($data, 2);
199		}
200		// check for EOD: 2-character sequence ~> (7Eh)(3Eh)
201		$eod = strpos($data, '~>');
202		if ($eod !== false) {
203			// remove EOD and extra data (if any)
204			$data = substr($data, 0, $eod);
205		}
206		// data length
207		$data_length = strlen($data);
208		// check for invalid characters
209		if (preg_match('/[^\x21-\x75,\x74]/', $data) > 0) {
210			self::Error('decodeFilterASCII85Decode: invalid code');
211		}
212		// z sequence
213		$zseq = chr(0).chr(0).chr(0).chr(0);
214		// position inside a group of 4 bytes (0-3)
215		$group_pos = 0;
216		$tuple = 0;
217		$pow85 = array((85*85*85*85), (85*85*85), (85*85), 85, 1);
218		$last_pos = ($data_length - 1);
219		// for each byte
220		for ($i = 0; $i < $data_length; ++$i) {
221			// get char value
222			$char = ord($data[$i]);
223			if ($char == 122) { // 'z'
224				if ($group_pos == 0) {
225					$decoded .= $zseq;
226				} else {
227					self::Error('decodeFilterASCII85Decode: invalid code');
228				}
229			} else {
230				// the value represented by a group of 5 characters should never be greater than 2^32 - 1
231				$tuple += (($char - 33) * $pow85[$group_pos]);
232				if ($group_pos == 4) {
233					$decoded .= chr($tuple >> 24).chr($tuple >> 16).chr($tuple >> 8).chr($tuple);
234					$tuple = 0;
235					$group_pos = 0;
236				} else {
237					++$group_pos;
238				}
239			}
240		}
241		if ($group_pos > 1) {
242			$tuple += $pow85[($group_pos - 1)];
243		}
244		// last tuple (if any)
245		switch ($group_pos) {
246			case 4: {
247				$decoded .= chr($tuple >> 24).chr($tuple >> 16).chr($tuple >> 8);
248				break;
249			}
250			case 3: {
251				$decoded .= chr($tuple >> 24).chr($tuple >> 16);
252				break;
253			}
254			case 2: {
255				$decoded .= chr($tuple >> 24);
256				break;
257			}
258			case 1: {
259				self::Error('decodeFilterASCII85Decode: invalid code');
260				break;
261			}
262		}
263		return $decoded;
264	}
265
266	/**
267	 * LZWDecode
268	 * Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method, reproducing the original text or binary data.
269	 * @param string $data Data to decode.
270	 * @return string Decoded data string.
271	 * @since 1.0.000 (2011-05-23)
272	 * @public static
273	 */
274	public static function decodeFilterLZWDecode($data) {
275		// initialize string to return
276		$decoded = '';
277		// data length
278		$data_length = strlen($data);
279		// convert string to binary string
280		$bitstring = '';
281		for ($i = 0; $i < $data_length; ++$i) {
282			$bitstring .= sprintf('%08b', ord($data[$i]));
283		}
284		// get the number of bits
285		$data_length = strlen($bitstring);
286		// initialize code length in bits
287		$bitlen = 9;
288		// initialize dictionary index
289		$dix = 258;
290		// initialize the dictionary (with the first 256 entries).
291		$dictionary = array();
292		for ($i = 0; $i < 256; ++$i) {
293			$dictionary[$i] = chr($i);
294		}
295		// previous val
296		$prev_index = 0;
297		// while we encounter EOD marker (257), read code_length bits
298		while (($data_length > 0) AND (($index = bindec(substr($bitstring, 0, $bitlen))) != 257)) {
299			// remove read bits from string
300			$bitstring = substr($bitstring, $bitlen);
301			// update number of bits
302			$data_length -= $bitlen;
303			if ($index == 256) { // clear-table marker
304				// reset code length in bits
305				$bitlen = 9;
306				// reset dictionary index
307				$dix = 258;
308				$prev_index = 256;
309				// reset the dictionary (with the first 256 entries).
310				$dictionary = array();
311				for ($i = 0; $i < 256; ++$i) {
312					$dictionary[$i] = chr($i);
313				}
314			} elseif ($prev_index == 256) {
315				// first entry
316				$decoded .= $dictionary[$index];
317				$prev_index = $index;
318			} else {
319				// check if index exist in the dictionary
320				if ($index < $dix) {
321					// index exist on dictionary
322					$decoded .= $dictionary[$index];
323					$dic_val = $dictionary[$prev_index].$dictionary[$index][0];
324					// store current index
325					$prev_index = $index;
326				} else {
327					// index do not exist on dictionary
328					$dic_val = $dictionary[$prev_index].$dictionary[$prev_index][0];
329					$decoded .= $dic_val;
330				}
331				// update dictionary
332				$dictionary[$dix] = $dic_val;
333				++$dix;
334				// change bit length by case
335				if ($dix == 2047) {
336					$bitlen = 12;
337				} elseif ($dix == 1023) {
338					$bitlen = 11;
339				} elseif ($dix == 511) {
340					$bitlen = 10;
341				}
342			}
343		}
344		return $decoded;
345	}
346
347	/**
348	 * FlateDecode
349	 * Decompresses data encoded using the zlib/deflate compression method, reproducing the original text or binary data.
350	 * @param string $data Data to decode.
351	 * @return string Decoded data string.
352	 * @since 1.0.000 (2011-05-23)
353	 * @public static
354	 */
355	public static function decodeFilterFlateDecode($data) {
356		// initialize string to return
357		$decoded = @gzuncompress($data);
358		if ($decoded === false) {
359			self::Error('decodeFilterFlateDecode: invalid code');
360		}
361		return $decoded;
362	}
363
364	/**
365	 * RunLengthDecode
366	 * Decompresses data encoded using a byte-oriented run-length encoding algorithm.
367	 * @param string $data Data to decode.
368	 * @since 1.0.000 (2011-05-23)
369	 * @public static
370	 */
371	public static function decodeFilterRunLengthDecode($data) {
372		// initialize string to return
373		$decoded = '';
374		// data length
375		$data_length = strlen($data);
376		$i = 0;
377		while($i < $data_length) {
378			// get current byte value
379			$byte = ord($data[$i]);
380			if ($byte == 128) {
381				// a length value of 128 denote EOD
382				break;
383			} elseif ($byte < 128) {
384				// if the length byte is in the range 0 to 127
385				// the following length + 1 (1 to 128) bytes shall be copied literally during decompression
386				$decoded .= substr($data, ($i + 1), ($byte + 1));
387				// move to next block
388				$i += ($byte + 2);
389			} else {
390				// if length is in the range 129 to 255,
391				// the following single byte shall be copied 257 - length (2 to 128) times during decompression
392				$decoded .= str_repeat($data[($i + 1)], (257 - $byte));
393				// move to next block
394				$i += 2;
395			}
396		}
397		return $decoded;
398	}
399
400	/**
401	 * CCITTFaxDecode (NOT IMPLEMETED - RETURN AN EXCEPTION)
402	 * Decompresses data encoded using the CCITT facsimile standard, reproducing the original data (typically monochrome image data at 1 bit per pixel).
403	 * @param string $data Data to decode.
404	 * @return string Decoded data string.
405	 * @since 1.0.000 (2011-05-23)
406	 * @public static
407	 */
408	public static function decodeFilterCCITTFaxDecode($data) {
409		self::Error('~decodeFilterCCITTFaxDecode: this method has not been yet implemented');
410		//return $data;
411	}
412
413	/**
414	 * JBIG2Decode (NOT IMPLEMETED - RETURN AN EXCEPTION)
415	 * Decompresses data encoded using the JBIG2 standard, reproducing the original monochrome (1 bit per pixel) image data (or an approximation of that data).
416	 * @param string $data Data to decode.
417	 * @return string Decoded data string.
418	 * @since 1.0.000 (2011-05-23)
419	 * @public static
420	 */
421	public static function decodeFilterJBIG2Decode($data) {
422		self::Error('~decodeFilterJBIG2Decode: this method has not been yet implemented');
423		//return $data;
424	}
425
426	/**
427	 * DCTDecode (NOT IMPLEMETED - RETURN AN EXCEPTION)
428	 * Decompresses data encoded using a DCT (discrete cosine transform) technique based on the JPEG standard, reproducing image sample data that approximates the original data.
429	 * @param string $data Data to decode.
430	 * @return string Decoded data string.
431	 * @since 1.0.000 (2011-05-23)
432	 * @public static
433	 */
434	public static function decodeFilterDCTDecode($data) {
435		self::Error('~decodeFilterDCTDecode: this method has not been yet implemented');
436		//return $data;
437	}
438
439	/**
440	 * JPXDecode (NOT IMPLEMETED - RETURN AN EXCEPTION)
441	 * Decompresses data encoded using the wavelet-based JPEG2000 standard, reproducing the original image data.
442	 * @param string $data Data to decode.
443	 * @return string Decoded data string.
444	 * @since 1.0.000 (2011-05-23)
445	 * @public static
446	 */
447	public static function decodeFilterJPXDecode($data) {
448		self::Error('~decodeFilterJPXDecode: this method has not been yet implemented');
449		//return $data;
450	}
451
452	/**
453	 * Crypt (NOT IMPLEMETED - RETURN AN EXCEPTION)
454	 * Decrypts data encrypted by a security handler, reproducing the data as it was before encryption.
455	 * @param string $data Data to decode.
456	 * @return string Decoded data string.
457	 * @since 1.0.000 (2011-05-23)
458	 * @public static
459	 */
460	public static function decodeFilterCrypt($data) {
461		self::Error('~decodeFilterCrypt: this method has not been yet implemented');
462		//return $data;
463	}
464
465	// --- END FILTERS SECTION -------------------------------------------------
466
467	/**
468	 * Throw an exception.
469	 * @param string $msg The error message
470	 * @since 1.0.000 (2011-05-23)
471	 * @public static
472	 */
473	public static function Error($msg) {
474		throw new Exception('TCPDF_PARSER ERROR: '.$msg);
475	}
476
477} // END OF TCPDF_FILTERS CLASS
478
479//============================================================+
480// END OF FILE
481//============================================================+
482