1<?php 2 3/** 4 * League.Csv (https://csv.thephpleague.com) 5 * 6 * (c) Ignace Nyamagana Butera <nyamsprod@gmail.com> 7 * 8 * For the full copyright and license information, please view the LICENSE 9 * file that was distributed with this source code. 10 */ 11 12declare(strict_types=1); 13 14namespace League\Csv; 15 16use OutOfRangeException; 17use php_user_filter; 18use Traversable; 19use TypeError; 20use function array_combine; 21use function array_map; 22use function array_walk; 23use function gettype; 24use function in_array; 25use function is_iterable; 26use function is_numeric; 27use function mb_convert_encoding; 28use function mb_list_encodings; 29use function preg_match; 30use function sprintf; 31use function stream_bucket_append; 32use function stream_bucket_make_writeable; 33use function stream_filter_register; 34use function stream_get_filters; 35use function strpos; 36use function strtolower; 37use function substr; 38 39/** 40 * Converts resource stream or tabular data content charset. 41 */ 42class CharsetConverter extends php_user_filter 43{ 44 const FILTERNAME = 'convert.league.csv'; 45 46 /** 47 * the filter name used to instantiate the class with. 48 * 49 * @var string 50 */ 51 public $filtername; 52 53 /** 54 * Contents of the params parameter passed to stream_filter_append 55 * or stream_filter_prepend functions. 56 * 57 * @var mixed 58 */ 59 public $params; 60 61 /** 62 * The records input encoding charset. 63 * 64 * @var string 65 */ 66 protected $input_encoding = 'UTF-8'; 67 68 /** 69 * The records output encoding charset. 70 * 71 * @var string 72 */ 73 protected $output_encoding = 'UTF-8'; 74 75 /** 76 * Static method to add the stream filter to a {@link AbstractCsv} object. 77 */ 78 public static function addTo(AbstractCsv $csv, string $input_encoding, string $output_encoding): AbstractCsv 79 { 80 self::register(); 81 82 return $csv->addStreamFilter(self::getFiltername($input_encoding, $output_encoding)); 83 } 84 85 /** 86 * Static method to register the class as a stream filter. 87 */ 88 public static function register() 89 { 90 $filtername = self::FILTERNAME.'.*'; 91 if (!in_array($filtername, stream_get_filters(), true)) { 92 stream_filter_register($filtername, self::class); 93 } 94 } 95 96 /** 97 * Static method to return the stream filter filtername. 98 */ 99 public static function getFiltername(string $input_encoding, string $output_encoding): string 100 { 101 return sprintf( 102 '%s.%s/%s', 103 self::FILTERNAME, 104 self::filterEncoding($input_encoding), 105 self::filterEncoding($output_encoding) 106 ); 107 } 108 109 /** 110 * Filter encoding charset. 111 * 112 * @throws OutOfRangeException if the charset is malformed or unsupported 113 */ 114 protected static function filterEncoding(string $encoding): string 115 { 116 static $encoding_list; 117 if (null === $encoding_list) { 118 $list = mb_list_encodings(); 119 $encoding_list = array_combine(array_map('strtolower', $list), $list); 120 } 121 122 $key = strtolower($encoding); 123 if (isset($encoding_list[$key])) { 124 return $encoding_list[$key]; 125 } 126 127 throw new OutOfRangeException(sprintf('The submitted charset %s is not supported by the mbstring extension', $encoding)); 128 } 129 130 /** 131 * {@inheritdoc} 132 */ 133 public function onCreate() 134 { 135 $prefix = self::FILTERNAME.'.'; 136 if (0 !== strpos($this->filtername, $prefix)) { 137 return false; 138 } 139 140 $encodings = substr($this->filtername, strlen($prefix)); 141 if (1 !== preg_match(',^(?<input>[-\w]+)\/(?<output>[-\w]+)$,', $encodings, $matches)) { 142 return false; 143 } 144 145 try { 146 $this->input_encoding = $this->filterEncoding($matches['input']); 147 $this->output_encoding = $this->filterEncoding($matches['output']); 148 } catch (OutOfRangeException $e) { 149 return false; 150 } 151 152 return true; 153 } 154 155 /** 156 * {@inheritdoc} 157 */ 158 public function filter($in, $out, &$consumed, $closing) 159 { 160 while ($res = stream_bucket_make_writeable($in)) { 161 $res->data = @mb_convert_encoding($res->data, $this->output_encoding, $this->input_encoding); 162 $consumed += $res->datalen; 163 stream_bucket_append($out, $res); 164 } 165 166 return PSFS_PASS_ON; 167 } 168 169 /** 170 * Convert Csv records collection into UTF-8. 171 * 172 * @param array|Traversable $records 173 * 174 * @return array|Traversable 175 */ 176 public function convert($records) 177 { 178 if (!is_iterable($records)) { 179 throw new TypeError(sprintf('%s() expects argument passed to be iterable, %s given', __METHOD__, gettype($records))); 180 } 181 182 if ($this->output_encoding === $this->input_encoding) { 183 return $records; 184 } 185 186 if (is_array($records)) { 187 return array_map($this, $records); 188 } 189 190 return new MapIterator($records, $this); 191 } 192 193 /** 194 * Enable using the class as a formatter for the {@link Writer}. 195 */ 196 public function __invoke(array $record): array 197 { 198 array_walk($record, [$this, 'encodeField']); 199 200 return $record; 201 } 202 203 /** 204 * Walker method to convert the offset and the value of a CSV record field. 205 * 206 * @param mixed $value 207 * @param mixed $offset 208 */ 209 protected function encodeField(&$value, &$offset) 210 { 211 if (null !== $value && !is_numeric($value)) { 212 $value = mb_convert_encoding((string) $value, $this->output_encoding, $this->input_encoding); 213 } 214 215 if (!is_numeric($offset)) { 216 $offset = mb_convert_encoding((string) $offset, $this->output_encoding, $this->input_encoding); 217 } 218 } 219 220 /** 221 * Sets the records input encoding charset. 222 */ 223 public function inputEncoding(string $encoding): self 224 { 225 $encoding = $this->filterEncoding($encoding); 226 if ($encoding === $this->input_encoding) { 227 return $this; 228 } 229 230 $clone = clone $this; 231 $clone->input_encoding = $encoding; 232 233 return $clone; 234 } 235 236 /** 237 * Sets the records output encoding charset. 238 */ 239 public function outputEncoding(string $encoding): self 240 { 241 $encoding = $this->filterEncoding($encoding); 242 if ($encoding === $this->output_encoding) { 243 return $this; 244 } 245 246 $clone = clone $this; 247 $clone->output_encoding = $encoding; 248 249 return $clone; 250 } 251} 252