1<?php
2
3/**
4 * League.Csv (https://csv.thephpleague.com)
5 *
6 * (c) Ignace Nyamagana Butera <nyamsprod@gmail.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12declare(strict_types=1);
13
14namespace League\Csv;
15
16use OutOfRangeException;
17use php_user_filter;
18use Traversable;
19use TypeError;
20use function array_combine;
21use function array_map;
22use function array_walk;
23use function gettype;
24use function in_array;
25use function is_iterable;
26use function is_numeric;
27use function mb_convert_encoding;
28use function mb_list_encodings;
29use function preg_match;
30use function sprintf;
31use function stream_bucket_append;
32use function stream_bucket_make_writeable;
33use function stream_filter_register;
34use function stream_get_filters;
35use function strpos;
36use function strtolower;
37use function substr;
38
39/**
40 * Converts resource stream or tabular data content charset.
41 */
42class CharsetConverter extends php_user_filter
43{
44    const FILTERNAME = 'convert.league.csv';
45
46    /**
47     * the filter name used to instantiate the class with.
48     *
49     * @var string
50     */
51    public $filtername;
52
53    /**
54     * Contents of the params parameter passed to stream_filter_append
55     * or stream_filter_prepend functions.
56     *
57     * @var mixed
58     */
59    public $params;
60
61    /**
62     * The records input encoding charset.
63     *
64     * @var string
65     */
66    protected $input_encoding = 'UTF-8';
67
68    /**
69     * The records output encoding charset.
70     *
71     * @var string
72     */
73    protected $output_encoding = 'UTF-8';
74
75    /**
76     * Static method to add the stream filter to a {@link AbstractCsv} object.
77     */
78    public static function addTo(AbstractCsv $csv, string $input_encoding, string $output_encoding): AbstractCsv
79    {
80        self::register();
81
82        return $csv->addStreamFilter(self::getFiltername($input_encoding, $output_encoding));
83    }
84
85    /**
86     * Static method to register the class as a stream filter.
87     */
88    public static function register()
89    {
90        $filtername = self::FILTERNAME.'.*';
91        if (!in_array($filtername, stream_get_filters(), true)) {
92            stream_filter_register($filtername, self::class);
93        }
94    }
95
96    /**
97     * Static method to return the stream filter filtername.
98     */
99    public static function getFiltername(string $input_encoding, string $output_encoding): string
100    {
101        return sprintf(
102            '%s.%s/%s',
103            self::FILTERNAME,
104            self::filterEncoding($input_encoding),
105            self::filterEncoding($output_encoding)
106        );
107    }
108
109    /**
110     * Filter encoding charset.
111     *
112     * @throws OutOfRangeException if the charset is malformed or unsupported
113     */
114    protected static function filterEncoding(string $encoding): string
115    {
116        static $encoding_list;
117        if (null === $encoding_list) {
118            $list = mb_list_encodings();
119            $encoding_list = array_combine(array_map('strtolower', $list), $list);
120        }
121
122        $key = strtolower($encoding);
123        if (isset($encoding_list[$key])) {
124            return $encoding_list[$key];
125        }
126
127        throw new OutOfRangeException(sprintf('The submitted charset %s is not supported by the mbstring extension', $encoding));
128    }
129
130    /**
131     * {@inheritdoc}
132     */
133    public function onCreate()
134    {
135        $prefix = self::FILTERNAME.'.';
136        if (0 !== strpos($this->filtername, $prefix)) {
137            return false;
138        }
139
140        $encodings = substr($this->filtername, strlen($prefix));
141        if (1 !== preg_match(',^(?<input>[-\w]+)\/(?<output>[-\w]+)$,', $encodings, $matches)) {
142            return false;
143        }
144
145        try {
146            $this->input_encoding = $this->filterEncoding($matches['input']);
147            $this->output_encoding = $this->filterEncoding($matches['output']);
148        } catch (OutOfRangeException $e) {
149            return false;
150        }
151
152        return true;
153    }
154
155    /**
156     * {@inheritdoc}
157     */
158    public function filter($in, $out, &$consumed, $closing)
159    {
160        while ($res = stream_bucket_make_writeable($in)) {
161            $res->data = @mb_convert_encoding($res->data, $this->output_encoding, $this->input_encoding);
162            $consumed += $res->datalen;
163            stream_bucket_append($out, $res);
164        }
165
166        return PSFS_PASS_ON;
167    }
168
169    /**
170     * Convert Csv records collection into UTF-8.
171     *
172     * @param array|Traversable $records
173     *
174     * @return array|Traversable
175     */
176    public function convert($records)
177    {
178        if (!is_iterable($records)) {
179            throw new TypeError(sprintf('%s() expects argument passed to be iterable, %s given', __METHOD__, gettype($records)));
180        }
181
182        if ($this->output_encoding === $this->input_encoding) {
183            return $records;
184        }
185
186        if (is_array($records)) {
187            return array_map($this, $records);
188        }
189
190        return new MapIterator($records, $this);
191    }
192
193    /**
194     * Enable using the class as a formatter for the {@link Writer}.
195     */
196    public function __invoke(array $record): array
197    {
198        array_walk($record, [$this, 'encodeField']);
199
200        return $record;
201    }
202
203    /**
204     * Walker method to convert the offset and the value of a CSV record field.
205     *
206     * @param mixed $value
207     * @param mixed $offset
208     */
209    protected function encodeField(&$value, &$offset)
210    {
211        if (null !== $value && !is_numeric($value)) {
212            $value = mb_convert_encoding((string) $value, $this->output_encoding, $this->input_encoding);
213        }
214
215        if (!is_numeric($offset)) {
216            $offset = mb_convert_encoding((string) $offset, $this->output_encoding, $this->input_encoding);
217        }
218    }
219
220    /**
221     * Sets the records input encoding charset.
222     */
223    public function inputEncoding(string $encoding): self
224    {
225        $encoding = $this->filterEncoding($encoding);
226        if ($encoding === $this->input_encoding) {
227            return $this;
228        }
229
230        $clone = clone $this;
231        $clone->input_encoding = $encoding;
232
233        return $clone;
234    }
235
236    /**
237     * Sets the records output encoding charset.
238     */
239    public function outputEncoding(string $encoding): self
240    {
241        $encoding = $this->filterEncoding($encoding);
242        if ($encoding === $this->output_encoding) {
243            return $this;
244        }
245
246        $clone = clone $this;
247        $clone->output_encoding = $encoding;
248
249        return $clone;
250    }
251}
252