1#!perl -w
2
3# This is a base file to be used by various .t's in its directory
4# It tests various malformed UTF-8 sequences and some code points that are
5# "problematic", and verifies that the correct warnings/flags etc are
6# generated when using them.  For the code points, it also takes the UTF-8 and
7# perturbs it to be malformed in various ways, and tests that this gets
8# appropriately detected.
9
10use strict;
11use Test::More;
12
13BEGIN {
14    require './t/utf8_setup.pl';
15    use_ok('XS::APItest');
16};
17
18$|=1;
19
20use Data::Dumper;
21
22my @well_formed = (
23            "\xE1",
24            "The quick brown fox jumped over the lazy dog",
25            "Ces systèmes de codage sont souvent incompatibles entre eux.  Ainsi, deux systèmes peuvent utiliser le même nombre pour deux caractères différents ou utiliser différents nombres pour le même caractère.",
26            "Kelimelerin m\xC3\xAAme caract\xC3\xA8re ve yaz\xC3\xB1abc",
27);
28
29my @malformed = (
30            "Kelimelerin m\xC3\xAAme caract\xC3\xA8re ve yaz\xC4\xB1abc",
31            "Kelimelerin m\xC3\xAAme caract\xC3\xA8re ve yaz\xC4\xB1\xC3\xA8abc",
32            "Kelimelerin m\xC3\xAAme caract\xC3re ve yazi\xC3\xA8abc",
33            "Kelimelerin m\xC3\xAAme caract\xA8 ve yazi\xC3\xA8abc",
34            "Kelimelerin m\xC3\xAAme caract\xC3\xA8\xC3re ve yazi\xC3\xA8abc",
35);
36
37for my $test (@well_formed) {
38    my $utf8 = $test;
39    utf8::upgrade($utf8);
40    my $utf8_length;
41    my $byte_length = length $test;
42
43    {
44        use bytes;
45        $utf8_length = length $utf8;
46    }
47
48    my $ret_ref = test_utf8_to_bytes($utf8, $utf8_length);
49
50    is ($ret_ref->[0], $test, "Successfully downgraded "
51                            . display_bytes($utf8));
52    is ($ret_ref->[1], $byte_length, "... And returned correct length("
53                                     . $byte_length . ")");
54}
55
56for my $test (@malformed) {
57    my $utf8 = $test;
58    my $utf8_length = length $test;
59
60    my $ret_ref = test_utf8_to_bytes($utf8, $utf8_length);
61
62    ok (! defined $ret_ref->[0], "Returned undef for malformed "
63                                . display_bytes($utf8));
64    is ($ret_ref->[1], -1, "... And returned length -1");
65    is ($ret_ref->[2], $utf8, "... And left the input unchanged");
66}
67
68done_testing();
69