1#     Copyright 2021, Kay Hayen, mailto:kay.hayen@gmail.com
2#
3#     Part of "Nuitka", an optimizing Python compiler that is compatible and
4#     integrates with CPython, but also works on its own.
5#
6#     Licensed under the Apache License, Version 2.0 (the "License");
7#     you may not use this file except in compliance with the License.
8#     You may obtain a copy of the License at
9#
10#        http://www.apache.org/licenses/LICENSE-2.0
11#
12#     Unless required by applicable law or agreed to in writing, software
13#     distributed under the License is distributed on an "AS IS" BASIS,
14#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15#     See the License for the specific language governing permissions and
16#     limitations under the License.
17#
18""" C string encoding
19
20This contains the code to create string literals for C to represent the given
21values.
22"""
23
24import codecs
25import re
26
27
28def _identifierEncode(c):
29    """Nuitka handler to encode unicode to ASCII identifiers for C compiler."""
30    return "$%02x$" % ord(c.object[c.end - 1]), c.end
31
32
33codecs.register_error("c_identifier", _identifierEncode)
34
35
36def _encodePythonStringToC(value):
37    """Encode a string, so that it gives a C string literal.
38
39    This doesn't handle limits.
40    """
41    assert type(value) is bytes, type(value)
42
43    result = ""
44    octal = False
45
46    for c in value:
47        if str is bytes:
48            cv = ord(c)
49        else:
50            cv = c
51
52        if c in b'\\\t\r\n"?':
53            result += r"\%o" % cv
54
55            octal = True
56        elif 32 <= cv <= 127:
57            if octal and c in b"0123456789":
58                result += '" "'
59
60            result += chr(cv)
61
62            octal = False
63        else:
64            result += r"\%o" % cv
65
66            octal = True
67
68    result = result.replace('" "\\', "\\")
69
70    return '"%s"' % result
71
72
73def encodePythonStringToC(value):
74    """Encode a string, so that it gives a C string literal."""
75
76    # Not all compilers allow arbitrary large C strings, therefore split it up
77    # into chunks. That changes nothing to the meanings, but is easier on the
78    # parser. Currently only MSVC is known to have this issue, but the
79    # workaround can be used universally.
80
81    result = _encodePythonStringToC(value[:16000])
82    value = value[16000:]
83
84    while value:
85        result += " "
86        result += _encodePythonStringToC(value[:16000])
87        value = value[16000:]
88
89    return result
90
91
92def encodePythonIdentifierToC(value):
93    """Encode an identifier from a given Python string."""
94
95    # Python identifiers allow almost of characters except a very
96    # few, much more than C identifiers support. This attempts to
97    # be bi-directional, so we can reverse it.
98
99    def r(match):
100        c = match.group()
101
102        if c == ".":
103            return "$"
104        else:
105            return "$$%d$" % ord(c)
106
107    return "".join(re.sub("[^a-zA-Z0-9_]", r, c) for c in value)
108