1# -*- coding: utf-8 -*-
2
3## Amazon S3 manager
4## Author: Michal Ludvig <michal@logix.cz>
5##         http://www.logix.cz/michal
6## License: GPL Version 2
7## Copyright: TGRMN Software and contributors
8
9from __future__ import absolute_import, division
10
11import re
12import sys
13
14from calendar import timegm
15from logging import debug, warning, error
16
17import xml.dom.minidom
18import xml.etree.ElementTree as ET
19
20from .ExitCodes import EX_OSFILE
21
22try:
23    import dateutil.parser
24except ImportError:
25    sys.stderr.write(u"""
26!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
27ImportError trying to import dateutil.parser.
28Please install the python dateutil module:
29$ sudo apt-get install python-dateutil
30  or
31$ sudo yum install python-dateutil
32  or
33$ pip install python-dateutil
34!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
35""")
36    sys.stderr.flush()
37    sys.exit(EX_OSFILE)
38
39try:
40    from urllib import quote
41except ImportError:
42    # python 3 support
43    from urllib.parse import quote
44
45try:
46    unicode
47except NameError:
48    # python 3 support
49    # In python 3, unicode -> str, and str -> bytes
50    unicode = str
51
52
53__all__ = []
54
55
56RE_S3_DATESTRING = re.compile('\.[0-9]*(?:[Z\\-\\+]*?)')
57RE_XML_NAMESPACE = re.compile(b'^(<?[^>]+?>\s*|\s*)(<\w+) xmlns=[\'"](https?://[^\'"]+)[\'"]', re.MULTILINE)
58
59
60# Date and time helpers
61
62
63def dateS3toPython(date):
64    # Reset milliseconds to 000
65    date = RE_S3_DATESTRING.sub(".000", date)
66    return dateutil.parser.parse(date, fuzzy=True)
67__all__.append("dateS3toPython")
68
69
70def dateS3toUnix(date):
71    ## NOTE: This is timezone-aware and return the timestamp regarding GMT
72    return timegm(dateS3toPython(date).utctimetuple())
73__all__.append("dateS3toUnix")
74
75
76def dateRFC822toPython(date):
77    """
78    Convert a string formated like '2020-06-27T15:56:34Z' into a python datetime
79    """
80    return dateutil.parser.parse(date, fuzzy=True)
81__all__.append("dateRFC822toPython")
82
83
84def dateRFC822toUnix(date):
85    return timegm(dateRFC822toPython(date).utctimetuple())
86__all__.append("dateRFC822toUnix")
87
88
89def formatDateTime(s3timestamp):
90    date_obj = dateutil.parser.parse(s3timestamp, fuzzy=True)
91    return date_obj.strftime("%Y-%m-%d %H:%M")
92__all__.append("formatDateTime")
93
94
95# Encoding / Decoding
96
97
98def base_unicodise(string, encoding='UTF-8', errors='replace', silent=False):
99    """
100    Convert 'string' to Unicode or raise an exception.
101    """
102    if type(string) == unicode:
103        return string
104
105    if not silent:
106        debug("Unicodising %r using %s" % (string, encoding))
107    try:
108        return unicode(string, encoding, errors)
109    except UnicodeDecodeError:
110        raise UnicodeDecodeError("Conversion to unicode failed: %r" % string)
111__all__.append("base_unicodise")
112
113
114def base_deunicodise(string, encoding='UTF-8', errors='replace', silent=False):
115    """
116    Convert unicode 'string' to <type str>, by default replacing
117    all invalid characters with '?' or raise an exception.
118    """
119    if type(string) != unicode:
120        return string
121
122    if not silent:
123        debug("DeUnicodising %r using %s" % (string, encoding))
124    try:
125        return string.encode(encoding, errors)
126    except UnicodeEncodeError:
127        raise UnicodeEncodeError("Conversion from unicode failed: %r" % string)
128__all__.append("base_deunicodise")
129
130
131def decode_from_s3(string, errors = "replace"):
132    """
133    Convert S3 UTF-8 'string' to Unicode or raise an exception.
134    """
135    return base_unicodise(string, "UTF-8", errors, True)
136__all__.append("decode_from_s3")
137
138
139def encode_to_s3(string, errors='replace'):
140    """
141    Convert Unicode to S3 UTF-8 'string', by default replacing
142    all invalid characters with '?' or raise an exception.
143    """
144    return base_deunicodise(string, "UTF-8", errors, True)
145__all__.append("encode_to_s3")
146
147
148def s3_quote(param, quote_backslashes=True, unicode_output=False):
149    """
150    URI encode every byte. UriEncode() must enforce the following rules:
151    - URI encode every byte except the unreserved characters: 'A'-'Z', 'a'-'z', '0'-'9', '-', '.', '_', and '~'.
152    - The space character is a reserved character and must be encoded as "%20" (and not as "+").
153    - Each URI encoded byte is formed by a '%' and the two-digit hexadecimal value of the byte.
154    - Letters in the hexadecimal value must be uppercase, for example "%1A".
155    - Encode the forward slash character, '/', everywhere except in the object key name.
156    For example, if the object key name is photos/Jan/sample.jpg, the forward slash in the key name is not encoded.
157    """
158    if quote_backslashes:
159        safe_chars = "~"
160    else:
161        safe_chars = "~/"
162    param = encode_to_s3(param)
163    param = quote(param, safe=safe_chars)
164    if unicode_output:
165        param = decode_from_s3(param)
166    else:
167        param = encode_to_s3(param)
168    return param
169__all__.append("s3_quote")
170
171
172def base_urlencode_string(string, urlencoding_mode = None, unicode_output=False):
173    string = encode_to_s3(string)
174
175    if urlencoding_mode == "verbatim":
176        ## Don't do any pre-processing
177        return string
178
179    encoded = quote(string, safe="~/")
180    debug("String '%s' encoded to '%s'" % (string, encoded))
181    if unicode_output:
182        return decode_from_s3(encoded)
183    else:
184        return encode_to_s3(encoded)
185__all__.append("base_urlencode_string")
186
187
188def base_replace_nonprintables(string, with_message=False):
189    """
190    replace_nonprintables(string)
191
192    Replaces all non-printable characters 'ch' in 'string'
193    where ord(ch) <= 26 with ^@, ^A, ... ^Z
194    """
195    new_string = ""
196    modified = 0
197    for c in string:
198        o = ord(c)
199        if (o <= 31):
200            new_string += "^" + chr(ord('@') + o)
201            modified += 1
202        elif (o == 127):
203            new_string += "^?"
204            modified += 1
205        else:
206            new_string += c
207    if modified and with_message:
208        warning("%d non-printable characters replaced in: %s" % (modified, new_string))
209    return new_string
210__all__.append("base_replace_nonprintables")
211
212
213# XML helpers
214
215
216def parseNodes(nodes):
217    ## WARNING: Ignores text nodes from mixed xml/text.
218    ## For instance <tag1>some text<tag2>other text</tag2></tag1>
219    ## will be ignore "some text" node
220    ## WARNING 2: Any node at first level without children will also be ignored
221    retval = []
222    for node in nodes:
223        retval_item = {}
224        for child in node:
225            name = decode_from_s3(child.tag)
226            if len(child):
227                retval_item[name] = parseNodes([child])
228            else:
229                found_text = node.findtext(".//%s" % child.tag)
230                if found_text is not None:
231                    retval_item[name] = decode_from_s3(found_text)
232                else:
233                    retval_item[name] = None
234        if retval_item:
235            retval.append(retval_item)
236    return retval
237__all__.append("parseNodes")
238
239
240def getPrettyFromXml(xmlstr):
241    xmlparser = xml.dom.minidom.parseString(xmlstr)
242    return xmlparser.toprettyxml()
243
244__all__.append("getPrettyFromXml")
245
246
247def stripNameSpace(xml):
248    """
249    removeNameSpace(xml) -- remove top-level AWS namespace
250    Operate on raw byte(utf-8) xml string. (Not unicode)
251    """
252    xmlns_match = RE_XML_NAMESPACE.match(xml)
253    if xmlns_match:
254        xmlns = xmlns_match.group(3)
255        xml = RE_XML_NAMESPACE.sub("\\1\\2", xml, 1)
256    else:
257        xmlns = None
258    return xml, xmlns
259__all__.append("stripNameSpace")
260
261
262def getTreeFromXml(xml):
263    xml, xmlns = stripNameSpace(encode_to_s3(xml))
264    try:
265        tree = ET.fromstring(xml)
266        if xmlns:
267            tree.attrib['xmlns'] = xmlns
268        return tree
269    except Exception as e:
270        error("Error parsing xml: %s", e)
271        error(xml)
272        raise
273__all__.append("getTreeFromXml")
274
275
276def getListFromXml(xml, node):
277    tree = getTreeFromXml(xml)
278    nodes = tree.findall('.//%s' % (node))
279    return parseNodes(nodes)
280__all__.append("getListFromXml")
281
282
283def getDictFromTree(tree):
284    ret_dict = {}
285    for child in tree:
286        if len(child):
287            ## Complex-type child. Recurse
288            content = getDictFromTree(child)
289        else:
290            content = decode_from_s3(child.text) if child.text is not None else None
291        child_tag = decode_from_s3(child.tag)
292        if child_tag in ret_dict:
293            if not type(ret_dict[child_tag]) == list:
294                ret_dict[child_tag] = [ret_dict[child_tag]]
295            ret_dict[child_tag].append(content or "")
296        else:
297            ret_dict[child_tag] = content or ""
298    return ret_dict
299__all__.append("getDictFromTree")
300
301
302def getTextFromXml(xml, xpath):
303    tree = getTreeFromXml(xml)
304    if tree.tag.endswith(xpath):
305        return decode_from_s3(tree.text) if tree.text is not None else None
306    else:
307        result = tree.findtext(xpath)
308        return decode_from_s3(result) if result is not None else None
309__all__.append("getTextFromXml")
310
311
312def getRootTagName(xml):
313    tree = getTreeFromXml(xml)
314    return decode_from_s3(tree.tag) if tree.tag is not None else None
315__all__.append("getRootTagName")
316
317
318def xmlTextNode(tag_name, text):
319    el = ET.Element(tag_name)
320    el.text = decode_from_s3(text)
321    return el
322__all__.append("xmlTextNode")
323
324
325def appendXmlTextNode(tag_name, text, parent):
326    """
327    Creates a new <tag_name> Node and sets
328    its content to 'text'. Then appends the
329    created Node to 'parent' element if given.
330    Returns the newly created Node.
331    """
332    el = xmlTextNode(tag_name, text)
333    parent.append(el)
334    return el
335__all__.append("appendXmlTextNode")
336
337
338# vim:et:ts=4:sts=4:ai
339