1# -*- coding: utf-8 -*- 2 3## Amazon S3 manager 4## Author: Michal Ludvig <michal@logix.cz> 5## http://www.logix.cz/michal 6## License: GPL Version 2 7## Copyright: TGRMN Software and contributors 8 9from __future__ import absolute_import, division 10 11import re 12import sys 13 14from calendar import timegm 15from logging import debug, warning, error 16 17import xml.dom.minidom 18import xml.etree.ElementTree as ET 19 20from .ExitCodes import EX_OSFILE 21 22try: 23 import dateutil.parser 24except ImportError: 25 sys.stderr.write(u""" 26!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 27ImportError trying to import dateutil.parser. 28Please install the python dateutil module: 29$ sudo apt-get install python-dateutil 30 or 31$ sudo yum install python-dateutil 32 or 33$ pip install python-dateutil 34!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 35""") 36 sys.stderr.flush() 37 sys.exit(EX_OSFILE) 38 39try: 40 from urllib import quote 41except ImportError: 42 # python 3 support 43 from urllib.parse import quote 44 45try: 46 unicode 47except NameError: 48 # python 3 support 49 # In python 3, unicode -> str, and str -> bytes 50 unicode = str 51 52 53__all__ = [] 54 55 56RE_S3_DATESTRING = re.compile('\.[0-9]*(?:[Z\\-\\+]*?)') 57RE_XML_NAMESPACE = re.compile(b'^(<?[^>]+?>\s*|\s*)(<\w+) xmlns=[\'"](https?://[^\'"]+)[\'"]', re.MULTILINE) 58 59 60# Date and time helpers 61 62 63def dateS3toPython(date): 64 # Reset milliseconds to 000 65 date = RE_S3_DATESTRING.sub(".000", date) 66 return dateutil.parser.parse(date, fuzzy=True) 67__all__.append("dateS3toPython") 68 69 70def dateS3toUnix(date): 71 ## NOTE: This is timezone-aware and return the timestamp regarding GMT 72 return timegm(dateS3toPython(date).utctimetuple()) 73__all__.append("dateS3toUnix") 74 75 76def dateRFC822toPython(date): 77 """ 78 Convert a string formated like '2020-06-27T15:56:34Z' into a python datetime 79 """ 80 return dateutil.parser.parse(date, fuzzy=True) 81__all__.append("dateRFC822toPython") 82 83 84def dateRFC822toUnix(date): 85 return timegm(dateRFC822toPython(date).utctimetuple()) 86__all__.append("dateRFC822toUnix") 87 88 89def formatDateTime(s3timestamp): 90 date_obj = dateutil.parser.parse(s3timestamp, fuzzy=True) 91 return date_obj.strftime("%Y-%m-%d %H:%M") 92__all__.append("formatDateTime") 93 94 95# Encoding / Decoding 96 97 98def base_unicodise(string, encoding='UTF-8', errors='replace', silent=False): 99 """ 100 Convert 'string' to Unicode or raise an exception. 101 """ 102 if type(string) == unicode: 103 return string 104 105 if not silent: 106 debug("Unicodising %r using %s" % (string, encoding)) 107 try: 108 return unicode(string, encoding, errors) 109 except UnicodeDecodeError: 110 raise UnicodeDecodeError("Conversion to unicode failed: %r" % string) 111__all__.append("base_unicodise") 112 113 114def base_deunicodise(string, encoding='UTF-8', errors='replace', silent=False): 115 """ 116 Convert unicode 'string' to <type str>, by default replacing 117 all invalid characters with '?' or raise an exception. 118 """ 119 if type(string) != unicode: 120 return string 121 122 if not silent: 123 debug("DeUnicodising %r using %s" % (string, encoding)) 124 try: 125 return string.encode(encoding, errors) 126 except UnicodeEncodeError: 127 raise UnicodeEncodeError("Conversion from unicode failed: %r" % string) 128__all__.append("base_deunicodise") 129 130 131def decode_from_s3(string, errors = "replace"): 132 """ 133 Convert S3 UTF-8 'string' to Unicode or raise an exception. 134 """ 135 return base_unicodise(string, "UTF-8", errors, True) 136__all__.append("decode_from_s3") 137 138 139def encode_to_s3(string, errors='replace'): 140 """ 141 Convert Unicode to S3 UTF-8 'string', by default replacing 142 all invalid characters with '?' or raise an exception. 143 """ 144 return base_deunicodise(string, "UTF-8", errors, True) 145__all__.append("encode_to_s3") 146 147 148def s3_quote(param, quote_backslashes=True, unicode_output=False): 149 """ 150 URI encode every byte. UriEncode() must enforce the following rules: 151 - URI encode every byte except the unreserved characters: 'A'-'Z', 'a'-'z', '0'-'9', '-', '.', '_', and '~'. 152 - The space character is a reserved character and must be encoded as "%20" (and not as "+"). 153 - Each URI encoded byte is formed by a '%' and the two-digit hexadecimal value of the byte. 154 - Letters in the hexadecimal value must be uppercase, for example "%1A". 155 - Encode the forward slash character, '/', everywhere except in the object key name. 156 For example, if the object key name is photos/Jan/sample.jpg, the forward slash in the key name is not encoded. 157 """ 158 if quote_backslashes: 159 safe_chars = "~" 160 else: 161 safe_chars = "~/" 162 param = encode_to_s3(param) 163 param = quote(param, safe=safe_chars) 164 if unicode_output: 165 param = decode_from_s3(param) 166 else: 167 param = encode_to_s3(param) 168 return param 169__all__.append("s3_quote") 170 171 172def base_urlencode_string(string, urlencoding_mode = None, unicode_output=False): 173 string = encode_to_s3(string) 174 175 if urlencoding_mode == "verbatim": 176 ## Don't do any pre-processing 177 return string 178 179 encoded = quote(string, safe="~/") 180 debug("String '%s' encoded to '%s'" % (string, encoded)) 181 if unicode_output: 182 return decode_from_s3(encoded) 183 else: 184 return encode_to_s3(encoded) 185__all__.append("base_urlencode_string") 186 187 188def base_replace_nonprintables(string, with_message=False): 189 """ 190 replace_nonprintables(string) 191 192 Replaces all non-printable characters 'ch' in 'string' 193 where ord(ch) <= 26 with ^@, ^A, ... ^Z 194 """ 195 new_string = "" 196 modified = 0 197 for c in string: 198 o = ord(c) 199 if (o <= 31): 200 new_string += "^" + chr(ord('@') + o) 201 modified += 1 202 elif (o == 127): 203 new_string += "^?" 204 modified += 1 205 else: 206 new_string += c 207 if modified and with_message: 208 warning("%d non-printable characters replaced in: %s" % (modified, new_string)) 209 return new_string 210__all__.append("base_replace_nonprintables") 211 212 213# XML helpers 214 215 216def parseNodes(nodes): 217 ## WARNING: Ignores text nodes from mixed xml/text. 218 ## For instance <tag1>some text<tag2>other text</tag2></tag1> 219 ## will be ignore "some text" node 220 ## WARNING 2: Any node at first level without children will also be ignored 221 retval = [] 222 for node in nodes: 223 retval_item = {} 224 for child in node: 225 name = decode_from_s3(child.tag) 226 if len(child): 227 retval_item[name] = parseNodes([child]) 228 else: 229 found_text = node.findtext(".//%s" % child.tag) 230 if found_text is not None: 231 retval_item[name] = decode_from_s3(found_text) 232 else: 233 retval_item[name] = None 234 if retval_item: 235 retval.append(retval_item) 236 return retval 237__all__.append("parseNodes") 238 239 240def getPrettyFromXml(xmlstr): 241 xmlparser = xml.dom.minidom.parseString(xmlstr) 242 return xmlparser.toprettyxml() 243 244__all__.append("getPrettyFromXml") 245 246 247def stripNameSpace(xml): 248 """ 249 removeNameSpace(xml) -- remove top-level AWS namespace 250 Operate on raw byte(utf-8) xml string. (Not unicode) 251 """ 252 xmlns_match = RE_XML_NAMESPACE.match(xml) 253 if xmlns_match: 254 xmlns = xmlns_match.group(3) 255 xml = RE_XML_NAMESPACE.sub("\\1\\2", xml, 1) 256 else: 257 xmlns = None 258 return xml, xmlns 259__all__.append("stripNameSpace") 260 261 262def getTreeFromXml(xml): 263 xml, xmlns = stripNameSpace(encode_to_s3(xml)) 264 try: 265 tree = ET.fromstring(xml) 266 if xmlns: 267 tree.attrib['xmlns'] = xmlns 268 return tree 269 except Exception as e: 270 error("Error parsing xml: %s", e) 271 error(xml) 272 raise 273__all__.append("getTreeFromXml") 274 275 276def getListFromXml(xml, node): 277 tree = getTreeFromXml(xml) 278 nodes = tree.findall('.//%s' % (node)) 279 return parseNodes(nodes) 280__all__.append("getListFromXml") 281 282 283def getDictFromTree(tree): 284 ret_dict = {} 285 for child in tree: 286 if len(child): 287 ## Complex-type child. Recurse 288 content = getDictFromTree(child) 289 else: 290 content = decode_from_s3(child.text) if child.text is not None else None 291 child_tag = decode_from_s3(child.tag) 292 if child_tag in ret_dict: 293 if not type(ret_dict[child_tag]) == list: 294 ret_dict[child_tag] = [ret_dict[child_tag]] 295 ret_dict[child_tag].append(content or "") 296 else: 297 ret_dict[child_tag] = content or "" 298 return ret_dict 299__all__.append("getDictFromTree") 300 301 302def getTextFromXml(xml, xpath): 303 tree = getTreeFromXml(xml) 304 if tree.tag.endswith(xpath): 305 return decode_from_s3(tree.text) if tree.text is not None else None 306 else: 307 result = tree.findtext(xpath) 308 return decode_from_s3(result) if result is not None else None 309__all__.append("getTextFromXml") 310 311 312def getRootTagName(xml): 313 tree = getTreeFromXml(xml) 314 return decode_from_s3(tree.tag) if tree.tag is not None else None 315__all__.append("getRootTagName") 316 317 318def xmlTextNode(tag_name, text): 319 el = ET.Element(tag_name) 320 el.text = decode_from_s3(text) 321 return el 322__all__.append("xmlTextNode") 323 324 325def appendXmlTextNode(tag_name, text, parent): 326 """ 327 Creates a new <tag_name> Node and sets 328 its content to 'text'. Then appends the 329 created Node to 'parent' element if given. 330 Returns the newly created Node. 331 """ 332 el = xmlTextNode(tag_name, text) 333 parent.append(el) 334 return el 335__all__.append("appendXmlTextNode") 336 337 338# vim:et:ts=4:sts=4:ai 339