1# 2# Copyright (c), 2018-2020, SISSA (International School for Advanced Studies). 3# All rights reserved. 4# This file is distributed under the terms of the MIT License. 5# See the file 'LICENSE' in the root directory of the present 6# distribution, or http://opensource.org/licenses/MIT. 7# 8# @author Davide Brunato <brunato@sissa.it> 9# 10import re 11import math 12from calendar import isleap, leapdays 13from decimal import Decimal 14from typing import Optional, Union 15 16### 17# Data validation helpers 18 19NORMALIZE_PATTERN = re.compile(r'[^\S\xa0]') 20WHITESPACES_PATTERN = re.compile(r'[^\S\xa0]+') # include ASCII 160 (non-breaking space) 21NCNAME_PATTERN = re.compile(r'^[^\d\W][\w.\-\u00B7\u0300-\u036F\u203F\u2040]*$') 22QNAME_PATTERN = re.compile( 23 r'^(?:(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?' 24 r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$', 25) 26EQNAME_PATTERN = re.compile( 27 r'^(?:Q{(?P<namespace>[^}]+)}|' 28 r'(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?' 29 r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$', 30) 31WRONG_ESCAPE_PATTERN = re.compile(r'%(?![a-fA-F\d]{2})') 32XML_NEWLINES_PATTERN = re.compile('\r\n|\r|\n') 33 34 35def collapse_white_spaces(s: str) -> str: 36 return WHITESPACES_PATTERN.sub(' ', s).strip(' ') 37 38 39def is_idrefs(value: Optional[str]) -> bool: 40 return isinstance(value, str) and \ 41 all(NCNAME_PATTERN.match(x) is not None for x in value.split()) 42 43 44### 45# Sequence type checking 46SEQUENCE_TYPE_PATTERN = re.compile(r'\s?([()?*+,])\s?') 47 48 49def normalize_sequence_type(sequence_type: str) -> str: 50 sequence_type = WHITESPACES_PATTERN.sub(' ', sequence_type).strip() 51 sequence_type = SEQUENCE_TYPE_PATTERN.sub(r'\1', sequence_type) 52 return sequence_type.replace(',', ', ').replace(')as', ') as') 53 54 55### 56# Date/Time helpers 57MONTH_DAYS = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] 58MONTH_DAYS_LEAP = [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] 59 60 61def adjust_day(year: int, month: int, day: int) -> int: 62 if month in {1, 3, 5, 7, 8, 10, 12}: 63 return day 64 elif month in {4, 6, 9, 11}: 65 return min(day, 30) 66 else: 67 return min(day, 29) if isleap(year) else min(day, 28) 68 69 70def days_from_common_era(year: int) -> int: 71 """ 72 Returns the number of days from from 0001-01-01 to the provided year. For a 73 common era year the days are counted until the last day of December, for a 74 BCE year the days are counted down from the end to the 1st of January. 75 """ 76 if year > 0: 77 return year * 365 + year // 4 - year // 100 + year // 400 78 elif year >= -1: 79 return year * 366 80 else: 81 year = -year - 1 82 return -(366 + year * 365 + year // 4 - year // 100 + year // 400) 83 84 85DAYS_IN_4Y = days_from_common_era(4) 86DAYS_IN_100Y = days_from_common_era(100) 87DAYS_IN_400Y = days_from_common_era(400) 88 89 90def months2days(year: int, month: int, months_delta: int) -> int: 91 """ 92 Converts a delta of months to a delta of days, counting from the 1st day of the month, 93 relative to the year and the month passed as arguments. 94 95 :param year: the reference start year, a negative or zero value means a BCE year \ 96 (0 is 1 BCE, -1 is 2 BCE, -2 is 3 BCE, etc). 97 :param month: the starting month (1-12). 98 :param months_delta: the number of months, if negative count backwards. 99 """ 100 if not months_delta: 101 return 0 102 103 total_months = month - 1 + months_delta 104 target_year = year + total_months // 12 105 target_month = total_months % 12 + 1 106 107 if month <= 2: 108 y_days = 365 * (target_year - year) + leapdays(year, target_year) 109 else: 110 y_days = 365 * (target_year - year) + leapdays(year + 1, target_year + 1) 111 112 months_days = MONTH_DAYS_LEAP if isleap(target_year) else MONTH_DAYS 113 if target_month >= month: 114 m_days = sum(months_days[m] for m in range(month, target_month)) 115 return y_days + m_days if y_days >= 0 else y_days + m_days 116 else: 117 m_days = sum(months_days[m] for m in range(target_month, month)) 118 return y_days - m_days if y_days >= 0 else y_days - m_days 119 120 121def round_number(value: Union[float, int, Decimal]) -> Union[float, int, Decimal]: 122 if math.isnan(value) or math.isinf(value): 123 return value 124 125 number = Decimal(value) 126 if number > 0: 127 return type(value)(number.quantize(Decimal('1'), rounding='ROUND_HALF_UP')) 128 else: 129 return type(value)(number.quantize(Decimal('1'), rounding='ROUND_HALF_DOWN')) 130 131 132def normalized_seconds(seconds: Decimal) -> str: 133 # Decimal.normalize() does not remove exp every time: eg. Decimal('1E+1') 134 return '{:.6f}'.format(seconds).rstrip('0').rstrip('.') 135 136 137def is_xml_codepoint(cp: int) -> bool: 138 return cp in {0x9, 0xA, 0xD} or \ 139 0x20 <= cp <= 0xD7FF or \ 140 0xE000 <= cp <= 0xFFFD or \ 141 0x10000 <= cp <= 0x10FFFF 142 143 144def ordinal(n: int) -> str: 145 if n in {11, 12, 13}: 146 return '%dth' % n 147 148 least_significant_digit = n % 10 149 if least_significant_digit == 1: 150 return '%dst' % n 151 elif least_significant_digit == 2: 152 return '%dnd' % n 153 elif least_significant_digit == 3: 154 return '%drd' % n 155 else: 156 return '%dth' % n 157