1#
2# Copyright (c), 2018-2020, SISSA (International School for Advanced Studies).
3# All rights reserved.
4# This file is distributed under the terms of the MIT License.
5# See the file 'LICENSE' in the root directory of the present
6# distribution, or http://opensource.org/licenses/MIT.
7#
8# @author Davide Brunato <brunato@sissa.it>
9#
10import re
11import math
12from calendar import isleap, leapdays
13from decimal import Decimal
14from typing import Optional, Union
15
16###
17# Data validation helpers
18
19NORMALIZE_PATTERN = re.compile(r'[^\S\xa0]')
20WHITESPACES_PATTERN = re.compile(r'[^\S\xa0]+')  # include ASCII 160 (non-breaking space)
21NCNAME_PATTERN = re.compile(r'^[^\d\W][\w.\-\u00B7\u0300-\u036F\u203F\u2040]*$')
22QNAME_PATTERN = re.compile(
23    r'^(?:(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
24    r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
25)
26EQNAME_PATTERN = re.compile(
27    r'^(?:Q{(?P<namespace>[^}]+)}|'
28    r'(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
29    r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
30)
31WRONG_ESCAPE_PATTERN = re.compile(r'%(?![a-fA-F\d]{2})')
32XML_NEWLINES_PATTERN = re.compile('\r\n|\r|\n')
33
34
35def collapse_white_spaces(s: str) -> str:
36    return WHITESPACES_PATTERN.sub(' ', s).strip(' ')
37
38
39def is_idrefs(value: Optional[str]) -> bool:
40    return isinstance(value, str) and \
41        all(NCNAME_PATTERN.match(x) is not None for x in value.split())
42
43
44###
45# Sequence type checking
46SEQUENCE_TYPE_PATTERN = re.compile(r'\s?([()?*+,])\s?')
47
48
49def normalize_sequence_type(sequence_type: str) -> str:
50    sequence_type = WHITESPACES_PATTERN.sub(' ', sequence_type).strip()
51    sequence_type = SEQUENCE_TYPE_PATTERN.sub(r'\1', sequence_type)
52    return sequence_type.replace(',', ', ').replace(')as', ') as')
53
54
55###
56# Date/Time helpers
57MONTH_DAYS = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
58MONTH_DAYS_LEAP = [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
59
60
61def adjust_day(year: int, month: int, day: int) -> int:
62    if month in {1, 3, 5, 7, 8, 10, 12}:
63        return day
64    elif month in {4, 6, 9, 11}:
65        return min(day, 30)
66    else:
67        return min(day, 29) if isleap(year) else min(day, 28)
68
69
70def days_from_common_era(year: int) -> int:
71    """
72    Returns the number of days from from 0001-01-01 to the provided year. For a
73    common era year the days are counted until the last day of December, for a
74    BCE year the days are counted down from the end to the 1st of January.
75    """
76    if year > 0:
77        return year * 365 + year // 4 - year // 100 + year // 400
78    elif year >= -1:
79        return year * 366
80    else:
81        year = -year - 1
82        return -(366 + year * 365 + year // 4 - year // 100 + year // 400)
83
84
85DAYS_IN_4Y = days_from_common_era(4)
86DAYS_IN_100Y = days_from_common_era(100)
87DAYS_IN_400Y = days_from_common_era(400)
88
89
90def months2days(year: int, month: int, months_delta: int) -> int:
91    """
92    Converts a delta of months to a delta of days, counting from the 1st day of the month,
93    relative to the year and the month passed as arguments.
94
95    :param year: the reference start year, a negative or zero value means a BCE year \
96    (0 is 1 BCE, -1 is 2 BCE, -2 is 3 BCE, etc).
97    :param month: the starting month (1-12).
98    :param months_delta: the number of months, if negative count backwards.
99    """
100    if not months_delta:
101        return 0
102
103    total_months = month - 1 + months_delta
104    target_year = year + total_months // 12
105    target_month = total_months % 12 + 1
106
107    if month <= 2:
108        y_days = 365 * (target_year - year) + leapdays(year, target_year)
109    else:
110        y_days = 365 * (target_year - year) + leapdays(year + 1, target_year + 1)
111
112    months_days = MONTH_DAYS_LEAP if isleap(target_year) else MONTH_DAYS
113    if target_month >= month:
114        m_days = sum(months_days[m] for m in range(month, target_month))
115        return y_days + m_days if y_days >= 0 else y_days + m_days
116    else:
117        m_days = sum(months_days[m] for m in range(target_month, month))
118        return y_days - m_days if y_days >= 0 else y_days - m_days
119
120
121def round_number(value: Union[float, int, Decimal]) -> Union[float, int, Decimal]:
122    if math.isnan(value) or math.isinf(value):
123        return value
124
125    number = Decimal(value)
126    if number > 0:
127        return type(value)(number.quantize(Decimal('1'), rounding='ROUND_HALF_UP'))
128    else:
129        return type(value)(number.quantize(Decimal('1'), rounding='ROUND_HALF_DOWN'))
130
131
132def normalized_seconds(seconds: Decimal) -> str:
133    # Decimal.normalize() does not remove exp every time: eg. Decimal('1E+1')
134    return '{:.6f}'.format(seconds).rstrip('0').rstrip('.')
135
136
137def is_xml_codepoint(cp: int) -> bool:
138    return cp in {0x9, 0xA, 0xD} or \
139        0x20 <= cp <= 0xD7FF or \
140        0xE000 <= cp <= 0xFFFD or \
141        0x10000 <= cp <= 0x10FFFF
142
143
144def ordinal(n: int) -> str:
145    if n in {11, 12, 13}:
146        return '%dth' % n
147
148    least_significant_digit = n % 10
149    if least_significant_digit == 1:
150        return '%dst' % n
151    elif least_significant_digit == 2:
152        return '%dnd' % n
153    elif least_significant_digit == 3:
154        return '%drd' % n
155    else:
156        return '%dth' % n
157