1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3"""
4parse numeral from various formats
5"""
6from rebulk.remodule import re
7
8digital_numeral = r'\d{1,4}'
9
10roman_numeral = r'(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})'
11
12english_word_numeral_list = [
13    'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
14    'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
15]
16
17french_word_numeral_list = [
18    'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
19    'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
20]
21
22french_alt_word_numeral_list = [
23    'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
24    'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
25]
26
27
28def __build_word_numeral(*args):
29    """
30    Build word numeral regexp from list.
31
32    :param args:
33    :type args:
34    :param kwargs:
35    :type kwargs:
36    :return:
37    :rtype:
38    """
39    re_ = None
40    for word_list in args:
41        for word in word_list:
42            if not re_:
43                re_ = r'(?:(?=\w+)'
44            else:
45                re_ += '|'
46            re_ += word
47    re_ += ')'
48    return re_
49
50
51word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
52
53numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
54
55__romanNumeralMap = (
56    ('M', 1000),
57    ('CM', 900),
58    ('D', 500),
59    ('CD', 400),
60    ('C', 100),
61    ('XC', 90),
62    ('L', 50),
63    ('XL', 40),
64    ('X', 10),
65    ('IX', 9),
66    ('V', 5),
67    ('IV', 4),
68    ('I', 1)
69)
70
71__romanNumeralPattern = re.compile('^' + roman_numeral + '$')
72
73
74def __parse_roman(value):
75    """
76    convert Roman numeral to integer
77
78    :param value: Value to parse
79    :type value: string
80    :return:
81    :rtype:
82    """
83    if not __romanNumeralPattern.search(value):
84        raise ValueError('Invalid Roman numeral: %s' % value)
85
86    result = 0
87    index = 0
88    for num, integer in __romanNumeralMap:
89        while value[index:index + len(num)] == num:
90            result += integer
91            index += len(num)
92    return result
93
94
95def __parse_word(value):
96    """
97    Convert Word numeral to integer
98
99    :param value: Value to parse
100    :type value: string
101    :return:
102    :rtype:
103    """
104    for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
105        try:
106            return word_list.index(value.lower())
107        except ValueError:
108            pass
109    raise ValueError  # pragma: no cover
110
111
112_clean_re = re.compile(r'[^\d]*(\d+)[^\d]*')
113
114
115def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
116    """
117    Parse a numeric value into integer.
118
119    :param value: Value to parse. Can be an integer, roman numeral or word.
120    :type value: string
121    :param int_enabled:
122    :type int_enabled:
123    :param roman_enabled:
124    :type roman_enabled:
125    :param word_enabled:
126    :type word_enabled:
127    :param clean:
128    :type clean:
129    :return: Numeric value, or None if value can't be parsed
130    :rtype: int
131    """
132    # pylint: disable=too-many-branches
133    if int_enabled:
134        try:
135            if clean:
136                match = _clean_re.match(value)
137                if match:
138                    clean_value = match.group(1)
139                    return int(clean_value)
140            return int(value)
141        except ValueError:
142            pass
143    if roman_enabled:
144        try:
145            if clean:
146                for word in value.split():
147                    try:
148                        return __parse_roman(word.upper())
149                    except ValueError:
150                        pass
151            return __parse_roman(value)
152        except ValueError:
153            pass
154    if word_enabled:
155        try:
156            if clean:
157                for word in value.split():
158                    try:
159                        return __parse_word(word)
160                    except ValueError:  # pragma: no cover
161                        pass
162            return __parse_word(value)  # pragma: no cover
163        except ValueError:  # pragma: no cover
164            pass
165    raise ValueError('Invalid numeral: ' + value)   # pragma: no cover
166