1<?php
2/**
3 * Copyright 2010-2017 Horde LLC (http://www.horde.org/)
4 *
5 * @author   Chuck Hagenbuch <chuck@horde.org>
6 * @license  http://www.horde.org/licenses/bsd BSD
7 * @category Horde
8 * @package  Support
9 */
10
11/**
12 * @author   Chuck Hagenbuch <chuck@horde.org>
13 * @license  http://www.horde.org/licenses/bsd BSD
14 * @category Horde
15 * @package  Support
16 */
17class Horde_Support_Numerizer_Locale_Base
18{
19    public $DIRECT_NUMS = array(
20        'eleven' => '11',
21        'twelve' => '12',
22        'thirteen' => '13',
23        'fourteen' => '14',
24        'fifteen' => '15',
25        'sixteen' => '16',
26        'seventeen' => '17',
27        'eighteen' => '18',
28        'nineteen' => '19',
29        'ninteen' => '19',      // Common mis-spelling
30        'zero' => '0',
31        'one' => '1',
32        'two' => '2',
33        'three' => '3',
34        'four(\W|$)' => '4$1',  // The weird regex is so that it matches four but not fourty
35        'five' => '5',
36        'six(\W|$)' => '6$1',
37        'seven(\W|$)' => '7$1',
38        'eight(\W|$)' => '8$1',
39        'nine(\W|$)' => '9$1',
40        'ten' => '10',
41        '\ba[\b^$]' => '1',     // doesn't make sense for an 'a' at the end to be a 1
42    );
43
44    public $TEN_PREFIXES = array(
45        'twenty' => 20,
46        'thirty' => 30,
47        'forty' => 40,
48        'fourty' => 40, // Common mis-spelling
49        'fifty' => 50,
50        'sixty' => 60,
51        'seventy' => 70,
52        'eighty' => 80,
53        'ninety' => 90,
54        'ninty' => 90, // Common mis-spelling
55    );
56
57    public $BIG_PREFIXES = array(
58        'hundred' => 100,
59        'thousand' => 1000,
60        'million' => 1000000,
61        'billion' => 1000000000,
62        'trillion' => 1000000000000,
63    );
64
65    public function numerize($string)
66    {
67        // preprocess
68        $string = $this->_splitHyphenatedWords($string);
69        $string = $this->_hideAHalf($string);
70
71        $string = $this->_directReplacements($string);
72        $string = $this->_replaceTenPrefixes($string);
73        $string = $this->_replaceBigPrefixes($string);
74        $string = $this->_fractionalAddition($string);
75
76        return $string;
77    }
78
79    /**
80     * will mutilate hyphenated-words but shouldn't matter for date extraction
81     */
82    protected function _splitHyphenatedWords($string)
83    {
84        return preg_replace('/ +|([^\d])-([^d])/', '$1 $2', $string);
85    }
86
87    /**
88     * take the 'a' out so it doesn't turn into a 1, save the half for the end
89     */
90    protected function _hideAHalf($string)
91    {
92        return str_replace('a half', 'haAlf', $string);
93    }
94
95    /**
96     * easy/direct replacements
97     */
98    protected function _directReplacements($string)
99    {
100        foreach ($this->DIRECT_NUMS as $dn => $dn_replacement) {
101            $string = preg_replace("/$dn/i", $dn_replacement, $string);
102        }
103        return $string;
104    }
105
106    /**
107     * ten, twenty, etc.
108     */
109    protected function _replaceTenPrefixes($string)
110    {
111        foreach ($this->TEN_PREFIXES as $tp => $tp_replacement) {
112            $string = preg_replace_callback(
113                "/(?:$tp)( *\d(?=[^\d]|\$))*/i",
114                function ($m) use ($tp_replacement) {
115                    return $tp_replacement + (isset($m[1]) ? (int)$m[1] : 0);
116                },
117                $string
118            );
119        }
120        return $string;
121    }
122
123    /**
124     * hundreds, thousands, millions, etc.
125     */
126    protected function _replaceBigPrefixes($string)
127    {
128        foreach ($this->BIG_PREFIXES as $bp => $bp_replacement) {
129            $string = preg_replace_callback(
130                '/(\d*) *' . $bp . '/i',
131                function ($m) use ($bp_replacement) {
132                    return $bp_replacement * (int)$m[1];
133                },
134                $string
135            );
136            $string = $this->_andition($string);
137        }
138        return $string;
139    }
140
141    protected function _andition($string)
142    {
143        while (true) {
144            if (preg_match('/(\d+)( | and )(\d+)(?=[^\w]|$)/i', $string, $sc, PREG_OFFSET_CAPTURE)) {
145                if (preg_match('/and/', $sc[2][0]) || (strlen($sc[1][0]) > strlen($sc[3][0]))) {
146                    $string = substr($string, 0, $sc[1][1]) . ((int)$sc[1][0] + (int)$sc[3][0]) . substr($string, $sc[3][1] + strlen($sc[3][0]));
147                    continue;
148                }
149            }
150            break;
151        }
152        return $string;
153    }
154
155    protected function _fractionalAddition($string)
156    {
157        return preg_replace_callback(
158            '/(\d+)(?: | and |-)*haAlf/i',
159            function ($m) {
160                return (string)((float)$m[1] + 0.5);
161            },
162            $string
163        );
164    }
165
166}
167