1# Copyright (c) 2006-2021  Andrey Golovizin
2#
3# Permission is hereby granted, free of charge, to any person obtaining
4# a copy of this software and associated documentation files (the
5# "Software"), to deal in the Software without restriction, including
6# without limitation the rights to use, copy, modify, merge, publish,
7# distribute, sublicense, and/or sell copies of the Software, and to
8# permit persons to whom the Software is furnished to do so, subject to
9# the following conditions:
10#
11# The above copyright notice and this permission notice shall be
12# included in all copies or substantial portions of the Software.
13#
14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22from __future__ import unicode_literals
23
24import re
25
26from pybtex.py3compat import fix_unicode_literals_in_doctest
27from pybtex.utils import deprecated
28
29terminators = '.', '?', '!'
30delimiter_re = re.compile(r'([\s\-])')
31whitespace_re = re.compile(r'\s+')
32
33
34@deprecated('0.19', 'use str.capitalize() instead')
35def capfirst(s):
36    return s[0].upper() + s[1:] if s else s
37
38
39def is_terminated(text):
40    """
41    Return True if text ends with a terminating character.
42
43    >>> is_terminated('')
44    False
45    >>> is_terminated('.')
46    True
47    >>> is_terminated('Done')
48    False
49    >>> is_terminated('Done. ')
50    False
51    >>> is_terminated('Done.')
52    True
53    >>> is_terminated('Done...')
54    True
55    >>> is_terminated('Done!')
56    True
57    >>> is_terminated('Done?')
58    True
59    >>> is_terminated('Done?!')
60    True
61    """
62
63    return text.endswith(terminators)
64
65
66def add_period(text):
67    """Add a period to the end of text, if needed.
68
69    >>> print(add_period(''))
70    <BLANKLINE>
71    >>> print(add_period('.'))
72    .
73    >>> print(add_period('Done'))
74    Done.
75    >>> print(add_period('Done. '))
76    Done. .
77    >>> print(add_period('Done.'))
78    Done.
79    >>> print(add_period('Done...'))
80    Done...
81    >>> print(add_period('Done!'))
82    Done!
83    >>> print(add_period('Done?'))
84    Done?
85    >>> print(add_period('Done?!'))
86    Done?!
87    """
88
89    if text and not is_terminated(text):
90        return text + '.'
91    return text
92
93
94@fix_unicode_literals_in_doctest
95def abbreviate(text, split=delimiter_re.split):
96    """Abbreviate the given text.
97
98    >> abbreviate('Name')
99    u'N'
100    >> abbreviate('Some words')
101    u'S. w.'
102    >>> abbreviate('First-Second')
103    u'F.-S.'
104    """
105
106    def abbreviate(part):
107        if part.isalpha():
108            return part[0] + '.'
109        else:
110            return part
111
112    return ''.join(abbreviate(part) for part in split(text))
113
114
115def normalize_whitespace(string):
116    r"""
117    Replace every sequence of whitespace characters with a single space.
118
119    >>> print(normalize_whitespace('abc'))
120    abc
121    >>> print(normalize_whitespace('Abc def.'))
122    Abc def.
123    >>> print(normalize_whitespace(' Abc def.'))
124    Abc def.
125    >>> print(normalize_whitespace('Abc\ndef.'))
126    Abc def.
127    >>> print(normalize_whitespace('Abc\r\ndef.'))
128    Abc def.
129    >>> print(normalize_whitespace('Abc    \r\n\tdef.'))
130    Abc def.
131    >>> print(normalize_whitespace('   \nAbc\r\ndef.'))
132    Abc def.
133    """
134
135    return whitespace_re.sub(' ', string.strip())
136
137
138def width(string):
139    r"""
140    Get the width of the typeset string, in relative units.  Similar to
141    BibTeX's width$, but does not care about any "special characters".
142
143    >>> width('')
144    0
145    >>> width('abc')
146    1500
147    >>> width('ab{c}')
148    2500
149    >>> width(r"ab{\'c}")
150    3278
151    >>> width(r"ab{\'c{}}")
152    4278
153    >>> width(r"ab{\'c{}")
154    3778
155    >>> width(r"ab{\'c{d}}")
156    4834
157    """
158
159    from pybtex.charwidths import charwidths
160    return sum(charwidths.get(char, 0) for char in string)
161
162
163def tie_or_space(word, tie='~', space=' ', enough_chars=3, other_word=None):
164    n_chars = len(word)
165    if other_word is not None:
166        n_chars = min(n_chars, len(other_word))
167
168    if n_chars < enough_chars:
169        return tie
170    else:
171        return space
172