1# vim: fileencoding=utf-8 2# Copyright (c) 2006-2021 Andrey Golovizin 3# 4# Permission is hereby granted, free of charge, to any person obtaining 5# a copy of this software and associated documentation files (the 6# "Software"), to deal in the Software without restriction, including 7# without limitation the rights to use, copy, modify, merge, publish, 8# distribute, sublicense, and/or sell copies of the Software, and to 9# permit persons to whom the Software is furnished to do so, subject to 10# the following conditions: 11# 12# The above copyright notice and this permission notice shall be 13# included in all copies or substantial portions of the Software. 14# 15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 18# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 19# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 20# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 23from __future__ import unicode_literals 24from __future__ import print_function 25import re 26 27try: 28 from collections.abc import Mapping 29except ImportError: 30 from collections import Mapping 31 32import six 33import textwrap 34 35from pybtex.exceptions import PybtexError 36from pybtex.utils import ( 37 deprecated, 38 OrderedCaseInsensitiveDict, CaseInsensitiveDefaultDict, CaseInsensitiveSet 39) 40from pybtex.richtext import Text 41from pybtex.bibtex.utils import split_tex_string, scan_bibtex_string 42from pybtex.errors import report_error 43from pybtex.py3compat import fix_unicode_literals_in_doctest, python_2_unicode_compatible 44from pybtex.plugin import find_plugin 45 46 47# for python2 compatibility 48def indent(text, prefix): 49 if hasattr(textwrap, "indent"): 50 return textwrap.indent(text, prefix) 51 else: 52 return ''.join(prefix + line for line in text.splitlines(True)) 53 54 55class BibliographyDataError(PybtexError): 56 pass 57 58 59class InvalidNameString(PybtexError): 60 def __init__(self, name_string): 61 message = 'Too many commas in {}'.format(repr(name_string)) 62 super(InvalidNameString, self).__init__(message) 63 64 65class BibliographyData(object): 66 def __init__(self, entries=None, preamble=None, wanted_entries=None, min_crossrefs=2): 67 """ 68 A :py:class:`.BibliographyData` object contains a dictionary of bibliography 69 entries referenced by their keys. 70 Each entry represented by an :py:class:`.Entry` object. 71 72 Additionally, :py:class:`.BibliographyData` may contain a LaTeX 73 preamble defined by ``@PREAMBLE`` commands in the BibTeX file. 74 """ 75 76 self.entries = OrderedCaseInsensitiveDict() 77 '''A dictionary of bibliography entries referenced by their keys. 78 79 The dictionary is case insensitive: 80 81 >>> bib_data = parse_string(""" 82 ... @ARTICLE{gnats, 83 ... author = {L[eslie] A. Aamport}, 84 ... title = {The Gnats and Gnus Document Preparation System}, 85 ... } 86 ... """, 'bibtex') 87 >>> bib_data.entries['gnats'] == bib_data.entries['GNATS'] 88 True 89 90 ''' 91 92 self.crossref_count = CaseInsensitiveDefaultDict(int) 93 self.min_crossrefs = min_crossrefs 94 self._preamble = [] 95 if wanted_entries is not None: 96 self.wanted_entries = CaseInsensitiveSet(wanted_entries) 97 self.citations = CaseInsensitiveSet(wanted_entries) 98 else: 99 self.wanted_entries = None 100 self.citations = CaseInsensitiveSet() 101 if entries: 102 if isinstance(entries, Mapping): 103 entries = entries.items() 104 for (key, entry) in entries: 105 self.add_entry(key, entry) 106 if preamble: 107 self._preamble.extend(preamble) 108 109 def __eq__(self, other): 110 if not isinstance(other, BibliographyData): 111 return super(BibliographyData, self) == other 112 return ( 113 self.entries == other.entries 114 and self._preamble == other._preamble 115 ) 116 117 def __repr__(self): 118 119 repr_entry = repr(self.entries) 120 keys = self.entries.keys() 121 122 for key in keys: 123 ind = repr_entry.index(key) - 2 # find first instance 124 repr_entry = repr_entry[:ind] + "\n" + repr_entry[ind:] 125 126 repr_entry = indent(repr_entry, prefix=" ") 127 repr_entry = repr_entry[4:] # drop 1st indent 128 129 return ( 130 "BibliographyData(\n" 131 " entries={0},\n\n" 132 " preamble={1})".format(repr_entry, repr(self._preamble)) 133 ) 134 135 def add_to_preamble(self, *values): 136 self._preamble.extend(values) 137 138 @property 139 def preamble(self): 140 r''' 141 LaTeX preamble. 142 143 >>> bib_data = parse_string(r""" 144 ... @PREAMBLE{"\newcommand{\noopsort}[1]{}"} 145 ... """, 'bibtex') 146 >>> print(bib_data.preamble) 147 \newcommand{\noopsort}[1]{} 148 149 .. versionadded:: 0.19 150 Earlier versions used :py:meth:`.get_preamble()`, which is now deprecated. 151 ''' 152 return ''.join(self._preamble) 153 154 @deprecated('0.19', 'use BibliographyData.preamble instead') 155 def get_preamble(self): 156 """ 157 .. deprecated:: 0.19 158 Use :py:attr:`.preamble` instead. 159 """ 160 return self.preamble 161 162 def want_entry(self, key): 163 return ( 164 self.wanted_entries is None 165 or key in self.wanted_entries 166 or '*' in self.wanted_entries 167 ) 168 169 def get_canonical_key(self, key): 170 if key in self.citations: 171 return self.citations.get_canonical_key(key) 172 else: 173 return key 174 175 def add_entry(self, key, entry): 176 if not self.want_entry(key): 177 return 178 if key in self.entries: 179 report_error(BibliographyDataError('repeated bibliograhpy entry: %s' % key)) 180 return 181 entry.key = self.get_canonical_key(key) 182 self.entries[entry.key] = entry 183 try: 184 crossref = entry.fields['crossref'] 185 except KeyError: 186 pass 187 else: 188 if self.wanted_entries is not None: 189 self.wanted_entries.add(crossref) 190 191 def add_entries(self, entries): 192 for key, entry in entries: 193 self.add_entry(key, entry) 194 195 @fix_unicode_literals_in_doctest 196 def _get_crossreferenced_citations(self, citations, min_crossrefs): 197 r""" 198 Get cititations not cited explicitly but referenced by other citations. 199 200 >>> from pybtex.database import Entry 201 >>> data = BibliographyData({ 202 ... 'main_article': Entry('article', {'crossref': 'xrefd_arcicle'}), 203 ... 'xrefd_arcicle': Entry('article'), 204 ... }) 205 >>> list(data._get_crossreferenced_citations([], min_crossrefs=1)) 206 [] 207 >>> list(data._get_crossreferenced_citations(['main_article'], min_crossrefs=1)) 208 [u'xrefd_arcicle'] 209 >>> list(data._get_crossreferenced_citations(['Main_article'], min_crossrefs=1)) 210 [u'xrefd_arcicle'] 211 >>> list(data._get_crossreferenced_citations(['main_article'], min_crossrefs=2)) 212 [] 213 >>> list(data._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) 214 [] 215 216 >>> data2 = BibliographyData(data.entries, wanted_entries=data.entries.keys()) 217 >>> list(data2._get_crossreferenced_citations([], min_crossrefs=1)) 218 [] 219 >>> list(data2._get_crossreferenced_citations(['main_article'], min_crossrefs=1)) 220 [u'xrefd_arcicle'] 221 >>> list(data2._get_crossreferenced_citations(['Main_article'], min_crossrefs=1)) 222 [u'xrefd_arcicle'] 223 >>> list(data2._get_crossreferenced_citations(['main_article'], min_crossrefs=2)) 224 [] 225 >>> list(data2._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) 226 [] 227 >>> list(data2._get_crossreferenced_citations(['xrefd_arcicle'], min_crossrefs=1)) 228 [] 229 230 """ 231 232 crossref_count = CaseInsensitiveDefaultDict(int) 233 citation_set = CaseInsensitiveSet(citations) 234 for citation in citations: 235 try: 236 entry = self.entries[citation] 237 crossref = entry.fields['crossref'] 238 except KeyError: 239 continue 240 try: 241 crossref_entry = self.entries[crossref] 242 except KeyError: 243 report_error(BibliographyDataError( 244 'bad cross-reference: entry "{key}" refers to ' 245 'entry "{crossref}" which does not exist.'.format( 246 key=citation, crossref=crossref, 247 ) 248 )) 249 continue 250 251 canonical_crossref = crossref_entry.key 252 crossref_count[canonical_crossref] += 1 253 if crossref_count[canonical_crossref] >= min_crossrefs and canonical_crossref not in citation_set: 254 citation_set.add(canonical_crossref) 255 yield canonical_crossref 256 257 @fix_unicode_literals_in_doctest 258 def _expand_wildcard_citations(self, citations): 259 r""" 260 Expand wildcard citations (\citation{*} in .aux file). 261 262 >>> from pybtex.database import Entry 263 >>> data = BibliographyData(( 264 ... ('uno', Entry('article')), 265 ... ('dos', Entry('article')), 266 ... ('tres', Entry('article')), 267 ... ('cuatro', Entry('article')), 268 ... )) 269 >>> list(data._expand_wildcard_citations([])) 270 [] 271 >>> list(data._expand_wildcard_citations(['*'])) 272 [u'uno', u'dos', u'tres', u'cuatro'] 273 >>> list(data._expand_wildcard_citations(['uno', '*'])) 274 [u'uno', u'dos', u'tres', u'cuatro'] 275 >>> list(data._expand_wildcard_citations(['dos', '*'])) 276 [u'dos', u'uno', u'tres', u'cuatro'] 277 >>> list(data._expand_wildcard_citations(['*', 'uno'])) 278 [u'uno', u'dos', u'tres', u'cuatro'] 279 >>> list(data._expand_wildcard_citations(['*', 'DOS'])) 280 [u'uno', u'dos', u'tres', u'cuatro'] 281 282 """ 283 284 citation_set = CaseInsensitiveSet() 285 for citation in citations: 286 if citation == '*': 287 for key in self.entries: 288 if key not in citation_set: 289 citation_set.add(key) 290 yield key 291 else: 292 if citation not in citation_set: 293 citation_set.add(citation) 294 yield citation 295 296 def add_extra_citations(self, citations, min_crossrefs): 297 expanded_citations = list(self._expand_wildcard_citations(citations)) 298 crossrefs = list(self._get_crossreferenced_citations(expanded_citations, min_crossrefs)) 299 return expanded_citations + crossrefs 300 301 def to_string(self, bib_format, **kwargs): 302 """ 303 Return the data as a unicode string in the given format. 304 305 :param bib_format: Data format ("bibtex", "yaml", etc.). 306 307 .. versionadded:: 0.19 308 """ 309 writer = find_plugin('pybtex.database.output', bib_format)(**kwargs) 310 return writer.to_string(self) 311 312 @classmethod 313 def from_string(cls, value, bib_format, **kwargs): 314 """ 315 Return the data from a unicode string in the given format. 316 317 :param bib_format: Data format ("bibtex", "yaml", etc.). 318 319 .. versionadded:: 0.22.2 320 """ 321 return parse_string(value, bib_format, **kwargs) 322 323 def to_bytes(self, bib_format, **kwargs): 324 """ 325 Return the data as a byte string in the given format. 326 327 :param bib_format: Data format ("bibtex", "yaml", etc.). 328 329 .. versionadded:: 0.19 330 """ 331 writer = find_plugin('pybtex.database.output', bib_format)(**kwargs) 332 return writer.to_bytes(self) 333 334 def to_file(self, file, bib_format=None, **kwargs): 335 """ 336 Save the data to a file. 337 338 :param file: A file name or a file-like object. 339 :param bib_format: Data format ("bibtex", "yaml", etc.). 340 If not specified, Pybtex will try to guess by the file name. 341 342 .. versionadded:: 0.19 343 """ 344 if isinstance(file, six.string_types): 345 filename = file 346 else: 347 filename = getattr(file, 'name', None) 348 writer = find_plugin('pybtex.database.output', bib_format, filename=filename)(**kwargs) 349 return writer.write_file(self, file) 350 351 @fix_unicode_literals_in_doctest 352 def lower(self): 353 u''' 354 Return another :py:class:`.BibliographyData` with all identifiers converted to lowercase. 355 356 >>> data = parse_string(""" 357 ... @BOOK{Obrazy, 358 ... title = "Obrazy z Rus", 359 ... author = "Karel Havlíček Borovský", 360 ... } 361 ... @BOOK{Elegie, 362 ... title = "Tirolské elegie", 363 ... author = "Karel Havlíček Borovský", 364 ... } 365 ... """, 'bibtex') 366 >>> data_lower = data.lower() 367 >>> list(data_lower.entries.keys()) 368 [u'obrazy', u'elegie'] 369 >>> for entry in data_lower.entries.values(): 370 ... entry.key 371 ... list(entry.persons.keys()) 372 ... list(entry.fields.keys()) 373 u'obrazy' 374 [u'author'] 375 [u'title'] 376 u'elegie' 377 [u'author'] 378 [u'title'] 379 380 ''' 381 382 entries_lower = ((key.lower(), entry.lower()) for key, entry in self.entries.items()) 383 return type(self)( 384 entries=entries_lower, 385 preamble=self._preamble, 386 wanted_entries=self.wanted_entries, 387 min_crossrefs=self.min_crossrefs, 388 ) 389 390 391class RichFieldProxyDict(Mapping): 392 def __init__(self, fields): 393 self._fields = fields 394 395 def __contains__(self): 396 return self._fields.__contains__() 397 398 def __iter__(self): 399 return self._fields.__iter__() 400 401 def __len__(self): 402 return self._fields.__len__() 403 404 def __getitem__(self, key): 405 return Text.from_latex(self._fields[key]) 406 407 408class Entry(object): 409 """A bibliography entry.""" 410 411 type = None 412 """Entry type (``'book'``, ``'article'``, etc.).""" 413 414 key = None 415 """Entry key (for example, ``'fukushima1980neocognitron'``).""" 416 417 fields = None 418 """A dictionary of entry fields. 419 The dictionary is ordered and case-insensitive.""" 420 421 persons = None 422 """ 423 A dictionary of entry persons, by their roles. 424 425 The most often used roles are ``'author'`` and ``'editor'``. 426 """ 427 428 """A reference to the containing :py:class:`.BibliographyData` object. Used to resolve crossrefs.""" 429 430 def __init__(self, type_, fields=None, persons=None): 431 if fields is None: 432 fields = {} 433 if persons is None: 434 persons = {} 435 self.type = type_.lower() 436 self.original_type = type_ 437 self.fields = OrderedCaseInsensitiveDict(fields) 438 self.persons = OrderedCaseInsensitiveDict(persons) 439 440 def __eq__(self, other): 441 if not isinstance(other, Entry): 442 return super(Entry, self) == other 443 return ( 444 self.type == other.type 445 and self.fields == other.fields 446 and self.persons == other.persons 447 ) 448 449 def __repr__(self): 450 # represent the fields as a list of tuples for simplicity 451 repr_fields = repr(list(self.fields.items())) 452 keys = self.fields.keys() 453 454 for key in keys: 455 ind = repr_fields.index(key) - 2 # find first instance 456 repr_fields = repr_fields[:ind] + "\n" + repr_fields[ind:] 457 458 repr_fields = indent(repr_fields, prefix=" ") 459 repr_fields = repr_fields[4:] # drop 1st indent 460 461 return ( 462 "Entry({0},\n" 463 " fields={1},\n" 464 " persons={2})".format(repr(self.type), repr_fields, repr(self.persons)) 465 ) 466 467 def add_person(self, person, role): 468 self.persons.setdefault(role, []).append(person) 469 470 def lower(self): 471 return type(self)( 472 self.type, 473 fields=self.fields.lower(), 474 persons=self.persons.lower(), 475 ) 476 477 def _find_person_field(self, role): 478 persons = self.persons[role] 479 return ' and '.join(six.text_type(person) for person in persons) 480 481 def _find_crossref_field(self, name, bib_data): 482 if bib_data is None or 'crossref' not in self.fields: 483 raise KeyError(name) 484 referenced_entry = bib_data.entries[self.fields['crossref']] 485 return referenced_entry._find_field(name, bib_data) 486 487 def _find_field(self, name, bib_data=None): 488 """ 489 Find the field with the given ``name`` according to this rules: 490 491 - If the given field ``name`` in in ``self.fields``, just return 492 self.fields[name]. 493 494 - Otherwise, if ``name`` is ``"authors"`` or ``"editors"`` (or any other 495 person role), return the list of names as a string, separated by 496 ``" and "``. 497 498 - Otherwise, if this entry has a ``crossreff`` field, look up for the 499 cross-referenced entry and try to find its field with the given 500 ``name``. 501 """ 502 try: 503 return self.fields[name] 504 except KeyError: 505 try: 506 return self._find_person_field(name) 507 except KeyError: 508 return self._find_crossref_field(name, bib_data) 509 510 def to_string(self, bib_format, **kwargs): 511 """ 512 Return the data as a unicode string in the given format. 513 514 :param bib_format: Data format ("bibtex", "yaml", etc.). 515 516 """ 517 writer = find_plugin('pybtex.database.output', bib_format)(**kwargs) 518 return writer.to_string(BibliographyData(entries={self.key: self})) 519 520 @classmethod 521 def from_string(cls, value, bib_format, entry_number=0, **kwargs): 522 """ 523 Return the data from a unicode string in the given format. 524 525 :param bib_format: Data format ("bibtex", "yaml", etc.). 526 :param entry_number: entry number if the string has more than one. 527 528 .. versionadded:: 0.22.2 529 """ 530 # get bibliography 531 bibdata = BibliographyData.from_string(value, bib_format, **kwargs) 532 # grab specific instance 533 key = tuple(bibdata.entries.keys())[entry_number] 534 return bibdata.entries[key] 535 536 537@python_2_unicode_compatible 538@fix_unicode_literals_in_doctest 539class Person(object): 540 """A person or some other person-like entity. 541 542 >>> knuth = Person('Donald E. Knuth') 543 >>> knuth.first_names 544 [u'Donald'] 545 >>> knuth.middle_names 546 [u'E.'] 547 >>> knuth.last_names 548 [u'Knuth'] 549 550 """ 551 552 first_names = None 553 """ 554 A list of first names. 555 556 .. versionadded:: 0.19 557 Earlier versions used :py:meth:`.first`, which is now deprecated. 558 """ 559 560 middle_names = None 561 """ 562 A list of middle names. 563 564 .. versionadded:: 0.19 565 Earlier versions used :py:meth:`.middle`, which is now deprecated. 566 """ 567 568 prelast_names = None 569 """ 570 A list of pre-last (aka von) name parts. 571 572 .. versionadded:: 0.19 573 Earlier versions used :py:meth:`.middle`, which is now deprecated. 574 """ 575 576 last_names = None 577 """ 578 A list of last names. 579 580 .. versionadded:: 0.19 581 Earlier versions used :py:meth:`.last`, which is now deprecated. 582 """ 583 584 lineage_names = None 585 """ 586 A list of linage (aka Jr) name parts. 587 588 .. versionadded:: 0.19 589 Earlier versions used :py:meth:`.lineage`, which is now deprecated. 590 """ 591 592 valid_roles = ['author', 'editor'] 593 style1_re = re.compile(r'^(.+),\s*(.+)$') 594 style2_re = re.compile(r'^(.+),\s*(.+),\s*(.+)$') 595 596 def __init__(self, string="", first="", middle="", prelast="", last="", lineage=""): 597 """ 598 :param string: The full name string. 599 It will be parsed and split into separate first, last, middle, 600 pre-last and lineage name parst. 601 602 Supported name formats are: 603 604 - von Last, First 605 - von Last, Jr, First 606 - First von Last 607 608 (see BibTeX manual for explanation) 609 610 """ 611 612 self.first_names = [] 613 self.middle_names = [] 614 self.prelast_names = [] 615 self.last_names = [] 616 self.lineage_names = [] 617 618 string = string.strip() 619 if string: 620 self._parse_string(string) 621 self.first_names.extend(split_tex_string(first)) 622 self.middle_names.extend(split_tex_string(middle)) 623 self.prelast_names.extend(split_tex_string(prelast)) 624 self.last_names.extend(split_tex_string(last)) 625 self.lineage_names.extend(split_tex_string(lineage)) 626 627 @property 628 @fix_unicode_literals_in_doctest 629 def bibtex_first_names(self): 630 """A list of first and middle names together. 631 (BibTeX treats all middle names as first.) 632 633 .. versionadded:: 0.19 634 Earlier versions used :py:meth:`Person.bibtex_first`, which is now deprecated. 635 636 637 >>> knuth = Person('Donald E. Knuth') 638 >>> knuth.bibtex_first_names 639 [u'Donald', u'E.'] 640 """ 641 return self.first_names + self.middle_names 642 643 @fix_unicode_literals_in_doctest 644 def _parse_string(self, name): 645 """Extract various parts of the name from a string. 646 647 >>> p = Person('Avinash K. Dixit') 648 >>> print(p.first_names) 649 [u'Avinash'] 650 >>> print(p.middle_names) 651 [u'K.'] 652 >>> print(p.prelast_names) 653 [] 654 >>> print(p.last_names) 655 [u'Dixit'] 656 >>> print(p.lineage_names) 657 [] 658 >>> print(six.text_type(p)) 659 Dixit, Avinash K. 660 >>> p == Person(six.text_type(p)) 661 True 662 >>> p = Person('Dixit, Jr, Avinash K. ') 663 >>> print(p.first_names) 664 [u'Avinash'] 665 >>> print(p.middle_names) 666 [u'K.'] 667 >>> print(p.prelast_names) 668 [] 669 >>> print(p.last_names) 670 [u'Dixit'] 671 >>> print(p.lineage_names) 672 [u'Jr'] 673 >>> print(six.text_type(p)) 674 Dixit, Jr, Avinash K. 675 >>> p == Person(six.text_type(p)) 676 True 677 678 >>> p = Person('abc') 679 >>> print(p.first_names, p.middle_names, p.prelast_names, p.last_names, p.lineage_names) 680 [] [] [] [u'abc'] [] 681 >>> p = Person('Viktorov, Michail~Markovitch') 682 >>> print(p.first_names, p.middle_names, p.prelast_names, p.last_names, p.lineage_names) 683 [u'Michail'] [u'Markovitch'] [] [u'Viktorov'] [] 684 """ 685 def process_first_middle(parts): 686 try: 687 self.first_names.append(parts[0]) 688 self.middle_names.extend(parts[1:]) 689 except IndexError: 690 pass 691 692 def process_von_last(parts): 693 # von cannot be the last name in the list 694 von_last = parts[:-1] 695 definitely_not_von = parts[-1:] 696 697 if von_last: 698 von, last = rsplit_at(von_last, is_von_name) 699 self.prelast_names.extend(von) 700 self.last_names.extend(last) 701 self.last_names.extend(definitely_not_von) 702 703 def find_pos(lst, pred): 704 for i, item in enumerate(lst): 705 if pred(item): 706 return i 707 return i + 1 708 709 def split_at(lst, pred): 710 """Split the given list into two parts. 711 712 The second part starts with the first item for which the given 713 predicate is True. 714 """ 715 pos = find_pos(lst, pred) 716 return lst[:pos], lst[pos:] 717 718 def rsplit_at(lst, pred): 719 rpos = find_pos(reversed(lst), pred) 720 pos = len(lst) - rpos 721 return lst[:pos], lst[pos:] 722 723 def is_von_name(string): 724 if string[0].isupper(): 725 return False 726 if string[0].islower(): 727 return True 728 else: 729 for char, brace_level in scan_bibtex_string(string): 730 if brace_level == 0 and char.isalpha(): 731 return char.islower() 732 elif brace_level == 1 and char.startswith('\\'): 733 return special_char_islower(char) 734 return False 735 736 def special_char_islower(special_char): 737 control_sequence = True 738 for char in special_char[1:]: # skip the backslash 739 if control_sequence: 740 if not char.isalpha(): 741 control_sequence = False 742 else: 743 if char.isalpha(): 744 return char.islower() 745 return False 746 747 parts = split_tex_string(name, ',') 748 if len(parts) > 3: 749 report_error(InvalidNameString(name)) 750 last_parts = parts[2:] 751 parts = parts[:2] + [' '.join(last_parts)] 752 753 if len(parts) == 3: # von Last, Jr, First 754 process_von_last(split_tex_string(parts[0])) 755 self.lineage_names.extend(split_tex_string(parts[1])) 756 process_first_middle(split_tex_string(parts[2])) 757 elif len(parts) == 2: # von Last, First 758 process_von_last(split_tex_string(parts[0])) 759 process_first_middle(split_tex_string(parts[1])) 760 elif len(parts) == 1: # First von Last 761 parts = split_tex_string(name) 762 first_middle, von_last = split_at(parts, is_von_name) 763 if not von_last and first_middle: 764 last = first_middle.pop() 765 von_last.append(last) 766 process_first_middle(first_middle) 767 process_von_last(von_last) 768 else: 769 # should hot really happen 770 raise ValueError(name) 771 772 def __eq__(self, other): 773 if not isinstance(other, Person): 774 return super(Person, self) == other 775 return ( 776 self.first_names == other.first_names 777 and self.middle_names == other.middle_names 778 and self.prelast_names == other.prelast_names 779 and self.last_names == other.last_names 780 and self.lineage_names == other.lineage_names 781 ) 782 783 def __str__(self): 784 # von Last, Jr, First 785 von_last = ' '.join(self.prelast_names + self.last_names) 786 jr = ' '.join(self.lineage_names) 787 first = ' '.join(self.first_names + self.middle_names) 788 return ', '.join(part for part in (von_last, jr, first) if part) 789 790 def __repr__(self): 791 return 'Person({0})'.format(repr(six.text_type(self))) 792 793 def get_part_as_text(self, type): 794 names = getattr(self, type + '_names') 795 return ' '.join(names) 796 797 @fix_unicode_literals_in_doctest 798 def get_part(self, type, abbr=False): 799 """Get a list of name parts by `type`. 800 801 >>> knuth = Person('Donald E. Knuth') 802 >>> knuth.get_part('first') 803 [u'Donald'] 804 >>> knuth.get_part('last') 805 [u'Knuth'] 806 """ 807 808 names = getattr(self, type + '_names') 809 if abbr: 810 import warnings 811 warnings.warn('Person.get_part(abbr=True) is deprecated since 0.19: use pybtex.textutils.abbreviate()', stacklevel=2) 812 from pybtex.textutils import abbreviate 813 names = [abbreviate(name) for name in names] 814 return names 815 816 @property 817 def rich_first_names(self): 818 """ 819 A list of first names converted to :ref:`rich text <rich-text>`. 820 821 .. versionadded:: 0.20 822 """ 823 824 return [Text.from_latex(name) for name in self.first_names] 825 826 @property 827 def rich_middle_names(self): 828 """ 829 A list of middle names converted to :ref:`rich text <rich-text>`. 830 831 .. versionadded:: 0.20 832 """ 833 return [Text.from_latex(name) for name in self.middle_names] 834 835 @property 836 def rich_prelast_names(self): 837 """ 838 A list of pre-last (aka von) name parts converted to :ref:`rich text <rich-text>`. 839 840 .. versionadded:: 0.20 841 """ 842 return [Text.from_latex(name) for name in self.prelast_names] 843 844 @property 845 def rich_last_names(self): 846 """ 847 A list of last names converted to :ref:`rich text <rich-text>`. 848 849 .. versionadded:: 0.20 850 """ 851 return [Text.from_latex(name) for name in self.last_names] 852 853 @property 854 def rich_lineage_names(self): 855 """ 856 A list of lineage (aka Jr) name parts converted to :ref:`rich text <rich-text>`. 857 858 .. versionadded:: 0.20 859 """ 860 return [Text.from_latex(name) for name in self.lineage_names] 861 862 @deprecated('0.19', 'use Person.first_names instead') 863 def first(self, abbr=False): 864 """ 865 .. deprecated:: 0.19 866 Use :py:attr:`.first_names` instead. 867 """ 868 return self.get_part('first', abbr) 869 870 @deprecated('0.19', 'use Person.middle_names instead') 871 def middle(self, abbr=False): 872 """ 873 .. deprecated:: 0.19 874 Use :py:attr:`.middle_names` instead. 875 """ 876 return self.get_part('middle', abbr) 877 878 @deprecated('0.19', 'use Person.prelast_names instead') 879 def prelast(self, abbr=False): 880 """ 881 .. deprecated:: 0.19 882 Use :py:attr:`.prelast_names` instead. 883 """ 884 return self.get_part('prelast', abbr) 885 886 @deprecated('0.19', 'use Person.last_names instead') 887 def last(self, abbr=False): 888 """ 889 .. deprecated:: 0.19 890 Use :py:attr:`.last_names` instead. 891 """ 892 return self.get_part('last', abbr) 893 894 @deprecated('0.19', 'use Person.lineage_names instead') 895 def lineage(self, abbr=False): 896 """ 897 .. deprecated:: 0.19 898 Use :py:attr:`.lineage_names` instead. 899 """ 900 return self.get_part('lineage', abbr) 901 902 @deprecated('0.19', 'use Person.bibtex_first_names instead') 903 def bibtex_first(self): 904 """ 905 .. deprecated:: 0.19 906 Use :py:attr:`.bibtex_first_names` instead. 907 """ 908 return self.bibtex_first_names 909 910 911def parse_file(file, bib_format=None, **kwargs): 912 """ 913 Read bibliography data from file and return a :py:class:`.BibliographyData` object. 914 915 :param file: A file name or a file-like object. 916 :param bib_format: Data format ("bibtex", "yaml", etc.). 917 If not specified, Pybtex will try to guess by the file name. 918 919 .. versionadded:: 0.19 920 """ 921 922 if isinstance(file, six.string_types): 923 filename = file 924 else: 925 filename = getattr(file, 'name', None) 926 927 parser = find_plugin('pybtex.database.input', bib_format, filename=filename)(**kwargs) 928 return parser.parse_file(file) 929 930 931def parse_string(value, bib_format, **kwargs): 932 """ 933 Parse a Unicode string containing bibliography data and return a :py:class:`.BibliographyData` object. 934 935 :param value: Unicode string. 936 :param bib_format: Data format ("bibtex", "yaml", etc.). 937 938 .. versionadded:: 0.19 939 """ 940 941 parser = find_plugin('pybtex.database.input', bib_format)(**kwargs) 942 return parser.parse_string(value) 943 944 945def parse_bytes(value, bib_format, **kwargs): 946 """ 947 Parse a byte string containing bibliography data and return a :py:class:`.BibliographyData` object. 948 949 :param value: Byte string. 950 :param bib_format: Data format (for example, "bibtexml"). 951 952 .. versionadded:: 0.19 953 """ 954 955 parser = find_plugin('pybtex.database.input', bib_format)(**kwargs) 956 return parser.parse_bytes(value) 957