1 /**
2 * \file BiblioInfo.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
5 *
6 * \author Angus Leeming
7 * \author Herbert Voß
8 * \author Richard Heck
9 * \author Julien Rioux
10 * \author Jürgen Spitzmüller
11 *
12 * Full author contact details are available in file CREDITS.
13 */
14
15 #include <config.h>
16
17 #include "BiblioInfo.h"
18 #include "Buffer.h"
19 #include "BufferParams.h"
20 #include "buffer_funcs.h"
21 #include "Citation.h"
22 #include "Encoding.h"
23 #include "InsetIterator.h"
24 #include "Language.h"
25 #include "output_xhtml.h"
26 #include "Paragraph.h"
27 #include "TextClass.h"
28 #include "TocBackend.h"
29
30 #include "support/convert.h"
31 #include "support/debug.h"
32 #include "support/docstream.h"
33 #include "support/gettext.h"
34 #include "support/lassert.h"
35 #include "support/lstrings.h"
36 #include "support/regex.h"
37 #include "support/textutils.h"
38
39 #include <map>
40 #include <set>
41
42 using namespace std;
43 using namespace lyx::support;
44
45
46 namespace lyx {
47
48 namespace {
49
50 // Remove placeholders from names
renormalize(docstring const & input)51 docstring renormalize(docstring const & input)
52 {
53 docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
54 return subst(res, from_ascii("$$comma!"), from_ascii(","));
55 }
56
57
58 // Split the surname into prefix ("von-part") and family name
parseSurname(docstring const & sname)59 pair<docstring, docstring> parseSurname(docstring const & sname)
60 {
61 // Split the surname into its tokens
62 vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
63 if (pieces.size() < 2)
64 return make_pair(docstring(), sname);
65
66 // Now we look for pieces that begin with a lower case letter.
67 // All except for the very last token constitute the "von-part".
68 docstring prefix;
69 vector<docstring>::const_iterator it = pieces.begin();
70 vector<docstring>::const_iterator const en = pieces.end();
71 bool first = true;
72 for (; it != en; ++it) {
73 if ((*it).empty())
74 continue;
75 // If this is the last piece, then what we now have is
76 // the family name, notwithstanding the casing.
77 if (it + 1 == en)
78 break;
79 char_type const c = (*it)[0];
80 // If the piece starts with a upper case char, we assume
81 // this is part of the surname.
82 if (!isLower(c))
83 break;
84 // Nothing of the former, so add this piece to the prename
85 if (!first)
86 prefix += " ";
87 else
88 first = false;
89 prefix += *it;
90 }
91
92 // Reconstruct the family name.
93 // Note that if we left the loop with because it + 1 == en,
94 // then this will still do the right thing, i.e., make surname
95 // just be the last piece.
96 docstring surname;
97 first = true;
98 for (; it != en; ++it) {
99 if (!first)
100 surname += " ";
101 else
102 first = false;
103 surname += *it;
104 }
105 return make_pair(prefix, surname);
106 }
107
108
109 struct name_parts {
110 docstring surname;
111 docstring prename;
112 docstring suffix;
113 docstring prefix;
114 };
115
116
117 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
nameParts(docstring const & iname)118 name_parts nameParts(docstring const & iname)
119 {
120 name_parts res;
121 if (iname.empty())
122 return res;
123
124 // First we check for goupings (via {...}) and replace blanks and
125 // commas inside groups with temporary placeholders
126 docstring name;
127 int gl = 0;
128 docstring::const_iterator p = iname.begin();
129 while (p != iname.end()) {
130 // count grouping level
131 if (*p == '{')
132 ++gl;
133 else if (*p == '}')
134 --gl;
135 // generate string with probable placeholders
136 if (*p == ' ' && gl > 0)
137 name += from_ascii("$$space!");
138 else if (*p == ',' && gl > 0)
139 name += from_ascii("$$comma!");
140 else
141 name += *p;
142 ++p;
143 }
144
145 // Now we look for a comma, and take the last name to be everything
146 // preceding the right-most one, so that we also get the name suffix
147 // (aka "jr" part).
148 vector<docstring> pieces = getVectorFromString(name);
149 if (pieces.size() > 1) {
150 // Whether we have a name suffix or not, the prename is
151 // always last item
152 res.prename = renormalize(pieces.back());
153 // The family name, conversely, is always the first item.
154 // However, it might contain a prefix (aka "von" part)
155 docstring const sname = pieces.front();
156 res.prefix = renormalize(parseSurname(sname).first);
157 res.surname = renormalize(parseSurname(sname).second);
158 // If we have three pieces (the maximum allowed by BibTeX),
159 // the second one is the name suffix.
160 if (pieces.size() > 2)
161 res.suffix = renormalize(pieces.at(1));
162 return res;
163 }
164
165 // OK, so now we want to look for the last name.
166 // Split on spaces, to get various tokens.
167 pieces = getVectorFromString(name, from_ascii(" "));
168 // No space: Only a family name given
169 if (pieces.size() < 2) {
170 res.surname = renormalize(pieces.back());
171 return res;
172 }
173 // If we get two pieces, assume "prename surname"
174 if (pieces.size() == 2) {
175 res.prename = renormalize(pieces.front());
176 res.surname = renormalize(pieces.back());
177 return res;
178 }
179
180 // More than 3 pieces: A name prefix (aka "von" part) might be included.
181 // We look for the first piece that begins with a lower case letter
182 // (which is the name prefix, if it is not the last token) or the last token.
183 docstring prename;
184 vector<docstring>::const_iterator it = pieces.begin();
185 vector<docstring>::const_iterator const en = pieces.end();
186 bool first = true;
187 for (; it != en; ++it) {
188 if ((*it).empty())
189 continue;
190 char_type const c = (*it)[0];
191 // If the piece starts with a lower case char, we assume
192 // this is the name prefix and thus prename is complete.
193 if (isLower(c))
194 break;
195 // Same if this is the last piece, which is always the surname.
196 if (it + 1 == en)
197 break;
198 // Nothing of the former, so add this piece to the prename
199 if (!first)
200 prename += " ";
201 else
202 first = false;
203 prename += *it;
204 }
205
206 // Now reconstruct the family name and strip the prefix.
207 // Note that if we left the loop because it + 1 == en,
208 // then this will still do the right thing, i.e., make surname
209 // just be the last piece.
210 docstring surname;
211 first = true;
212 for (; it != en; ++it) {
213 if (!first)
214 surname += " ";
215 else
216 first = false;
217 surname += *it;
218 }
219 res.prename = renormalize(prename);
220 res.prefix = renormalize(parseSurname(surname).first);
221 res.surname = renormalize(parseSurname(surname).second);
222 return res;
223 }
224
225
constructName(docstring const & name,string const scheme)226 docstring constructName(docstring const & name, string const scheme)
227 {
228 // re-constructs a name from name parts according
229 // to a given scheme
230 docstring const prename = nameParts(name).prename;
231 docstring const surname = nameParts(name).surname;
232 docstring const prefix = nameParts(name).prefix;
233 docstring const suffix = nameParts(name).suffix;
234 string res = scheme;
235 static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
236 static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
237 static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
238 smatch sub;
239 // Changing the first parameter of regex_match() may corrupt the
240 // second one. In this case we use the temporary string tmp.
241 if (regex_match(scheme, sub, reg1)) {
242 res = sub.str(1);
243 if (!prename.empty())
244 res += sub.str(3);
245 res += sub.str(5);
246 }
247 if (regex_match(res, sub, reg2)) {
248 string tmp = sub.str(1);
249 if (!suffix.empty())
250 tmp += sub.str(3);
251 res = tmp + sub.str(5);
252 }
253 if (regex_match(res, sub, reg3)) {
254 string tmp = sub.str(1);
255 if (!prefix.empty())
256 tmp += sub.str(3);
257 res = tmp + sub.str(5);
258 }
259 docstring result = from_ascii(res);
260 result = subst(result, from_ascii("%prename%"), prename);
261 result = subst(result, from_ascii("%surname%"), surname);
262 result = subst(result, from_ascii("%prefix%"), prefix);
263 result = subst(result, from_ascii("%suffix%"), suffix);
264 return result;
265 }
266
267
getAuthors(docstring const & author)268 vector<docstring> const getAuthors(docstring const & author)
269 {
270 // We check for goupings (via {...}) and only consider " and "
271 // outside groups as author separator. This is to account
272 // for cases such as {{Barnes and Noble, Inc.}}, which
273 // need to be treated as one single family name.
274 // We use temporary placeholders in order to differentiate the
275 // diverse " and " cases.
276
277 // First, we temporarily replace all ampersands. It is rather unusual
278 // in author names, but can happen (consider cases such as "C \& A Corp.").
279 docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
280 // Then, we temporarily make all " and " strings to ampersands in order
281 // to handle them later on a per-char level.
282 iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
283 // Now we traverse through the string and replace the "&" by the proper
284 // output in- and outside groups
285 docstring name;
286 int gl = 0;
287 docstring::const_iterator p = iname.begin();
288 while (p != iname.end()) {
289 // count grouping level
290 if (*p == '{')
291 ++gl;
292 else if (*p == '}')
293 --gl;
294 // generate string with probable placeholders
295 if (*p == '&') {
296 if (gl > 0)
297 // Inside groups, we output "and"
298 name += from_ascii("and");
299 else
300 // Outside groups, we output a separator
301 name += from_ascii("$$namesep!");
302 }
303 else
304 name += *p;
305 ++p;
306 }
307
308 // re-insert the literal ampersands
309 name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
310
311 // Now construct the actual vector
312 return getVectorFromString(name, from_ascii(" $$namesep! "));
313 }
314
315
multipleAuthors(docstring const author)316 bool multipleAuthors(docstring const author)
317 {
318 return getAuthors(author).size() > 1;
319 }
320
321
322 // converts a string containing LaTeX commands into unicode
323 // for display.
convertLaTeXCommands(docstring const & str)324 docstring convertLaTeXCommands(docstring const & str)
325 {
326 docstring val = str;
327 docstring ret;
328
329 bool scanning_cmd = false;
330 bool scanning_math = false;
331 bool escaped = false; // used to catch \$, etc.
332 while (!val.empty()) {
333 char_type const ch = val[0];
334
335 // if we're scanning math, we output everything until we
336 // find an unescaped $, at which point we break out.
337 if (scanning_math) {
338 if (escaped)
339 escaped = false;
340 else if (ch == '\\')
341 escaped = true;
342 else if (ch == '$')
343 scanning_math = false;
344 ret += ch;
345 val = val.substr(1);
346 continue;
347 }
348
349 // if we're scanning a command name, then we just
350 // discard characters until we hit something that
351 // isn't alpha.
352 if (scanning_cmd) {
353 if (isAlphaASCII(ch)) {
354 val = val.substr(1);
355 escaped = false;
356 continue;
357 }
358 // so we're done with this command.
359 // now we fall through and check this character.
360 scanning_cmd = false;
361 }
362
363 // was the last character a \? If so, then this is something like:
364 // \\ or \$, so we'll just output it. That's probably not always right...
365 if (escaped) {
366 // exception: output \, as THIN SPACE
367 if (ch == ',')
368 ret.push_back(0x2009);
369 else
370 ret += ch;
371 val = val.substr(1);
372 escaped = false;
373 continue;
374 }
375
376 if (ch == '$') {
377 ret += ch;
378 val = val.substr(1);
379 scanning_math = true;
380 continue;
381 }
382
383 // Change text mode accents in the form
384 // {\v a} to \v{a} (see #9340).
385 // FIXME: This is a sort of mini-tex2lyx.
386 // Use the real tex2lyx instead!
387 static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
388 if (lyx::regex_search(to_utf8(val), tma_reg)) {
389 val = val.substr(1);
390 val.replace(2, 1, from_ascii("{"));
391 continue;
392 }
393
394 // Apart from the above, we just ignore braces
395 if (ch == '{' || ch == '}') {
396 val = val.substr(1);
397 continue;
398 }
399
400 // we're going to check things that look like commands, so if
401 // this doesn't, just output it.
402 if (ch != '\\') {
403 ret += ch;
404 val = val.substr(1);
405 continue;
406 }
407
408 // ok, could be a command of some sort
409 // let's see if it corresponds to some unicode
410 // unicodesymbols has things in the form: \"{u},
411 // whereas we may see things like: \"u. So we'll
412 // look for that and change it, if necessary.
413 // FIXME: This is a sort of mini-tex2lyx.
414 // Use the real tex2lyx instead!
415 static lyx::regex const reg("^\\\\\\W\\w");
416 if (lyx::regex_search(to_utf8(val), reg)) {
417 val.insert(3, from_ascii("}"));
418 val.insert(2, from_ascii("{"));
419 }
420 bool termination;
421 docstring rem;
422 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
423 Encodings::TEXT_CMD, termination, rem);
424 if (!cnvtd.empty()) {
425 // it did, so we'll take that bit and proceed with what's left
426 ret += cnvtd;
427 val = rem;
428 continue;
429 }
430 // it's a command of some sort
431 scanning_cmd = true;
432 escaped = true;
433 val = val.substr(1);
434 }
435 return ret;
436 }
437
438
439 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
processRichtext(docstring const & str,bool richtext)440 docstring processRichtext(docstring const & str, bool richtext)
441 {
442 docstring val = str;
443 docstring ret;
444
445 bool scanning_rich = false;
446 while (!val.empty()) {
447 char_type const ch = val[0];
448 if (ch == '{' && val.size() > 1 && val[1] == '!') {
449 // beginning of rich text
450 scanning_rich = true;
451 val = val.substr(2);
452 continue;
453 }
454 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
455 // end of rich text
456 scanning_rich = false;
457 val = val.substr(2);
458 continue;
459 }
460 if (richtext) {
461 if (scanning_rich)
462 ret += ch;
463 else {
464 // we need to escape '<' and '>'
465 if (ch == '<')
466 ret += "<";
467 else if (ch == '>')
468 ret += ">";
469 else
470 ret += ch;
471 }
472 } else if (!scanning_rich /* && !richtext */)
473 ret += ch;
474 // else the character is discarded, which will happen only if
475 // richtext == false and we are scanning rich text
476 val = val.substr(1);
477 }
478 return ret;
479 }
480
481 } // namespace
482
483
484 //////////////////////////////////////////////////////////////////////
485 //
486 // BibTeXInfo
487 //
488 //////////////////////////////////////////////////////////////////////
489
BibTeXInfo(docstring const & key,docstring const & type)490 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
491 : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
492 modifier_(0)
493 {}
494
495
496
getAuthorOrEditorList(Buffer const * buf,bool full,bool forceshort) const497 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
498 bool full, bool forceshort) const
499 {
500 docstring author = operator[]("author");
501 if (author.empty())
502 author = operator[]("editor");
503
504 return getAuthorList(buf, author, full, forceshort);
505 }
506
507
getAuthorList(Buffer const * buf,docstring const & author,bool const full,bool const forceshort,bool const allnames,bool const beginning) const508 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
509 docstring const & author, bool const full, bool const forceshort,
510 bool const allnames, bool const beginning) const
511 {
512 // Maxnames treshold depend on engine
513 size_t maxnames = buf ?
514 buf->params().documentClass().max_citenames() : 2;
515
516 if (!is_bibtex_) {
517 docstring const opt = label();
518 if (opt.empty())
519 return docstring();
520
521 docstring authors;
522 docstring const remainder = trim(split(opt, authors, '('));
523 if (remainder.empty())
524 // in this case, we didn't find a "(",
525 // so we don't have author (year)
526 return docstring();
527 return authors;
528 }
529
530 if (author.empty())
531 return author;
532
533 // OK, we've got some names. Let's format them.
534 // Try to split the author list
535 vector<docstring> const authors = getAuthors(author);
536
537 docstring retval;
538
539 CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
540 : ENGINE_TYPE_DEFAULT;
541
542 // These are defined in the styles
543 string const etal =
544 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_etal")
545 : " et al.";
546 string const namesep =
547 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_namesep")
548 : ", ";
549 string const lastnamesep =
550 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_lastnamesep")
551 : ", and ";
552 string const pairnamesep =
553 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_pairnamesep")
554 : " and ";
555 string firstnameform =
556 buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
557 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
558 if (!beginning)
559 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
560 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
561 string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
562 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
563 if (!beginning)
564 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
565 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
566 string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
567 : "{%prefix%[[%prefix% ]]}%surname%";
568
569 // Shorten the list (with et al.) if forceshort is set
570 // and the list can actually be shortened, else if maxcitenames
571 // is passed and full is not set.
572 bool shorten = forceshort && authors.size() > 1;
573 vector<docstring>::const_iterator it = authors.begin();
574 vector<docstring>::const_iterator en = authors.end();
575 for (size_t i = 0; it != en; ++it, ++i) {
576 if (i >= maxnames && !full) {
577 shorten = true;
578 break;
579 }
580 if (*it == "others") {
581 retval += buf ? buf->B_(etal) : from_ascii(etal);
582 break;
583 }
584 if (i > 0 && i == authors.size() - 1) {
585 if (authors.size() == 2)
586 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
587 else
588 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
589 } else if (i > 0)
590 retval += buf ? buf->B_(namesep) : from_ascii(namesep);
591 if (allnames)
592 retval += (i == 0) ? constructName(*it, firstnameform)
593 : constructName(*it, othernameform);
594 else
595 retval += constructName(*it, citenameform);
596 }
597 if (shorten) {
598 if (allnames)
599 retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
600 else
601 retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
602 }
603
604 return convertLaTeXCommands(retval);
605 }
606
607
getYear() const608 docstring const BibTeXInfo::getYear() const
609 {
610 if (is_bibtex_) {
611 // first try legacy year field
612 docstring year = operator[]("year");
613 if (!year.empty())
614 return year;
615 // now try biblatex's date field
616 year = operator[]("date");
617 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
618 // We only want the years.
619 static regex const yreg("[-]?([\\d]{4}).*");
620 static regex const ereg(".*/[-]?([\\d]{4}).*");
621 smatch sm;
622 string const date = to_utf8(year);
623 if (!regex_match(date, sm, yreg))
624 // cannot parse year.
625 return docstring();
626 year = from_ascii(sm[1]);
627 // check for an endyear
628 if (regex_match(date, sm, ereg))
629 year += char_type(0x2013) + from_ascii(sm[1]);
630 return year;
631 }
632
633 docstring const opt = label();
634 if (opt.empty())
635 return docstring();
636
637 docstring authors;
638 docstring tmp = split(opt, authors, '(');
639 if (tmp.empty())
640 // we don't have author (year)
641 return docstring();
642 docstring year;
643 tmp = split(tmp, year, ')');
644 return year;
645 }
646
647
648 namespace {
649
650 docstring parseOptions(docstring const & format, string & optkey,
651 docstring & ifpart, docstring & elsepart);
652
653 // Calls parseOptions to deal with an embedded option, such as:
654 // {%number%[[, no.~%number%]]}
655 // which must appear at the start of format. ifelsepart gets the
656 // whole of the option, and we return what's left after the option.
657 // we return format if there is an error.
parseEmbeddedOption(docstring const & format,docstring & ifelsepart)658 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
659 {
660 LASSERT(format[0] == '{' && format[1] == '%', return format);
661 string optkey;
662 docstring ifpart;
663 docstring elsepart;
664 docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
665 if (format == rest) { // parse error
666 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
667 return format;
668 }
669 LASSERT(rest.size() <= format.size(),
670 { ifelsepart = docstring(); return format; });
671 ifelsepart = format.substr(0, format.size() - rest.size());
672 return rest;
673 }
674
675
676 // Gets a "clause" from a format string, where the clause is
677 // delimited by '[[' and ']]'. Returns what is left after the
678 // clause is removed, and returns format if there is an error.
getClause(docstring const & format,docstring & clause)679 docstring getClause(docstring const & format, docstring & clause)
680 {
681 docstring fmt = format;
682 // remove '[['
683 fmt = fmt.substr(2);
684 // we'll remove characters from the front of fmt as we
685 // deal with them
686 while (!fmt.empty()) {
687 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
688 // that's the end
689 fmt = fmt.substr(2);
690 break;
691 }
692 // check for an embedded option
693 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
694 docstring part;
695 docstring const rest = parseEmbeddedOption(fmt, part);
696 if (fmt == rest) {
697 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
698 return format;
699 }
700 clause += part;
701 fmt = rest;
702 } else { // it's just a normal character
703 clause += fmt[0];
704 fmt = fmt.substr(1);
705 }
706 }
707 return fmt;
708 }
709
710
711 // parse an options string, which must appear at the start of the
712 // format parameter. puts the parsed bits in optkey, ifpart, and
713 // elsepart and returns what's left after the option is removed.
714 // if there's an error, it returns format itself.
parseOptions(docstring const & format,string & optkey,docstring & ifpart,docstring & elsepart)715 docstring parseOptions(docstring const & format, string & optkey,
716 docstring & ifpart, docstring & elsepart)
717 {
718 LASSERT(format[0] == '{' && format[1] == '%', return format);
719 // strip '{%'
720 docstring fmt = format.substr(2);
721 size_t pos = fmt.find('%'); // end of key
722 if (pos == string::npos) {
723 LYXERR0("Error parsing `" << format <<"'. Can't find end of key.");
724 return format;
725 }
726 optkey = to_utf8(fmt.substr(0, pos));
727 fmt = fmt.substr(pos + 1);
728 // [[format]] should be next
729 if (fmt[0] != '[' || fmt[1] != '[') {
730 LYXERR0("Error parsing `" << format <<"'. Can't find '[[' after key.");
731 return format;
732 }
733
734 docstring curfmt = fmt;
735 fmt = getClause(curfmt, ifpart);
736 if (fmt == curfmt) {
737 LYXERR0("Error parsing `" << format <<"'. Couldn't get if clause.");
738 return format;
739 }
740
741 if (fmt[0] == '}') // we're done, no else clause
742 return fmt.substr(1);
743
744 // else part should follow
745 if (fmt[0] != '[' || fmt[1] != '[') {
746 LYXERR0("Error parsing `" << format <<"'. Can't find else clause.");
747 return format;
748 }
749
750 curfmt = fmt;
751 fmt = getClause(curfmt, elsepart);
752 // we should be done
753 if (fmt == curfmt || fmt[0] != '}') {
754 LYXERR0("Error parsing `" << format <<"'. Can't find end of option.");
755 return format;
756 }
757 return fmt.substr(1);
758 }
759
760
761 } // namespace
762
763 /* FIXME
764 Bug #9131 revealed an oddity in how we are generating citation information
765 when more than one key is given. We end up building a longer and longer format
766 string as we go, which we then have to re-parse, over and over and over again,
767 rather than generating the information for the individual keys and then putting
768 all of that together. We do that to deal with the way separators work, from what
769 I can tell, but it still feels like a hack. Fixing this would require quite a
770 bit of work, however.
771 */
expandFormat(docstring const & format,BibTeXInfoList const xrefs,int & counter,Buffer const & buf,CiteItem const & ci,bool next,bool second) const772 docstring BibTeXInfo::expandFormat(docstring const & format,
773 BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
774 CiteItem const & ci, bool next, bool second) const
775 {
776 // incorrect use of macros could put us in an infinite loop
777 static int const max_passes = 5000;
778 // the use of overly large keys can lead to performance problems, due
779 // to eventual attempts to convert LaTeX macros to unicode. See bug
780 // #8944. By default, the size is limited to 128 (in CiteItem), but
781 // for specific purposes (such as XHTML export), it needs to be enlarged
782 // This is perhaps not the best solution, but it will have to do for now.
783 size_t const max_keysize = ci.max_key_size;
784 odocstringstream ret; // return value
785 string key;
786 bool scanning_key = false;
787 bool scanning_rich = false;
788
789 CiteEngineType const engine_type = buf.params().citeEngineType();
790 docstring fmt = format;
791 // we'll remove characters from the front of fmt as we
792 // deal with them
793 while (!fmt.empty()) {
794 if (counter > max_passes) {
795 LYXERR0("Recursion limit reached while parsing `"
796 << format << "'.");
797 return _("ERROR!");
798 }
799
800 char_type thischar = fmt[0];
801 if (thischar == '%') {
802 // beginning or end of key
803 if (scanning_key) {
804 // end of key
805 scanning_key = false;
806 // so we replace the key with its value, which may be empty
807 if (key[0] == '!') {
808 // macro
809 string const val =
810 buf.params().documentClass().getCiteMacro(engine_type, key);
811 fmt = from_utf8(val) + fmt.substr(1);
812 counter += 1;
813 continue;
814 } else if (prefixIs(key, "B_")) {
815 // a translatable bit (to the Buffer language)
816 string const val =
817 buf.params().documentClass().getCiteMacro(engine_type, key);
818 docstring const trans =
819 translateIfPossible(from_utf8(val), buf.params().language->code());
820 ret << trans;
821 } else if (key[0] == '_') {
822 // a translatable bit (to the GUI language)
823 string const val =
824 buf.params().documentClass().getCiteMacro(engine_type, key);
825 docstring const trans =
826 translateIfPossible(from_utf8(val));
827 ret << trans;
828 } else {
829 docstring const val =
830 getValueForKey(key, buf, ci, xrefs, max_keysize);
831 if (!scanning_rich)
832 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
833 ret << val;
834 if (!scanning_rich)
835 ret << from_ascii("{!</span>!}");
836 }
837 } else {
838 // beginning of key
839 key.clear();
840 scanning_key = true;
841 }
842 }
843 else if (thischar == '{') {
844 // beginning of option?
845 if (scanning_key) {
846 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
847 return _("ERROR!");
848 }
849 if (fmt.size() > 1) {
850 if (fmt[1] == '%') {
851 // it is the beginning of an optional format
852 string optkey;
853 docstring ifpart;
854 docstring elsepart;
855 docstring const newfmt =
856 parseOptions(fmt, optkey, ifpart, elsepart);
857 if (newfmt == fmt) // parse error
858 return _("ERROR!");
859 fmt = newfmt;
860 docstring const val =
861 getValueForKey(optkey, buf, ci, xrefs);
862 if (optkey == "next" && next)
863 ret << ifpart; // without expansion
864 else if (optkey == "second" && second) {
865 int newcounter = 0;
866 ret << expandFormat(ifpart, xrefs, newcounter, buf,
867 ci, next);
868 } else if (!val.empty()) {
869 int newcounter = 0;
870 ret << expandFormat(ifpart, xrefs, newcounter, buf,
871 ci, next);
872 } else if (!elsepart.empty()) {
873 int newcounter = 0;
874 ret << expandFormat(elsepart, xrefs, newcounter, buf,
875 ci, next);
876 }
877 // fmt will have been shortened for us already
878 continue;
879 }
880 if (fmt[1] == '!') {
881 // beginning of rich text
882 scanning_rich = true;
883 fmt = fmt.substr(2);
884 ret << from_ascii("{!");
885 continue;
886 }
887 }
888 // we are here if '{' was not followed by % or !.
889 // So it's just a character.
890 ret << thischar;
891 }
892 else if (scanning_rich && thischar == '!'
893 && fmt.size() > 1 && fmt[1] == '}') {
894 // end of rich text
895 scanning_rich = false;
896 fmt = fmt.substr(2);
897 ret << from_ascii("!}");
898 continue;
899 }
900 else if (scanning_key)
901 key += char(thischar);
902 else {
903 try {
904 ret.put(thischar);
905 } catch (EncodingException & /* e */) {
906 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
907 }
908 }
909 fmt = fmt.substr(1);
910 } // for loop
911 if (scanning_key) {
912 LYXERR0("Never found end of key in `" << format << "'!");
913 return _("ERROR!");
914 }
915 if (scanning_rich) {
916 LYXERR0("Never found end of rich text in `" << format << "'!");
917 return _("ERROR!");
918 }
919 return ret.str();
920 }
921
922
getInfo(BibTeXInfoList const xrefs,Buffer const & buf,CiteItem const & ci) const923 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
924 Buffer const & buf, CiteItem const & ci) const
925 {
926 bool const richtext = ci.richtext;
927
928 if (!richtext && !info_.empty())
929 return info_;
930 if (richtext && !info_richtext_.empty())
931 return info_richtext_;
932
933 if (!is_bibtex_) {
934 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
935 info_ = it->second;
936 return info_;
937 }
938
939 CiteEngineType const engine_type = buf.params().citeEngineType();
940 DocumentClass const & dc = buf.params().documentClass();
941 docstring const & format =
942 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
943 int counter = 0;
944 info_ = expandFormat(format, xrefs, counter, buf,
945 ci, false, false);
946
947 if (info_.empty()) {
948 // this probably shouldn't happen
949 return info_;
950 }
951
952 if (richtext) {
953 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
954 return info_richtext_;
955 }
956
957 info_ = convertLaTeXCommands(processRichtext(info_, false));
958 return info_;
959 }
960
961
getLabel(BibTeXInfoList const xrefs,Buffer const & buf,docstring const & format,CiteItem const & ci,bool next,bool second) const962 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
963 Buffer const & buf, docstring const & format,
964 CiteItem const & ci, bool next, bool second) const
965 {
966 docstring loclabel;
967
968 int counter = 0;
969 loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
970
971 if (!loclabel.empty() && !next) {
972 loclabel = processRichtext(loclabel, ci.richtext);
973 loclabel = convertLaTeXCommands(loclabel);
974 }
975
976 return loclabel;
977 }
978
979
operator [](docstring const & field) const980 docstring const & BibTeXInfo::operator[](docstring const & field) const
981 {
982 BibTeXInfo::const_iterator it = find(field);
983 if (it != end())
984 return it->second;
985 static docstring const empty_value = docstring();
986 return empty_value;
987 }
988
989
operator [](string const & field) const990 docstring const & BibTeXInfo::operator[](string const & field) const
991 {
992 return operator[](from_ascii(field));
993 }
994
995
getValueForKey(string const & oldkey,Buffer const & buf,CiteItem const & ci,BibTeXInfoList const xrefs,size_t maxsize) const996 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
997 CiteItem const & ci, BibTeXInfoList const xrefs, size_t maxsize) const
998 {
999 // anything less is pointless
1000 LASSERT(maxsize >= 16, maxsize = 16);
1001 string key = oldkey;
1002 bool cleanit = false;
1003 if (prefixIs(oldkey, "clean:")) {
1004 key = oldkey.substr(6);
1005 cleanit = true;
1006 }
1007
1008 docstring ret = operator[](key);
1009 if (ret.empty() && !xrefs.empty()) {
1010 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
1011 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
1012 for (; it != en; ++it) {
1013 if (*it && !(**it)[key].empty()) {
1014 ret = (**it)[key];
1015 break;
1016 }
1017 }
1018 }
1019 if (ret.empty()) {
1020 // some special keys
1021 // FIXME: dialog, textbefore and textafter have nothing to do with this
1022 if (key == "dialog" && ci.context == CiteItem::Dialog)
1023 ret = from_ascii("x"); // any non-empty string will do
1024 else if (key == "export" && ci.context == CiteItem::Export)
1025 ret = from_ascii("x"); // any non-empty string will do
1026 else if (key == "ifstar" && ci.Starred)
1027 ret = from_ascii("x"); // any non-empty string will do
1028 else if (key == "ifqualified" && ci.isQualified)
1029 ret = from_ascii("x"); // any non-empty string will do
1030 else if (key == "entrytype")
1031 ret = entry_type_;
1032 else if (prefixIs(key, "ifentrytype:")
1033 && from_ascii(key.substr(12)) == entry_type_)
1034 ret = from_ascii("x"); // any non-empty string will do
1035 else if (key == "key")
1036 ret = bib_key_;
1037 else if (key == "label")
1038 ret = label_;
1039 else if (key == "modifier" && modifier_ != 0)
1040 ret = modifier_;
1041 else if (key == "numericallabel")
1042 ret = cite_number_;
1043 else if (prefixIs(key, "ifmultiple:")) {
1044 // Return whether we have multiple authors
1045 docstring const kind = operator[](from_ascii(key.substr(11)));
1046 if (multipleAuthors(kind))
1047 ret = from_ascii("x"); // any non-empty string will do
1048 }
1049 else if (prefixIs(key, "abbrvnames:")) {
1050 // Special key to provide abbreviated name list,
1051 // with respect to maxcitenames. Suitable for Bibliography
1052 // beginnings.
1053 docstring const kind = operator[](from_ascii(key.substr(11)));
1054 ret = getAuthorList(&buf, kind, false, false, true);
1055 if (ci.forceUpperCase && isLowerCase(ret[0]))
1056 ret[0] = uppercase(ret[0]);
1057 } else if (prefixIs(key, "fullnames:")) {
1058 // Return a full name list. Suitable for Bibliography
1059 // beginnings.
1060 docstring const kind = operator[](from_ascii(key.substr(10)));
1061 ret = getAuthorList(&buf, kind, true, false, true);
1062 if (ci.forceUpperCase && isLowerCase(ret[0]))
1063 ret[0] = uppercase(ret[0]);
1064 } else if (prefixIs(key, "forceabbrvnames:")) {
1065 // Special key to provide abbreviated name lists,
1066 // irrespective of maxcitenames. Suitable for Bibliography
1067 // beginnings.
1068 docstring const kind = operator[](from_ascii(key.substr(15)));
1069 ret = getAuthorList(&buf, kind, false, true, true);
1070 if (ci.forceUpperCase && isLowerCase(ret[0]))
1071 ret[0] = uppercase(ret[0]);
1072 } else if (prefixIs(key, "abbrvbynames:")) {
1073 // Special key to provide abbreviated name list,
1074 // with respect to maxcitenames. Suitable for further names inside a
1075 // bibliography item // (such as "ed. by ...")
1076 docstring const kind = operator[](from_ascii(key.substr(11)));
1077 ret = getAuthorList(&buf, kind, false, false, true, false);
1078 if (ci.forceUpperCase && isLowerCase(ret[0]))
1079 ret[0] = uppercase(ret[0]);
1080 } else if (prefixIs(key, "fullbynames:")) {
1081 // Return a full name list. Suitable for further names inside a
1082 // bibliography item // (such as "ed. by ...")
1083 docstring const kind = operator[](from_ascii(key.substr(10)));
1084 ret = getAuthorList(&buf, kind, true, false, true, false);
1085 if (ci.forceUpperCase && isLowerCase(ret[0]))
1086 ret[0] = uppercase(ret[0]);
1087 } else if (prefixIs(key, "forceabbrvbynames:")) {
1088 // Special key to provide abbreviated name lists,
1089 // irrespective of maxcitenames. Suitable for further names inside a
1090 // bibliography item // (such as "ed. by ...")
1091 docstring const kind = operator[](from_ascii(key.substr(15)));
1092 ret = getAuthorList(&buf, kind, false, true, true, false);
1093 if (ci.forceUpperCase && isLowerCase(ret[0]))
1094 ret[0] = uppercase(ret[0]);
1095 } else if (key == "abbrvciteauthor") {
1096 // Special key to provide abbreviated author or
1097 // editor names (suitable for citation labels),
1098 // with respect to maxcitenames.
1099 ret = getAuthorOrEditorList(&buf, false, false);
1100 if (ci.forceUpperCase && isLowerCase(ret[0]))
1101 ret[0] = uppercase(ret[0]);
1102 } else if (key == "fullciteauthor") {
1103 // Return a full author or editor list (for citation labels)
1104 ret = getAuthorOrEditorList(&buf, true, false);
1105 if (ci.forceUpperCase && isLowerCase(ret[0]))
1106 ret[0] = uppercase(ret[0]);
1107 } else if (key == "forceabbrvciteauthor") {
1108 // Special key to provide abbreviated author or
1109 // editor names (suitable for citation labels),
1110 // irrespective of maxcitenames.
1111 ret = getAuthorOrEditorList(&buf, false, true);
1112 if (ci.forceUpperCase && isLowerCase(ret[0]))
1113 ret[0] = uppercase(ret[0]);
1114 } else if (key == "bibentry") {
1115 // Special key to provide the full bibliography entry: see getInfo()
1116 CiteEngineType const engine_type = buf.params().citeEngineType();
1117 DocumentClass const & dc = buf.params().documentClass();
1118 docstring const & format =
1119 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1120 int counter = 0;
1121 ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1122 } else if (key == "textbefore")
1123 ret = ci.textBefore;
1124 else if (key == "textafter")
1125 ret = ci.textAfter;
1126 else if (key == "curpretext")
1127 ret = ci.getPretexts()[bib_key_];
1128 else if (key == "curposttext")
1129 ret = ci.getPosttexts()[bib_key_];
1130 else if (key == "year")
1131 ret = getYear();
1132 }
1133
1134 if (cleanit)
1135 ret = html::cleanAttr(ret);
1136
1137 // make sure it is not too big
1138 support::truncateWithEllipsis(ret, maxsize);
1139 return ret;
1140 }
1141
1142
1143 //////////////////////////////////////////////////////////////////////
1144 //
1145 // BiblioInfo
1146 //
1147 //////////////////////////////////////////////////////////////////////
1148
1149 namespace {
1150
1151 // A functor for use with sort, leading to case insensitive sorting
1152 class compareNoCase: public binary_function<docstring, docstring, bool>
1153 {
1154 public:
operator ()(docstring const & s1,docstring const & s2) const1155 bool operator()(docstring const & s1, docstring const & s2) const {
1156 return compare_no_case(s1, s2) < 0;
1157 }
1158 };
1159
1160 } // namespace
1161
1162
getXRefs(BibTeXInfo const & data,bool const nested) const1163 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1164 {
1165 vector<docstring> result;
1166 if (!data.isBibTeX())
1167 return result;
1168 // Legacy crossref field. This is not nestable.
1169 if (!nested && !data["crossref"].empty()) {
1170 docstring const xrefkey = data["crossref"];
1171 result.push_back(xrefkey);
1172 // However, check for nested xdatas
1173 BiblioInfo::const_iterator it = find(xrefkey);
1174 if (it != end()) {
1175 BibTeXInfo const & xref = it->second;
1176 vector<docstring> const nxdata = getXRefs(xref, true);
1177 if (!nxdata.empty())
1178 result.insert(result.end(), nxdata.begin(), nxdata.end());
1179 }
1180 }
1181 // Biblatex's xdata field. Infinitely nestable.
1182 // XData field can consist of a comma-separated list of keys
1183 vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1184 if (!xdatakeys.empty()) {
1185 vector<docstring>::const_iterator xit = xdatakeys.begin();
1186 vector<docstring>::const_iterator xen = xdatakeys.end();
1187 for (; xit != xen; ++xit) {
1188 docstring const xdatakey = *xit;
1189 result.push_back(xdatakey);
1190 BiblioInfo::const_iterator it = find(xdatakey);
1191 if (it != end()) {
1192 BibTeXInfo const & xdata = it->second;
1193 vector<docstring> const nxdata = getXRefs(xdata, true);
1194 if (!nxdata.empty())
1195 result.insert(result.end(), nxdata.begin(), nxdata.end());
1196 }
1197 }
1198 }
1199 return result;
1200 }
1201
1202
getKeys() const1203 vector<docstring> const BiblioInfo::getKeys() const
1204 {
1205 vector<docstring> bibkeys;
1206 BiblioInfo::const_iterator it = begin();
1207 for (; it != end(); ++it)
1208 bibkeys.push_back(it->first);
1209 sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
1210 return bibkeys;
1211 }
1212
1213
getFields() const1214 vector<docstring> const BiblioInfo::getFields() const
1215 {
1216 vector<docstring> bibfields;
1217 set<docstring>::const_iterator it = field_names_.begin();
1218 set<docstring>::const_iterator end = field_names_.end();
1219 for (; it != end; ++it)
1220 bibfields.push_back(*it);
1221 sort(bibfields.begin(), bibfields.end());
1222 return bibfields;
1223 }
1224
1225
getEntries() const1226 vector<docstring> const BiblioInfo::getEntries() const
1227 {
1228 vector<docstring> bibentries;
1229 set<docstring>::const_iterator it = entry_types_.begin();
1230 set<docstring>::const_iterator end = entry_types_.end();
1231 for (; it != end; ++it)
1232 bibentries.push_back(*it);
1233 sort(bibentries.begin(), bibentries.end());
1234 return bibentries;
1235 }
1236
1237
getAuthorOrEditorList(docstring const & key,Buffer const & buf) const1238 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1239 {
1240 BiblioInfo::const_iterator it = find(key);
1241 if (it == end())
1242 return docstring();
1243 BibTeXInfo const & data = it->second;
1244 return data.getAuthorOrEditorList(&buf, false);
1245 }
1246
1247
getCiteNumber(docstring const & key) const1248 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1249 {
1250 BiblioInfo::const_iterator it = find(key);
1251 if (it == end())
1252 return docstring();
1253 BibTeXInfo const & data = it->second;
1254 return data.citeNumber();
1255 }
1256
1257
getYear(docstring const & key,bool use_modifier) const1258 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1259 {
1260 BiblioInfo::const_iterator it = find(key);
1261 if (it == end())
1262 return docstring();
1263 BibTeXInfo const & data = it->second;
1264 docstring year = data.getYear();
1265 if (year.empty()) {
1266 // let's try the crossrefs
1267 vector<docstring> const xrefs = getXRefs(data);
1268 if (xrefs.empty())
1269 // no luck
1270 return docstring();
1271 vector<docstring>::const_iterator it = xrefs.begin();
1272 vector<docstring>::const_iterator en = xrefs.end();
1273 for (; it != en; ++it) {
1274 BiblioInfo::const_iterator const xrefit = find(*it);
1275 if (xrefit == end())
1276 continue;
1277 BibTeXInfo const & xref_data = xrefit->second;
1278 year = xref_data.getYear();
1279 if (!year.empty())
1280 // success!
1281 break;
1282 }
1283 }
1284 if (use_modifier && data.modifier() != 0)
1285 year += data.modifier();
1286 return year;
1287 }
1288
1289
getYear(docstring const & key,Buffer const & buf,bool use_modifier) const1290 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1291 {
1292 docstring const year = getYear(key, use_modifier);
1293 if (year.empty())
1294 return buf.B_("No year");
1295 return year;
1296 }
1297
1298
getInfo(docstring const & key,Buffer const & buf,CiteItem const & ci) const1299 docstring const BiblioInfo::getInfo(docstring const & key,
1300 Buffer const & buf, CiteItem const & ci) const
1301 {
1302 BiblioInfo::const_iterator it = find(key);
1303 if (it == end())
1304 return docstring(_("Bibliography entry not found!"));
1305 BibTeXInfo const & data = it->second;
1306 BibTeXInfoList xrefptrs;
1307 vector<docstring> const xrefs = getXRefs(data);
1308 if (!xrefs.empty()) {
1309 vector<docstring>::const_iterator it = xrefs.begin();
1310 vector<docstring>::const_iterator en = xrefs.end();
1311 for (; it != en; ++it) {
1312 BiblioInfo::const_iterator const xrefit = find(*it);
1313 if (xrefit != end())
1314 xrefptrs.push_back(&(xrefit->second));
1315 }
1316 }
1317 return data.getInfo(xrefptrs, buf, ci);
1318 }
1319
1320
getLabel(vector<docstring> keys,Buffer const & buf,string const & style,CiteItem const & ci) const1321 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1322 Buffer const & buf, string const & style, CiteItem const & ci) const
1323 {
1324 size_t max_size = ci.max_size;
1325 // shorter makes no sense
1326 LASSERT(max_size >= 16, max_size = 16);
1327
1328 // we can't display more than 10 of these, anyway
1329 bool const too_many_keys = keys.size() > 10;
1330 if (too_many_keys)
1331 keys.resize(10);
1332
1333 CiteEngineType const engine_type = buf.params().citeEngineType();
1334 DocumentClass const & dc = buf.params().documentClass();
1335 docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1336 docstring ret = format;
1337 vector<docstring>::const_iterator key = keys.begin();
1338 vector<docstring>::const_iterator ken = keys.end();
1339 for (int i = 0; key != ken; ++key, ++i) {
1340 BiblioInfo::const_iterator it = find(*key);
1341 BibTeXInfo empty_data;
1342 empty_data.key(*key);
1343 BibTeXInfo & data = empty_data;
1344 vector<BibTeXInfo const *> xrefptrs;
1345 if (it != end()) {
1346 data = it->second;
1347 vector<docstring> const xrefs = getXRefs(data);
1348 if (!xrefs.empty()) {
1349 vector<docstring>::const_iterator it = xrefs.begin();
1350 vector<docstring>::const_iterator en = xrefs.end();
1351 for (; it != en; ++it) {
1352 BiblioInfo::const_iterator const xrefit = find(*it);
1353 if (xrefit != end())
1354 xrefptrs.push_back(&(xrefit->second));
1355 }
1356 }
1357 }
1358 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1359 }
1360
1361 if (too_many_keys)
1362 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
1363 support::truncateWithEllipsis(ret, max_size);
1364 return ret;
1365 }
1366
1367
isBibtex(docstring const & key) const1368 bool BiblioInfo::isBibtex(docstring const & key) const
1369 {
1370 docstring key1;
1371 split(key, key1, ',');
1372 BiblioInfo::const_iterator it = find(key1);
1373 if (it == end())
1374 return false;
1375 return it->second.isBibTeX();
1376 }
1377
1378
getCiteStrings(vector<docstring> const & keys,vector<CitationStyle> const & styles,Buffer const & buf,CiteItem const & ci) const1379 BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
1380 vector<docstring> const & keys, vector<CitationStyle> const & styles,
1381 Buffer const & buf, CiteItem const & ci) const
1382 {
1383 if (empty())
1384 return vector<pair<docstring,docstring>>();
1385
1386 string style;
1387 CiteStringMap csm(styles.size());
1388 for (size_t i = 0; i != csm.size(); ++i) {
1389 style = styles[i].name;
1390 csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
1391 }
1392
1393 return csm;
1394 }
1395
1396
mergeBiblioInfo(BiblioInfo const & info)1397 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1398 {
1399 bimap_.insert(info.begin(), info.end());
1400 field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1401 entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1402 }
1403
1404
1405 namespace {
1406
1407 // used in xhtml to sort a list of BibTeXInfo objects
lSorter(BibTeXInfo const * lhs,BibTeXInfo const * rhs)1408 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1409 {
1410 docstring const lauth = lhs->getAuthorOrEditorList();
1411 docstring const rauth = rhs->getAuthorOrEditorList();
1412 docstring const lyear = lhs->getYear();
1413 docstring const ryear = rhs->getYear();
1414 docstring const ltitl = lhs->operator[]("title");
1415 docstring const rtitl = rhs->operator[]("title");
1416 return (lauth < rauth)
1417 || (lauth == rauth && lyear < ryear)
1418 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1419 }
1420
1421 } // namespace
1422
1423
collectCitedEntries(Buffer const & buf)1424 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1425 {
1426 cited_entries_.clear();
1427 // We are going to collect all the citation keys used in the document,
1428 // getting them from the TOC.
1429 // FIXME We may want to collect these differently, in the first case,
1430 // so that we might have them in order of appearance.
1431 set<docstring> citekeys;
1432 shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1433 Toc::const_iterator it = toc->begin();
1434 Toc::const_iterator const en = toc->end();
1435 for (; it != en; ++it) {
1436 if (it->str().empty())
1437 continue;
1438 vector<docstring> const keys = getVectorFromString(it->str());
1439 citekeys.insert(keys.begin(), keys.end());
1440 }
1441 if (citekeys.empty())
1442 return;
1443
1444 // We have a set of the keys used in this document.
1445 // We will now convert it to a list of the BibTeXInfo objects used in
1446 // this document...
1447 vector<BibTeXInfo const *> bi;
1448 set<docstring>::const_iterator cit = citekeys.begin();
1449 set<docstring>::const_iterator const cen = citekeys.end();
1450 for (; cit != cen; ++cit) {
1451 BiblioInfo::const_iterator const bt = find(*cit);
1452 if (bt == end() || !bt->second.isBibTeX())
1453 continue;
1454 bi.push_back(&(bt->second));
1455 }
1456 // ...and sort it.
1457 sort(bi.begin(), bi.end(), lSorter);
1458
1459 // Now we can write the sorted keys
1460 vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1461 vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1462 for (; bit != ben; ++bit)
1463 cited_entries_.push_back((*bit)->key());
1464 }
1465
1466
makeCitationLabels(Buffer const & buf)1467 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1468 {
1469 collectCitedEntries(buf);
1470 CiteEngineType const engine_type = buf.params().citeEngineType();
1471 bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1472
1473 int keynumber = 0;
1474 char modifier = 0;
1475 // used to remember the last one we saw
1476 // we'll be comparing entries to see if we need to add
1477 // modifiers, like "1984a"
1478 map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1479
1480 vector<docstring>::const_iterator it = cited_entries_.begin();
1481 vector<docstring>::const_iterator const en = cited_entries_.end();
1482 for (; it != en; ++it) {
1483 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1484 // this shouldn't happen, but...
1485 if (biit == bimap_.end())
1486 // ...fail gracefully, anyway.
1487 continue;
1488 BibTeXInfo & entry = biit->second;
1489 if (numbers) {
1490 docstring const num = convert<docstring>(++keynumber);
1491 entry.setCiteNumber(num);
1492 } else {
1493 // The first test here is checking whether this is the first
1494 // time through the loop. If so, then we do not have anything
1495 // with which to compare.
1496 if (last != bimap_.end()
1497 && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1498 // we access the year via getYear() so as to get it from the xref,
1499 // if we need to do so
1500 && getYear(entry.key()) == getYear(last->second.key())) {
1501 if (modifier == 0) {
1502 // so the last one should have been 'a'
1503 last->second.setModifier('a');
1504 modifier = 'b';
1505 } else if (modifier == 'z')
1506 modifier = 'A';
1507 else
1508 modifier++;
1509 } else {
1510 modifier = 0;
1511 }
1512 entry.setModifier(modifier);
1513 // remember the last one
1514 last = biit;
1515 }
1516 }
1517 // Set the labels
1518 it = cited_entries_.begin();
1519 for (; it != en; ++it) {
1520 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1521 // this shouldn't happen, but...
1522 if (biit == bimap_.end())
1523 // ...fail gracefully, anyway.
1524 continue;
1525 BibTeXInfo & entry = biit->second;
1526 if (numbers) {
1527 entry.label(entry.citeNumber());
1528 } else {
1529 docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1530 // we do it this way so as to access the xref, if necessary
1531 // note that this also gives us the modifier
1532 docstring const year = getYear(*it, buf, true);
1533 if (!auth.empty() && !year.empty())
1534 entry.label(auth + ' ' + year);
1535 else
1536 entry.label(entry.key());
1537 }
1538 }
1539 }
1540
1541
1542 //////////////////////////////////////////////////////////////////////
1543 //
1544 // CitationStyle
1545 //
1546 //////////////////////////////////////////////////////////////////////
1547
1548
citationStyleFromString(string const & command,BufferParams const & params)1549 CitationStyle citationStyleFromString(string const & command,
1550 BufferParams const & params)
1551 {
1552 CitationStyle cs;
1553 if (command.empty())
1554 return cs;
1555
1556 string const alias = params.getCiteAlias(command);
1557 string cmd = alias.empty() ? command : alias;
1558 if (isUpperCase(command[0])) {
1559 cs.forceUpperCase = true;
1560 cmd[0] = lowercase(cmd[0]);
1561 }
1562
1563 size_t const n = command.size() - 1;
1564 if (command[n] == '*') {
1565 cs.hasStarredVersion = true;
1566 if (suffixIs(cmd, '*'))
1567 cmd = cmd.substr(0, cmd.size() - 1);
1568 }
1569
1570 cs.name = cmd;
1571 return cs;
1572 }
1573
1574
citationStyleToString(const CitationStyle & cs,bool const latex)1575 string citationStyleToString(const CitationStyle & cs, bool const latex)
1576 {
1577 string cmd = latex ? cs.cmd : cs.name;
1578 if (cs.forceUpperCase)
1579 cmd[0] = uppercase(cmd[0]);
1580 if (cs.hasStarredVersion)
1581 cmd += '*';
1582 return cmd;
1583 }
1584
1585 } // namespace lyx
1586