1 /**
2  * \file BiblioInfo.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Angus Leeming
7  * \author Herbert Voß
8  * \author Richard Heck
9  * \author Julien Rioux
10  * \author Jürgen Spitzmüller
11  *
12  * Full author contact details are available in file CREDITS.
13  */
14 
15 #include <config.h>
16 
17 #include "BiblioInfo.h"
18 #include "Buffer.h"
19 #include "BufferParams.h"
20 #include "buffer_funcs.h"
21 #include "Citation.h"
22 #include "Encoding.h"
23 #include "InsetIterator.h"
24 #include "Language.h"
25 #include "output_xhtml.h"
26 #include "Paragraph.h"
27 #include "TextClass.h"
28 #include "TocBackend.h"
29 
30 #include "support/convert.h"
31 #include "support/debug.h"
32 #include "support/docstream.h"
33 #include "support/gettext.h"
34 #include "support/lassert.h"
35 #include "support/lstrings.h"
36 #include "support/regex.h"
37 #include "support/textutils.h"
38 
39 #include <map>
40 #include <set>
41 
42 using namespace std;
43 using namespace lyx::support;
44 
45 
46 namespace lyx {
47 
48 namespace {
49 
50 // Remove placeholders from names
renormalize(docstring const & input)51 docstring renormalize(docstring const & input)
52 {
53 	docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
54 	return subst(res, from_ascii("$$comma!"), from_ascii(","));
55 }
56 
57 
58 // Split the surname into prefix ("von-part") and family name
parseSurname(docstring const & sname)59 pair<docstring, docstring> parseSurname(docstring const & sname)
60 {
61 	// Split the surname into its tokens
62 	vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
63 	if (pieces.size() < 2)
64 		return make_pair(docstring(), sname);
65 
66 	// Now we look for pieces that begin with a lower case letter.
67 	// All except for the very last token constitute the "von-part".
68 	docstring prefix;
69 	vector<docstring>::const_iterator it = pieces.begin();
70 	vector<docstring>::const_iterator const en = pieces.end();
71 	bool first = true;
72 	for (; it != en; ++it) {
73 		if ((*it).empty())
74 			continue;
75 		// If this is the last piece, then what we now have is
76 		// the family name, notwithstanding the casing.
77 		if (it + 1 == en)
78 			break;
79 		char_type const c = (*it)[0];
80 		// If the piece starts with a upper case char, we assume
81 		// this is part of the surname.
82 		if (!isLower(c))
83 			break;
84 		// Nothing of the former, so add this piece to the prename
85 		if (!first)
86 			prefix += " ";
87 		else
88 			first = false;
89 		prefix += *it;
90 	}
91 
92 	// Reconstruct the family name.
93 	// Note that if we left the loop with because it + 1 == en,
94 	// then this will still do the right thing, i.e., make surname
95 	// just be the last piece.
96 	docstring surname;
97 	first = true;
98 	for (; it != en; ++it) {
99 		if (!first)
100 			surname += " ";
101 		else
102 			first = false;
103 		surname += *it;
104 	}
105 	return make_pair(prefix, surname);
106 }
107 
108 
109 struct name_parts {
110 	docstring surname;
111 	docstring prename;
112 	docstring suffix;
113 	docstring prefix;
114 };
115 
116 
117 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
nameParts(docstring const & iname)118 name_parts nameParts(docstring const & iname)
119 {
120 	name_parts res;
121 	if (iname.empty())
122 		return res;
123 
124 	// First we check for goupings (via {...}) and replace blanks and
125 	// commas inside groups with temporary placeholders
126 	docstring name;
127 	int gl = 0;
128 	docstring::const_iterator p = iname.begin();
129 	while (p != iname.end()) {
130 		// count grouping level
131 		if (*p == '{')
132 			++gl;
133 		else if (*p == '}')
134 			--gl;
135 		// generate string with probable placeholders
136 		if (*p == ' ' && gl > 0)
137 			name += from_ascii("$$space!");
138 		else if (*p == ',' && gl > 0)
139 			name += from_ascii("$$comma!");
140 		else
141 			name += *p;
142 		++p;
143 	}
144 
145 	// Now we look for a comma, and take the last name to be everything
146 	// preceding the right-most one, so that we also get the name suffix
147 	// (aka "jr" part).
148 	vector<docstring> pieces = getVectorFromString(name);
149 	if (pieces.size() > 1) {
150 		// Whether we have a name suffix or not, the prename is
151 		// always last item
152 		res.prename = renormalize(pieces.back());
153 		// The family name, conversely, is always the first item.
154 		// However, it might contain a prefix (aka "von" part)
155 		docstring const sname = pieces.front();
156 		res.prefix = renormalize(parseSurname(sname).first);
157 		res.surname = renormalize(parseSurname(sname).second);
158 		// If we have three pieces (the maximum allowed by BibTeX),
159 		// the second one is the name suffix.
160 		if (pieces.size() > 2)
161 			res.suffix = renormalize(pieces.at(1));
162 		return res;
163 	}
164 
165 	// OK, so now we want to look for the last name.
166 	// Split on spaces, to get various tokens.
167 	pieces = getVectorFromString(name, from_ascii(" "));
168 	// No space: Only a family name given
169 	if (pieces.size() < 2) {
170 		res.surname = renormalize(pieces.back());
171 		return res;
172 	}
173 	// If we get two pieces, assume "prename surname"
174 	if (pieces.size() == 2) {
175 		res.prename = renormalize(pieces.front());
176 		res.surname = renormalize(pieces.back());
177 		return res;
178 	}
179 
180 	// More than 3 pieces: A name prefix (aka "von" part) might be included.
181 	// We look for the first piece that begins with a lower case letter
182 	// (which is the name prefix, if it is not the last token) or the last token.
183 	docstring prename;
184 	vector<docstring>::const_iterator it = pieces.begin();
185 	vector<docstring>::const_iterator const en = pieces.end();
186 	bool first = true;
187 	for (; it != en; ++it) {
188 		if ((*it).empty())
189 			continue;
190 		char_type const c = (*it)[0];
191 		// If the piece starts with a lower case char, we assume
192 		// this is the name prefix and thus prename is complete.
193 		if (isLower(c))
194 			break;
195 		// Same if this is the last piece, which is always the surname.
196 		if (it + 1 == en)
197 			break;
198 		// Nothing of the former, so add this piece to the prename
199 		if (!first)
200 			prename += " ";
201 		else
202 			first = false;
203 		prename += *it;
204 	}
205 
206 	// Now reconstruct the family name and strip the prefix.
207 	// Note that if we left the loop because it + 1 == en,
208 	// then this will still do the right thing, i.e., make surname
209 	// just be the last piece.
210 	docstring surname;
211 	first = true;
212 	for (; it != en; ++it) {
213 		if (!first)
214 			surname += " ";
215 		else
216 			first = false;
217 		surname += *it;
218 	}
219 	res.prename = renormalize(prename);
220 	res.prefix = renormalize(parseSurname(surname).first);
221 	res.surname = renormalize(parseSurname(surname).second);
222 	return res;
223 }
224 
225 
constructName(docstring const & name,string const scheme)226 docstring constructName(docstring const & name, string const scheme)
227 {
228 	// re-constructs a name from name parts according
229 	// to a given scheme
230 	docstring const prename = nameParts(name).prename;
231 	docstring const surname = nameParts(name).surname;
232 	docstring const prefix = nameParts(name).prefix;
233 	docstring const suffix = nameParts(name).suffix;
234 	string res = scheme;
235 	static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
236 	static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
237 	static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
238 	smatch sub;
239 	// Changing the first parameter of regex_match() may corrupt the
240 	// second one. In this case we use the temporary string tmp.
241 	if (regex_match(scheme, sub, reg1)) {
242 		res = sub.str(1);
243 		if (!prename.empty())
244 			res += sub.str(3);
245 		res += sub.str(5);
246 	}
247 	if (regex_match(res, sub, reg2)) {
248 		string tmp = sub.str(1);
249 		if (!suffix.empty())
250 			tmp += sub.str(3);
251 		res = tmp + sub.str(5);
252 	}
253 	if (regex_match(res, sub, reg3)) {
254 		string tmp = sub.str(1);
255 		if (!prefix.empty())
256 			tmp += sub.str(3);
257 		res = tmp + sub.str(5);
258 	}
259 	docstring result = from_ascii(res);
260 	result = subst(result, from_ascii("%prename%"), prename);
261 	result = subst(result, from_ascii("%surname%"), surname);
262 	result = subst(result, from_ascii("%prefix%"), prefix);
263 	result = subst(result, from_ascii("%suffix%"), suffix);
264 	return result;
265 }
266 
267 
getAuthors(docstring const & author)268 vector<docstring> const getAuthors(docstring const & author)
269 {
270 	// We check for goupings (via {...}) and only consider " and "
271 	// outside groups as author separator. This is to account
272 	// for cases such as {{Barnes and Noble, Inc.}}, which
273 	// need to be treated as one single family name.
274 	// We use temporary placeholders in order to differentiate the
275 	// diverse " and " cases.
276 
277 	// First, we temporarily replace all ampersands. It is rather unusual
278 	// in author names, but can happen (consider cases such as "C \& A Corp.").
279 	docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
280 	// Then, we temporarily make all " and " strings to ampersands in order
281 	// to handle them later on a per-char level.
282 	iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
283 	// Now we traverse through the string and replace the "&" by the proper
284 	// output in- and outside groups
285 	docstring name;
286 	int gl = 0;
287 	docstring::const_iterator p = iname.begin();
288 	while (p != iname.end()) {
289 		// count grouping level
290 		if (*p == '{')
291 			++gl;
292 		else if (*p == '}')
293 			--gl;
294 		// generate string with probable placeholders
295 		if (*p == '&') {
296 			if (gl > 0)
297 				// Inside groups, we output "and"
298 				name += from_ascii("and");
299 			else
300 				// Outside groups, we output a separator
301 				name += from_ascii("$$namesep!");
302 		}
303 		else
304 			name += *p;
305 		++p;
306 	}
307 
308 	// re-insert the literal ampersands
309 	name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
310 
311 	// Now construct the actual vector
312 	return getVectorFromString(name, from_ascii(" $$namesep! "));
313 }
314 
315 
multipleAuthors(docstring const author)316 bool multipleAuthors(docstring const author)
317 {
318 	return getAuthors(author).size() > 1;
319 }
320 
321 
322 // converts a string containing LaTeX commands into unicode
323 // for display.
convertLaTeXCommands(docstring const & str)324 docstring convertLaTeXCommands(docstring const & str)
325 {
326 	docstring val = str;
327 	docstring ret;
328 
329 	bool scanning_cmd = false;
330 	bool scanning_math = false;
331 	bool escaped = false; // used to catch \$, etc.
332 	while (!val.empty()) {
333 		char_type const ch = val[0];
334 
335 		// if we're scanning math, we output everything until we
336 		// find an unescaped $, at which point we break out.
337 		if (scanning_math) {
338 			if (escaped)
339 				escaped = false;
340 			else if (ch == '\\')
341 				escaped = true;
342 			else if (ch == '$')
343 				scanning_math = false;
344 			ret += ch;
345 			val = val.substr(1);
346 			continue;
347 		}
348 
349 		// if we're scanning a command name, then we just
350 		// discard characters until we hit something that
351 		// isn't alpha.
352 		if (scanning_cmd) {
353 			if (isAlphaASCII(ch)) {
354 				val = val.substr(1);
355 				escaped = false;
356 				continue;
357 			}
358 			// so we're done with this command.
359 			// now we fall through and check this character.
360 			scanning_cmd = false;
361 		}
362 
363 		// was the last character a \? If so, then this is something like:
364 		// \\ or \$, so we'll just output it. That's probably not always right...
365 		if (escaped) {
366 			// exception: output \, as THIN SPACE
367 			if (ch == ',')
368 				ret.push_back(0x2009);
369 			else
370 				ret += ch;
371 			val = val.substr(1);
372 			escaped = false;
373 			continue;
374 		}
375 
376 		if (ch == '$') {
377 			ret += ch;
378 			val = val.substr(1);
379 			scanning_math = true;
380 			continue;
381 		}
382 
383 		// Change text mode accents in the form
384 		// {\v a} to \v{a} (see #9340).
385 		// FIXME: This is a sort of mini-tex2lyx.
386 		//        Use the real tex2lyx instead!
387 		static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
388 		if (lyx::regex_search(to_utf8(val), tma_reg)) {
389 			val = val.substr(1);
390 			val.replace(2, 1, from_ascii("{"));
391 			continue;
392 		}
393 
394 		// Apart from the above, we just ignore braces
395 		if (ch == '{' || ch == '}') {
396 			val = val.substr(1);
397 			continue;
398 		}
399 
400 		// we're going to check things that look like commands, so if
401 		// this doesn't, just output it.
402 		if (ch != '\\') {
403 			ret += ch;
404 			val = val.substr(1);
405 			continue;
406 		}
407 
408 		// ok, could be a command of some sort
409 		// let's see if it corresponds to some unicode
410 		// unicodesymbols has things in the form: \"{u},
411 		// whereas we may see things like: \"u. So we'll
412 		// look for that and change it, if necessary.
413 		// FIXME: This is a sort of mini-tex2lyx.
414 		//        Use the real tex2lyx instead!
415 		static lyx::regex const reg("^\\\\\\W\\w");
416 		if (lyx::regex_search(to_utf8(val), reg)) {
417 			val.insert(3, from_ascii("}"));
418 			val.insert(2, from_ascii("{"));
419 		}
420 		bool termination;
421 		docstring rem;
422 		docstring const cnvtd = Encodings::fromLaTeXCommand(val,
423 				Encodings::TEXT_CMD, termination, rem);
424 		if (!cnvtd.empty()) {
425 			// it did, so we'll take that bit and proceed with what's left
426 			ret += cnvtd;
427 			val = rem;
428 			continue;
429 		}
430 		// it's a command of some sort
431 		scanning_cmd = true;
432 		escaped = true;
433 		val = val.substr(1);
434 	}
435 	return ret;
436 }
437 
438 
439 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
processRichtext(docstring const & str,bool richtext)440 docstring processRichtext(docstring const & str, bool richtext)
441 {
442 	docstring val = str;
443 	docstring ret;
444 
445 	bool scanning_rich = false;
446 	while (!val.empty()) {
447 		char_type const ch = val[0];
448 		if (ch == '{' && val.size() > 1 && val[1] == '!') {
449 			// beginning of rich text
450 			scanning_rich = true;
451 			val = val.substr(2);
452 			continue;
453 		}
454 		if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
455 			// end of rich text
456 			scanning_rich = false;
457 			val = val.substr(2);
458 			continue;
459 		}
460 		if (richtext) {
461 			if (scanning_rich)
462 				ret += ch;
463 			else {
464 				// we need to escape '<' and '>'
465 				if (ch == '<')
466 					ret += "&lt;";
467 				else if (ch == '>')
468 					ret += "&gt;";
469 				else
470 					ret += ch;
471 			}
472 		} else if (!scanning_rich /* && !richtext */)
473 			ret += ch;
474 		// else the character is discarded, which will happen only if
475 		// richtext == false and we are scanning rich text
476 		val = val.substr(1);
477 	}
478 	return ret;
479 }
480 
481 } // namespace
482 
483 
484 //////////////////////////////////////////////////////////////////////
485 //
486 // BibTeXInfo
487 //
488 //////////////////////////////////////////////////////////////////////
489 
BibTeXInfo(docstring const & key,docstring const & type)490 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
491 	: is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
492 	  modifier_(0)
493 {}
494 
495 
496 
getAuthorOrEditorList(Buffer const * buf,bool full,bool forceshort) const497 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
498 					  bool full, bool forceshort) const
499 {
500 	docstring author = operator[]("author");
501 	if (author.empty())
502 		author = operator[]("editor");
503 
504 	return getAuthorList(buf, author, full, forceshort);
505 }
506 
507 
getAuthorList(Buffer const * buf,docstring const & author,bool const full,bool const forceshort,bool const allnames,bool const beginning) const508 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
509 		docstring const & author, bool const full, bool const forceshort,
510 		bool const allnames, bool const beginning) const
511 {
512 	// Maxnames treshold depend on engine
513 	size_t maxnames = buf ?
514 		buf->params().documentClass().max_citenames() : 2;
515 
516 	if (!is_bibtex_) {
517 		docstring const opt = label();
518 		if (opt.empty())
519 			return docstring();
520 
521 		docstring authors;
522 		docstring const remainder = trim(split(opt, authors, '('));
523 		if (remainder.empty())
524 			// in this case, we didn't find a "(",
525 			// so we don't have author (year)
526 			return docstring();
527 		return authors;
528 	}
529 
530 	if (author.empty())
531 		return author;
532 
533 	// OK, we've got some names. Let's format them.
534 	// Try to split the author list
535 	vector<docstring> const authors = getAuthors(author);
536 
537 	docstring retval;
538 
539 	CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
540 					       : ENGINE_TYPE_DEFAULT;
541 
542 	// These are defined in the styles
543 	string const etal =
544 		buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_etal")
545 		    : " et al.";
546 	string const namesep =
547 		buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_namesep")
548 		   : ", ";
549 	string const lastnamesep =
550 		buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_lastnamesep")
551 		    : ", and ";
552 	string const pairnamesep =
553 		buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_pairnamesep")
554 		     : " and ";
555 	string firstnameform =
556 			buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
557 			     : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
558 	if (!beginning)
559 		firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
560 					     : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
561 	string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
562 			     : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
563 	if (!beginning)
564 		othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
565 					     : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
566 	string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
567 			     : "{%prefix%[[%prefix% ]]}%surname%";
568 
569 	// Shorten the list (with et al.) if forceshort is set
570 	// and the list can actually be shortened, else if maxcitenames
571 	// is passed and full is not set.
572 	bool shorten = forceshort && authors.size() > 1;
573 	vector<docstring>::const_iterator it = authors.begin();
574 	vector<docstring>::const_iterator en = authors.end();
575 	for (size_t i = 0; it != en; ++it, ++i) {
576 		if (i >= maxnames && !full) {
577 			shorten = true;
578 			break;
579 		}
580 		if (*it == "others") {
581 			retval += buf ? buf->B_(etal) : from_ascii(etal);
582 			break;
583 		}
584 		if (i > 0 && i == authors.size() - 1) {
585 			if (authors.size() == 2)
586 				retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
587 			else
588 				retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
589 		} else if (i > 0)
590 			retval += buf ? buf->B_(namesep) : from_ascii(namesep);
591 		if (allnames)
592 			retval += (i == 0) ? constructName(*it, firstnameform)
593 				: constructName(*it, othernameform);
594 		else
595 			retval += constructName(*it, citenameform);
596 	}
597 	if (shorten) {
598 		if (allnames)
599 			retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
600 		else
601 			retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
602 	}
603 
604 	return convertLaTeXCommands(retval);
605 }
606 
607 
getYear() const608 docstring const BibTeXInfo::getYear() const
609 {
610 	if (is_bibtex_) {
611 		// first try legacy year field
612 		docstring year = operator[]("year");
613 		if (!year.empty())
614 			return year;
615 		// now try biblatex's date field
616 		year = operator[]("date");
617 		// Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
618 		// We only want the years.
619 		static regex const yreg("[-]?([\\d]{4}).*");
620 		static regex const ereg(".*/[-]?([\\d]{4}).*");
621 		smatch sm;
622 		string const date = to_utf8(year);
623 		if (!regex_match(date, sm, yreg))
624 			// cannot parse year.
625 			return docstring();
626 		year = from_ascii(sm[1]);
627 		// check for an endyear
628 		if (regex_match(date, sm, ereg))
629 			year += char_type(0x2013) + from_ascii(sm[1]);
630 		return year;
631 	}
632 
633 	docstring const opt = label();
634 	if (opt.empty())
635 		return docstring();
636 
637 	docstring authors;
638 	docstring tmp = split(opt, authors, '(');
639 	if (tmp.empty())
640 		// we don't have author (year)
641 		return docstring();
642 	docstring year;
643 	tmp = split(tmp, year, ')');
644 	return year;
645 }
646 
647 
648 namespace {
649 
650 docstring parseOptions(docstring const & format, string & optkey,
651 		    docstring & ifpart, docstring & elsepart);
652 
653 // Calls parseOptions to deal with an embedded option, such as:
654 //   {%number%[[, no.~%number%]]}
655 // which must appear at the start of format. ifelsepart gets the
656 // whole of the option, and we return what's left after the option.
657 // we return format if there is an error.
parseEmbeddedOption(docstring const & format,docstring & ifelsepart)658 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
659 {
660 	LASSERT(format[0] == '{' && format[1] == '%', return format);
661 	string optkey;
662 	docstring ifpart;
663 	docstring elsepart;
664 	docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
665 	if (format == rest) { // parse error
666 		LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
667 		return format;
668 	}
669 	LASSERT(rest.size() <= format.size(),
670 		{ ifelsepart = docstring(); return format; });
671 	ifelsepart = format.substr(0, format.size() - rest.size());
672 	return rest;
673 }
674 
675 
676 // Gets a "clause" from a format string, where the clause is
677 // delimited by '[[' and ']]'. Returns what is left after the
678 // clause is removed, and returns format if there is an error.
getClause(docstring const & format,docstring & clause)679 docstring getClause(docstring const & format, docstring & clause)
680 {
681 	docstring fmt = format;
682 	// remove '[['
683 	fmt = fmt.substr(2);
684 	// we'll remove characters from the front of fmt as we
685 	// deal with them
686 	while (!fmt.empty()) {
687 		if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
688 			// that's the end
689 			fmt = fmt.substr(2);
690 			break;
691 		}
692 		// check for an embedded option
693 		if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
694 			docstring part;
695 			docstring const rest = parseEmbeddedOption(fmt, part);
696 			if (fmt == rest) {
697 				LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
698 				return format;
699 			}
700 			clause += part;
701 			fmt = rest;
702 		} else { // it's just a normal character
703 				clause += fmt[0];
704 				fmt = fmt.substr(1);
705 		}
706 	}
707 	return fmt;
708 }
709 
710 
711 // parse an options string, which must appear at the start of the
712 // format parameter. puts the parsed bits in optkey, ifpart, and
713 // elsepart and returns what's left after the option is removed.
714 // if there's an error, it returns format itself.
parseOptions(docstring const & format,string & optkey,docstring & ifpart,docstring & elsepart)715 docstring parseOptions(docstring const & format, string & optkey,
716 		    docstring & ifpart, docstring & elsepart)
717 {
718 	LASSERT(format[0] == '{' && format[1] == '%', return format);
719 	// strip '{%'
720 	docstring fmt = format.substr(2);
721 	size_t pos = fmt.find('%'); // end of key
722 	if (pos == string::npos) {
723 		LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
724 		return format;
725 	}
726 	optkey = to_utf8(fmt.substr(0, pos));
727 	fmt = fmt.substr(pos + 1);
728 	// [[format]] should be next
729 	if (fmt[0] != '[' || fmt[1] != '[') {
730 		LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
731 		return format;
732 	}
733 
734 	docstring curfmt = fmt;
735 	fmt = getClause(curfmt, ifpart);
736 	if (fmt == curfmt) {
737 		LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
738 		return format;
739 	}
740 
741 	if (fmt[0] == '}') // we're done, no else clause
742 		return fmt.substr(1);
743 
744 	// else part should follow
745 	if (fmt[0] != '[' || fmt[1] != '[') {
746 		LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
747 		return format;
748 	}
749 
750 	curfmt = fmt;
751 	fmt = getClause(curfmt, elsepart);
752 	// we should be done
753 	if (fmt == curfmt || fmt[0] != '}') {
754 		LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
755 		return format;
756 	}
757 	return fmt.substr(1);
758 }
759 
760 
761 } // namespace
762 
763 /* FIXME
764 Bug #9131 revealed an oddity in how we are generating citation information
765 when more than one key is given. We end up building a longer and longer format
766 string as we go, which we then have to re-parse, over and over and over again,
767 rather than generating the information for the individual keys and then putting
768 all of that together. We do that to deal with the way separators work, from what
769 I can tell, but it still feels like a hack. Fixing this would require quite a
770 bit of work, however.
771 */
expandFormat(docstring const & format,BibTeXInfoList const xrefs,int & counter,Buffer const & buf,CiteItem const & ci,bool next,bool second) const772 docstring BibTeXInfo::expandFormat(docstring const & format,
773 		BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
774 		CiteItem const & ci, bool next, bool second) const
775 {
776 	// incorrect use of macros could put us in an infinite loop
777 	static int const max_passes = 5000;
778 	// the use of overly large keys can lead to performance problems, due
779 	// to eventual attempts to convert LaTeX macros to unicode. See bug
780 	// #8944. By default, the size is limited to 128 (in CiteItem), but
781 	// for specific purposes (such as XHTML export), it needs to be enlarged
782 	// This is perhaps not the best solution, but it will have to do for now.
783 	size_t const max_keysize = ci.max_key_size;
784 	odocstringstream ret; // return value
785 	string key;
786 	bool scanning_key = false;
787 	bool scanning_rich = false;
788 
789 	CiteEngineType const engine_type = buf.params().citeEngineType();
790 	docstring fmt = format;
791 	// we'll remove characters from the front of fmt as we
792 	// deal with them
793 	while (!fmt.empty()) {
794 		if (counter > max_passes) {
795 			LYXERR0("Recursion limit reached while parsing `"
796 			        << format << "'.");
797 			return _("ERROR!");
798 		}
799 
800 		char_type thischar = fmt[0];
801 		if (thischar == '%') {
802 			// beginning or end of key
803 			if (scanning_key) {
804 				// end of key
805 				scanning_key = false;
806 				// so we replace the key with its value, which may be empty
807 				if (key[0] == '!') {
808 					// macro
809 					string const val =
810 						buf.params().documentClass().getCiteMacro(engine_type, key);
811 					fmt = from_utf8(val) + fmt.substr(1);
812 					counter += 1;
813 					continue;
814 				} else if (prefixIs(key, "B_")) {
815 					// a translatable bit (to the Buffer language)
816 					string const val =
817 						buf.params().documentClass().getCiteMacro(engine_type, key);
818 					docstring const trans =
819 						translateIfPossible(from_utf8(val), buf.params().language->code());
820 					ret << trans;
821 				} else if (key[0] == '_') {
822 					// a translatable bit (to the GUI language)
823 					string const val =
824 						buf.params().documentClass().getCiteMacro(engine_type, key);
825 					docstring const trans =
826 						translateIfPossible(from_utf8(val));
827 					ret << trans;
828 				} else {
829 					docstring const val =
830 						getValueForKey(key, buf, ci, xrefs, max_keysize);
831 					if (!scanning_rich)
832 						ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
833 					ret << val;
834 					if (!scanning_rich)
835 						ret << from_ascii("{!</span>!}");
836 				}
837 			} else {
838 				// beginning of key
839 				key.clear();
840 				scanning_key = true;
841 			}
842 		}
843 		else if (thischar == '{') {
844 			// beginning of option?
845 			if (scanning_key) {
846 				LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
847 				return _("ERROR!");
848 			}
849 			if (fmt.size() > 1) {
850 				if (fmt[1] == '%') {
851 					// it is the beginning of an optional format
852 					string optkey;
853 					docstring ifpart;
854 					docstring elsepart;
855 					docstring const newfmt =
856 						parseOptions(fmt, optkey, ifpart, elsepart);
857 					if (newfmt == fmt) // parse error
858 						return _("ERROR!");
859 					fmt = newfmt;
860 					docstring const val =
861 						getValueForKey(optkey, buf, ci, xrefs);
862 					if (optkey == "next" && next)
863 						ret << ifpart; // without expansion
864 					else if (optkey == "second" && second) {
865 						int newcounter = 0;
866 						ret << expandFormat(ifpart, xrefs, newcounter, buf,
867 							ci, next);
868 					} else if (!val.empty()) {
869 						int newcounter = 0;
870 						ret << expandFormat(ifpart, xrefs, newcounter, buf,
871 							ci, next);
872 					} else if (!elsepart.empty()) {
873 						int newcounter = 0;
874 						ret << expandFormat(elsepart, xrefs, newcounter, buf,
875 							ci, next);
876 					}
877 					// fmt will have been shortened for us already
878 					continue;
879 				}
880 				if (fmt[1] == '!') {
881 					// beginning of rich text
882 					scanning_rich = true;
883 					fmt = fmt.substr(2);
884 					ret << from_ascii("{!");
885 					continue;
886 				}
887 			}
888 			// we are here if '{' was not followed by % or !.
889 			// So it's just a character.
890 			ret << thischar;
891 		}
892 		else if (scanning_rich && thischar == '!'
893 		         && fmt.size() > 1 && fmt[1] == '}') {
894 			// end of rich text
895 			scanning_rich = false;
896 			fmt = fmt.substr(2);
897 			ret << from_ascii("!}");
898 			continue;
899 		}
900 		else if (scanning_key)
901 			key += char(thischar);
902 		else {
903 			try {
904 				ret.put(thischar);
905 			} catch (EncodingException & /* e */) {
906 				LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
907 			}
908 		}
909 		fmt = fmt.substr(1);
910 	} // for loop
911 	if (scanning_key) {
912 		LYXERR0("Never found end of key in `" << format << "'!");
913 		return _("ERROR!");
914 	}
915 	if (scanning_rich) {
916 		LYXERR0("Never found end of rich text in `" << format << "'!");
917 		return _("ERROR!");
918 	}
919 	return ret.str();
920 }
921 
922 
getInfo(BibTeXInfoList const xrefs,Buffer const & buf,CiteItem const & ci) const923 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
924 	Buffer const & buf, CiteItem const & ci) const
925 {
926 	bool const richtext = ci.richtext;
927 
928 	if (!richtext && !info_.empty())
929 		return info_;
930 	if (richtext && !info_richtext_.empty())
931 		return info_richtext_;
932 
933 	if (!is_bibtex_) {
934 		BibTeXInfo::const_iterator it = find(from_ascii("ref"));
935 		info_ = it->second;
936 		return info_;
937 	}
938 
939 	CiteEngineType const engine_type = buf.params().citeEngineType();
940 	DocumentClass const & dc = buf.params().documentClass();
941 	docstring const & format =
942 		from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
943 	int counter = 0;
944 	info_ = expandFormat(format, xrefs, counter, buf,
945 		ci, false, false);
946 
947 	if (info_.empty()) {
948 		// this probably shouldn't happen
949 		return info_;
950 	}
951 
952 	if (richtext) {
953 		info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
954 		return info_richtext_;
955 	}
956 
957 	info_ = convertLaTeXCommands(processRichtext(info_, false));
958 	return info_;
959 }
960 
961 
getLabel(BibTeXInfoList const xrefs,Buffer const & buf,docstring const & format,CiteItem const & ci,bool next,bool second) const962 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
963 	Buffer const & buf, docstring const & format,
964 	CiteItem const & ci, bool next, bool second) const
965 {
966 	docstring loclabel;
967 
968 	int counter = 0;
969 	loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
970 
971 	if (!loclabel.empty() && !next) {
972 		loclabel = processRichtext(loclabel, ci.richtext);
973 		loclabel = convertLaTeXCommands(loclabel);
974 	}
975 
976 	return loclabel;
977 }
978 
979 
operator [](docstring const & field) const980 docstring const & BibTeXInfo::operator[](docstring const & field) const
981 {
982 	BibTeXInfo::const_iterator it = find(field);
983 	if (it != end())
984 		return it->second;
985 	static docstring const empty_value = docstring();
986 	return empty_value;
987 }
988 
989 
operator [](string const & field) const990 docstring const & BibTeXInfo::operator[](string const & field) const
991 {
992 	return operator[](from_ascii(field));
993 }
994 
995 
getValueForKey(string const & oldkey,Buffer const & buf,CiteItem const & ci,BibTeXInfoList const xrefs,size_t maxsize) const996 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
997 	CiteItem const & ci, BibTeXInfoList const xrefs, size_t maxsize) const
998 {
999 	// anything less is pointless
1000 	LASSERT(maxsize >= 16, maxsize = 16);
1001 	string key = oldkey;
1002 	bool cleanit = false;
1003 	if (prefixIs(oldkey, "clean:")) {
1004 		key = oldkey.substr(6);
1005 		cleanit = true;
1006 	}
1007 
1008 	docstring ret = operator[](key);
1009 	if (ret.empty() && !xrefs.empty()) {
1010 		vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
1011 		vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
1012 		for (; it != en; ++it) {
1013 			if (*it && !(**it)[key].empty()) {
1014 				ret = (**it)[key];
1015 				break;
1016 			}
1017 		}
1018 	}
1019 	if (ret.empty()) {
1020 		// some special keys
1021 		// FIXME: dialog, textbefore and textafter have nothing to do with this
1022 		if (key == "dialog" && ci.context == CiteItem::Dialog)
1023 			ret = from_ascii("x"); // any non-empty string will do
1024 		else if (key == "export" && ci.context == CiteItem::Export)
1025 			ret = from_ascii("x"); // any non-empty string will do
1026 		else if (key == "ifstar" && ci.Starred)
1027 			ret = from_ascii("x"); // any non-empty string will do
1028 		else if (key == "ifqualified" && ci.isQualified)
1029 			ret = from_ascii("x"); // any non-empty string will do
1030 		else if (key == "entrytype")
1031 			ret = entry_type_;
1032 		else if (prefixIs(key, "ifentrytype:")
1033 			 && from_ascii(key.substr(12)) == entry_type_)
1034 			ret = from_ascii("x"); // any non-empty string will do
1035 		else if (key == "key")
1036 			ret = bib_key_;
1037 		else if (key == "label")
1038 			ret = label_;
1039 		else if (key == "modifier" && modifier_ != 0)
1040 			ret = modifier_;
1041 		else if (key == "numericallabel")
1042 			ret = cite_number_;
1043 		else if (prefixIs(key, "ifmultiple:")) {
1044 			// Return whether we have multiple authors
1045 			docstring const kind = operator[](from_ascii(key.substr(11)));
1046 			if (multipleAuthors(kind))
1047 				ret = from_ascii("x"); // any non-empty string will do
1048 		}
1049 		else if (prefixIs(key, "abbrvnames:")) {
1050 			// Special key to provide abbreviated name list,
1051 			// with respect to maxcitenames. Suitable for Bibliography
1052 			// beginnings.
1053 			docstring const kind = operator[](from_ascii(key.substr(11)));
1054 			ret = getAuthorList(&buf, kind, false, false, true);
1055 			if (ci.forceUpperCase && isLowerCase(ret[0]))
1056 				ret[0] = uppercase(ret[0]);
1057 		} else if (prefixIs(key, "fullnames:")) {
1058 			// Return a full name list. Suitable for Bibliography
1059 			// beginnings.
1060 			docstring const kind = operator[](from_ascii(key.substr(10)));
1061 			ret = getAuthorList(&buf, kind, true, false, true);
1062 			if (ci.forceUpperCase && isLowerCase(ret[0]))
1063 				ret[0] = uppercase(ret[0]);
1064 		} else if (prefixIs(key, "forceabbrvnames:")) {
1065 			// Special key to provide abbreviated name lists,
1066 			// irrespective of maxcitenames. Suitable for Bibliography
1067 			// beginnings.
1068 			docstring const kind = operator[](from_ascii(key.substr(15)));
1069 			ret = getAuthorList(&buf, kind, false, true, true);
1070 			if (ci.forceUpperCase && isLowerCase(ret[0]))
1071 				ret[0] = uppercase(ret[0]);
1072 		} else if (prefixIs(key, "abbrvbynames:")) {
1073 			// Special key to provide abbreviated name list,
1074 			// with respect to maxcitenames. Suitable for further names inside a
1075 			// bibliography item // (such as "ed. by ...")
1076 			docstring const kind = operator[](from_ascii(key.substr(11)));
1077 			ret = getAuthorList(&buf, kind, false, false, true, false);
1078 			if (ci.forceUpperCase && isLowerCase(ret[0]))
1079 				ret[0] = uppercase(ret[0]);
1080 		} else if (prefixIs(key, "fullbynames:")) {
1081 			// Return a full name list. Suitable for further names inside a
1082 			// bibliography item // (such as "ed. by ...")
1083 			docstring const kind = operator[](from_ascii(key.substr(10)));
1084 			ret = getAuthorList(&buf, kind, true, false, true, false);
1085 			if (ci.forceUpperCase && isLowerCase(ret[0]))
1086 				ret[0] = uppercase(ret[0]);
1087 		} else if (prefixIs(key, "forceabbrvbynames:")) {
1088 			// Special key to provide abbreviated name lists,
1089 			// irrespective of maxcitenames. Suitable for further names inside a
1090 			// bibliography item // (such as "ed. by ...")
1091 			docstring const kind = operator[](from_ascii(key.substr(15)));
1092 			ret = getAuthorList(&buf, kind, false, true, true, false);
1093 			if (ci.forceUpperCase && isLowerCase(ret[0]))
1094 				ret[0] = uppercase(ret[0]);
1095 		} else if (key == "abbrvciteauthor") {
1096 			// Special key to provide abbreviated author or
1097 			// editor names (suitable for citation labels),
1098 			// with respect to maxcitenames.
1099 			ret = getAuthorOrEditorList(&buf, false, false);
1100 			if (ci.forceUpperCase && isLowerCase(ret[0]))
1101 				ret[0] = uppercase(ret[0]);
1102 		} else if (key == "fullciteauthor") {
1103 			// Return a full author or editor list (for citation labels)
1104 			ret = getAuthorOrEditorList(&buf, true, false);
1105 			if (ci.forceUpperCase && isLowerCase(ret[0]))
1106 				ret[0] = uppercase(ret[0]);
1107 		} else if (key == "forceabbrvciteauthor") {
1108 			// Special key to provide abbreviated author or
1109 			// editor names (suitable for citation labels),
1110 			// irrespective of maxcitenames.
1111 			ret = getAuthorOrEditorList(&buf, false, true);
1112 			if (ci.forceUpperCase && isLowerCase(ret[0]))
1113 				ret[0] = uppercase(ret[0]);
1114 		} else if (key == "bibentry") {
1115 			// Special key to provide the full bibliography entry: see getInfo()
1116 			CiteEngineType const engine_type = buf.params().citeEngineType();
1117 			DocumentClass const & dc = buf.params().documentClass();
1118 			docstring const & format =
1119 				from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1120 			int counter = 0;
1121 			ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1122 		} else if (key == "textbefore")
1123 			ret = ci.textBefore;
1124 		else if (key == "textafter")
1125 			ret = ci.textAfter;
1126 		else if (key == "curpretext")
1127 			ret = ci.getPretexts()[bib_key_];
1128 		else if (key == "curposttext")
1129 			ret = ci.getPosttexts()[bib_key_];
1130 		else if (key == "year")
1131 			ret = getYear();
1132 	}
1133 
1134 	if (cleanit)
1135 		ret = html::cleanAttr(ret);
1136 
1137 	// make sure it is not too big
1138 	support::truncateWithEllipsis(ret, maxsize);
1139 	return ret;
1140 }
1141 
1142 
1143 //////////////////////////////////////////////////////////////////////
1144 //
1145 // BiblioInfo
1146 //
1147 //////////////////////////////////////////////////////////////////////
1148 
1149 namespace {
1150 
1151 // A functor for use with sort, leading to case insensitive sorting
1152 class compareNoCase: public binary_function<docstring, docstring, bool>
1153 {
1154 public:
operator ()(docstring const & s1,docstring const & s2) const1155 	bool operator()(docstring const & s1, docstring const & s2) const {
1156 		return compare_no_case(s1, s2) < 0;
1157 	}
1158 };
1159 
1160 } // namespace
1161 
1162 
getXRefs(BibTeXInfo const & data,bool const nested) const1163 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1164 {
1165 	vector<docstring> result;
1166 	if (!data.isBibTeX())
1167 		return result;
1168 	// Legacy crossref field. This is not nestable.
1169 	if (!nested && !data["crossref"].empty()) {
1170 		docstring const xrefkey = data["crossref"];
1171 		result.push_back(xrefkey);
1172 		// However, check for nested xdatas
1173 		BiblioInfo::const_iterator it = find(xrefkey);
1174 		if (it != end()) {
1175 			BibTeXInfo const & xref = it->second;
1176 			vector<docstring> const nxdata = getXRefs(xref, true);
1177 			if (!nxdata.empty())
1178 				result.insert(result.end(), nxdata.begin(), nxdata.end());
1179 		}
1180 	}
1181 	// Biblatex's xdata field. Infinitely nestable.
1182 	// XData field can consist of a comma-separated list of keys
1183 	vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1184 	if (!xdatakeys.empty()) {
1185 		vector<docstring>::const_iterator xit = xdatakeys.begin();
1186 		vector<docstring>::const_iterator xen = xdatakeys.end();
1187 		for (; xit != xen; ++xit) {
1188 			docstring const xdatakey = *xit;
1189 			result.push_back(xdatakey);
1190 			BiblioInfo::const_iterator it = find(xdatakey);
1191 			if (it != end()) {
1192 				BibTeXInfo const & xdata = it->second;
1193 				vector<docstring> const nxdata = getXRefs(xdata, true);
1194 				if (!nxdata.empty())
1195 					result.insert(result.end(), nxdata.begin(), nxdata.end());
1196 			}
1197 		}
1198 	}
1199 	return result;
1200 }
1201 
1202 
getKeys() const1203 vector<docstring> const BiblioInfo::getKeys() const
1204 {
1205 	vector<docstring> bibkeys;
1206 	BiblioInfo::const_iterator it  = begin();
1207 	for (; it != end(); ++it)
1208 		bibkeys.push_back(it->first);
1209 	sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
1210 	return bibkeys;
1211 }
1212 
1213 
getFields() const1214 vector<docstring> const BiblioInfo::getFields() const
1215 {
1216 	vector<docstring> bibfields;
1217 	set<docstring>::const_iterator it = field_names_.begin();
1218 	set<docstring>::const_iterator end = field_names_.end();
1219 	for (; it != end; ++it)
1220 		bibfields.push_back(*it);
1221 	sort(bibfields.begin(), bibfields.end());
1222 	return bibfields;
1223 }
1224 
1225 
getEntries() const1226 vector<docstring> const BiblioInfo::getEntries() const
1227 {
1228 	vector<docstring> bibentries;
1229 	set<docstring>::const_iterator it = entry_types_.begin();
1230 	set<docstring>::const_iterator end = entry_types_.end();
1231 	for (; it != end; ++it)
1232 		bibentries.push_back(*it);
1233 	sort(bibentries.begin(), bibentries.end());
1234 	return bibentries;
1235 }
1236 
1237 
getAuthorOrEditorList(docstring const & key,Buffer const & buf) const1238 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1239 {
1240 	BiblioInfo::const_iterator it = find(key);
1241 	if (it == end())
1242 		return docstring();
1243 	BibTeXInfo const & data = it->second;
1244 	return data.getAuthorOrEditorList(&buf, false);
1245 }
1246 
1247 
getCiteNumber(docstring const & key) const1248 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1249 {
1250 	BiblioInfo::const_iterator it = find(key);
1251 	if (it == end())
1252 		return docstring();
1253 	BibTeXInfo const & data = it->second;
1254 	return data.citeNumber();
1255 }
1256 
1257 
getYear(docstring const & key,bool use_modifier) const1258 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1259 {
1260 	BiblioInfo::const_iterator it = find(key);
1261 	if (it == end())
1262 		return docstring();
1263 	BibTeXInfo const & data = it->second;
1264 	docstring year = data.getYear();
1265 	if (year.empty()) {
1266 		// let's try the crossrefs
1267 		vector<docstring> const xrefs = getXRefs(data);
1268 		if (xrefs.empty())
1269 			// no luck
1270 			return docstring();
1271 		vector<docstring>::const_iterator it = xrefs.begin();
1272 		vector<docstring>::const_iterator en = xrefs.end();
1273 		for (; it != en; ++it) {
1274 			BiblioInfo::const_iterator const xrefit = find(*it);
1275 			if (xrefit == end())
1276 				continue;
1277 			BibTeXInfo const & xref_data = xrefit->second;
1278 			year = xref_data.getYear();
1279 			if (!year.empty())
1280 				// success!
1281 				break;
1282 		}
1283 	}
1284 	if (use_modifier && data.modifier() != 0)
1285 		year += data.modifier();
1286 	return year;
1287 }
1288 
1289 
getYear(docstring const & key,Buffer const & buf,bool use_modifier) const1290 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1291 {
1292 	docstring const year = getYear(key, use_modifier);
1293 	if (year.empty())
1294 		return buf.B_("No year");
1295 	return year;
1296 }
1297 
1298 
getInfo(docstring const & key,Buffer const & buf,CiteItem const & ci) const1299 docstring const BiblioInfo::getInfo(docstring const & key,
1300 	Buffer const & buf, CiteItem const & ci) const
1301 {
1302 	BiblioInfo::const_iterator it = find(key);
1303 	if (it == end())
1304 		return docstring(_("Bibliography entry not found!"));
1305 	BibTeXInfo const & data = it->second;
1306 	BibTeXInfoList xrefptrs;
1307 	vector<docstring> const xrefs = getXRefs(data);
1308 	if (!xrefs.empty()) {
1309 		vector<docstring>::const_iterator it = xrefs.begin();
1310 		vector<docstring>::const_iterator en = xrefs.end();
1311 		for (; it != en; ++it) {
1312 			BiblioInfo::const_iterator const xrefit = find(*it);
1313 			if (xrefit != end())
1314 				xrefptrs.push_back(&(xrefit->second));
1315 		}
1316 	}
1317 	return data.getInfo(xrefptrs, buf, ci);
1318 }
1319 
1320 
getLabel(vector<docstring> keys,Buffer const & buf,string const & style,CiteItem const & ci) const1321 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1322 	Buffer const & buf, string const & style, CiteItem const & ci) const
1323 {
1324 	size_t max_size = ci.max_size;
1325 	// shorter makes no sense
1326 	LASSERT(max_size >= 16, max_size = 16);
1327 
1328 	// we can't display more than 10 of these, anyway
1329 	bool const too_many_keys = keys.size() > 10;
1330 	if (too_many_keys)
1331 		keys.resize(10);
1332 
1333 	CiteEngineType const engine_type = buf.params().citeEngineType();
1334 	DocumentClass const & dc = buf.params().documentClass();
1335 	docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1336 	docstring ret = format;
1337 	vector<docstring>::const_iterator key = keys.begin();
1338 	vector<docstring>::const_iterator ken = keys.end();
1339 	for (int i = 0; key != ken; ++key, ++i) {
1340 		BiblioInfo::const_iterator it = find(*key);
1341 		BibTeXInfo empty_data;
1342 		empty_data.key(*key);
1343 		BibTeXInfo & data = empty_data;
1344 		vector<BibTeXInfo const *> xrefptrs;
1345 		if (it != end()) {
1346 			data = it->second;
1347 			vector<docstring> const xrefs = getXRefs(data);
1348 			if (!xrefs.empty()) {
1349 				vector<docstring>::const_iterator it = xrefs.begin();
1350 				vector<docstring>::const_iterator en = xrefs.end();
1351 				for (; it != en; ++it) {
1352 					BiblioInfo::const_iterator const xrefit = find(*it);
1353 					if (xrefit != end())
1354 						xrefptrs.push_back(&(xrefit->second));
1355 				}
1356 			}
1357 		}
1358 		ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1359 	}
1360 
1361 	if (too_many_keys)
1362 		ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
1363 	support::truncateWithEllipsis(ret, max_size);
1364 	return ret;
1365 }
1366 
1367 
isBibtex(docstring const & key) const1368 bool BiblioInfo::isBibtex(docstring const & key) const
1369 {
1370 	docstring key1;
1371 	split(key, key1, ',');
1372 	BiblioInfo::const_iterator it = find(key1);
1373 	if (it == end())
1374 		return false;
1375 	return it->second.isBibTeX();
1376 }
1377 
1378 
getCiteStrings(vector<docstring> const & keys,vector<CitationStyle> const & styles,Buffer const & buf,CiteItem const & ci) const1379 BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
1380 	vector<docstring> const & keys, vector<CitationStyle> const & styles,
1381 	Buffer const & buf, CiteItem const & ci) const
1382 {
1383 	if (empty())
1384 		return vector<pair<docstring,docstring>>();
1385 
1386 	string style;
1387 	CiteStringMap csm(styles.size());
1388 	for (size_t i = 0; i != csm.size(); ++i) {
1389 		style = styles[i].name;
1390 		csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
1391 	}
1392 
1393 	return csm;
1394 }
1395 
1396 
mergeBiblioInfo(BiblioInfo const & info)1397 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1398 {
1399 	bimap_.insert(info.begin(), info.end());
1400 	field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1401 	entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1402 }
1403 
1404 
1405 namespace {
1406 
1407 // used in xhtml to sort a list of BibTeXInfo objects
lSorter(BibTeXInfo const * lhs,BibTeXInfo const * rhs)1408 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1409 {
1410 	docstring const lauth = lhs->getAuthorOrEditorList();
1411 	docstring const rauth = rhs->getAuthorOrEditorList();
1412 	docstring const lyear = lhs->getYear();
1413 	docstring const ryear = rhs->getYear();
1414 	docstring const ltitl = lhs->operator[]("title");
1415 	docstring const rtitl = rhs->operator[]("title");
1416 	return  (lauth < rauth)
1417 		|| (lauth == rauth && lyear < ryear)
1418 		|| (lauth == rauth && lyear == ryear && ltitl < rtitl);
1419 }
1420 
1421 } // namespace
1422 
1423 
collectCitedEntries(Buffer const & buf)1424 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1425 {
1426 	cited_entries_.clear();
1427 	// We are going to collect all the citation keys used in the document,
1428 	// getting them from the TOC.
1429 	// FIXME We may want to collect these differently, in the first case,
1430 	// so that we might have them in order of appearance.
1431 	set<docstring> citekeys;
1432 	shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1433 	Toc::const_iterator it = toc->begin();
1434 	Toc::const_iterator const en = toc->end();
1435 	for (; it != en; ++it) {
1436 		if (it->str().empty())
1437 			continue;
1438 		vector<docstring> const keys = getVectorFromString(it->str());
1439 		citekeys.insert(keys.begin(), keys.end());
1440 	}
1441 	if (citekeys.empty())
1442 		return;
1443 
1444 	// We have a set of the keys used in this document.
1445 	// We will now convert it to a list of the BibTeXInfo objects used in
1446 	// this document...
1447 	vector<BibTeXInfo const *> bi;
1448 	set<docstring>::const_iterator cit = citekeys.begin();
1449 	set<docstring>::const_iterator const cen = citekeys.end();
1450 	for (; cit != cen; ++cit) {
1451 		BiblioInfo::const_iterator const bt = find(*cit);
1452 		if (bt == end() || !bt->second.isBibTeX())
1453 			continue;
1454 		bi.push_back(&(bt->second));
1455 	}
1456 	// ...and sort it.
1457 	sort(bi.begin(), bi.end(), lSorter);
1458 
1459 	// Now we can write the sorted keys
1460 	vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1461 	vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1462 	for (; bit != ben; ++bit)
1463 		cited_entries_.push_back((*bit)->key());
1464 }
1465 
1466 
makeCitationLabels(Buffer const & buf)1467 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1468 {
1469 	collectCitedEntries(buf);
1470 	CiteEngineType const engine_type = buf.params().citeEngineType();
1471 	bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1472 
1473 	int keynumber = 0;
1474 	char modifier = 0;
1475 	// used to remember the last one we saw
1476 	// we'll be comparing entries to see if we need to add
1477 	// modifiers, like "1984a"
1478 	map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1479 
1480 	vector<docstring>::const_iterator it = cited_entries_.begin();
1481 	vector<docstring>::const_iterator const en = cited_entries_.end();
1482 	for (; it != en; ++it) {
1483 		map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1484 		// this shouldn't happen, but...
1485 		if (biit == bimap_.end())
1486 			// ...fail gracefully, anyway.
1487 			continue;
1488 		BibTeXInfo & entry = biit->second;
1489 		if (numbers) {
1490 			docstring const num = convert<docstring>(++keynumber);
1491 			entry.setCiteNumber(num);
1492 		} else {
1493 			// The first test here is checking whether this is the first
1494 			// time through the loop. If so, then we do not have anything
1495 			// with which to compare.
1496 			if (last != bimap_.end()
1497 			    && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1498 			    // we access the year via getYear() so as to get it from the xref,
1499 			    // if we need to do so
1500 			    && getYear(entry.key()) == getYear(last->second.key())) {
1501 				if (modifier == 0) {
1502 					// so the last one should have been 'a'
1503 					last->second.setModifier('a');
1504 					modifier = 'b';
1505 				} else if (modifier == 'z')
1506 					modifier = 'A';
1507 				else
1508 					modifier++;
1509 			} else {
1510 				modifier = 0;
1511 			}
1512 			entry.setModifier(modifier);
1513 			// remember the last one
1514 			last = biit;
1515 		}
1516 	}
1517 	// Set the labels
1518 	it = cited_entries_.begin();
1519 	for (; it != en; ++it) {
1520 		map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1521 		// this shouldn't happen, but...
1522 		if (biit == bimap_.end())
1523 			// ...fail gracefully, anyway.
1524 			continue;
1525 		BibTeXInfo & entry = biit->second;
1526 		if (numbers) {
1527 			entry.label(entry.citeNumber());
1528 		} else {
1529 			docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1530 			// we do it this way so as to access the xref, if necessary
1531 			// note that this also gives us the modifier
1532 			docstring const year = getYear(*it, buf, true);
1533 			if (!auth.empty() && !year.empty())
1534 				entry.label(auth + ' ' + year);
1535 			else
1536 				entry.label(entry.key());
1537 		}
1538 	}
1539 }
1540 
1541 
1542 //////////////////////////////////////////////////////////////////////
1543 //
1544 // CitationStyle
1545 //
1546 //////////////////////////////////////////////////////////////////////
1547 
1548 
citationStyleFromString(string const & command,BufferParams const & params)1549 CitationStyle citationStyleFromString(string const & command,
1550 				      BufferParams const & params)
1551 {
1552 	CitationStyle cs;
1553 	if (command.empty())
1554 		return cs;
1555 
1556 	string const alias = params.getCiteAlias(command);
1557 	string cmd = alias.empty() ? command : alias;
1558 	if (isUpperCase(command[0])) {
1559 		cs.forceUpperCase = true;
1560 		cmd[0] = lowercase(cmd[0]);
1561 	}
1562 
1563 	size_t const n = command.size() - 1;
1564 	if (command[n] == '*') {
1565 		cs.hasStarredVersion = true;
1566 		if (suffixIs(cmd, '*'))
1567 			cmd = cmd.substr(0, cmd.size() - 1);
1568 	}
1569 
1570 	cs.name = cmd;
1571 	return cs;
1572 }
1573 
1574 
citationStyleToString(const CitationStyle & cs,bool const latex)1575 string citationStyleToString(const CitationStyle & cs, bool const latex)
1576 {
1577 	string cmd = latex ? cs.cmd : cs.name;
1578 	if (cs.forceUpperCase)
1579 		cmd[0] = uppercase(cmd[0]);
1580 	if (cs.hasStarredVersion)
1581 		cmd += '*';
1582 	return cmd;
1583 }
1584 
1585 } // namespace lyx
1586