1 /**
2  * \file tex2lyx.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author André Pönitz
7  *
8  * Full author contact details are available in file CREDITS.
9  */
10 
11 // {[(
12 
13 #include <config.h>
14 #include <version.h>
15 
16 #include "tex2lyx.h"
17 
18 #include "Context.h"
19 #include "Encoding.h"
20 #include "Layout.h"
21 #include "LayoutFile.h"
22 #include "LayoutModuleList.h"
23 #include "ModuleList.h"
24 #include "Preamble.h"
25 #include "TextClass.h"
26 
27 #include "support/ConsoleApplication.h"
28 #include "support/convert.h"
29 #include "support/ExceptionMessage.h"
30 #include "support/filetools.h"
31 #include "support/lassert.h"
32 #include "support/lstrings.h"
33 #include "support/os.h"
34 #include "support/Package.h"
35 #include "support/Systemcall.h"
36 
37 #include <cstdlib>
38 #include <algorithm>
39 #include <exception>
40 #include <iostream>
41 #include <string>
42 #include <sstream>
43 #include <vector>
44 #include <map>
45 
46 using namespace std;
47 using namespace lyx::support;
48 using namespace lyx::support::os;
49 
50 namespace lyx {
51 
trimSpaceAndEol(string const & a)52 string const trimSpaceAndEol(string const & a)
53 {
54 	return trim(a, " \t\n\r");
55 }
56 
57 
split(string const & s,vector<string> & result,char delim)58 void split(string const & s, vector<string> & result, char delim)
59 {
60 	//cerr << "split 1: '" << s << "'\n";
61 	istringstream is(s);
62 	string t;
63 	while (getline(is, t, delim))
64 		result.push_back(t);
65 	//cerr << "split 2\n";
66 }
67 
68 
join(vector<string> const & input,char const * delim)69 string join(vector<string> const & input, char const * delim)
70 {
71 	ostringstream os;
72 	for (size_t i = 0; i != input.size(); ++i) {
73 		if (i)
74 			os << delim;
75 		os << input[i];
76 	}
77 	return os.str();
78 }
79 
80 
is_known(string const & str,char const * const * what)81 char const * const * is_known(string const & str, char const * const * what)
82 {
83 	for ( ; *what; ++what)
84 		if (str == *what)
85 			return what;
86 	return 0;
87 }
88 
89 
90 
91 // current stack of nested environments
92 vector<string> active_environments;
93 
94 
active_environment()95 string active_environment()
96 {
97 	return active_environments.empty() ? string() : active_environments.back();
98 }
99 
100 
101 TeX2LyXDocClass textclass;
102 CommandMap known_commands;
103 CommandMap known_environments;
104 CommandMap known_math_environments;
105 FullCommandMap possible_textclass_commands;
106 FullEnvironmentMap possible_textclass_environments;
107 FullCommandMap possible_textclass_theorems;
108 int const LYX_FORMAT = LYX_FORMAT_TEX2LYX;
109 
110 /// used modules
111 LayoutModuleList used_modules;
112 vector<string> preloaded_modules;
113 
114 
convertArgs(string const & o1,bool o2,vector<ArgumentType> & arguments)115 void convertArgs(string const & o1, bool o2, vector<ArgumentType> & arguments)
116 {
117 	// We have to handle the following cases:
118 	// definition                      o1    o2    invocation result
119 	// \newcommand{\foo}{bar}          ""    false \foo       bar
120 	// \newcommand{\foo}[1]{bar #1}    "[1]" false \foo{x}    bar x
121 	// \newcommand{\foo}[1][]{bar #1}  "[1]" true  \foo       bar
122 	// \newcommand{\foo}[1][]{bar #1}  "[1]" true  \foo[x]    bar x
123 	// \newcommand{\foo}[1][x]{bar #1} "[1]" true  \foo[x]    bar x
124 	unsigned int nargs = 0;
125 	string const opt1 = rtrim(ltrim(o1, "["), "]");
126 	if (isStrUnsignedInt(opt1)) {
127 		// The command has arguments
128 		nargs = convert<unsigned int>(opt1);
129 		if (nargs > 0 && o2) {
130 			// The first argument is optional
131 			arguments.push_back(optional);
132 			--nargs;
133 		}
134 	}
135 	for (unsigned int i = 0; i < nargs; ++i)
136 		arguments.push_back(required);
137 }
138 
139 
add_known_command(string const & command,string const & o1,bool o2,docstring const & definition)140 void add_known_command(string const & command, string const & o1,
141                        bool o2, docstring const & definition)
142 {
143 	vector<ArgumentType> arguments;
144 	convertArgs(o1, o2, arguments);
145 	known_commands[command] = arguments;
146 	if (!definition.empty())
147 		possible_textclass_commands[command] =
148 			FullCommand(arguments, definition);
149 }
150 
151 
add_known_environment(string const & environment,string const & o1,bool o2,docstring const & beg,docstring const & end)152 void add_known_environment(string const & environment, string const & o1,
153                            bool o2, docstring const & beg, docstring const &end)
154 {
155 	vector<ArgumentType> arguments;
156 	convertArgs(o1, o2, arguments);
157 	known_environments[environment] = arguments;
158 	if (!beg.empty() || ! end.empty())
159 		possible_textclass_environments[environment] =
160 			FullEnvironment(arguments, beg, end);
161 }
162 
163 
add_known_theorem(string const & theorem,string const & o1,bool o2,docstring const & definition)164 void add_known_theorem(string const & theorem, string const & o1,
165                        bool o2, docstring const & definition)
166 {
167 	vector<ArgumentType> arguments;
168 	convertArgs(o1, o2, arguments);
169 	if (!definition.empty())
170 		possible_textclass_theorems[theorem] =
171 			FullCommand(arguments, definition);
172 }
173 
174 
findLayoutWithoutModule(TextClass const & tc,string const & name,bool command,string const & latexparam)175 Layout const * findLayoutWithoutModule(TextClass const & tc,
176                                        string const & name, bool command,
177                                        string const & latexparam)
178 {
179 	for (auto const & lay : tc) {
180 		if (lay.latexname() == name &&
181 		    (latexparam.empty() ||
182 		     (!lay.latexparam().empty() && suffixIs(latexparam, lay.latexparam()))) &&
183 		    ((command && lay.isCommand()) || (!command && lay.isEnvironment())))
184 			return &lay;
185 	}
186 	return 0;
187 }
188 
189 
findInsetLayoutWithoutModule(TextClass const & tc,string const & name,bool command,string const & latexparam)190 InsetLayout const * findInsetLayoutWithoutModule(TextClass const & tc,
191                                                  string const & name, bool command,
192                                                  string const & latexparam)
193 {
194 	for (auto const & ilay : tc.insetLayouts()) {
195 		if (ilay.second.latexname() == name &&
196 		    (latexparam.empty() ||
197 		     (!ilay.second.latexparam().empty() && suffixIs(latexparam, ilay.second.latexparam()))) &&
198 		    ((command && ilay.second.latextype() == InsetLayout::COMMAND) ||
199 		     (!command && ilay.second.latextype() == InsetLayout::ENVIRONMENT)))
200 			return &(ilay.second);
201 	}
202 	return 0;
203 }
204 
205 
206 namespace {
207 
208 typedef map<string, DocumentClassPtr> ModuleMap;
209 ModuleMap modules;
210 
211 
addModule(string const & module,LayoutFile const & baseClass,LayoutModuleList & m,vector<string> & visited)212 bool addModule(string const & module, LayoutFile const & baseClass, LayoutModuleList & m, vector<string> & visited)
213 {
214 	// avoid endless loop for circular dependency
215 	vector<string>::const_iterator const vb = visited.begin();
216 	vector<string>::const_iterator const ve = visited.end();
217 	if (find(vb, ve, module) != ve) {
218 		cerr << "Circular dependency detected for module " << module << '\n';
219 		return false;
220 	}
221 	LyXModule const * const lm = theModuleList[module];
222 	if (!lm) {
223 		cerr << "Could not find module " << module << " in module list.\n";
224 		return false;
225 	}
226 	bool foundone = false;
227 	LayoutModuleList::const_iterator const exclmodstart = baseClass.excludedModules().begin();
228 	LayoutModuleList::const_iterator const exclmodend = baseClass.excludedModules().end();
229 	LayoutModuleList::const_iterator const provmodstart = baseClass.providedModules().begin();
230 	LayoutModuleList::const_iterator const provmodend = baseClass.providedModules().end();
231 	vector<string> const reqs = lm->getRequiredModules();
232 	if (reqs.empty())
233 		foundone = true;
234 	else {
235 		LayoutModuleList::const_iterator mit = m.begin();
236 		LayoutModuleList::const_iterator men = m.end();
237 		vector<string>::const_iterator rit = reqs.begin();
238 		vector<string>::const_iterator ren = reqs.end();
239 		for (; rit != ren; ++rit) {
240 			if (find(mit, men, *rit) != men) {
241 				foundone = true;
242 				break;
243 			}
244 			if (find(provmodstart, provmodend, *rit) != provmodend) {
245 				foundone = true;
246 				break;
247 			}
248 		}
249 		if (!foundone) {
250 			visited.push_back(module);
251 			for (rit = reqs.begin(); rit != ren; ++rit) {
252 				if (find(exclmodstart, exclmodend, *rit) == exclmodend) {
253 					if (addModule(*rit, baseClass, m, visited)) {
254 						foundone = true;
255 						break;
256 					}
257 				}
258 			}
259 			visited.pop_back();
260 		}
261 	}
262 	if (!foundone) {
263 		cerr << "Could not add required modules for " << module << ".\n";
264 		return false;
265 	}
266 	if (!m.moduleCanBeAdded(module, &baseClass))
267 		return false;
268 	m.push_back(module);
269 	return true;
270 }
271 
272 
initModules()273 void initModules()
274 {
275 	// Create list of dummy document classes if not already done.
276 	// This is needed since a module cannot be read on its own, only as
277 	// part of a document class.
278 	LayoutFile const & baseClass = LayoutFileList::get()[textclass.name()];
279 	static bool init = true;
280 	if (init) {
281 		baseClass.load();
282 		LyXModuleList::const_iterator const end = theModuleList.end();
283 		LyXModuleList::const_iterator it = theModuleList.begin();
284 		for (; it != end; ++it) {
285 			string const module = it->getID();
286 			LayoutModuleList m;
287 			LayoutModuleList c;
288 			vector<string> v;
289 			if (!addModule(module, baseClass, m, v))
290 				continue;
291 			modules[module] = getDocumentClass(baseClass, m, c);
292 		}
293 		init = false;
294 	}
295 }
296 
297 
addModule(string const & module)298 bool addModule(string const & module)
299 {
300 	initModules();
301 	LayoutFile const & baseClass = LayoutFileList::get()[textclass.name()];
302 	if (!used_modules.moduleCanBeAdded(module, &baseClass))
303 		return false;
304 	FileName layout_file = libFileSearch("layouts", module, "module");
305 	if (textclass.read(layout_file, TextClass::MODULE)) {
306 		used_modules.push_back(module);
307 		// speed up further searches:
308 		// the module does not need to be checked anymore.
309 		ModuleMap::iterator const it = modules.find(module);
310 		if (it != modules.end())
311 			modules.erase(it);
312 		return true;
313 	}
314 	return false;
315 }
316 
317 } // namespace
318 
319 
checkModule(string const & name,bool command)320 bool checkModule(string const & name, bool command)
321 {
322 	// Cache to avoid slowdown by repated searches
323 	static set<string> failed[2];
324 
325 	// Record whether the command was actually defined in the LyX preamble
326 	bool theorem = false;
327 	bool preamble_def = true;
328 	if (command) {
329 		if (possible_textclass_commands.find('\\' + name) == possible_textclass_commands.end())
330 			preamble_def = false;
331 	} else {
332 		if (possible_textclass_environments.find(name) == possible_textclass_environments.end()) {
333 			if (possible_textclass_theorems.find(name) != possible_textclass_theorems.end())
334 				theorem = true;
335 			else
336 				preamble_def = false;
337 		}
338 	}
339 	if (failed[command].find(name) != failed[command].end())
340 		return false;
341 
342 	initModules();
343 	LayoutFile const & baseClass = LayoutFileList::get()[textclass.name()];
344 
345 	// Try to find a module that defines the command.
346 	// For commands with preamble definitions we prefer modules where the definition
347 	// can be found in the preamble of the style that corresponds to the command.
348 	// For others we check whether the command or module requires a package that is loaded
349 	// in the tex file and use a style with the respective command.
350 	// This is a heuristic and different from the way how we parse the builtin
351 	// commands of the text class (in that case we only compare the name),
352 	// but it is needed since it is not unlikely that two different modules define a
353 	// command with the same name.
354 	string found_module;
355 	vector<string> potential_modules;
356 	ModuleMap::iterator const end = modules.end();
357 	for (ModuleMap::iterator it = modules.begin(); it != end; ++it) {
358 		string const module = it->first;
359 		if (used_modules.moduleConflicts(module, &baseClass))
360 			continue;
361 		if (findLayoutWithoutModule(textclass, name, command))
362 			continue;
363 		if (findInsetLayoutWithoutModule(textclass, name, command))
364 			continue;
365 		DocumentClassConstPtr c = it->second;
366 		Layout const * layout = findLayoutWithoutModule(*c, name, command);
367 		InsetLayout const * insetlayout = layout ? nullptr :
368 			findInsetLayoutWithoutModule(*c, name, command);
369 		docstring dpre;
370 		std::set<std::string> cmd_reqs;
371 		bool found_style = false;
372 		if (layout) {
373 			found_style = true;
374 			dpre = layout->preamble();
375 			std::set<std::string> lreqs = layout->requires();
376 			if (!lreqs.empty())
377 				cmd_reqs.insert(lreqs.begin(), lreqs.end());
378 		} else if (insetlayout) {
379 			found_style = true;
380 			dpre = insetlayout->preamble();
381 			std::set<std::string> lreqs = insetlayout->requires();
382 			if (!lreqs.empty())
383 				cmd_reqs.insert(lreqs.begin(), lreqs.end());
384 		}
385 		if (dpre.empty() && preamble_def)
386 			continue;
387 		bool const package_cmd = dpre.empty();
388 		bool match_req = false;
389 		if (package_cmd) {
390 			std::set<std::string> mreqs = it->second->requires();
391 			if (!mreqs.empty())
392 				cmd_reqs.insert(mreqs.begin(), mreqs.end());
393 			for (auto const & pack : cmd_reqs) {
394 				// If a requirement of the module matches a used package
395 				// we load the module except if we have an auto-loaded package
396 				// which is only required generally by the module, and the module
397 				// does not provide the [inset]layout we are looking for.
398 				// This heuristics should
399 				// * load modules if the provide a style we don't have in the class
400 				// * load modules that provide a package support generally (such as fixltx2e)
401 				// * not unnecessarily load modules just because they require a package which we
402 				//   load anyway.
403 				if (preamble.isPackageUsed(pack)
404 				    && (found_style || !preamble.isPackageAutoLoaded(pack))) {
405 				    if (found_style)
406 					    match_req = true;
407 				    else
408 					    potential_modules.push_back(module);
409 				    break;
410 				}
411 			}
412 		}
413 		bool add = match_req;
414 		if (preamble_def) {
415 			if (command) {
416 				FullCommand const & cmd =
417 					possible_textclass_commands['\\' + name];
418 				if (dpre.find(cmd.def) != docstring::npos)
419 					add = true;
420 			} else if (theorem) {
421 				FullCommand const & thm =
422 					possible_textclass_theorems[name];
423 				if (dpre.find(thm.def) != docstring::npos)
424 					add = true;
425 			} else {
426 				FullEnvironment const & env =
427 					possible_textclass_environments[name];
428 				if (dpre.find(env.beg) != docstring::npos &&
429 				    dpre.find(env.end) != docstring::npos)
430 					add = true;
431 			}
432 		}
433 		if (add) {
434 			found_module = module;
435 			break;
436 		}
437 	}
438 	if (found_module.empty()) {
439 		// take one of the second row
440 		if (!potential_modules.empty())
441 			found_module = potential_modules.front();
442 	}
443 
444 	if (!found_module.empty()) {
445 		vector<string> v;
446 		LayoutModuleList mods;
447 		// addModule is necessary in order to catch required modules
448 		// as well (see #11156)
449 		if (!addModule(found_module, baseClass, mods, v))
450 			return false;
451 		for (auto const & mod : mods) {
452 			if (!used_modules.moduleCanBeAdded(mod, &baseClass))
453 				return false;
454 			FileName layout_file = libFileSearch("layouts", mod, "module");
455 			if (textclass.read(layout_file, TextClass::MODULE)) {
456 				used_modules.push_back(mod);
457 				// speed up further searches:
458 				// the module does not need to be checked anymore.
459 				ModuleMap::iterator const it = modules.find(mod);
460 				if (it != modules.end())
461 					modules.erase(it);
462 				return true;
463 			}
464 		}
465 	}
466 	failed[command].insert(name);
467 	return false;
468 }
469 
470 
isProvided(string const & name)471 bool isProvided(string const & name)
472 {
473 	// This works only for features that are named like the LaTeX packages
474 	return textclass.provides(name) || preamble.isPackageUsed(name);
475 }
476 
477 
478 bool noweb_mode = false;
479 bool pdflatex = false;
480 bool xetex = false;
481 bool is_nonCJKJapanese = false;
482 bool roundtrip = false;
483 
484 
485 namespace {
486 
487 
488 /*!
489  * Read one command definition from the syntax file
490  */
read_command(Parser & p,string command,CommandMap & commands)491 void read_command(Parser & p, string command, CommandMap & commands)
492 {
493 	if (p.next_token().asInput() == "*") {
494 		p.get_token();
495 		command += '*';
496 	}
497 	vector<ArgumentType> arguments;
498 	while (p.next_token().cat() == catBegin ||
499 	       p.next_token().asInput() == "[") {
500 		if (p.next_token().cat() == catBegin) {
501 			string const arg = p.getArg('{', '}');
502 			if (arg == "translate")
503 				arguments.push_back(required);
504 			else if (arg == "group")
505 				arguments.push_back(req_group);
506 			else if (arg == "item")
507 				arguments.push_back(item);
508 			else if (arg == "displaymath")
509 				arguments.push_back(displaymath);
510 			else
511 				arguments.push_back(verbatim);
512 		} else {
513 			string const arg = p.getArg('[', ']');
514 			if (arg == "group")
515 				arguments.push_back(opt_group);
516 			else
517 				arguments.push_back(optional);
518 		}
519 	}
520 	commands[command] = arguments;
521 }
522 
523 
524 /*!
525  * Read a class of environments from the syntax file
526  */
read_environment(Parser & p,string const & begin,CommandMap & environments)527 void read_environment(Parser & p, string const & begin,
528 		      CommandMap & environments)
529 {
530 	string environment;
531 	while (p.good()) {
532 		Token const & t = p.get_token();
533 		if (t.cat() == catLetter)
534 			environment += t.asInput();
535 		else if (!environment.empty()) {
536 			p.putback();
537 			read_command(p, environment, environments);
538 			environment.erase();
539 		}
540 		if (t.cat() == catEscape && t.asInput() == "\\end") {
541 			string const end = p.getArg('{', '}');
542 			if (end == begin)
543 				return;
544 		}
545 	}
546 }
547 
548 
549 /*!
550  * Read a list of TeX commands from a reLyX compatible syntax file.
551  * Since this list is used after all commands that have a LyX counterpart
552  * are handled, it does not matter that the "syntax.default" file
553  * has almost all of them listed. For the same reason the reLyX-specific
554  * reLyXre environment is ignored.
555  */
read_syntaxfile(FileName const & file_name)556 bool read_syntaxfile(FileName const & file_name)
557 {
558 	ifdocstream is(file_name.toFilesystemEncoding().c_str());
559 	if (!is.good()) {
560 		cerr << "Could not open syntax file \"" << file_name
561 		     << "\" for reading." << endl;
562 		return false;
563 	}
564 	// We can use our TeX parser, since the syntax of the layout file is
565 	// modeled after TeX.
566 	// Unknown tokens are just silently ignored, this helps us to skip some
567 	// reLyX specific things.
568 	Parser p(is, string());
569 	while (p.good()) {
570 		Token const & t = p.get_token();
571 		if (t.cat() == catEscape) {
572 			string const command = t.asInput();
573 			if (command == "\\begin") {
574 				string const name = p.getArg('{', '}');
575 				if (name == "environments" || name == "reLyXre")
576 					// We understand "reLyXre", but it is
577 					// not as powerful as "environments".
578 					read_environment(p, name,
579 						known_environments);
580 				else if (name == "mathenvironments")
581 					read_environment(p, name,
582 						known_math_environments);
583 			} else {
584 				read_command(p, command, known_commands);
585 			}
586 		}
587 	}
588 	return true;
589 }
590 
591 
592 string documentclass;
593 string default_encoding;
594 bool fixed_encoding = false;
595 string syntaxfile;
596 bool copy_files = false;
597 bool overwrite_files = false;
598 bool skip_children = false;
599 int error_code = 0;
600 
601 /// return the number of arguments consumed
602 typedef int (*cmd_helper)(string const &, string const &);
603 
604 
605 class StopException : public exception
606 {
607 	public:
StopException(int status)608 		StopException(int status) : status_(status) {}
status() const609 		int status() const { return status_; }
610 	private:
611 		int status_;
612 };
613 
614 
615 /// The main application class
616 class TeX2LyXApp : public ConsoleApplication
617 {
618 public:
TeX2LyXApp(int & argc,char * argv[])619 	TeX2LyXApp(int & argc, char * argv[])
620 		: ConsoleApplication("tex2lyx" PROGRAM_SUFFIX, argc, argv),
621 		  argc_(argc), argv_(argv)
622 	{
623 	}
doExec()624 	void doExec()
625 	{
626 		try {
627 			int const exit_status = run();
628 			exit(exit_status);
629 		}
630 		catch (StopException & e) {
631 			exit(e.status());
632 		}
633 	}
634 private:
635 	void easyParse();
636 	/// Do the real work
637 	int run();
638 	int & argc_;
639 	char ** argv_;
640 };
641 
642 
parse_help(string const &,string const &)643 int parse_help(string const &, string const &)
644 {
645 	cout << "Usage: tex2lyx [options] infile.tex [outfile.lyx]\n"
646 		"Options:\n"
647 		"\t-c textclass       Declare the textclass.\n"
648 		"\t-m mod1[,mod2...]  Load the given modules.\n"
649 		"\t-copyfiles         Copy all included files to the directory of outfile.lyx.\n"
650 		"\t-e encoding        Set the default encoding (latex name).\n"
651 		"\t-fixedenc encoding Like -e, but ignore encoding changing commands while parsing.\n"
652 		"\t-f                 Force overwrite of .lyx files.\n"
653 		"\t-help              Print this message and quit.\n"
654 		"\t-n                 translate literate programming (noweb, sweave,... ) file.\n"
655 		"\t-skipchildren      Do not translate included child documents.\n"
656 		"\t-roundtrip         re-export created .lyx file infile.lyx.lyx to infile.lyx.tex.\n"
657 		"\t-s syntaxfile      read additional syntax file.\n"
658 		"\t-sysdir SYSDIR     Set system directory to SYSDIR.\n"
659 		"\t                   Default: " << package().system_support() << "\n"
660 		"\t-userdir USERDIR   Set user directory to USERDIR.\n"
661 		"\t                   Default: " << package().user_support() << "\n"
662 		"\t-version           Summarize version and build info.\n"
663 		"Paths:\n"
664 		"\tThe program searches for the files \"encodings\", \"lyxmodules.lst\",\n"
665 		"\t\"textclass.lst\", \"syntax.default\", and \"unicodesymbols\", first in\n"
666 		"\t\"USERDIR\", then in \"SYSDIR\". The subdirectories \"USERDIR/layouts\"\n"
667 		"\tand \"SYSDIR/layouts\" are searched for layout and module files.\n"
668 		"Check the tex2lyx man page for more details."
669 	     << endl;
670 	throw StopException(error_code);
671 }
672 
673 
parse_version(string const &,string const &)674 int parse_version(string const &, string const &)
675 {
676 	cout << "tex2lyx " << lyx_version
677 	     << " (" << lyx_release_date << ")" << endl;
678 
679 	cout << lyx_version_info << endl;
680 	throw StopException(error_code);
681 }
682 
683 
error_message(string const & message)684 void error_message(string const & message)
685 {
686 	cerr << "tex2lyx: " << message << "\n\n";
687 	error_code = EXIT_FAILURE;
688 	parse_help(string(), string());
689 }
690 
691 
parse_class(string const & arg,string const &)692 int parse_class(string const & arg, string const &)
693 {
694 	if (arg.empty())
695 		error_message("Missing textclass string after -c switch");
696 	documentclass = arg;
697 	return 1;
698 }
699 
700 
parse_module(string const & arg,string const &)701 int parse_module(string const & arg, string const &)
702 {
703 	if (arg.empty())
704 		error_message("Missing modules string after -m switch");
705 	split(arg, preloaded_modules, ',');
706 	return 1;
707 }
708 
709 
parse_encoding(string const & arg,string const &)710 int parse_encoding(string const & arg, string const &)
711 {
712 	if (arg.empty())
713 		error_message("Missing encoding string after -e switch");
714 	default_encoding = arg;
715 	return 1;
716 }
717 
718 
parse_fixed_encoding(string const & arg,string const &)719 int parse_fixed_encoding(string const & arg, string const &)
720 {
721 	if (arg.empty())
722 		error_message("Missing encoding string after -fixedenc switch");
723 	default_encoding = arg;
724 	fixed_encoding = true;
725 	return 1;
726 }
727 
728 
parse_syntaxfile(string const & arg,string const &)729 int parse_syntaxfile(string const & arg, string const &)
730 {
731 	if (arg.empty())
732 		error_message("Missing syntaxfile string after -s switch");
733 	syntaxfile = internal_path(arg);
734 	return 1;
735 }
736 
737 
738 // Filled with the command line arguments "foo" of "-sysdir foo" or
739 // "-userdir foo".
740 string cl_system_support;
741 string cl_user_support;
742 
743 
parse_sysdir(string const & arg,string const &)744 int parse_sysdir(string const & arg, string const &)
745 {
746 	if (arg.empty())
747 		error_message("Missing directory for -sysdir switch");
748 	cl_system_support = internal_path(arg);
749 	return 1;
750 }
751 
752 
parse_userdir(string const & arg,string const &)753 int parse_userdir(string const & arg, string const &)
754 {
755 	if (arg.empty())
756 		error_message("Missing directory for -userdir switch");
757 	cl_user_support = internal_path(arg);
758 	return 1;
759 }
760 
761 
parse_force(string const &,string const &)762 int parse_force(string const &, string const &)
763 {
764 	overwrite_files = true;
765 	return 0;
766 }
767 
768 
parse_noweb(string const &,string const &)769 int parse_noweb(string const &, string const &)
770 {
771 	noweb_mode = true;
772 	return 0;
773 }
774 
775 
parse_skipchildren(string const &,string const &)776 int parse_skipchildren(string const &, string const &)
777 {
778 	skip_children = true;
779 	return 0;
780 }
781 
782 
parse_roundtrip(string const &,string const &)783 int parse_roundtrip(string const &, string const &)
784 {
785 	roundtrip = true;
786 	return 0;
787 }
788 
789 
parse_copyfiles(string const &,string const &)790 int parse_copyfiles(string const &, string const &)
791 {
792 	copy_files = true;
793 	return 0;
794 }
795 
796 
easyParse()797 void TeX2LyXApp::easyParse()
798 {
799 	map<string, cmd_helper> cmdmap;
800 
801 	cmdmap["-h"] = parse_help;
802 	cmdmap["-help"] = parse_help;
803 	cmdmap["--help"] = parse_help;
804 	cmdmap["-v"] = parse_version;
805 	cmdmap["-version"] = parse_version;
806 	cmdmap["--version"] = parse_version;
807 	cmdmap["-c"] = parse_class;
808 	cmdmap["-m"] = parse_module;
809 	cmdmap["-e"] = parse_encoding;
810 	cmdmap["-fixedenc"] = parse_fixed_encoding;
811 	cmdmap["-f"] = parse_force;
812 	cmdmap["-s"] = parse_syntaxfile;
813 	cmdmap["-n"] = parse_noweb;
814 	cmdmap["-skipchildren"] = parse_skipchildren;
815 	cmdmap["-sysdir"] = parse_sysdir;
816 	cmdmap["-userdir"] = parse_userdir;
817 	cmdmap["-roundtrip"] = parse_roundtrip;
818 	cmdmap["-copyfiles"] = parse_copyfiles;
819 
820 	for (int i = 1; i < argc_; ++i) {
821 		map<string, cmd_helper>::const_iterator it
822 			= cmdmap.find(argv_[i]);
823 
824 		// don't complain if not found - may be parsed later
825 		if (it == cmdmap.end()) {
826 			if (argv_[i][0] == '-')
827 				error_message(string("Unknown option `") + argv_[i] + "'.");
828 			else
829 				continue;
830 		}
831 
832 		string arg = (i + 1 < argc_) ? os::utf8_argv(i + 1) : string();
833 		string arg2 = (i + 2 < argc_) ? os::utf8_argv(i + 2) : string();
834 
835 		int const remove = 1 + it->second(arg, arg2);
836 
837 		// Now, remove used arguments by shifting
838 		// the following ones remove places down.
839 		os::remove_internal_args(i, remove);
840 		argc_ -= remove;
841 		for (int j = i; j < argc_; ++j)
842 			argv_[j] = argv_[j + remove];
843 		--i;
844 	}
845 }
846 
847 
848 // path of the first parsed file
849 string masterFilePathLyX;
850 string masterFilePathTeX;
851 // path of the currently parsed file
852 string parentFilePathTeX;
853 
854 } // anonymous namespace
855 
856 
getMasterFilePath(bool input)857 string getMasterFilePath(bool input)
858 {
859 	return input ? masterFilePathTeX : masterFilePathLyX;
860 }
861 
getParentFilePath(bool input)862 string getParentFilePath(bool input)
863 {
864 	if (input)
865 		return parentFilePathTeX;
866 	string const rel = to_utf8(makeRelPath(from_utf8(masterFilePathTeX),
867 	                                       from_utf8(parentFilePathTeX)));
868 	if (rel.substr(0, 3) == "../") {
869 		// The parent is not below the master - keep the path
870 		return parentFilePathTeX;
871 	}
872 	return makeAbsPath(rel, masterFilePathLyX).absFileName();
873 }
874 
875 
copyFiles()876 bool copyFiles()
877 {
878 	return copy_files;
879 }
880 
881 
overwriteFiles()882 bool overwriteFiles()
883 {
884 	return overwrite_files;
885 }
886 
887 
skipChildren()888 bool skipChildren()
889 {
890 	return skip_children;
891 }
892 
893 
roundtripMode()894 bool roundtripMode()
895 {
896 	return roundtrip;
897 }
898 
899 
900 namespace {
901 
902 /*!
903  *  Reads tex input from \a is and writes lyx output to \a os.
904  *  Uses some common settings for the preamble, so this should only
905  *  be used more than once for included documents.
906  *  Caution: Overwrites the existing preamble settings if the new document
907  *  contains a preamble.
908  *  You must ensure that \p parentFilePathTeX is properly set before calling
909  *  this function!
910  */
tex2lyx(idocstream & is,ostream & os,string const & encoding,string const & outfiledir)911 bool tex2lyx(idocstream & is, ostream & os, string const & encoding,
912              string const & outfiledir)
913 {
914 	Parser p(is, fixed_encoding ? default_encoding : string());
915 	p.setEncoding(encoding);
916 	//p.dump();
917 
918 	preamble.parse(p, documentclass, textclass);
919 	list<string> removed_modules;
920 	LayoutFile const & baseClass = LayoutFileList::get()[textclass.name()];
921 	if (!used_modules.adaptToBaseClass(&baseClass, removed_modules)) {
922 		cerr << "Could not load default modules for text class." << endl;
923 		return false;
924 	}
925 
926 	// Load preloaded modules.
927 	// This needs to be done after the preamble is parsed, since the text
928 	// class may not be known before. It neds to be done before parsing
929 	// body, since otherwise the commands/environments provided by the
930 	// modules would be parsed as ERT.
931 	for (size_t i = 0; i < preloaded_modules.size(); ++i) {
932 		if (!addModule(preloaded_modules[i])) {
933 			cerr << "Error: Could not load module \""
934 			     << preloaded_modules[i] << "\"." << endl;
935 			return false;
936 		}
937 	}
938 	// Ensure that the modules are not loaded again for included files
939 	preloaded_modules.clear();
940 
941 	active_environments.push_back("document");
942 	Context context(true, textclass);
943 	stringstream ss;
944 	// store the document language in the context to be able to handle the
945 	// commands like \foreignlanguage and \textenglish etc.
946 	context.font.language = preamble.defaultLanguage();
947 	// parse the main text
948 	parse_text(p, ss, FLAG_END, true, context);
949 	// check if we need a commented bibtex inset (biblatex)
950 	check_comment_bib(ss, context);
951 	if (Context::empty)
952 		// Empty document body. LyX needs at least one paragraph.
953 		context.check_layout(ss);
954 	context.check_end_layout(ss);
955 	ss << "\n\\end_body\n\\end_document\n";
956 	active_environments.pop_back();
957 
958 	// We know the used modules only after parsing the full text
959 	if (!used_modules.empty()) {
960 		LayoutModuleList::const_iterator const end = used_modules.end();
961 		LayoutModuleList::const_iterator it = used_modules.begin();
962 		for (; it != end; ++it)
963 			preamble.addModule(*it);
964 	}
965 	if (!preamble.writeLyXHeader(os, !active_environments.empty(), outfiledir)) {
966 		cerr << "Could not write LyX file header." << endl;
967 		return false;
968 	}
969 
970 	ss.seekg(0);
971 	os << ss.str();
972 #ifdef TEST_PARSER
973 	p.reset();
974 	ofdocstream parsertest("parsertest.tex");
975 	while (p.good())
976 		parsertest << p.get_token().asInput();
977 	// <origfile> and parsertest.tex should now have identical content
978 #endif
979 	return true;
980 }
981 
982 
983 /// convert TeX from \p infilename to LyX and write it to \p os
tex2lyx(FileName const & infilename,ostream & os,string encoding,string const & outfiledir)984 bool tex2lyx(FileName const & infilename, ostream & os, string encoding,
985              string const & outfiledir)
986 {
987 	// Set a sensible default encoding.
988 	// This is used until an encoding command is found.
989 	// For child documents use the encoding of the master, else try to
990 	// detect it from the preamble, since setting an encoding of an open
991 	// fstream does currently not work on OS X.
992 	// Always start with ISO-8859-1, (formerly known by its latex name
993 	// latin1), since ISO-8859-1 does not cause an iconv error if the
994 	// actual encoding is different (bug 7509).
995 	if (encoding.empty()) {
996 		Encoding const * enc = 0;
997 		if (preamble.inputencoding() == "auto") {
998 			ifdocstream is(setEncoding("ISO-8859-1"));
999 			// forbid buffering on this stream
1000 			is.rdbuf()->pubsetbuf(0, 0);
1001 			is.open(infilename.toFilesystemEncoding().c_str());
1002 			if (is.good()) {
1003 				Parser ep(is, string());
1004 				ep.setEncoding("ISO-8859-1");
1005 				Preamble encodingpreamble;
1006 				string const e = encodingpreamble
1007 					.parseEncoding(ep, documentclass);
1008 				if (!e.empty())
1009 					enc = encodings.fromLyXName(e, true);
1010 			}
1011 		} else
1012 			enc = encodings.fromLyXName(
1013 					preamble.inputencoding(), true);
1014 		if (enc)
1015 			encoding = enc->iconvName();
1016 		else
1017 			encoding = "ISO-8859-1";
1018 	}
1019 
1020 	ifdocstream is(setEncoding(encoding));
1021 	// forbid buffering on this stream
1022 	is.rdbuf()->pubsetbuf(0, 0);
1023 	is.open(infilename.toFilesystemEncoding().c_str());
1024 	if (!is.good()) {
1025 		cerr << "Could not open input file \"" << infilename
1026 		     << "\" for reading." << endl;
1027 		return false;
1028 	}
1029 	string const oldParentFilePath = parentFilePathTeX;
1030 	parentFilePathTeX = onlyPath(infilename.absFileName());
1031 	bool retval = tex2lyx(is, os, encoding, outfiledir);
1032 	parentFilePathTeX = oldParentFilePath;
1033 	return retval;
1034 }
1035 
1036 } // anonymous namespace
1037 
1038 
tex2lyx(string const & infilename,FileName const & outfilename,string const & encoding)1039 bool tex2lyx(string const & infilename, FileName const & outfilename,
1040 	     string const & encoding)
1041 {
1042 	if (outfilename.isReadableFile()) {
1043 		if (overwrite_files) {
1044 			cerr << "Overwriting existing file "
1045 			     << outfilename << endl;
1046 		} else {
1047 			cerr << "Not overwriting existing file "
1048 			     << outfilename << endl;
1049 			return false;
1050 		}
1051 	} else {
1052 		cerr << "Creating file " << outfilename << endl;
1053 	}
1054 	ofstream os(outfilename.toFilesystemEncoding().c_str());
1055 	if (!os.good()) {
1056 		cerr << "Could not open output file \"" << outfilename
1057 		     << "\" for writing." << endl;
1058 		return false;
1059 	}
1060 #ifdef FILEDEBUG
1061 	cerr << "Input file: " << infilename << "\n";
1062 	cerr << "Output file: " << outfilename << "\n";
1063 #endif
1064 	return tex2lyx(FileName(infilename), os, encoding,
1065 	               outfilename.onlyPath().absFileName() + '/');
1066 }
1067 
1068 
tex2tex(string const & infilename,FileName const & outfilename,string const & encoding)1069 bool tex2tex(string const & infilename, FileName const & outfilename,
1070              string const & encoding)
1071 {
1072 	if (!tex2lyx(infilename, outfilename, encoding))
1073 		return false;
1074 	string command = quoteName(package().lyx_binary().toFilesystemEncoding());
1075 	if (overwrite_files)
1076 		command += " -f main";
1077 	else
1078 		command += " -f none";
1079 	if (pdflatex)
1080 		command += " -e pdflatex ";
1081 	else if (xetex)
1082 		command += " -e xetex ";
1083 	else
1084 		command += " -e latex ";
1085 	command += quoteName(outfilename.toFilesystemEncoding());
1086 	Systemcall one;
1087 	if (one.startscript(Systemcall::Wait, command) == 0)
1088 		return true;
1089 	cerr << "Error: Running '" << command << "' failed." << endl;
1090 	return false;
1091 }
1092 
1093 
1094 namespace {
1095 
run()1096 int TeX2LyXApp::run()
1097 {
1098 	// qt changes this, and our numeric conversions require the C locale
1099 	setlocale(LC_NUMERIC, "C");
1100 
1101 	try {
1102 		init_package(internal_path(os::utf8_argv(0)), string(), string());
1103 	} catch (ExceptionMessage const & message) {
1104 		cerr << to_utf8(message.title_) << ":\n"
1105 		     << to_utf8(message.details_) << endl;
1106 		if (message.type_ == ErrorException)
1107 			return EXIT_FAILURE;
1108 	}
1109 
1110 	easyParse();
1111 
1112 	if (argc_ <= 1)
1113 		error_message("Not enough arguments.");
1114 
1115 	try {
1116 		init_package(internal_path(os::utf8_argv(0)),
1117 			     cl_system_support, cl_user_support);
1118 	} catch (ExceptionMessage const & message) {
1119 		cerr << to_utf8(message.title_) << ":\n"
1120 		     << to_utf8(message.details_) << endl;
1121 		if (message.type_ == ErrorException)
1122 			return EXIT_FAILURE;
1123 	}
1124 
1125 	// Check that user LyX directory is ok.
1126 	FileName const sup = package().user_support();
1127 	if (sup.exists() && sup.isDirectory()) {
1128 		string const lock_file = package().getConfigureLockName();
1129 		int fd = fileLock(lock_file.c_str());
1130 		if (configFileNeedsUpdate("lyxrc.defaults") ||
1131 		    configFileNeedsUpdate("lyxmodules.lst") ||
1132 		    configFileNeedsUpdate("textclass.lst") ||
1133 		    configFileNeedsUpdate("packages.lst") ||
1134 		    configFileNeedsUpdate("lyxciteengines.lst") ||
1135 		    configFileNeedsUpdate("xtemplates.lst"))
1136 			package().reconfigureUserLyXDir("");
1137 		fileUnlock(fd, lock_file.c_str());
1138 	} else
1139 		error_message("User directory does not exist.");
1140 
1141 	// Now every known option is parsed. Look for input and output
1142 	// file name (the latter is optional).
1143 	string infilename = internal_path(os::utf8_argv(1));
1144 	infilename = makeAbsPath(infilename).absFileName();
1145 
1146 	string outfilename;
1147 	if (argc_ > 2) {
1148 		outfilename = internal_path(os::utf8_argv(2));
1149 		if (outfilename != "-")
1150 			outfilename = makeAbsPath(outfilename).absFileName();
1151 		if (roundtrip) {
1152 			if (outfilename == "-") {
1153 				cerr << "Error: Writing to standard output is "
1154 				        "not supported in roundtrip mode."
1155 				     << endl;
1156 				return EXIT_FAILURE;
1157 			}
1158 			string texfilename = changeExtension(outfilename, ".tex");
1159 			if (equivalent(FileName(infilename), FileName(texfilename))) {
1160 				cerr << "Error: The input file `" << infilename
1161 				     << "´ would be overwritten by the TeX file exported from `"
1162 				     << outfilename << "´ in roundtrip mode." << endl;
1163 				return EXIT_FAILURE;
1164 			}
1165 		}
1166 	} else if (roundtrip) {
1167 		// avoid overwriting the input file
1168 		outfilename = changeExtension(infilename, ".lyx.lyx");
1169 	} else
1170 		outfilename = changeExtension(infilename, ".lyx");
1171 
1172 	// Read the syntax tables
1173 	FileName const system_syntaxfile = libFileSearch("", "syntax.default");
1174 	if (system_syntaxfile.empty()) {
1175 		cerr << "Error: Could not find syntax file \"syntax.default\"." << endl;
1176 		return EXIT_FAILURE;
1177 	}
1178 	if (!read_syntaxfile(system_syntaxfile))
1179 		return 2;
1180 	if (!syntaxfile.empty())
1181 		if (!read_syntaxfile(makeAbsPath(syntaxfile)))
1182 			return 2;
1183 
1184 	// Read the encodings table.
1185 	FileName const symbols_path = libFileSearch(string(), "unicodesymbols");
1186 	if (symbols_path.empty()) {
1187 		cerr << "Error: Could not find file \"unicodesymbols\"."
1188 		     << endl;
1189 		return EXIT_FAILURE;
1190 	}
1191 	FileName const enc_path = libFileSearch(string(), "encodings");
1192 	if (enc_path.empty()) {
1193 		cerr << "Error: Could not find file \"encodings\"."
1194 		     << endl;
1195 		return EXIT_FAILURE;
1196 	}
1197 	encodings.read(enc_path, symbols_path);
1198 	if (!default_encoding.empty()) {
1199 		Encoding const * const enc = encodings.fromLaTeXName(
1200 			default_encoding, Encoding::any, true);
1201 		if (!enc)
1202 			error_message("Unknown LaTeX encoding `" + default_encoding + "'");
1203 		default_encoding = enc->iconvName();
1204 		if (fixed_encoding)
1205 			preamble.setInputencoding(enc->name());
1206 	}
1207 
1208 	// Load the layouts
1209 	LayoutFileList::get().read();
1210 	//...and the modules
1211 	theModuleList.read();
1212 
1213 	// The real work now.
1214 	masterFilePathTeX = onlyPath(infilename);
1215 	parentFilePathTeX = masterFilePathTeX;
1216 	if (outfilename == "-") {
1217 		// assume same directory as input file
1218 		masterFilePathLyX = masterFilePathTeX;
1219 		if (tex2lyx(FileName(infilename), cout, default_encoding, masterFilePathLyX))
1220 			return EXIT_SUCCESS;
1221 	} else {
1222 		masterFilePathLyX = onlyPath(outfilename);
1223 		if (copy_files) {
1224 			FileName const path(masterFilePathLyX);
1225 			if (!path.isDirectory()) {
1226 				if (!path.createPath()) {
1227 					cerr << "Warning: Could not create directory for file `"
1228 					     << masterFilePathLyX << "´." << endl;
1229 					return EXIT_FAILURE;
1230 				}
1231 			}
1232 		}
1233 		if (roundtrip) {
1234 			if (tex2tex(infilename, FileName(outfilename), default_encoding))
1235 				return EXIT_SUCCESS;
1236 		} else {
1237 			if (lyx::tex2lyx(infilename, FileName(outfilename), default_encoding))
1238 				return EXIT_SUCCESS;
1239 		}
1240 	}
1241 	return EXIT_FAILURE;
1242 }
1243 
1244 } // anonymous namespace
1245 } // namespace lyx
1246 
1247 
main(int argc,char * argv[])1248 int main(int argc, char * argv[])
1249 {
1250 	//setlocale(LC_CTYPE, "");
1251 
1252 	lyx::lyxerr.setStream(cerr);
1253 
1254 	os::init(argc, &argv);
1255 
1256 	lyx::TeX2LyXApp app(argc, argv);
1257 	return app.exec();
1258 }
1259 
1260 // }])
1261