1 // Copyright (c) 1997 James Clark
2 // See the file copying.txt for copying permission.
3
4 #include "config.h"
5 #include "TransformFOTBuilder.h"
6 #include "FOTBuilder.h"
7 #include "OutputCharStream.h"
8 #include "MessageArg.h"
9 #include "ErrnoMessageArg.h"
10
11 #include <errno.h>
12
13 #ifdef DSSSL_NAMESPACE
14 namespace DSSSL_NAMESPACE {
15 #endif
16
17 const char RE = '\r';
18
19 class TransformFOTBuilder : public SerialFOTBuilder {
20 public:
21 // SGML Transformations
22 struct DocumentTypeNIC {
23 ~DocumentTypeNIC();
24 StringC name;
25 StringC publicId;
26 StringC systemId;
27 };
28 struct ElementNIC {
29 ~ElementNIC();
30 StringC gi;
31 Vector<StringC> attributes;
32 };
33 class TransformExtensionFlowObj : public FOTBuilder::ExtensionFlowObj {
34 public:
35 virtual void atomic(TransformFOTBuilder &, const NodePtr &) const = 0;
36 };
37 class TransformCompoundExtensionFlowObj : public FOTBuilder::CompoundExtensionFlowObj {
38 public:
39 virtual void start(TransformFOTBuilder &, const NodePtr &) const = 0;
40 virtual void end(TransformFOTBuilder &) const = 0;
41 };
42 class EntityRefFlowObj : public TransformExtensionFlowObj {
43 public:
EntityRefFlowObj()44 EntityRefFlowObj() {}
atomic(TransformFOTBuilder & fotb,const NodePtr &) const45 void atomic(TransformFOTBuilder &fotb, const NodePtr &) const {
46 fotb.entityRef(name_);
47 }
hasNIC(const StringC & name) const48 bool hasNIC(const StringC &name) const {
49 return name == "name";
50 }
setNIC(const StringC & name,const Value & value)51 void setNIC(const StringC &name, const Value &value) {
52 value.convertString(name_);
53 }
copy() const54 ExtensionFlowObj *copy() const { return new EntityRefFlowObj(*this); }
55 private:
56 StringC name_;
57 };
58 class ProcessingInstructionFlowObj : public TransformExtensionFlowObj {
59 public:
ProcessingInstructionFlowObj()60 ProcessingInstructionFlowObj() {}
atomic(TransformFOTBuilder & fotb,const NodePtr &) const61 void atomic(TransformFOTBuilder &fotb, const NodePtr &) const {
62 fotb.processingInstruction(data_);
63 }
hasNIC(const StringC & name) const64 bool hasNIC(const StringC &name) const {
65 return name.size() == 4 && name[0] == 'd' && name[1] == 'a' && name[2] == 't' && name[3] == 'a';
66 }
setNIC(const StringC & name,const Value & value)67 void setNIC(const StringC &name, const Value &value) {
68 value.convertString(data_);
69 }
copy() const70 ExtensionFlowObj *copy() const { return new ProcessingInstructionFlowObj(*this); }
71 private:
72 StringC data_;
73 };
74 class EmptyElementFlowObj : public TransformExtensionFlowObj {
atomic(TransformFOTBuilder & fotb,const NodePtr & nd) const75 void atomic(TransformFOTBuilder &fotb, const NodePtr &nd) const {
76 if (nic_.gi.size() > 0)
77 fotb.emptyElement(nic_);
78 else {
79 GroveString str;
80 if (nd && nd->getGi(str) == accessOK) {
81 ElementNIC tem(nic_);
82 tem.gi.assign(str.data(), str.size());
83 fotb.emptyElement(tem);
84 }
85 else
86 fotb.emptyElement(nic_);
87 }
88 }
hasNIC(const StringC & name) const89 bool hasNIC(const StringC &name) const {
90 return name == "gi" || name == "attributes";
91 }
setNIC(const StringC & name,const Value & value)92 void setNIC(const StringC &name, const Value &value) {
93 switch (name[0]) {
94 case 'g':
95 value.convertString(nic_.gi);
96 break;
97 case 'a':
98 value.convertStringPairList(nic_.attributes);
99 break;
100 }
101 }
copy() const102 ExtensionFlowObj *copy() const { return new EmptyElementFlowObj(*this); }
103 public:
EmptyElementFlowObj()104 EmptyElementFlowObj() {}
105 private:
106 ElementNIC nic_;
107 };
108 class ElementFlowObj : public TransformCompoundExtensionFlowObj {
start(TransformFOTBuilder & fotb,const NodePtr & nd) const109 void start(TransformFOTBuilder &fotb, const NodePtr &nd) const {
110 if (nic_.gi.size() > 0)
111 fotb.startElement(nic_);
112 else {
113 GroveString str;
114 if (nd && nd->getGi(str) == accessOK) {
115 ElementNIC tem(nic_);
116 tem.gi.assign(str.data(), str.size());
117 fotb.startElement(tem);
118 }
119 else
120 fotb.startElement(nic_);
121 }
122 }
end(TransformFOTBuilder & fotb) const123 void end(TransformFOTBuilder &fotb) const {
124 fotb.endElement();
125 }
hasNIC(const StringC & name) const126 bool hasNIC(const StringC &name) const {
127 return name == "gi" || name == "attributes";
128 }
setNIC(const StringC & name,const Value & value)129 void setNIC(const StringC &name, const Value &value) {
130 switch (name[0]) {
131 case 'g':
132 value.convertString(nic_.gi);
133 break;
134 case 'a':
135 value.convertStringPairList(nic_.attributes);
136 break;
137 }
138 }
copy() const139 ExtensionFlowObj *copy() const { return new ElementFlowObj(*this); }
140 public:
ElementFlowObj()141 ElementFlowObj() {}
142 private:
143 ElementNIC nic_;
144 };
145 class EntityFlowObj : public TransformCompoundExtensionFlowObj {
start(TransformFOTBuilder & fotb,const NodePtr &) const146 void start(TransformFOTBuilder &fotb, const NodePtr &) const {
147 fotb.startEntity(systemId_);
148 }
end(TransformFOTBuilder & fotb) const149 void end(TransformFOTBuilder &fotb) const {
150 fotb.endEntity();
151 }
hasNIC(const StringC & name) const152 bool hasNIC(const StringC &name) const {
153 return name == "system-id";
154 }
setNIC(const StringC & name,const Value & value)155 void setNIC(const StringC &name, const Value &value) {
156 value.convertString(systemId_);
157 }
copy() const158 ExtensionFlowObj *copy() const { return new EntityFlowObj(*this); }
159 public:
EntityFlowObj()160 EntityFlowObj() {};
161 private:
162 StringC systemId_;
163 };
164 class DocumentTypeFlowObj : public TransformExtensionFlowObj {
atomic(TransformFOTBuilder & fotb,const NodePtr & nd) const165 void atomic(TransformFOTBuilder &fotb, const NodePtr &nd) const {
166 fotb.documentType(nic_);
167 }
hasNIC(const StringC & name) const168 bool hasNIC(const StringC &name) const {
169 return name == "system-id" || name == "public-id" || name == "name";
170 }
setNIC(const StringC & name,const Value & value)171 void setNIC(const StringC &name, const Value &value) {
172 switch (name[0]) {
173 case 's':
174 value.convertString(nic_.systemId);
175 break;
176 case 'p':
177 value.convertString(nic_.publicId);
178 break;
179 case 'n':
180 value.convertString(nic_.name);
181 break;
182 }
183 }
copy() const184 ExtensionFlowObj *copy() const { return new DocumentTypeFlowObj(*this); }
185 public:
DocumentTypeFlowObj()186 DocumentTypeFlowObj() {}
187 private:
188 DocumentTypeNIC nic_;
189 };
190 TransformFOTBuilder(CmdLineApp *, bool xml);
191 ~TransformFOTBuilder();
192 void startElement(const ElementNIC &);
193 void endElement();
194 void emptyElement(const ElementNIC &);
195 void characters(const Char *s, size_t n);
196 void charactersFromNode(const NodePtr &, const Char *, size_t);
197 void processingInstruction(const StringC &);
198 void documentType(const DocumentTypeNIC &);
199 void formattingInstruction(const StringC &);
200 void entityRef(const StringC &);
201 void startEntity(const StringC &);
202 void endEntity();
203 void extension(const ExtensionFlowObj &fo, const NodePtr &);
204 void startExtensionSerial(const CompoundExtensionFlowObj &fo, const NodePtr &nd);
205 void endExtensionSerial(const CompoundExtensionFlowObj &fo);
206 void start();
207 void end();
208 void setPreserveSdata(bool);
209 private:
210 TransformFOTBuilder(const TransformFOTBuilder &);
211 void operator=(const TransformFOTBuilder &);
212
os()213 OutputCharStream &os() { return *os_; }
214 void attributes(const Vector<StringC> &atts);
flushPendingRe()215 void flushPendingRe() {
216 if (state_ == statePendingRe) {
217 os() << RE;
218 state_ = stateMiddle;
219 }
220 }
flushPendingReCharRef()221 void flushPendingReCharRef() {
222 if (state_ == statePendingRe) {
223 os() << " ";
224 state_ = stateMiddle;
225 }
226 }
227
228 CmdLineApp *app_;
229 OutputCharStream *os_;
230 Owner<OutputCharStream> topOs_;
231 Vector<StringC> openElements_;
232 StringC undefGi_;
233 struct OpenFile : Link {
234 ~OpenFile();
235 OutputCharStream *saveOs;
236 // fb must be before os so it gets destroyed afterwards
237 FileOutputByteStream fb;
238 Owner<OutputCharStream> os;
239 StringC systemId;
240 };
241 IList<OpenFile> openFileStack_;
242 bool xml_;
243 enum ReState {
244 stateMiddle,
245 stateStartOfElement,
246 statePendingRe
247 };
248 ReState state_;
249 bool preserveSdata_;
250 // Really Vector<bool>
251 StringC preserveSdataStack_;
252 };
253
makeTransformFOTBuilder(CmdLineApp * app,bool xml,const FOTBuilder::Extension * & ext)254 FOTBuilder *makeTransformFOTBuilder(CmdLineApp *app,
255 bool xml,
256 const FOTBuilder::Extension *&ext)
257 {
258 static const TransformFOTBuilder::ProcessingInstructionFlowObj pi;
259 static const TransformFOTBuilder::ElementFlowObj element;
260 static const TransformFOTBuilder::EmptyElementFlowObj emptyElement;
261 static const TransformFOTBuilder::EntityFlowObj entity;
262 static const TransformFOTBuilder::EntityRefFlowObj entityRef;
263 static const TransformFOTBuilder::DocumentTypeFlowObj documentType;
264 static const FOTBuilder::Extension extensions[] = {
265 {
266 "UNREGISTERED::James Clark//Flow Object Class::processing-instruction",
267 0,
268 0,
269 0,
270 0,
271 &pi
272 },
273 {
274 "UNREGISTERED::James Clark//Flow Object Class::element",
275 0,
276 0,
277 0,
278 0,
279 &element
280 },
281 {
282 "UNREGISTERED::James Clark//Flow Object Class::empty-element",
283 0,
284 0,
285 0,
286 0,
287 &emptyElement
288 },
289 {
290 "UNREGISTERED::James Clark//Flow Object Class::entity",
291 0,
292 0,
293 0,
294 0,
295 &entity
296 },
297 {
298 "UNREGISTERED::James Clark//Flow Object Class::entity-ref",
299 0,
300 0,
301 0,
302 0,
303 &entityRef
304 },
305 {
306 "UNREGISTERED::James Clark//Flow Object Class::document-type",
307 0,
308 0,
309 0,
310 0,
311 &documentType
312 },
313 {
314 "UNREGISTERED::James Clark//Characteristic::preserve-sdata?",
315 (void (FOTBuilder::*)(bool))&TransformFOTBuilder::setPreserveSdata,
316 0,
317 0,
318 0,
319 0
320 },
321 { 0 }
322 };
323 ext = extensions;
324 return new TransformFOTBuilder(app, xml);
325 }
326
327 static
outputNumericCharRef(OutputCharStream & os,Char c)328 void outputNumericCharRef(OutputCharStream &os, Char c)
329 {
330 os << "&#" << (unsigned long)c << ';';
331 }
332
TransformFOTBuilder(CmdLineApp * app,bool xml)333 TransformFOTBuilder::TransformFOTBuilder(CmdLineApp *app, bool xml)
334 : app_(app),
335 xml_(xml),
336 topOs_(new RecordOutputCharStream(app->makeStdOut())),
337 state_(stateMiddle),
338 preserveSdata_(0)
339 {
340 undefGi_ = app_->systemCharset().execToDesc("#UNDEF");
341 topOs_->setEscaper(outputNumericCharRef);
342 os_ = topOs_.pointer();
343 preserveSdataStack_ += 0;
344 }
345
~TransformFOTBuilder()346 TransformFOTBuilder::~TransformFOTBuilder()
347 {
348 }
349
contains(const StringC & str,Char c)350 static bool contains(const StringC &str, Char c)
351 {
352 for (size_t i = 0; i < str.size(); i++)
353 if (str[i] == c)
354 return 1;
355 return 0;
356 }
357
documentType(const DocumentTypeNIC & nic)358 void TransformFOTBuilder::documentType(const DocumentTypeNIC &nic)
359 {
360 flushPendingRe();
361 if (nic.name.size()) {
362 os() << "<!DOCTYPE " << nic.name;
363 if (nic.publicId.size())
364 os() << " PUBLIC \"" << nic.publicId << '"';
365 else
366 os() << " SYSTEM";
367 if (nic.systemId.size()) {
368 char quote = contains(nic.systemId, '"') ? '\'' : '"';
369 os() << quote << nic.systemId << quote;
370 }
371 os() << '>' << RE;
372 }
373 atomic();
374 }
375
attributes(const Vector<StringC> & atts)376 void TransformFOTBuilder::attributes(const Vector<StringC> &atts)
377 {
378 for (size_t i = 0; i < atts.size(); i += 2) {
379 os() << RE << atts[i] << '=';
380 const StringC &s = atts[i + 1];
381 if (!contains(s, '"'))
382 os() << '"' << s << '"';
383 else if (!contains(s, '\''))
384 os() << '\'' << s << '\'';
385 else {
386 os() << '"';
387 for (size_t j = 0; j < s.size(); j++) {
388 if (s[j] == '"') {
389 if (xml_)
390 os() << """;
391 else
392 outputNumericCharRef(os(), '"');
393 }
394 else
395 os().put(s[j]);
396 }
397 os() << '"';
398 }
399 }
400 }
401
startElement(const ElementNIC & nic)402 void TransformFOTBuilder::startElement(const ElementNIC &nic)
403 {
404 flushPendingRe();
405 os() << "<";
406 const StringC &s = nic.gi.size() == 0 ? undefGi_ : nic.gi;
407 os() << s;
408 attributes(nic.attributes);
409 os() << RE << '>';
410 openElements_.push_back(s);
411 start();
412 state_ = stateStartOfElement;
413 }
414
emptyElement(const ElementNIC & nic)415 void TransformFOTBuilder::emptyElement(const ElementNIC &nic)
416 {
417 flushPendingRe();
418 os() << "<";
419 const StringC &s = nic.gi.size() == 0 ? undefGi_ : nic.gi;
420 os() << s;
421 attributes(nic.attributes);
422 if (xml_)
423 os() << "/>";
424 else
425 os() << '>';
426 atomic();
427 state_ = stateMiddle;
428 }
429
endElement()430 void TransformFOTBuilder::endElement()
431 {
432 flushPendingReCharRef();
433 os() << "</" << openElements_.back();
434 os() << RE << '>';
435 openElements_.resize(openElements_.size() - 1);
436 end();
437 state_ = stateMiddle;
438 }
439
processingInstruction(const StringC & s)440 void TransformFOTBuilder::processingInstruction(const StringC &s)
441 {
442 flushPendingReCharRef();
443 os() << "<?" << s;
444 if (xml_)
445 os() << "?>";
446 else
447 os() << '>';
448 atomic();
449 }
450
formattingInstruction(const StringC & s)451 void TransformFOTBuilder::formattingInstruction(const StringC &s)
452 {
453 flushPendingRe();
454 os() << s;
455 }
456
entityRef(const StringC & s)457 void TransformFOTBuilder::entityRef(const StringC &s)
458 {
459 flushPendingRe();
460 os() << "&" << s << ";";
461 }
462
startEntity(const StringC & systemId)463 void TransformFOTBuilder::startEntity(const StringC &systemId)
464 {
465 flushPendingRe();
466 OpenFile *ofp = new OpenFile;
467 openFileStack_.insert(ofp);
468 ofp->systemId = systemId;
469 ofp->saveOs = os_;
470 String<CmdLineApp::AppChar> filename;
471 #ifdef SP_WIDE_SYSTEM
472 filename = systemId;
473 #else
474 filename = app_->codingSystem()->convertOut(systemId);
475 #endif
476 if (filename.size()) {
477 filename += 0;
478 if (!ofp->fb.open(filename.data())) {
479 app_->message(CmdLineApp::openFileErrorMessage(),
480 StringMessageArg(systemId),
481 ErrnoMessageArg(errno));
482 }
483 else {
484 ofp->os
485 = new RecordOutputCharStream(
486 new EncodeOutputCharStream(&ofp->fb,
487 app_->outputCodingSystem()));
488 ofp->os->setEscaper(outputNumericCharRef);
489 os_ = ofp->os.pointer();
490 }
491 }
492 }
493
endEntity()494 void TransformFOTBuilder::endEntity()
495 {
496 flushPendingRe();
497 OpenFile &of = *openFileStack_.head();
498 if (of.os) {
499 errno = 0;
500 of.os->flush();
501 if (!of.fb.close())
502 app_->message(CmdLineApp::closeFileErrorMessage(),
503 StringMessageArg(of.systemId),
504 ErrnoMessageArg(errno));
505 }
506 os_ = of.saveOs;
507 delete openFileStack_.get();
508 }
509
510 inline
operator <<(OutputCharStream & os,GroveString & str)511 OutputCharStream &operator<<(OutputCharStream &os, GroveString &str)
512 {
513 return os.write(str.data(), str.size());
514 }
515
charactersFromNode(const NodePtr & nd,const Char * s,size_t n)516 void TransformFOTBuilder::charactersFromNode(const NodePtr &nd, const Char *s, size_t n)
517 {
518 GroveString name;
519 if (preserveSdata_ && n == 1 && nd->getEntityName(name) == accessOK) {
520 flushPendingRe();
521 os() << "&" << name << ';';
522 }
523 else
524 TransformFOTBuilder::characters(s, n);
525 }
526
characters(const Char * s,size_t n)527 void TransformFOTBuilder::characters(const Char *s, size_t n)
528 {
529 if (n == 0)
530 return;
531 flushPendingRe();
532 if (state_ == stateStartOfElement && *s == RE) {
533 s++;
534 n--;
535 os() << " ";
536 if (n == 0) {
537 state_ = stateMiddle;
538 return;
539 }
540 }
541 if (s[n - 1] == RE) {
542 n--;
543 state_ = statePendingRe;
544 }
545 else
546 state_ = stateMiddle;
547 for (; n > 0; n--, s++) {
548 switch (*s) {
549 case '&':
550 if (xml_)
551 os() << "&";
552 else
553 outputNumericCharRef(os(), *s);
554 break;
555 case '<':
556 if (xml_)
557 os() << "<";
558 else
559 outputNumericCharRef(os(), *s);
560 break;
561 case '>':
562 if (xml_)
563 os() << ">";
564 else
565 outputNumericCharRef(os(), *s);
566 break;
567 default:
568 os().put(*s);
569 break;
570 }
571 }
572 }
573
extension(const ExtensionFlowObj & fo,const NodePtr & nd)574 void TransformFOTBuilder::extension(const ExtensionFlowObj &fo, const NodePtr &nd)
575 {
576 ((const TransformExtensionFlowObj &)fo).atomic(*this, nd);
577 }
578
startExtensionSerial(const CompoundExtensionFlowObj & fo,const NodePtr & nd)579 void TransformFOTBuilder::startExtensionSerial(const CompoundExtensionFlowObj &fo, const NodePtr &nd)
580 {
581 ((const TransformCompoundExtensionFlowObj &)fo).start(*this, nd);
582 }
583
endExtensionSerial(const CompoundExtensionFlowObj & fo)584 void TransformFOTBuilder::endExtensionSerial(const CompoundExtensionFlowObj &fo)
585 {
586 ((const TransformCompoundExtensionFlowObj &)fo).end(*this);
587 }
588
setPreserveSdata(bool b)589 void TransformFOTBuilder::setPreserveSdata(bool b)
590 {
591 preserveSdata_ = b;
592 }
593
start()594 void TransformFOTBuilder::start()
595 {
596 preserveSdataStack_ += Char(preserveSdata_);
597 }
598
end()599 void TransformFOTBuilder::end()
600 {
601 preserveSdataStack_.resize(preserveSdataStack_.size() - 1);
602 preserveSdata_ = bool(preserveSdataStack_[preserveSdataStack_.size() - 1]);
603 }
604
~OpenFile()605 TransformFOTBuilder::OpenFile::~OpenFile()
606 {
607 }
608
~DocumentTypeNIC()609 TransformFOTBuilder::DocumentTypeNIC::~DocumentTypeNIC()
610 {
611 }
612
~ElementNIC()613 TransformFOTBuilder::ElementNIC::~ElementNIC()
614 {
615 }
616
617 #ifdef DSSSL_NAMESPACE
618 }
619 #endif
620
621 #include "TransformFOTBuilder_inst.cxx"
622