1 /**
2 * Copyright (C) 2009 Cedric Tabin
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19 #include "PrettyPrinter.h"
20
21 /*======================= FUNCTIONS ====================================================================*/
22
23 /* error reporting functions */
24 static void PP_ERROR(const char* fmt, ...) G_GNUC_PRINTF(1,2); /* prints an error message */
25
26 /* xml pretty printing functions */
27 static void putCharInBuffer(char charToAdd); /* put a char into the new char buffer */
28 static void putCharsInBuffer(const char* charsToAdd); /* put the chars into the new char buffer */
29 static void putNextCharsInBuffer(int nbChars); /* put the next nbChars of the input buffer into the new buffer */
30 static int readWhites(bool considerLineBreakAsWhite); /* read the next whites into the input buffer */
31 static char readNextChar(void); /* read the next char into the input buffer; */
32 static char getNextChar(void); /* returns the next char but do not increase the input buffer index (use readNextChar for that) */
33 static char getPreviousInsertedChar(void); /* returns the last inserted char into the new buffer */
34 static bool isWhite(char c); /* check if the specified char is a white */
35 static bool isSpace(char c); /* check if the specified char is a space */
36 static bool isLineBreak(char c); /* check if the specified char is a new line */
37 static bool isQuote(char c); /* check if the specified char is a quote (simple or double) */
38 static int putNewLine(void); /* put a new line into the new char buffer with the correct number of whites (indentation) */
39 static bool isInlineNodeAllowed(void); /* check if it is possible to have an inline node */
40 static bool isOnSingleLine(int skip, char stop1, char stop2); /* check if the current node data is on one line (for inlining) */
41 static void resetBackwardIndentation(bool resetLineBreak); /* reset the indentation for the current depth (just reset the index in fact) */
42
43 /* specific parsing functions */
44 static int processElements(void); /* returns the number of elements processed */
45 static void processElementAttribute(void); /* process on attribute of a node */
46 static void processElementAttributes(void); /* process all the attributes of a node */
47 static void processHeader(void); /* process the header <?xml version="..." ?> */
48 static void processNode(void); /* process an XML node */
49 static void processTextNode(void); /* process a text node */
50 static void processComment(void); /* process a comment */
51 static void processCDATA(void); /* process a CDATA node */
52 static void processDoctype(void); /* process a DOCTYPE node */
53 static void processDoctypeElement(void); /* process a DOCTYPE ELEMENT node */
54
55 /* debug function */
56 static void printError(const char *msg, ...) G_GNUC_PRINTF(1,2); /* just print a message like the printf method */
57 static void printDebugStatus(void); /* just print some variables into the console for debugging */
58
59 /*============================================ PRIVATE PROPERTIES ======================================*/
60
61 /* those are variables that are shared by the functions and
62 * shouldn't be altered. */
63
64 static int result; /* result of the pretty printing */
65 static char* xmlPrettyPrinted; /* new buffer for the formatted XML */
66 static int xmlPrettyPrintedLength; /* buffer size */
67 static int xmlPrettyPrintedIndex; /* buffer index (position of the next char to insert) */
68 static const char* inputBuffer; /* input buffer */
69 static int inputBufferLength; /* input buffer size */
70 static int inputBufferIndex; /* input buffer index (position of the next char to read into the input string) */
71 static int currentDepth; /* current depth (for indentation) */
72 static char* currentNodeName; /* current node name */
73 static bool appendIndentation; /* if the indentation must be added (with a line break before) */
74 static bool lastNodeOpen; /* defines if the last action was a not opening or not */
75 static PrettyPrintingOptions* options; /* options of PrettyPrinting */
76
77 /*============================================ GENERAL FUNCTIONS =======================================*/
78
PP_ERROR(const char * fmt,...)79 static void PP_ERROR(const char* fmt, ...)
80 {
81 va_list va;
82
83 va_start(va, fmt);
84 vfprintf(stderr, fmt, va);
85 putc('\n', stderr);
86 va_end(va);
87 }
88
processXMLPrettyPrinting(const char * xml,int xml_length,char ** output,int * output_length,PrettyPrintingOptions * ppOptions)89 int processXMLPrettyPrinting(const char *xml, int xml_length, char** output, int* output_length, PrettyPrintingOptions* ppOptions)
90 {
91 bool freeOptions;
92 char* reallocated;
93
94 /* empty buffer, nothing to process */
95 if (xml_length == 0) { return PRETTY_PRINTING_EMPTY_XML; }
96 if (xml == NULL) { return PRETTY_PRINTING_EMPTY_XML; }
97
98 /* initialize the variables */
99 result = PRETTY_PRINTING_SUCCESS;
100 freeOptions = FALSE;
101 if (ppOptions == NULL)
102 {
103 ppOptions = createDefaultPrettyPrintingOptions();
104 freeOptions = TRUE;
105 }
106
107 options = ppOptions;
108 currentNodeName = NULL;
109 appendIndentation = FALSE;
110 lastNodeOpen = FALSE;
111 xmlPrettyPrintedIndex = 0;
112 inputBufferIndex = 0;
113 currentDepth = -1;
114
115 inputBuffer = xml;
116 inputBufferLength = xml_length;
117
118 xmlPrettyPrintedLength = xml_length;
119 xmlPrettyPrinted = (char*)g_try_malloc(sizeof(char)*(xml_length));
120 if (xmlPrettyPrinted == NULL) { PP_ERROR("Allocation error (initialisation)"); return PRETTY_PRINTING_SYSTEM_ERROR; }
121
122 /* go to the first char */
123 readWhites(TRUE);
124
125 /* process the pretty-printing */
126 processElements();
127
128 /* close the buffer */
129 putCharInBuffer('\0');
130
131 /* adjust the final size */
132 reallocated = (char*)g_try_realloc(xmlPrettyPrinted, xmlPrettyPrintedIndex);
133 if (reallocated == NULL) {
134 PP_ERROR("Allocation error (reallocation size is %d)", xmlPrettyPrintedIndex);
135 g_free(xmlPrettyPrinted);
136 xmlPrettyPrinted = NULL;
137 return PRETTY_PRINTING_SYSTEM_ERROR;
138 }
139 xmlPrettyPrinted = reallocated;
140
141 /* freeing the unused values */
142 if (freeOptions) { g_free(options); }
143
144 /* if success, then update the values */
145 if (result == PRETTY_PRINTING_SUCCESS)
146 {
147 *output = xmlPrettyPrinted;
148 *output_length = xmlPrettyPrintedIndex-2; /* the '\0' is not in the length */
149 }
150 /* else clean the other values */
151 else
152 {
153 g_free(xmlPrettyPrinted);
154 }
155
156 /* updating the pointers for the using into the caller function */
157 xmlPrettyPrinted = NULL; /* avoid reference */
158 inputBuffer = NULL; /* avoid reference */
159 currentNodeName = NULL; /* avoid reference */
160 options = NULL; /* avoid reference */
161
162 /* and finally the result */
163 return result;
164 }
165
createDefaultPrettyPrintingOptions(void)166 PrettyPrintingOptions* createDefaultPrettyPrintingOptions(void)
167 {
168 PrettyPrintingOptions* defaultOptions = (PrettyPrintingOptions*)g_try_malloc(sizeof(PrettyPrintingOptions));
169 if (defaultOptions == NULL)
170 {
171 PP_ERROR("Unable to allocate memory for PrettyPrintingOptions");
172 return NULL;
173 }
174
175 defaultOptions->newLineChars = g_strdup ("\r\n");
176 defaultOptions->indentChar = ' ';
177 defaultOptions->indentLength = 2;
178 defaultOptions->oneLineText = FALSE;
179 defaultOptions->inlineText = TRUE;
180 defaultOptions->oneLineComment = FALSE;
181 defaultOptions->inlineComment = TRUE;
182 defaultOptions->oneLineCdata = FALSE;
183 defaultOptions->inlineCdata = TRUE;
184 defaultOptions->emptyNodeStripping = TRUE;
185 defaultOptions->emptyNodeStrippingSpace = TRUE;
186 defaultOptions->forceEmptyNodeSplit = FALSE;
187 defaultOptions->trimLeadingWhites = TRUE;
188 defaultOptions->trimTrailingWhites = TRUE;
189 defaultOptions->alignComment = TRUE;
190 defaultOptions->alignText = TRUE;
191 defaultOptions->alignCdata = TRUE;
192
193 return defaultOptions;
194 }
195
putNextCharsInBuffer(int nbChars)196 void putNextCharsInBuffer(int nbChars)
197 {
198 int i;
199 for (i=0 ; i<nbChars ; ++i)
200 {
201 char c = readNextChar();
202 putCharInBuffer(c);
203 }
204 }
205
putCharInBuffer(char charToAdd)206 void putCharInBuffer(char charToAdd)
207 {
208 /* check if the buffer is full and reallocation if needed */
209 if (xmlPrettyPrintedIndex >= xmlPrettyPrintedLength)
210 {
211 char* reallocated;
212
213 if (charToAdd == '\0') { ++xmlPrettyPrintedLength; }
214 else { xmlPrettyPrintedLength += inputBufferLength; }
215 reallocated = (char*)g_try_realloc(xmlPrettyPrinted, xmlPrettyPrintedLength);
216 if (reallocated == NULL) { PP_ERROR("Allocation error (char was %c)", charToAdd); return; }
217 xmlPrettyPrinted = reallocated;
218 }
219
220 /* putting the char and increase the index for the next one */
221 xmlPrettyPrinted[xmlPrettyPrintedIndex] = charToAdd;
222 ++xmlPrettyPrintedIndex;
223 }
224
putCharsInBuffer(const char * charsToAdd)225 void putCharsInBuffer(const char* charsToAdd)
226 {
227 int currentIndex = 0;
228 while (charsToAdd[currentIndex] != '\0')
229 {
230 putCharInBuffer(charsToAdd[currentIndex]);
231 ++currentIndex;
232 }
233 }
234
getPreviousInsertedChar(void)235 char getPreviousInsertedChar(void)
236 {
237 return xmlPrettyPrinted[xmlPrettyPrintedIndex-1];
238 }
239
putNewLine(void)240 int putNewLine(void)
241 {
242 int spaces;
243 int i;
244
245 putCharsInBuffer(options->newLineChars);
246 spaces = currentDepth*options->indentLength;
247 for(i=0 ; i<spaces ; ++i)
248 {
249 putCharInBuffer(options->indentChar);
250 }
251
252 return spaces;
253 }
254
getNextChar(void)255 char getNextChar(void)
256 {
257 return inputBuffer[inputBufferIndex];
258 }
259
readNextChar(void)260 char readNextChar(void)
261 {
262 return inputBuffer[inputBufferIndex++];
263 }
264
readWhites(bool considerLineBreakAsWhite)265 int readWhites(bool considerLineBreakAsWhite)
266 {
267 int counter = 0;
268 while(isWhite(inputBuffer[inputBufferIndex]) &&
269 (!isLineBreak(inputBuffer[inputBufferIndex]) ||
270 considerLineBreakAsWhite))
271 {
272 ++counter;
273 ++inputBufferIndex;
274 }
275
276 return counter;
277 }
278
isQuote(char c)279 bool isQuote(char c)
280 {
281 return (c == '\'' ||
282 c == '\"');
283 }
284
isWhite(char c)285 bool isWhite(char c)
286 {
287 return (isSpace(c) ||
288 isLineBreak(c));
289 }
290
isSpace(char c)291 bool isSpace(char c)
292 {
293 return (c == ' ' ||
294 c == '\t');
295 }
296
isLineBreak(char c)297 bool isLineBreak(char c)
298 {
299 return (c == '\n' ||
300 c == '\r');
301 }
302
isInlineNodeAllowed(void)303 bool isInlineNodeAllowed(void)
304 {
305 int firstChar;
306 int secondChar;
307 int thirdChar;
308 int currentIndex;
309 char currentChar;
310
311 /* the last action was not an opening => inline not allowed */
312 if (!lastNodeOpen) { return FALSE; }
313
314 firstChar = getNextChar(); /* should be '<' or we are in a text node */
315 secondChar = inputBuffer[inputBufferIndex+1]; /* should be '!' */
316 thirdChar = inputBuffer[inputBufferIndex+2]; /* should be '-' or '[' */
317
318 /* loop through the content up to the next opening/closing node */
319 currentIndex = inputBufferIndex+1;
320 if (firstChar == '<')
321 {
322 char closingComment = '-';
323 char oldChar = ' ';
324 bool loop = TRUE;
325
326 /* another node is being open ==> no inline ! */
327 if (secondChar != '!') { return FALSE; }
328
329 /* okay we are in a comment/cdata node, so read until it is closed */
330
331 /* select the closing char */
332 if (thirdChar == '[') { closingComment = ']'; }
333
334 /* read until closing */
335 currentIndex += 3; /* that bypass meanless chars */
336 while (loop)
337 {
338 char current = inputBuffer[currentIndex];
339 if (current == closingComment && oldChar == closingComment) { loop = FALSE; } /* end of comment/cdata */
340 oldChar = current;
341 ++currentIndex;
342 }
343
344 /* okay now avoid blanks */
345 /* inputBuffer[index] is now '>' */
346 ++currentIndex;
347 while (isWhite(inputBuffer[currentIndex])) { ++currentIndex; }
348 }
349 else
350 {
351 /* this is a text node. Simply loop to the next '<' */
352 while (inputBuffer[currentIndex] != '<') { ++currentIndex; }
353 }
354
355 /* check what do we have now */
356 currentChar = inputBuffer[currentIndex];
357 if (currentChar == '<')
358 {
359 /* check if that is a closing node */
360 currentChar = inputBuffer[currentIndex+1];
361 if (currentChar == '/')
362 {
363 /* as we are in a correct XML (so far...), if the node is */
364 /* being directly closed, the inline is allowed !!! */
365 return TRUE;
366 }
367 }
368
369 /* inline not allowed... */
370 return FALSE;
371 }
372
isOnSingleLine(int skip,char stop1,char stop2)373 bool isOnSingleLine(int skip, char stop1, char stop2)
374 {
375 int currentIndex = inputBufferIndex+skip; /* skip the n first chars (in comment <!--) */
376 bool onSingleLine = TRUE;
377
378 char oldChar = inputBuffer[currentIndex];
379 char currentChar = inputBuffer[currentIndex+1];
380 while(onSingleLine && oldChar != stop1 && currentChar != stop2)
381 {
382 onSingleLine = !isLineBreak(oldChar);
383
384 ++currentIndex;
385 oldChar = currentChar;
386 currentChar = inputBuffer[currentIndex+1];
387
388 /**
389 * A line break inside the node has been reached. But we should check
390 * if there is something before the end of the node (otherwise, there
391 * are only spaces and it may be wanted to be considered as a single
392 * line). //TODO externalize an option for that ?
393 */
394 if (!onSingleLine)
395 {
396 while(oldChar != stop1 && currentChar != stop2)
397 {
398 /* okay there is something else => this is not on one line */
399 if (!isWhite(oldChar)) return FALSE;
400
401 ++currentIndex;
402 oldChar = currentChar;
403 currentChar = inputBuffer[currentIndex+1];
404 }
405
406 /* the end of the node has been reached with only whites. Then
407 * the node can be considered being one single line */
408 return TRUE;
409 }
410 }
411
412 return onSingleLine;
413 }
414
resetBackwardIndentation(bool resetLineBreak)415 void resetBackwardIndentation(bool resetLineBreak)
416 {
417 xmlPrettyPrintedIndex -= (currentDepth*options->indentLength);
418 if (resetLineBreak)
419 {
420 int len = strlen(options->newLineChars);
421 xmlPrettyPrintedIndex -= len;
422 }
423 }
424
425 /*#########################################################################################################################################*/
426 /*-----------------------------------------------------------------------------------------------------------------------------------------*/
427
428 /*-----------------------------------------------------------------------------------------------------------------------------------------*/
429 /*=============================================================== NODE FUNCTIONS ==========================================================*/
430 /*-----------------------------------------------------------------------------------------------------------------------------------------*/
431
432 /*-----------------------------------------------------------------------------------------------------------------------------------------*/
433 /*#########################################################################################################################################*/
434
processElements(void)435 int processElements(void)
436 {
437 int counter = 0;
438 bool loop = TRUE;
439 ++currentDepth;
440 while (loop && result == PRETTY_PRINTING_SUCCESS)
441 {
442 bool indentBackward;
443 char nextChar;
444
445 /* strip unused whites */
446 readWhites(TRUE);
447
448 nextChar = getNextChar();
449 if (nextChar == '\0') { return 0; } /* no more data to read */
450
451 /* put a new line with indentation */
452 if (appendIndentation) { putNewLine(); }
453
454 /* always append indentation (but need to store the state) */
455 indentBackward = appendIndentation;
456 appendIndentation = TRUE;
457
458 /* okay what do we have now ? */
459 if (nextChar != '<')
460 {
461 /* a simple text node */
462 processTextNode();
463 ++counter;
464 }
465 else /* some more check are needed */
466 {
467 nextChar = inputBuffer[inputBufferIndex+1];
468 if (nextChar == '!')
469 {
470 char oneMore = inputBuffer[inputBufferIndex+2];
471 if (oneMore == '-') { processComment(); ++counter; } /* a comment */
472 else if (oneMore == '[') { processCDATA(); ++counter; } /* cdata */
473 else if (oneMore == 'D') { processDoctype(); ++counter; } /* doctype <!DOCTYPE ... > */
474 else if (oneMore == 'E') { processDoctypeElement(); ++counter; } /* doctype element <!ELEMENT ... > */
475 else
476 {
477 printError("processElements : Invalid char '%c' afer '<!'", oneMore);
478 result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
479 }
480 }
481 else if (nextChar == '/')
482 {
483 /* close a node => stop the loop !! */
484 loop = FALSE;
485 if (indentBackward)
486 {
487 /* INDEX HACKING */
488 xmlPrettyPrintedIndex -= options->indentLength;
489 }
490 }
491 else if (nextChar == '?')
492 {
493 /* this is a header */
494 processHeader();
495 }
496 else
497 {
498 /* a new node is open */
499 processNode();
500 ++counter;
501 }
502 }
503 }
504
505 --currentDepth;
506 return counter;
507 }
508
processElementAttribute(void)509 void processElementAttribute(void)
510 {
511 char quote;
512 char value;
513 /* process the attribute name */
514 char nextChar = readNextChar();
515 while (nextChar != '=')
516 {
517 putCharInBuffer(nextChar);
518 nextChar = readNextChar();
519 }
520
521 putCharInBuffer(nextChar); /* that's the '=' */
522
523 /* read the simple quote or double quote and put it into the buffer */
524 quote = readNextChar();
525 putCharInBuffer(quote);
526
527 /* process until the last quote */
528 value = readNextChar();
529 while(value != quote)
530 {
531 putCharInBuffer(value);
532 value = readNextChar();
533 }
534
535 /* simply add the last quote */
536 putCharInBuffer(quote);
537 }
538
processElementAttributes(void)539 void processElementAttributes(void)
540 {
541 bool loop = TRUE;
542 char current = getNextChar(); /* should not be a white */
543 if (isWhite(current))
544 {
545 printError("processElementAttributes : first char shouldn't be a white");
546 result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
547 return;
548 }
549
550 while (loop)
551 {
552 char next;
553
554 readWhites(TRUE); /* strip the whites */
555
556 next = getNextChar(); /* don't read the last char (processed afterwards) */
557 if (next == '/') { loop = FALSE; } /* end of node */
558 else if (next == '>') { loop = FALSE; } /* end of tag */
559 else if (next == '?') { loop = FALSE; } /* end of header */
560 else
561 {
562 putCharInBuffer(' '); /* put only one space to separate attributes */
563 processElementAttribute();
564 }
565 }
566 }
567
processHeader(void)568 void processHeader(void)
569 {
570 int firstChar = inputBuffer[inputBufferIndex]; /* should be '<' */
571 int secondChar = inputBuffer[inputBufferIndex+1]; /* must be '?' */
572
573 if (firstChar != '<')
574 {
575 /* what ?????? invalid xml !!! */
576 printError("processHeader : first char should be '<' (not '%c')", firstChar);
577 result = PRETTY_PRINTING_INVALID_CHAR_ERROR; return;
578 }
579
580 if (secondChar == '?')
581 {
582 /* puts the '<' and '?' chars into the new buffer */
583 putNextCharsInBuffer(2);
584
585 while(!isWhite(getNextChar())) { putNextCharsInBuffer(1); }
586
587 readWhites(TRUE);
588 processElementAttributes();
589
590 /* puts the '?' and '>' chars into the new buffer */
591 putNextCharsInBuffer(2);
592 }
593 }
594
processNode(void)595 void processNode(void)
596 {
597 char closeChar;
598 int subElementsProcessed = 0;
599 char nextChar;
600 char* nodeName;
601 int nodeNameLength = 0;
602 int i;
603 int opening = readNextChar();
604 if (opening != '<')
605 {
606 printError("processNode : The first char should be '<' (not '%c')", opening);
607 result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
608 return;
609 }
610
611 putCharInBuffer(opening);
612
613 /* read the node name */
614 while (!isWhite(getNextChar()) &&
615 getNextChar() != '>' && /* end of the tag */
616 getNextChar() != '/') /* tag is being closed */
617 {
618 putNextCharsInBuffer(1);
619 ++nodeNameLength;
620 }
621
622 /* store the name */
623 nodeName = (char*)g_try_malloc(sizeof(char)*nodeNameLength+1);
624 if (nodeName == NULL) { PP_ERROR("Allocation error (node name length is %d)", nodeNameLength); return ; }
625 nodeName[nodeNameLength] = '\0';
626 for (i=0 ; i<nodeNameLength ; ++i)
627 {
628 int tempIndex = xmlPrettyPrintedIndex-nodeNameLength+i;
629 nodeName[i] = xmlPrettyPrinted[tempIndex];
630 }
631
632 currentNodeName = nodeName; /* set the name for using in other methods */
633 lastNodeOpen = TRUE;
634
635 /* process the attributes */
636 readWhites(TRUE);
637 processElementAttributes();
638
639 /* process the end of the tag */
640 subElementsProcessed = 0;
641 nextChar = getNextChar(); /* should be either '/' or '>' */
642 if (nextChar == '/') /* the node is being closed immediatly */
643 {
644 /* closing node directly */
645 if (options->emptyNodeStripping || !options->forceEmptyNodeSplit)
646 {
647 if (options->emptyNodeStrippingSpace) { putCharInBuffer(' '); }
648 putNextCharsInBuffer(2);
649 }
650 /* split the closing nodes */
651 else
652 {
653 readNextChar(); /* removing '/' */
654 readNextChar(); /* removing '>' */
655
656 putCharInBuffer('>');
657 if (!options->inlineText)
658 {
659 /* no inline text => new line ! */
660 putNewLine();
661 }
662
663 putCharsInBuffer("</");
664 putCharsInBuffer(currentNodeName);
665 putCharInBuffer('>');
666 }
667
668 lastNodeOpen=FALSE;
669 return;
670 }
671 else if (nextChar == '>')
672 {
673 /* the tag is just closed (maybe some content) */
674 putNextCharsInBuffer(1);
675 subElementsProcessed = processElements();
676 }
677 else
678 {
679 printError("processNode : Invalid character '%c'", nextChar);
680 result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
681 return;
682 }
683
684 /* if the code reaches this area, then the processElements has been called and we must
685 * close the opening tag */
686 closeChar = getNextChar();
687 if (closeChar != '<')
688 {
689 printError("processNode : Invalid character '%c' for closing tag (should be '<')", closeChar);
690 result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
691 return;
692 }
693
694 do
695 {
696 closeChar = readNextChar();
697 putCharInBuffer(closeChar);
698 }
699 while(closeChar != '>');
700
701 /* there is no elements */
702 if (subElementsProcessed == 0)
703 {
704 /* the node will be stripped */
705 if (options->emptyNodeStripping)
706 {
707 /* because we have '<nodeName ...></nodeName>' */
708 xmlPrettyPrintedIndex -= nodeNameLength+4;
709 resetBackwardIndentation(TRUE);
710
711 if (options->emptyNodeStrippingSpace) { putCharInBuffer(' '); }
712 putCharsInBuffer("/>");
713 }
714 /* the closing tag will be put on the same line */
715 else if (options->inlineText)
716 {
717 /* correct the index because we have '</nodeName>' */
718 xmlPrettyPrintedIndex -= nodeNameLength+3;
719 resetBackwardIndentation(TRUE);
720
721 /* rewrite the node name */
722 putCharsInBuffer("</");
723 putCharsInBuffer(currentNodeName);
724 putCharInBuffer('>');
725 }
726 }
727
728 /* the node is closed */
729 lastNodeOpen = FALSE;
730
731 /* freeeeeeee !!! */
732 g_free(nodeName);
733 nodeName = NULL;
734 currentNodeName = NULL;
735 }
736
processComment(void)737 void processComment(void)
738 {
739 char lastChar;
740 bool loop = TRUE;
741 char oldChar;
742 bool inlineAllowed = FALSE;
743 if (options->inlineComment) { inlineAllowed = isInlineNodeAllowed(); }
744 if (inlineAllowed && !options->oneLineComment) { inlineAllowed = isOnSingleLine(4, '-', '-'); }
745 if (inlineAllowed) { resetBackwardIndentation(TRUE); }
746
747 putNextCharsInBuffer(4); /* add the chars '<!--' */
748
749 oldChar = '-';
750 while (loop)
751 {
752 char nextChar = readNextChar();
753 if (oldChar == '-' && nextChar == '-') /* comment is being closed */
754 {
755 loop = FALSE;
756 }
757
758 if (!isLineBreak(nextChar)) /* the comment simply continues */
759 {
760 if (options->oneLineComment && isSpace(nextChar))
761 {
762 /* removes all the unecessary spaces */
763 while(isSpace(getNextChar()))
764 {
765 nextChar = readNextChar();
766 }
767 putCharInBuffer(' ');
768 oldChar = ' ';
769 }
770 else
771 {
772 /* comment is left untouched */
773 putCharInBuffer(nextChar);
774 oldChar = nextChar;
775 }
776
777 if (!loop && options->alignComment) /* end of comment */
778 {
779 /* ensures the chars preceding the first '-' are all spaces (there are at least
780 * 5 spaces in front of the '-->' for the alignment with '<!--') */
781 bool onlySpaces = xmlPrettyPrinted[xmlPrettyPrintedIndex-3] == ' ' &&
782 xmlPrettyPrinted[xmlPrettyPrintedIndex-4] == ' ' &&
783 xmlPrettyPrinted[xmlPrettyPrintedIndex-5] == ' ' &&
784 xmlPrettyPrinted[xmlPrettyPrintedIndex-6] == ' ' &&
785 xmlPrettyPrinted[xmlPrettyPrintedIndex-7] == ' ';
786
787 /* if all the preceding chars are white, then go for replacement */
788 if (onlySpaces)
789 {
790 xmlPrettyPrintedIndex -= 7; /* remove indentation spaces */
791 putCharsInBuffer("--"); /* reset the first chars of '-->' */
792 }
793 }
794 }
795 else if (!options->oneLineComment && !inlineAllowed) /* oh ! there is a line break */
796 {
797 /* if the comments need to be aligned, just add 5 spaces */
798 if (options->alignComment)
799 {
800 int read = readWhites(FALSE); /* strip the whites and new line */
801 if (nextChar == '\r' && read == 0 && getNextChar() == '\n') /* handles the \r\n return line */
802 {
803 readNextChar();
804 readWhites(FALSE);
805 }
806
807 putNewLine(); /* put a new indentation line */
808 putCharsInBuffer(" "); /* align with <!-- */
809 oldChar = ' '; /* and update the last char */
810 }
811 else
812 {
813 putCharInBuffer(nextChar);
814 oldChar = nextChar;
815 }
816 }
817 else /* the comments must be inlined */
818 {
819 readWhites(TRUE); /* strip the whites and add a space if needed */
820 if (getPreviousInsertedChar() != ' ' &&
821 strncmp(xmlPrettyPrinted+xmlPrettyPrintedIndex-4, "<!--", 4) != 0) /* prevents adding a space at the beginning */
822 {
823 putCharInBuffer(' ');
824 oldChar = ' ';
825 }
826 }
827 }
828
829 lastChar = readNextChar(); /* should be '>' */
830 if (lastChar != '>')
831 {
832 printError("processComment : last char must be '>' (not '%c')", lastChar);
833 result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
834 return;
835 }
836 putCharInBuffer(lastChar);
837
838 if (inlineAllowed) { appendIndentation = FALSE; }
839
840 /* there vas no node open */
841 lastNodeOpen = FALSE;
842 }
843
processTextNode(void)844 void processTextNode(void)
845 {
846 /* checks if inline is allowed */
847 bool inlineTextAllowed = FALSE;
848 if (options->inlineText) { inlineTextAllowed = isInlineNodeAllowed(); }
849 if (inlineTextAllowed && !options->oneLineText) { inlineTextAllowed = isOnSingleLine(0, '<', '/'); }
850 if (inlineTextAllowed || !options->alignText)
851 {
852 resetBackwardIndentation(TRUE); /* remove previous indentation */
853 if (!inlineTextAllowed) { putNewLine(); }
854 }
855
856 /* the leading whites are automatically stripped. So we re-add it */
857 if (!options->trimLeadingWhites)
858 {
859 int backwardIndex = inputBufferIndex-1;
860 while (isSpace(inputBuffer[backwardIndex]))
861 {
862 --backwardIndex; /* backward rolling */
863 }
864
865 /* now the input[backwardIndex] IS NOT a white. So we go to
866 * the next char... */
867 ++backwardIndex;
868
869 /* and then re-add the whites */
870 while (inputBuffer[backwardIndex] == ' ' ||
871 inputBuffer[backwardIndex] == '\t')
872 {
873 putCharInBuffer(inputBuffer[backwardIndex]);
874 ++backwardIndex;
875 }
876 }
877
878 /* process the text into the node */
879 while(getNextChar() != '<')
880 {
881 char nextChar = readNextChar();
882 if (isLineBreak(nextChar))
883 {
884 if (options->oneLineText)
885 {
886 readWhites(TRUE);
887
888 /* as we can put text on one line, remove the line break
889 * and replace it by a space but only if the previous
890 * char wasn't a space */
891 if (getPreviousInsertedChar() != ' ') { putCharInBuffer(' '); }
892 }
893 else if (options->alignText)
894 {
895 int read = readWhites(FALSE);
896 if (nextChar == '\r' && read == 0 && getNextChar() == '\n') /* handles the '\r\n' */
897 {
898 nextChar = readNextChar();
899 readWhites(FALSE);
900 }
901
902 /* put a new line only if the closing tag is not reached */
903 if (getNextChar() != '<')
904 {
905 putNewLine();
906 }
907 }
908 else
909 {
910 putCharInBuffer(nextChar);
911 }
912 }
913 else
914 {
915 putCharInBuffer(nextChar);
916 }
917 }
918
919 /* strip the trailing whites */
920 if (options->trimTrailingWhites)
921 {
922 while(getPreviousInsertedChar() == ' ' ||
923 getPreviousInsertedChar() == '\t')
924 {
925 --xmlPrettyPrintedIndex;
926 }
927 }
928
929 /* remove the indentation for the closing tag */
930 if (inlineTextAllowed) { appendIndentation = FALSE; }
931
932 /* there vas no node open */
933 lastNodeOpen = FALSE;
934 }
935
processCDATA(void)936 void processCDATA(void)
937 {
938 char lastChar;
939 bool loop = TRUE;
940 char oldChar;
941 bool inlineAllowed = FALSE;
942 if (options->inlineCdata) { inlineAllowed = isInlineNodeAllowed(); }
943 if (inlineAllowed && !options->oneLineCdata) { inlineAllowed = isOnSingleLine(9, ']', ']'); }
944 if (inlineAllowed) { resetBackwardIndentation(TRUE); }
945
946 putNextCharsInBuffer(9); /* putting the '<![CDATA[' into the buffer */
947
948 oldChar = '[';
949 while(loop)
950 {
951 char nextChar = readNextChar();
952 char nextChar2 = getNextChar();
953 if (oldChar == ']' && nextChar == ']' && nextChar2 == '>') { loop = FALSE; } /* end of cdata */
954
955 if (!isLineBreak(nextChar)) /* the cdata simply continues */
956 {
957 if (options->oneLineCdata && isSpace(nextChar))
958 {
959 /* removes all the unecessary spaces */
960 while(isSpace(nextChar2))
961 {
962 nextChar = readNextChar();
963 nextChar2 = getNextChar();
964 }
965
966 putCharInBuffer(' ');
967 oldChar = ' ';
968 }
969 else
970 {
971 /* comment is left untouched */
972 putCharInBuffer(nextChar);
973 oldChar = nextChar;
974 }
975
976 if (!loop && options->alignCdata) /* end of cdata */
977 {
978 /* ensures the chars preceding the first '-' are all spaces (there are at least
979 * 10 spaces in front of the ']]>' for the alignment with '<![CDATA[') */
980 bool onlySpaces = xmlPrettyPrinted[xmlPrettyPrintedIndex-3] == ' ' &&
981 xmlPrettyPrinted[xmlPrettyPrintedIndex-4] == ' ' &&
982 xmlPrettyPrinted[xmlPrettyPrintedIndex-5] == ' ' &&
983 xmlPrettyPrinted[xmlPrettyPrintedIndex-6] == ' ' &&
984 xmlPrettyPrinted[xmlPrettyPrintedIndex-7] == ' ' &&
985 xmlPrettyPrinted[xmlPrettyPrintedIndex-8] == ' ' &&
986 xmlPrettyPrinted[xmlPrettyPrintedIndex-9] == ' ' &&
987 xmlPrettyPrinted[xmlPrettyPrintedIndex-10] == ' ' &&
988 xmlPrettyPrinted[xmlPrettyPrintedIndex-11] == ' ';
989
990 /* if all the preceding chars are white, then go for replacement */
991 if (onlySpaces)
992 {
993 xmlPrettyPrintedIndex -= 11; /* remove indentation spaces */
994 putCharsInBuffer("]]"); /* reset the first chars of '-->' */
995 }
996 }
997 }
998 else if (!options->oneLineCdata && !inlineAllowed) /* line break */
999 {
1000 /* if the cdata need to be aligned, just add 9 spaces */
1001 if (options->alignCdata)
1002 {
1003 int read = readWhites(FALSE); /* strip the whites and new line */
1004 if (nextChar == '\r' && read == 0 && getNextChar() == '\n') /* handles the \r\n return line */
1005 {
1006 readNextChar();
1007 readWhites(FALSE);
1008 }
1009
1010 putNewLine(); /* put a new indentation line */
1011 putCharsInBuffer(" "); /* align with <![CDATA[ */
1012 oldChar = ' '; /* and update the last char */
1013 }
1014 else
1015 {
1016 putCharInBuffer(nextChar);
1017 oldChar = nextChar;
1018 }
1019 }
1020 else /* cdata are inlined */
1021 {
1022 readWhites(TRUE); /* strip the whites and add a space if necessary */
1023 if(getPreviousInsertedChar() != ' ' &&
1024 strncmp(xmlPrettyPrinted+xmlPrettyPrintedIndex-9, "<![CDATA[", 9) != 0) /* prevents adding a space at the beginning */
1025 {
1026 putCharInBuffer(' ');
1027 oldChar = ' ';
1028 }
1029 }
1030 }
1031
1032 /* if the cdata is inline, then all the trailing spaces are removed */
1033 if (options->oneLineCdata)
1034 {
1035 xmlPrettyPrintedIndex -= 2; /* because of the last ']]' inserted */
1036 while(isWhite(xmlPrettyPrinted[xmlPrettyPrintedIndex-1]))
1037 {
1038 --xmlPrettyPrintedIndex;
1039 }
1040 putCharsInBuffer("]]");
1041 }
1042
1043 /* finalize the cdata */
1044 lastChar = readNextChar(); /* should be '>' */
1045 if (lastChar != '>')
1046 {
1047 printError("processCDATA : last char must be '>' (not '%c')", lastChar);
1048 result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
1049 return;
1050 }
1051
1052 putCharInBuffer(lastChar);
1053
1054 if (inlineAllowed) { appendIndentation = FALSE; }
1055
1056 /* there was no node open */
1057 lastNodeOpen = FALSE;
1058 }
1059
processDoctype(void)1060 void processDoctype(void)
1061 {
1062 bool loop = TRUE;
1063
1064 putNextCharsInBuffer(9); /* put the '<!DOCTYPE' into the buffer */
1065
1066 while(loop)
1067 {
1068 int nextChar;
1069
1070 readWhites(TRUE);
1071 putCharInBuffer(' '); /* only one space for the attributes */
1072
1073 nextChar = readNextChar();
1074 while(!isWhite(nextChar) &&
1075 !isQuote(nextChar) && /* begins a quoted text */
1076 nextChar != '=' && /* begins an attribute */
1077 nextChar != '>' && /* end of doctype */
1078 nextChar != '[') /* inner <!ELEMENT> types */
1079 {
1080 putCharInBuffer(nextChar);
1081 nextChar = readNextChar();
1082 }
1083
1084 if (isWhite(nextChar)) {} /* do nothing, just let the next loop do the job */
1085 else if (isQuote(nextChar) || nextChar == '=')
1086 {
1087 char quote;
1088
1089 if (nextChar == '=')
1090 {
1091 putCharInBuffer(nextChar);
1092 nextChar = readNextChar(); /* now we should have a quote */
1093
1094 if (!isQuote(nextChar))
1095 {
1096 printError("processDoctype : the next char should be a quote (not '%c')", nextChar);
1097 result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
1098 return;
1099 }
1100 }
1101
1102 /* simply process the content */
1103 quote = nextChar;
1104 do
1105 {
1106 putCharInBuffer(nextChar);
1107 nextChar = readNextChar();
1108 }
1109 while (nextChar != quote);
1110 putCharInBuffer(nextChar); /* now the last char is the last quote */
1111 }
1112 else if (nextChar == '>') /* end of doctype */
1113 {
1114 putCharInBuffer(nextChar);
1115 loop = FALSE;
1116 }
1117 else /* the char is a '[' => not supported yet */
1118 {
1119 printError("DOCTYPE inner ELEMENT is currently not supported by PrettyPrinter\n");
1120 result = PRETTY_PRINTING_NOT_SUPPORTED_YET;
1121 loop = FALSE;
1122 }
1123 }
1124 }
1125
processDoctypeElement(void)1126 void processDoctypeElement(void)
1127 {
1128 printError("ELEMENT is currently not supported by PrettyPrinter\n");
1129 result = PRETTY_PRINTING_NOT_SUPPORTED_YET;
1130 }
1131
printError(const char * msg,...)1132 void printError(const char *msg, ...)
1133 {
1134 va_list va;
1135 va_start(va, msg);
1136 #ifdef HAVE_GLIB
1137 g_logv(G_LOG_DOMAIN, G_LOG_LEVEL_WARNING, msg, va);
1138 #else
1139 vfprintf(stderr, msg, va);
1140 putc('\n', stderr);
1141 #endif
1142 va_end(va);
1143
1144 printDebugStatus();
1145 }
1146
printDebugStatus(void)1147 void printDebugStatus(void)
1148 {
1149 #ifdef HAVE_GLIB
1150 g_debug("\n===== INPUT =====\n%s\n=================\ninputLength = %d\ninputIndex = %d\noutputLength = %d\noutputIndex = %d\n",
1151 inputBuffer,
1152 inputBufferLength,
1153 inputBufferIndex,
1154 xmlPrettyPrintedLength,
1155 xmlPrettyPrintedIndex);
1156 #else
1157 PP_ERROR("\n===== INPUT =====\n%s\n=================\ninputLength = %d\ninputIndex = %d\noutputLength = %d\noutputIndex = %d\n",
1158 inputBuffer,
1159 inputBufferLength,
1160 inputBufferIndex,
1161 xmlPrettyPrintedLength,
1162 xmlPrettyPrintedIndex);
1163 #endif
1164 }
1165