1 /**
2  *   Copyright (C) 2009  Cedric Tabin
3  *
4  *   This program is free software; you can redistribute it and/or modify
5  *   it under the terms of the GNU General Public License as published by
6  *   the Free Software Foundation; either version 2 of the License, or
7  *   (at your option) any later version.
8  *
9  *   This program is distributed in the hope that it will be useful,
10  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *   GNU General Public License for more details.
13  *
14  *   You should have received a copy of the GNU General Public License along
15  *   with this program; if not, write to the Free Software Foundation, Inc.,
16  *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include "PrettyPrinter.h"
20 
21 /*======================= FUNCTIONS ====================================================================*/
22 
23 /* error reporting functions */
24 static void PP_ERROR(const char* fmt, ...) G_GNUC_PRINTF(1,2);  /* prints an error message */
25 
26 /* xml pretty printing functions */
27 static void putCharInBuffer(char charToAdd);                     /* put a char into the new char buffer */
28 static void putCharsInBuffer(const char* charsToAdd);            /* put the chars into the new char buffer */
29 static void putNextCharsInBuffer(int nbChars);                   /* put the next nbChars of the input buffer into the new buffer */
30 static int readWhites(bool considerLineBreakAsWhite);            /* read the next whites into the input buffer */
31 static char readNextChar(void);                                  /* read the next char into the input buffer; */
32 static char getNextChar(void);                                   /* returns the next char but do not increase the input buffer index (use readNextChar for that) */
33 static char getPreviousInsertedChar(void);                       /* returns the last inserted char into the new buffer */
34 static bool isWhite(char c);                                     /* check if the specified char is a white */
35 static bool isSpace(char c);                                     /* check if the specified char is a space */
36 static bool isLineBreak(char c);                                 /* check if the specified char is a new line */
37 static bool isQuote(char c);                                     /* check if the specified char is a quote (simple or double) */
38 static int putNewLine(void);                                     /* put a new line into the new char buffer with the correct number of whites (indentation) */
39 static bool isInlineNodeAllowed(void);                           /* check if it is possible to have an inline node */
40 static bool isOnSingleLine(int skip, char stop1, char stop2);    /* check if the current node data is on one line (for inlining) */
41 static void resetBackwardIndentation(bool resetLineBreak);       /* reset the indentation for the current depth (just reset the index in fact) */
42 
43 /* specific parsing functions */
44 static int processElements(void);                                /* returns the number of elements processed */
45 static void processElementAttribute(void);                       /* process on attribute of a node */
46 static void processElementAttributes(void);                      /* process all the attributes of a node */
47 static void processHeader(void);                                 /* process the header <?xml version="..." ?> */
48 static void processNode(void);                                   /* process an XML node */
49 static void processTextNode(void);                               /* process a text node */
50 static void processComment(void);                                /* process a comment */
51 static void processCDATA(void);                                  /* process a CDATA node */
52 static void processDoctype(void);                                /* process a DOCTYPE node */
53 static void processDoctypeElement(void);                         /* process a DOCTYPE ELEMENT node */
54 
55 /* debug function */
56 static void printError(const char *msg, ...) G_GNUC_PRINTF(1,2); /* just print a message like the printf method */
57 static void printDebugStatus(void);                              /* just print some variables into the console for debugging */
58 
59 /*============================================ PRIVATE PROPERTIES ======================================*/
60 
61 /* those are variables that are shared by the functions and
62  * shouldn't be altered. */
63 
64 static int result;                                                /* result of the pretty printing */
65 static char* xmlPrettyPrinted;                                    /* new buffer for the formatted XML */
66 static int xmlPrettyPrintedLength;                                /* buffer size */
67 static int xmlPrettyPrintedIndex;                                 /* buffer index (position of the next char to insert) */
68 static const char* inputBuffer;                                   /* input buffer */
69 static int inputBufferLength;                                     /* input buffer size */
70 static int inputBufferIndex;                                      /* input buffer index (position of the next char to read into the input string) */
71 static int currentDepth;                                          /* current depth (for indentation) */
72 static char* currentNodeName;                                     /* current node name */
73 static bool appendIndentation;                                /* if the indentation must be added (with a line break before) */
74 static bool lastNodeOpen;                                     /* defines if the last action was a not opening or not */
75 static PrettyPrintingOptions* options;                            /* options of PrettyPrinting */
76 
77 /*============================================ GENERAL FUNCTIONS =======================================*/
78 
PP_ERROR(const char * fmt,...)79 static void PP_ERROR(const char* fmt, ...)
80 {
81     va_list va;
82 
83     va_start(va, fmt);
84     vfprintf(stderr, fmt, va);
85     putc('\n', stderr);
86     va_end(va);
87 }
88 
processXMLPrettyPrinting(const char * xml,int xml_length,char ** output,int * output_length,PrettyPrintingOptions * ppOptions)89 int processXMLPrettyPrinting(const char *xml, int xml_length, char** output, int* output_length, PrettyPrintingOptions* ppOptions)
90 {
91     bool freeOptions;
92     char* reallocated;
93 
94     /* empty buffer, nothing to process */
95     if (xml_length == 0) { return PRETTY_PRINTING_EMPTY_XML; }
96     if (xml == NULL) { return PRETTY_PRINTING_EMPTY_XML; }
97 
98     /* initialize the variables */
99     result = PRETTY_PRINTING_SUCCESS;
100     freeOptions = FALSE;
101     if (ppOptions == NULL)
102     {
103         ppOptions = createDefaultPrettyPrintingOptions();
104         freeOptions = TRUE;
105     }
106 
107     options = ppOptions;
108     currentNodeName = NULL;
109     appendIndentation = FALSE;
110     lastNodeOpen = FALSE;
111     xmlPrettyPrintedIndex = 0;
112     inputBufferIndex = 0;
113     currentDepth = -1;
114 
115     inputBuffer = xml;
116     inputBufferLength = xml_length;
117 
118     xmlPrettyPrintedLength = xml_length;
119     xmlPrettyPrinted = (char*)g_try_malloc(sizeof(char)*(xml_length));
120     if (xmlPrettyPrinted == NULL) { PP_ERROR("Allocation error (initialisation)"); return PRETTY_PRINTING_SYSTEM_ERROR; }
121 
122     /* go to the first char */
123     readWhites(TRUE);
124 
125     /* process the pretty-printing */
126     processElements();
127 
128     /* close the buffer */
129     putCharInBuffer('\0');
130 
131     /* adjust the final size */
132     reallocated = (char*)g_try_realloc(xmlPrettyPrinted, xmlPrettyPrintedIndex);
133     if (reallocated == NULL) {
134         PP_ERROR("Allocation error (reallocation size is %d)", xmlPrettyPrintedIndex);
135         g_free(xmlPrettyPrinted);
136         xmlPrettyPrinted = NULL;
137         return PRETTY_PRINTING_SYSTEM_ERROR;
138     }
139     xmlPrettyPrinted = reallocated;
140 
141     /* freeing the unused values */
142     if (freeOptions) { g_free(options); }
143 
144     /* if success, then update the values */
145     if (result == PRETTY_PRINTING_SUCCESS)
146     {
147         *output = xmlPrettyPrinted;
148         *output_length = xmlPrettyPrintedIndex-2; /* the '\0' is not in the length */
149     }
150     /* else clean the other values */
151     else
152     {
153         g_free(xmlPrettyPrinted);
154     }
155 
156     /* updating the pointers for the using into the caller function */
157     xmlPrettyPrinted = NULL; /* avoid reference */
158     inputBuffer = NULL; /* avoid reference */
159     currentNodeName = NULL; /* avoid reference */
160     options = NULL; /* avoid reference */
161 
162     /* and finally the result */
163     return result;
164 }
165 
createDefaultPrettyPrintingOptions(void)166 PrettyPrintingOptions* createDefaultPrettyPrintingOptions(void)
167 {
168     PrettyPrintingOptions* defaultOptions = (PrettyPrintingOptions*)g_try_malloc(sizeof(PrettyPrintingOptions));
169     if (defaultOptions == NULL)
170     {
171         PP_ERROR("Unable to allocate memory for PrettyPrintingOptions");
172         return NULL;
173     }
174 
175     defaultOptions->newLineChars = g_strdup ("\r\n");
176     defaultOptions->indentChar = ' ';
177     defaultOptions->indentLength = 2;
178     defaultOptions->oneLineText = FALSE;
179     defaultOptions->inlineText = TRUE;
180     defaultOptions->oneLineComment = FALSE;
181     defaultOptions->inlineComment = TRUE;
182     defaultOptions->oneLineCdata = FALSE;
183     defaultOptions->inlineCdata = TRUE;
184     defaultOptions->emptyNodeStripping = TRUE;
185     defaultOptions->emptyNodeStrippingSpace = TRUE;
186     defaultOptions->forceEmptyNodeSplit = FALSE;
187     defaultOptions->trimLeadingWhites = TRUE;
188     defaultOptions->trimTrailingWhites = TRUE;
189     defaultOptions->alignComment = TRUE;
190     defaultOptions->alignText = TRUE;
191     defaultOptions->alignCdata = TRUE;
192 
193     return defaultOptions;
194 }
195 
putNextCharsInBuffer(int nbChars)196 void putNextCharsInBuffer(int nbChars)
197 {
198     int i;
199     for (i=0 ; i<nbChars ; ++i)
200     {
201         char c = readNextChar();
202         putCharInBuffer(c);
203     }
204 }
205 
putCharInBuffer(char charToAdd)206 void putCharInBuffer(char charToAdd)
207 {
208     /* check if the buffer is full and reallocation if needed */
209     if (xmlPrettyPrintedIndex >= xmlPrettyPrintedLength)
210     {
211         char* reallocated;
212 
213         if (charToAdd == '\0') { ++xmlPrettyPrintedLength; }
214         else { xmlPrettyPrintedLength += inputBufferLength; }
215         reallocated = (char*)g_try_realloc(xmlPrettyPrinted, xmlPrettyPrintedLength);
216         if (reallocated == NULL) { PP_ERROR("Allocation error (char was %c)", charToAdd); return; }
217         xmlPrettyPrinted = reallocated;
218     }
219 
220     /* putting the char and increase the index for the next one */
221     xmlPrettyPrinted[xmlPrettyPrintedIndex] = charToAdd;
222     ++xmlPrettyPrintedIndex;
223 }
224 
putCharsInBuffer(const char * charsToAdd)225 void putCharsInBuffer(const char* charsToAdd)
226 {
227     int currentIndex = 0;
228     while (charsToAdd[currentIndex] != '\0')
229     {
230         putCharInBuffer(charsToAdd[currentIndex]);
231         ++currentIndex;
232     }
233 }
234 
getPreviousInsertedChar(void)235 char getPreviousInsertedChar(void)
236 {
237     return xmlPrettyPrinted[xmlPrettyPrintedIndex-1];
238 }
239 
putNewLine(void)240 int putNewLine(void)
241 {
242     int spaces;
243     int i;
244 
245     putCharsInBuffer(options->newLineChars);
246     spaces = currentDepth*options->indentLength;
247     for(i=0 ; i<spaces ; ++i)
248     {
249         putCharInBuffer(options->indentChar);
250     }
251 
252     return spaces;
253 }
254 
getNextChar(void)255 char getNextChar(void)
256 {
257     return inputBuffer[inputBufferIndex];
258 }
259 
readNextChar(void)260 char readNextChar(void)
261 {
262     return inputBuffer[inputBufferIndex++];
263 }
264 
readWhites(bool considerLineBreakAsWhite)265 int readWhites(bool considerLineBreakAsWhite)
266 {
267     int counter = 0;
268     while(isWhite(inputBuffer[inputBufferIndex]) &&
269           (!isLineBreak(inputBuffer[inputBufferIndex]) ||
270            considerLineBreakAsWhite))
271     {
272         ++counter;
273         ++inputBufferIndex;
274     }
275 
276     return counter;
277 }
278 
isQuote(char c)279 bool isQuote(char c)
280 {
281     return (c == '\'' ||
282             c == '\"');
283 }
284 
isWhite(char c)285 bool isWhite(char c)
286 {
287     return (isSpace(c) ||
288             isLineBreak(c));
289 }
290 
isSpace(char c)291 bool isSpace(char c)
292 {
293     return (c == ' ' ||
294             c == '\t');
295 }
296 
isLineBreak(char c)297 bool isLineBreak(char c)
298 {
299     return (c == '\n' ||
300             c == '\r');
301 }
302 
isInlineNodeAllowed(void)303 bool isInlineNodeAllowed(void)
304 {
305     int firstChar;
306     int secondChar;
307     int thirdChar;
308     int currentIndex;
309     char currentChar;
310 
311     /* the last action was not an opening => inline not allowed */
312     if (!lastNodeOpen) { return FALSE; }
313 
314     firstChar = getNextChar(); /* should be '<' or we are in a text node */
315     secondChar = inputBuffer[inputBufferIndex+1]; /* should be '!' */
316     thirdChar = inputBuffer[inputBufferIndex+2]; /* should be '-' or '[' */
317 
318     /* loop through the content up to the next opening/closing node */
319     currentIndex = inputBufferIndex+1;
320     if (firstChar == '<')
321     {
322         char closingComment = '-';
323         char oldChar = ' ';
324         bool loop = TRUE;
325 
326         /* another node is being open ==> no inline ! */
327         if (secondChar != '!') { return FALSE; }
328 
329         /* okay we are in a comment/cdata node, so read until it is closed */
330 
331         /* select the closing char */
332         if (thirdChar == '[') { closingComment = ']'; }
333 
334         /* read until closing */
335         currentIndex += 3; /* that bypass meanless chars */
336         while (loop)
337         {
338             char current = inputBuffer[currentIndex];
339             if (current == closingComment && oldChar == closingComment) { loop = FALSE; } /* end of comment/cdata */
340             oldChar = current;
341             ++currentIndex;
342         }
343 
344         /* okay now avoid blanks */
345         /*  inputBuffer[index] is now '>' */
346         ++currentIndex;
347         while (isWhite(inputBuffer[currentIndex])) { ++currentIndex; }
348     }
349     else
350     {
351         /* this is a text node. Simply loop to the next '<' */
352         while (inputBuffer[currentIndex] != '<') { ++currentIndex; }
353     }
354 
355     /* check what do we have now */
356     currentChar = inputBuffer[currentIndex];
357     if (currentChar == '<')
358     {
359         /* check if that is a closing node */
360         currentChar = inputBuffer[currentIndex+1];
361         if (currentChar == '/')
362         {
363             /* as we are in a correct XML (so far...), if the node is  */
364             /* being directly closed, the inline is allowed !!! */
365             return TRUE;
366         }
367     }
368 
369     /* inline not allowed... */
370     return FALSE;
371 }
372 
isOnSingleLine(int skip,char stop1,char stop2)373 bool isOnSingleLine(int skip, char stop1, char stop2)
374 {
375     int currentIndex = inputBufferIndex+skip; /* skip the n first chars (in comment <!--) */
376     bool onSingleLine = TRUE;
377 
378     char oldChar = inputBuffer[currentIndex];
379     char currentChar = inputBuffer[currentIndex+1];
380     while(onSingleLine && oldChar != stop1 && currentChar != stop2)
381     {
382         onSingleLine = !isLineBreak(oldChar);
383 
384         ++currentIndex;
385         oldChar = currentChar;
386         currentChar = inputBuffer[currentIndex+1];
387 
388         /**
389          * A line break inside the node has been reached. But we should check
390          * if there is something before the end of the node (otherwise, there
391          * are only spaces and it may be wanted to be considered as a single
392          * line). //TODO externalize an option for that ?
393          */
394         if (!onSingleLine)
395         {
396             while(oldChar != stop1 && currentChar != stop2)
397             {
398                 /* okay there is something else => this is not on one line */
399                 if (!isWhite(oldChar)) return FALSE;
400 
401                 ++currentIndex;
402                 oldChar = currentChar;
403                 currentChar = inputBuffer[currentIndex+1];
404             }
405 
406             /* the end of the node has been reached with only whites. Then
407              * the node can be considered being one single line */
408             return TRUE;
409         }
410     }
411 
412     return onSingleLine;
413 }
414 
resetBackwardIndentation(bool resetLineBreak)415 void resetBackwardIndentation(bool resetLineBreak)
416 {
417     xmlPrettyPrintedIndex -= (currentDepth*options->indentLength);
418     if (resetLineBreak)
419     {
420         int len = strlen(options->newLineChars);
421         xmlPrettyPrintedIndex -= len;
422     }
423 }
424 
425 /*#########################################################################################################################################*/
426 /*-----------------------------------------------------------------------------------------------------------------------------------------*/
427 
428 /*-----------------------------------------------------------------------------------------------------------------------------------------*/
429 /*=============================================================== NODE FUNCTIONS ==========================================================*/
430 /*-----------------------------------------------------------------------------------------------------------------------------------------*/
431 
432 /*-----------------------------------------------------------------------------------------------------------------------------------------*/
433 /*#########################################################################################################################################*/
434 
processElements(void)435 int processElements(void)
436 {
437     int counter = 0;
438     bool loop = TRUE;
439     ++currentDepth;
440     while (loop && result == PRETTY_PRINTING_SUCCESS)
441     {
442         bool indentBackward;
443         char nextChar;
444 
445         /* strip unused whites */
446         readWhites(TRUE);
447 
448         nextChar = getNextChar();
449         if (nextChar == '\0') { return 0; } /* no more data to read */
450 
451         /* put a new line with indentation */
452         if (appendIndentation) { putNewLine(); }
453 
454         /* always append indentation (but need to store the state) */
455         indentBackward = appendIndentation;
456         appendIndentation = TRUE;
457 
458         /* okay what do we have now ? */
459         if (nextChar != '<')
460         {
461             /* a simple text node */
462             processTextNode();
463             ++counter;
464         }
465         else /* some more check are needed */
466         {
467             nextChar = inputBuffer[inputBufferIndex+1];
468             if (nextChar == '!')
469             {
470                 char oneMore = inputBuffer[inputBufferIndex+2];
471                 if (oneMore == '-') { processComment(); ++counter; } /* a comment */
472                 else if (oneMore == '[') { processCDATA(); ++counter; } /* cdata */
473                 else if (oneMore == 'D') { processDoctype(); ++counter; } /* doctype <!DOCTYPE ... > */
474                 else if (oneMore == 'E') { processDoctypeElement(); ++counter; } /* doctype element <!ELEMENT ... > */
475                 else
476                 {
477                     printError("processElements : Invalid char '%c' afer '<!'", oneMore);
478                     result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
479                 }
480             }
481             else if (nextChar == '/')
482             {
483                 /* close a node => stop the loop !! */
484                 loop = FALSE;
485                 if (indentBackward)
486                 {
487                     /* INDEX HACKING */
488                     xmlPrettyPrintedIndex -= options->indentLength;
489                 }
490             }
491             else if (nextChar == '?')
492             {
493                 /* this is a header */
494                 processHeader();
495             }
496             else
497             {
498                 /* a new node is open */
499                 processNode();
500                 ++counter;
501             }
502         }
503     }
504 
505     --currentDepth;
506     return counter;
507 }
508 
processElementAttribute(void)509 void processElementAttribute(void)
510 {
511     char quote;
512     char value;
513     /* process the attribute name */
514     char nextChar = readNextChar();
515     while (nextChar != '=')
516     {
517         putCharInBuffer(nextChar);
518         nextChar = readNextChar();
519     }
520 
521     putCharInBuffer(nextChar); /* that's the '=' */
522 
523     /* read the simple quote or double quote and put it into the buffer */
524     quote = readNextChar();
525     putCharInBuffer(quote);
526 
527     /* process until the last quote */
528     value = readNextChar();
529     while(value != quote)
530     {
531         putCharInBuffer(value);
532         value = readNextChar();
533     }
534 
535     /* simply add the last quote */
536     putCharInBuffer(quote);
537 }
538 
processElementAttributes(void)539 void processElementAttributes(void)
540 {
541     bool loop = TRUE;
542     char current = getNextChar(); /* should not be a white */
543     if (isWhite(current))
544     {
545         printError("processElementAttributes : first char shouldn't be a white");
546         result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
547         return;
548     }
549 
550     while (loop)
551     {
552         char next;
553 
554         readWhites(TRUE); /* strip the whites */
555 
556         next = getNextChar(); /* don't read the last char (processed afterwards) */
557         if (next == '/') { loop = FALSE; } /* end of node */
558         else if (next == '>') { loop = FALSE; } /* end of tag */
559         else if (next == '?') { loop = FALSE; } /* end of header */
560         else
561         {
562             putCharInBuffer(' '); /* put only one space to separate attributes */
563             processElementAttribute();
564         }
565     }
566 }
567 
processHeader(void)568 void processHeader(void)
569 {
570     int firstChar = inputBuffer[inputBufferIndex]; /* should be '<' */
571     int secondChar = inputBuffer[inputBufferIndex+1]; /* must be '?' */
572 
573     if (firstChar != '<')
574     {
575         /* what ?????? invalid xml !!! */
576         printError("processHeader : first char should be '<' (not '%c')", firstChar);
577         result = PRETTY_PRINTING_INVALID_CHAR_ERROR; return;
578     }
579 
580     if (secondChar == '?')
581     {
582         /* puts the '<' and '?' chars into the new buffer */
583         putNextCharsInBuffer(2);
584 
585         while(!isWhite(getNextChar())) { putNextCharsInBuffer(1); }
586 
587         readWhites(TRUE);
588         processElementAttributes();
589 
590         /* puts the '?' and '>' chars into the new buffer */
591         putNextCharsInBuffer(2);
592     }
593 }
594 
processNode(void)595 void processNode(void)
596 {
597     char closeChar;
598     int subElementsProcessed = 0;
599     char nextChar;
600     char* nodeName;
601     int nodeNameLength = 0;
602     int i;
603     int opening = readNextChar();
604     if (opening != '<')
605     {
606         printError("processNode : The first char should be '<' (not '%c')", opening);
607         result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
608         return;
609     }
610 
611     putCharInBuffer(opening);
612 
613     /* read the node name */
614     while (!isWhite(getNextChar()) &&
615            getNextChar() != '>' &&  /* end of the tag */
616            getNextChar() != '/') /* tag is being closed */
617     {
618         putNextCharsInBuffer(1);
619         ++nodeNameLength;
620     }
621 
622     /* store the name */
623     nodeName = (char*)g_try_malloc(sizeof(char)*nodeNameLength+1);
624     if (nodeName == NULL) { PP_ERROR("Allocation error (node name length is %d)", nodeNameLength); return ; }
625     nodeName[nodeNameLength] = '\0';
626     for (i=0 ; i<nodeNameLength ; ++i)
627     {
628         int tempIndex = xmlPrettyPrintedIndex-nodeNameLength+i;
629         nodeName[i] = xmlPrettyPrinted[tempIndex];
630     }
631 
632     currentNodeName = nodeName; /* set the name for using in other methods */
633     lastNodeOpen = TRUE;
634 
635     /* process the attributes     */
636     readWhites(TRUE);
637     processElementAttributes();
638 
639     /* process the end of the tag */
640     subElementsProcessed = 0;
641     nextChar = getNextChar(); /* should be either '/' or '>' */
642     if (nextChar == '/') /* the node is being closed immediatly */
643     {
644         /* closing node directly */
645         if (options->emptyNodeStripping || !options->forceEmptyNodeSplit)
646         {
647             if (options->emptyNodeStrippingSpace) { putCharInBuffer(' '); }
648             putNextCharsInBuffer(2);
649         }
650         /* split the closing nodes */
651         else
652         {
653             readNextChar(); /* removing '/' */
654             readNextChar(); /* removing '>' */
655 
656             putCharInBuffer('>');
657             if (!options->inlineText)
658             {
659                 /* no inline text => new line ! */
660                 putNewLine();
661             }
662 
663             putCharsInBuffer("</");
664             putCharsInBuffer(currentNodeName);
665             putCharInBuffer('>');
666         }
667 
668         lastNodeOpen=FALSE;
669         return;
670     }
671     else if (nextChar == '>')
672     {
673         /* the tag is just closed (maybe some content) */
674         putNextCharsInBuffer(1);
675         subElementsProcessed = processElements();
676     }
677     else
678     {
679         printError("processNode : Invalid character '%c'", nextChar);
680         result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
681         return;
682     }
683 
684     /* if the code reaches this area, then the processElements has been called and we must
685      * close the opening tag */
686     closeChar = getNextChar();
687     if (closeChar != '<')
688     {
689         printError("processNode : Invalid character '%c' for closing tag (should be '<')", closeChar);
690         result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
691         return;
692     }
693 
694     do
695     {
696         closeChar = readNextChar();
697         putCharInBuffer(closeChar);
698     }
699     while(closeChar != '>');
700 
701     /* there is no elements */
702     if (subElementsProcessed == 0)
703     {
704         /* the node will be stripped */
705         if (options->emptyNodeStripping)
706         {
707             /* because we have '<nodeName ...></nodeName>' */
708             xmlPrettyPrintedIndex -= nodeNameLength+4;
709             resetBackwardIndentation(TRUE);
710 
711             if (options->emptyNodeStrippingSpace) { putCharInBuffer(' '); }
712             putCharsInBuffer("/>");
713         }
714         /* the closing tag will be put on the same line */
715         else if (options->inlineText)
716         {
717             /* correct the index because we have '</nodeName>' */
718             xmlPrettyPrintedIndex -= nodeNameLength+3;
719             resetBackwardIndentation(TRUE);
720 
721             /* rewrite the node name */
722             putCharsInBuffer("</");
723             putCharsInBuffer(currentNodeName);
724             putCharInBuffer('>');
725         }
726     }
727 
728     /* the node is closed */
729     lastNodeOpen = FALSE;
730 
731     /* freeeeeeee !!! */
732     g_free(nodeName);
733     nodeName = NULL;
734     currentNodeName = NULL;
735 }
736 
processComment(void)737 void processComment(void)
738 {
739     char lastChar;
740     bool loop = TRUE;
741     char oldChar;
742     bool inlineAllowed = FALSE;
743     if (options->inlineComment) { inlineAllowed = isInlineNodeAllowed(); }
744     if (inlineAllowed && !options->oneLineComment) { inlineAllowed = isOnSingleLine(4, '-', '-'); }
745     if (inlineAllowed) { resetBackwardIndentation(TRUE); }
746 
747     putNextCharsInBuffer(4); /* add the chars '<!--' */
748 
749     oldChar = '-';
750     while (loop)
751     {
752         char nextChar = readNextChar();
753         if (oldChar == '-' && nextChar == '-') /* comment is being closed */
754         {
755             loop = FALSE;
756         }
757 
758         if (!isLineBreak(nextChar)) /* the comment simply continues */
759         {
760             if (options->oneLineComment && isSpace(nextChar))
761             {
762                 /* removes all the unecessary spaces */
763                 while(isSpace(getNextChar()))
764                 {
765                     nextChar = readNextChar();
766                 }
767                 putCharInBuffer(' ');
768                 oldChar = ' ';
769             }
770             else
771             {
772                 /* comment is left untouched */
773                 putCharInBuffer(nextChar);
774                 oldChar = nextChar;
775             }
776 
777             if (!loop && options->alignComment) /* end of comment */
778             {
779                 /* ensures the chars preceding the first '-' are all spaces (there are at least
780                  * 5 spaces in front of the '-->' for the alignment with '<!--') */
781                 bool onlySpaces = xmlPrettyPrinted[xmlPrettyPrintedIndex-3] == ' ' &&
782                                   xmlPrettyPrinted[xmlPrettyPrintedIndex-4] == ' ' &&
783                                   xmlPrettyPrinted[xmlPrettyPrintedIndex-5] == ' ' &&
784                                   xmlPrettyPrinted[xmlPrettyPrintedIndex-6] == ' ' &&
785                                   xmlPrettyPrinted[xmlPrettyPrintedIndex-7] == ' ';
786 
787                 /* if all the preceding chars are white, then go for replacement */
788                 if (onlySpaces)
789                 {
790                     xmlPrettyPrintedIndex -= 7; /* remove indentation spaces */
791                     putCharsInBuffer("--"); /* reset the first chars of '-->' */
792                 }
793             }
794         }
795         else if (!options->oneLineComment && !inlineAllowed) /* oh ! there is a line break */
796         {
797             /* if the comments need to be aligned, just add 5 spaces */
798             if (options->alignComment)
799             {
800                 int read = readWhites(FALSE); /* strip the whites and new line */
801                 if (nextChar == '\r' && read == 0 && getNextChar() == '\n') /* handles the \r\n return line */
802                 {
803                     readNextChar();
804                     readWhites(FALSE);
805                 }
806 
807                 putNewLine(); /* put a new indentation line */
808                 putCharsInBuffer("     "); /* align with <!--  */
809                 oldChar = ' '; /* and update the last char */
810             }
811             else
812             {
813                 putCharInBuffer(nextChar);
814                 oldChar = nextChar;
815             }
816         }
817         else /* the comments must be inlined */
818         {
819             readWhites(TRUE); /* strip the whites and add a space if needed */
820             if (getPreviousInsertedChar() != ' ' &&
821                 strncmp(xmlPrettyPrinted+xmlPrettyPrintedIndex-4, "<!--", 4) != 0) /* prevents adding a space at the beginning  */
822             {
823                 putCharInBuffer(' ');
824                 oldChar = ' ';
825             }
826         }
827     }
828 
829     lastChar = readNextChar(); /* should be '>' */
830     if (lastChar != '>')
831     {
832         printError("processComment : last char must be '>' (not '%c')", lastChar);
833         result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
834         return;
835     }
836     putCharInBuffer(lastChar);
837 
838     if (inlineAllowed) { appendIndentation = FALSE; }
839 
840     /* there vas no node open */
841     lastNodeOpen = FALSE;
842 }
843 
processTextNode(void)844 void processTextNode(void)
845 {
846     /* checks if inline is allowed */
847     bool inlineTextAllowed = FALSE;
848     if (options->inlineText) { inlineTextAllowed = isInlineNodeAllowed(); }
849     if (inlineTextAllowed && !options->oneLineText) { inlineTextAllowed = isOnSingleLine(0, '<', '/'); }
850     if (inlineTextAllowed || !options->alignText)
851     {
852         resetBackwardIndentation(TRUE); /* remove previous indentation */
853         if (!inlineTextAllowed) { putNewLine(); }
854     }
855 
856     /* the leading whites are automatically stripped. So we re-add it */
857     if (!options->trimLeadingWhites)
858     {
859         int backwardIndex = inputBufferIndex-1;
860         while (isSpace(inputBuffer[backwardIndex]))
861         {
862             --backwardIndex; /* backward rolling */
863         }
864 
865         /* now the input[backwardIndex] IS NOT a white. So we go to
866          * the next char... */
867         ++backwardIndex;
868 
869         /* and then re-add the whites */
870         while (inputBuffer[backwardIndex] == ' ' ||
871                inputBuffer[backwardIndex] == '\t')
872         {
873             putCharInBuffer(inputBuffer[backwardIndex]);
874             ++backwardIndex;
875         }
876     }
877 
878     /* process the text into the node */
879     while(getNextChar() != '<')
880     {
881         char nextChar = readNextChar();
882         if (isLineBreak(nextChar))
883         {
884             if (options->oneLineText)
885             {
886                 readWhites(TRUE);
887 
888                 /* as we can put text on one line, remove the line break
889                  * and replace it by a space but only if the previous
890                  * char wasn't a space */
891                 if (getPreviousInsertedChar() != ' ') { putCharInBuffer(' '); }
892             }
893             else if (options->alignText)
894             {
895                 int read = readWhites(FALSE);
896                 if (nextChar == '\r' && read == 0 && getNextChar() == '\n') /* handles the '\r\n' */
897                 {
898                    nextChar = readNextChar();
899                    readWhites(FALSE);
900                 }
901 
902                 /* put a new line only if the closing tag is not reached */
903                 if (getNextChar() != '<')
904                 {
905                     putNewLine();
906                 }
907             }
908             else
909             {
910                 putCharInBuffer(nextChar);
911             }
912         }
913         else
914         {
915             putCharInBuffer(nextChar);
916         }
917     }
918 
919     /* strip the trailing whites */
920     if (options->trimTrailingWhites)
921     {
922         while(getPreviousInsertedChar() == ' ' ||
923               getPreviousInsertedChar() == '\t')
924         {
925             --xmlPrettyPrintedIndex;
926         }
927     }
928 
929     /* remove the indentation for the closing tag */
930     if (inlineTextAllowed) { appendIndentation = FALSE; }
931 
932     /* there vas no node open */
933     lastNodeOpen = FALSE;
934 }
935 
processCDATA(void)936 void processCDATA(void)
937 {
938     char lastChar;
939     bool loop = TRUE;
940     char oldChar;
941     bool inlineAllowed = FALSE;
942     if (options->inlineCdata) { inlineAllowed = isInlineNodeAllowed(); }
943     if (inlineAllowed && !options->oneLineCdata) { inlineAllowed = isOnSingleLine(9, ']', ']'); }
944     if (inlineAllowed) { resetBackwardIndentation(TRUE); }
945 
946     putNextCharsInBuffer(9); /* putting the '<![CDATA[' into the buffer */
947 
948     oldChar = '[';
949     while(loop)
950     {
951         char nextChar = readNextChar();
952         char nextChar2 = getNextChar();
953         if (oldChar == ']' && nextChar == ']' && nextChar2 == '>') { loop = FALSE; } /* end of cdata */
954 
955         if (!isLineBreak(nextChar)) /* the cdata simply continues */
956         {
957             if (options->oneLineCdata && isSpace(nextChar))
958             {
959                 /* removes all the unecessary spaces */
960                 while(isSpace(nextChar2))
961                 {
962                     nextChar = readNextChar();
963                     nextChar2 = getNextChar();
964                 }
965 
966                 putCharInBuffer(' ');
967                 oldChar = ' ';
968             }
969             else
970             {
971                 /* comment is left untouched */
972                 putCharInBuffer(nextChar);
973                 oldChar = nextChar;
974             }
975 
976             if (!loop && options->alignCdata) /* end of cdata */
977             {
978                 /* ensures the chars preceding the first '-' are all spaces (there are at least
979                  * 10 spaces in front of the ']]>' for the alignment with '<![CDATA[') */
980                 bool onlySpaces = xmlPrettyPrinted[xmlPrettyPrintedIndex-3] == ' ' &&
981                                   xmlPrettyPrinted[xmlPrettyPrintedIndex-4] == ' ' &&
982                                   xmlPrettyPrinted[xmlPrettyPrintedIndex-5] == ' ' &&
983                                   xmlPrettyPrinted[xmlPrettyPrintedIndex-6] == ' ' &&
984                                   xmlPrettyPrinted[xmlPrettyPrintedIndex-7] == ' ' &&
985                                   xmlPrettyPrinted[xmlPrettyPrintedIndex-8] == ' ' &&
986                                   xmlPrettyPrinted[xmlPrettyPrintedIndex-9] == ' ' &&
987                                   xmlPrettyPrinted[xmlPrettyPrintedIndex-10] == ' ' &&
988                                   xmlPrettyPrinted[xmlPrettyPrintedIndex-11] == ' ';
989 
990                 /* if all the preceding chars are white, then go for replacement */
991                 if (onlySpaces)
992                 {
993                     xmlPrettyPrintedIndex -= 11; /* remove indentation spaces */
994                     putCharsInBuffer("]]"); /* reset the first chars of '-->' */
995                 }
996             }
997         }
998         else if (!options->oneLineCdata && !inlineAllowed) /* line break */
999         {
1000             /* if the cdata need to be aligned, just add 9 spaces */
1001             if (options->alignCdata)
1002             {
1003                 int read = readWhites(FALSE); /* strip the whites and new line */
1004                 if (nextChar == '\r' && read == 0 && getNextChar() == '\n') /* handles the \r\n return line */
1005                 {
1006                     readNextChar();
1007                     readWhites(FALSE);
1008                 }
1009 
1010                 putNewLine(); /* put a new indentation line */
1011                 putCharsInBuffer("         "); /* align with <![CDATA[ */
1012                 oldChar = ' '; /* and update the last char */
1013             }
1014             else
1015             {
1016                 putCharInBuffer(nextChar);
1017                 oldChar = nextChar;
1018             }
1019         }
1020         else /* cdata are inlined */
1021         {
1022             readWhites(TRUE); /* strip the whites and add a space if necessary */
1023             if(getPreviousInsertedChar() != ' ' &&
1024                strncmp(xmlPrettyPrinted+xmlPrettyPrintedIndex-9, "<![CDATA[", 9) != 0) /* prevents adding a space at the beginning  */
1025             {
1026                 putCharInBuffer(' ');
1027                 oldChar = ' ';
1028             }
1029         }
1030     }
1031 
1032     /* if the cdata is inline, then all the trailing spaces are removed */
1033     if (options->oneLineCdata)
1034     {
1035         xmlPrettyPrintedIndex -= 2; /* because of the last ']]' inserted */
1036         while(isWhite(xmlPrettyPrinted[xmlPrettyPrintedIndex-1]))
1037         {
1038             --xmlPrettyPrintedIndex;
1039         }
1040         putCharsInBuffer("]]");
1041     }
1042 
1043     /* finalize the cdata */
1044     lastChar = readNextChar(); /* should be '>' */
1045     if (lastChar != '>')
1046     {
1047         printError("processCDATA : last char must be '>' (not '%c')", lastChar);
1048         result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
1049         return;
1050     }
1051 
1052     putCharInBuffer(lastChar);
1053 
1054     if (inlineAllowed) { appendIndentation = FALSE; }
1055 
1056     /* there was no node open */
1057     lastNodeOpen = FALSE;
1058 }
1059 
processDoctype(void)1060 void processDoctype(void)
1061 {
1062     bool loop = TRUE;
1063 
1064     putNextCharsInBuffer(9); /* put the '<!DOCTYPE' into the buffer */
1065 
1066     while(loop)
1067     {
1068         int nextChar;
1069 
1070         readWhites(TRUE);
1071         putCharInBuffer(' '); /* only one space for the attributes */
1072 
1073         nextChar = readNextChar();
1074         while(!isWhite(nextChar) &&
1075               !isQuote(nextChar) &&  /* begins a quoted text */
1076               nextChar != '=' && /* begins an attribute */
1077               nextChar != '>' &&  /* end of doctype */
1078               nextChar != '[') /* inner <!ELEMENT> types */
1079         {
1080             putCharInBuffer(nextChar);
1081             nextChar = readNextChar();
1082         }
1083 
1084         if (isWhite(nextChar)) {} /* do nothing, just let the next loop do the job */
1085         else if (isQuote(nextChar) || nextChar == '=')
1086         {
1087             char quote;
1088 
1089             if (nextChar == '=')
1090             {
1091                 putCharInBuffer(nextChar);
1092                 nextChar = readNextChar(); /* now we should have a quote */
1093 
1094                 if (!isQuote(nextChar))
1095                 {
1096                     printError("processDoctype : the next char should be a quote (not '%c')", nextChar);
1097                     result = PRETTY_PRINTING_INVALID_CHAR_ERROR;
1098                     return;
1099                 }
1100             }
1101 
1102             /* simply process the content */
1103             quote = nextChar;
1104             do
1105             {
1106                 putCharInBuffer(nextChar);
1107                 nextChar = readNextChar();
1108             }
1109             while (nextChar != quote);
1110             putCharInBuffer(nextChar); /* now the last char is the last quote */
1111         }
1112         else if (nextChar == '>') /* end of doctype */
1113         {
1114             putCharInBuffer(nextChar);
1115             loop = FALSE;
1116         }
1117         else /* the char is a '[' => not supported yet */
1118         {
1119             printError("DOCTYPE inner ELEMENT is currently not supported by PrettyPrinter\n");
1120             result = PRETTY_PRINTING_NOT_SUPPORTED_YET;
1121             loop = FALSE;
1122         }
1123     }
1124 }
1125 
processDoctypeElement(void)1126 void processDoctypeElement(void)
1127 {
1128     printError("ELEMENT is currently not supported by PrettyPrinter\n");
1129     result = PRETTY_PRINTING_NOT_SUPPORTED_YET;
1130 }
1131 
printError(const char * msg,...)1132 void printError(const char *msg, ...)
1133 {
1134     va_list va;
1135     va_start(va, msg);
1136     #ifdef HAVE_GLIB
1137     g_logv(G_LOG_DOMAIN, G_LOG_LEVEL_WARNING, msg, va);
1138     #else
1139     vfprintf(stderr, msg, va);
1140     putc('\n', stderr);
1141     #endif
1142     va_end(va);
1143 
1144     printDebugStatus();
1145 }
1146 
printDebugStatus(void)1147 void printDebugStatus(void)
1148 {
1149     #ifdef HAVE_GLIB
1150     g_debug("\n===== INPUT =====\n%s\n=================\ninputLength = %d\ninputIndex = %d\noutputLength = %d\noutputIndex = %d\n",
1151             inputBuffer,
1152             inputBufferLength,
1153             inputBufferIndex,
1154             xmlPrettyPrintedLength,
1155             xmlPrettyPrintedIndex);
1156     #else
1157     PP_ERROR("\n===== INPUT =====\n%s\n=================\ninputLength = %d\ninputIndex = %d\noutputLength = %d\noutputIndex = %d\n",
1158             inputBuffer,
1159             inputBufferLength,
1160             inputBufferIndex,
1161             xmlPrettyPrintedLength,
1162             xmlPrettyPrintedIndex);
1163     #endif
1164 }
1165