1 /*  $Id: xml_elem.c,v 1.23 2004/11/21 23:40:40 mgrouch Exp $  */
2 
3 /*
4 
5 XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents
6 
7 Copyright (c) 2002-2004 Mikhail Grushinskiy.  All Rights Reserved.
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26 
27 */
28 
29 #include <config.h>
30 
31 #include <libxml/xmlstring.h>
32 #include <libxml/hash.h>
33 #include <stdlib.h>
34 #include <string.h>
35 
36 #include "xmlstar.h"
37 #include "escape.h"
38 
39 /* TODO:
40 
41    2. Option to display this only for nodes matching
42       an XPATH expression
43 
44       -p <xpath>
45 
46       so it will be able to deal with subtrees as well
47 
48 */
49 
50 typedef struct _elOptions {
51     int show_attr;            /* show attributes */
52     int show_attr_and_val;    /* show attributes and values */
53     int sort_uniq;            /* do sort and uniq on output */
54     int check_depth;          /* limit depth */
55 } elOptions;
56 
57 
58 static elOptions elOps;
59 static xmlHashTablePtr uniq = NULL;
60 static xmlChar *curXPath = NULL;
61 
62 /**
63  *  Display usage syntax
64  */
65 void
elUsage(int argc,char ** argv,exit_status status)66 elUsage(int argc, char **argv, exit_status status)
67 {
68     extern void fprint_elem_usage(FILE* o, const char* argv0);
69     extern const char more_info[];
70     FILE *o = (status == EXIT_SUCCESS)? stdout : stderr;
71     fprint_elem_usage(o, argv[0]);
72     fprintf(o, "%s", more_info);
73     exit(status);
74 }
75 
76 /**
77  *  read file and print element paths
78  */
79 int
parse_xml_file(const char * filename)80 parse_xml_file(const char *filename)
81 {
82     int ret, prev_depth = 0;
83     xmlTextReaderPtr reader;
84 
85     for (reader = xmlReaderForFile(filename, NULL, 0);;)
86     {
87         int depth;
88         const xmlChar *name;
89         xmlReaderTypes type;
90 
91         if (!reader) {
92             fprintf(stderr, "couldn't read file '%s'\n", filename);
93             exit(EXIT_BAD_FILE);
94         }
95 
96         ret = xmlTextReaderRead(reader);
97         if (ret <= 0) break;
98         type = xmlTextReaderNodeType(reader);
99         depth = xmlTextReaderDepth(reader);
100         name = xmlTextReaderConstName(reader);
101 
102         if (type != XML_READER_TYPE_ELEMENT)
103             continue;
104 
105         while (curXPath && depth <= prev_depth)
106         {
107             xmlChar *slash = BAD_CAST strrchr((char*) curXPath, '/');
108             if (slash) *slash = '\0';
109             prev_depth--;
110         }
111         prev_depth = depth;
112 
113         if (depth > 0) curXPath = xmlStrcat(curXPath, BAD_CAST "/");
114         curXPath = xmlStrcat(curXPath, name);
115 
116         if (elOps.show_attr)
117         {
118             int have_attr;
119 
120             fprintf(stdout, "%s\n", curXPath);
121             for (have_attr = xmlTextReaderMoveToFirstAttribute(reader);
122                  have_attr;
123                  have_attr = xmlTextReaderMoveToNextAttribute(reader))
124             {
125                 const xmlChar *aname = xmlTextReaderConstName(reader);
126                 fprintf(stdout, "%s/@%s\n", curXPath, aname);
127             }
128         }
129         else if (elOps.show_attr_and_val)
130         {
131             fprintf(stdout, "%s", curXPath);
132             if (xmlTextReaderHasAttributes(reader))
133             {
134                 int have_attr, first = 1;
135                 fprintf(stdout, "[");
136                 for (have_attr = xmlTextReaderMoveToFirstAttribute(reader);
137                      have_attr;
138                      have_attr = xmlTextReaderMoveToNextAttribute(reader))
139                 {
140                     const xmlChar *aname = xmlTextReaderConstName(reader),
141                         *avalue = xmlTextReaderConstValue(reader);
142                     char quote;
143                     if (!first)
144                         fprintf(stdout, " and ");
145                     first = 0;
146 
147                     quote = xmlStrchr(avalue, '\'')? '"' : '\'';
148                     fprintf(stdout, "@%s=%c%s%c", aname, quote, avalue, quote);
149                 }
150                 fprintf(stdout, "]");
151             }
152             fprintf(stdout, "\n");
153         }
154         else if (elOps.sort_uniq)
155         {
156             if ((elOps.check_depth == 0) || (elOps.check_depth != 0 && depth < elOps.check_depth))
157             {
158                 xmlHashAddEntry(uniq, curXPath, (void*) 1);
159             }
160         }
161         else fprintf(stdout, "%s\n", curXPath);
162 
163     }
164 
165     return ret == -1? EXIT_LIB_ERROR : ret;
166 }
167 
168 /**
169  *  Initialize options values
170  */
171 void
elInitOptions(elOptions * ops)172 elInitOptions(elOptions *ops)
173 {
174     ops->show_attr = 0;
175     ops->show_attr_and_val = 0;
176     ops->sort_uniq = 0;
177     ops->check_depth = 0;
178 }
179 
180 typedef struct {
181     xmlChar **array;
182     int offset;
183 } ArrayDest;
184 
185 /**
186  * put @name into @data->array[@data->offset]
187  */
188 static void
hash_key_put(void * payload,void * data,xmlChar * name)189 hash_key_put(void *payload, void *data, xmlChar *name)
190 {
191     ArrayDest *dest = data;
192     dest->array[dest->offset++] = name;
193 }
194 
195 /**
196  * a compare function for qsort
197  * takes pointers to 2 xmlChar* and compares them
198  */
199 static int
compare_string_ptr(const void * p1,const void * p2)200 compare_string_ptr(const void *p1, const void *p2)
201 {
202     typedef xmlChar const *const xmlCChar;
203     xmlCChar *str1 = p1, *str2 = p2;
204     return xmlStrcmp(*str1, *str2);
205 }
206 
207 /**
208  *  This is the main function for 'el' option
209  */
210 int
elMain(int argc,char ** argv)211 elMain(int argc, char **argv)
212 {
213     int errorno = 0;
214     char* inp_file = "-";
215 
216     if (argc <= 1) elUsage(argc, argv, EXIT_BAD_ARGS);
217 
218     elInitOptions(&elOps);
219 
220     if (argc == 2)
221         errorno = parse_xml_file("-");
222     else
223     {
224         if (!strcmp(argv[2], "--help") || !strcmp(argv[2], "-h") ||
225             !strcmp(argv[2], "-?") || !strcmp(argv[2], "-Z"))
226         {
227             elUsage(argc, argv, EXIT_SUCCESS);
228         }
229         else if (!strcmp(argv[2], "-a"))
230         {
231             elOps.show_attr = 1;
232             if (argc >= 4) inp_file = argv[3];
233             errorno = parse_xml_file(inp_file);
234         }
235         else if (!strcmp(argv[2], "-v"))
236         {
237             elOps.show_attr_and_val = 1;
238             if (argc >= 4) inp_file = argv[3];
239             errorno = parse_xml_file(inp_file);
240         }
241         else if (!strcmp(argv[2], "-u"))
242         {
243             elOps.sort_uniq = 1;
244             if (argc >= 4) inp_file = argv[3];
245             uniq = xmlHashCreate(0);
246             errorno = parse_xml_file(inp_file);
247         }
248         else if (!strncmp(argv[2], "-d", 2))
249         {
250             elOps.check_depth = atoi(argv[2]+2);
251             /* printf("Checking depth (%d)\n", elOps.check_depth); */
252             elOps.sort_uniq = 1;
253             if (argc >= 4) inp_file = argv[3];
254             uniq = xmlHashCreate(0);
255             errorno = parse_xml_file(inp_file);
256         }
257         else if (argv[2][0] != '-')
258         {
259             errorno = parse_xml_file(argv[2]);
260         }
261         else
262             elUsage(argc, argv, EXIT_BAD_ARGS);
263     }
264 
265     if (uniq)
266     {
267         int i;
268         ArrayDest lines;
269         lines.array = xmlMalloc(sizeof(xmlChar*) * xmlHashSize(uniq));
270         lines.offset = 0;
271         xmlHashScan(uniq, hash_key_put, &lines);
272 
273         qsort(lines.array, lines.offset, sizeof(xmlChar*), compare_string_ptr);
274 
275         for (i = 0; i < lines.offset; i++)
276         {
277             printf("%s\n", lines.array[i]);
278         }
279 
280         xmlFree(lines.array);
281         xmlHashFree(uniq, NULL);
282     }
283 
284     return errorno;
285 }
286 
287