1 /* $Id: xml_elem.c,v 1.23 2004/11/21 23:40:40 mgrouch Exp $ */
2
3 /*
4
5 XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents
6
7 Copyright (c) 2002-2004 Mikhail Grushinskiy. All Rights Reserved.
8
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26
27 */
28
29 #include <config.h>
30
31 #include <libxml/xmlstring.h>
32 #include <libxml/hash.h>
33 #include <stdlib.h>
34 #include <string.h>
35
36 #include "xmlstar.h"
37 #include "escape.h"
38
39 /* TODO:
40
41 2. Option to display this only for nodes matching
42 an XPATH expression
43
44 -p <xpath>
45
46 so it will be able to deal with subtrees as well
47
48 */
49
50 typedef struct _elOptions {
51 int show_attr; /* show attributes */
52 int show_attr_and_val; /* show attributes and values */
53 int sort_uniq; /* do sort and uniq on output */
54 int check_depth; /* limit depth */
55 } elOptions;
56
57
58 static elOptions elOps;
59 static xmlHashTablePtr uniq = NULL;
60 static xmlChar *curXPath = NULL;
61
62 /**
63 * Display usage syntax
64 */
65 void
elUsage(int argc,char ** argv,exit_status status)66 elUsage(int argc, char **argv, exit_status status)
67 {
68 extern void fprint_elem_usage(FILE* o, const char* argv0);
69 extern const char more_info[];
70 FILE *o = (status == EXIT_SUCCESS)? stdout : stderr;
71 fprint_elem_usage(o, argv[0]);
72 fprintf(o, "%s", more_info);
73 exit(status);
74 }
75
76 /**
77 * read file and print element paths
78 */
79 int
parse_xml_file(const char * filename)80 parse_xml_file(const char *filename)
81 {
82 int ret, prev_depth = 0;
83 xmlTextReaderPtr reader;
84
85 for (reader = xmlReaderForFile(filename, NULL, 0);;)
86 {
87 int depth;
88 const xmlChar *name;
89 xmlReaderTypes type;
90
91 if (!reader) {
92 fprintf(stderr, "couldn't read file '%s'\n", filename);
93 exit(EXIT_BAD_FILE);
94 }
95
96 ret = xmlTextReaderRead(reader);
97 if (ret <= 0) break;
98 type = xmlTextReaderNodeType(reader);
99 depth = xmlTextReaderDepth(reader);
100 name = xmlTextReaderConstName(reader);
101
102 if (type != XML_READER_TYPE_ELEMENT)
103 continue;
104
105 while (curXPath && depth <= prev_depth)
106 {
107 xmlChar *slash = BAD_CAST strrchr((char*) curXPath, '/');
108 if (slash) *slash = '\0';
109 prev_depth--;
110 }
111 prev_depth = depth;
112
113 if (depth > 0) curXPath = xmlStrcat(curXPath, BAD_CAST "/");
114 curXPath = xmlStrcat(curXPath, name);
115
116 if (elOps.show_attr)
117 {
118 int have_attr;
119
120 fprintf(stdout, "%s\n", curXPath);
121 for (have_attr = xmlTextReaderMoveToFirstAttribute(reader);
122 have_attr;
123 have_attr = xmlTextReaderMoveToNextAttribute(reader))
124 {
125 const xmlChar *aname = xmlTextReaderConstName(reader);
126 fprintf(stdout, "%s/@%s\n", curXPath, aname);
127 }
128 }
129 else if (elOps.show_attr_and_val)
130 {
131 fprintf(stdout, "%s", curXPath);
132 if (xmlTextReaderHasAttributes(reader))
133 {
134 int have_attr, first = 1;
135 fprintf(stdout, "[");
136 for (have_attr = xmlTextReaderMoveToFirstAttribute(reader);
137 have_attr;
138 have_attr = xmlTextReaderMoveToNextAttribute(reader))
139 {
140 const xmlChar *aname = xmlTextReaderConstName(reader),
141 *avalue = xmlTextReaderConstValue(reader);
142 char quote;
143 if (!first)
144 fprintf(stdout, " and ");
145 first = 0;
146
147 quote = xmlStrchr(avalue, '\'')? '"' : '\'';
148 fprintf(stdout, "@%s=%c%s%c", aname, quote, avalue, quote);
149 }
150 fprintf(stdout, "]");
151 }
152 fprintf(stdout, "\n");
153 }
154 else if (elOps.sort_uniq)
155 {
156 if ((elOps.check_depth == 0) || (elOps.check_depth != 0 && depth < elOps.check_depth))
157 {
158 xmlHashAddEntry(uniq, curXPath, (void*) 1);
159 }
160 }
161 else fprintf(stdout, "%s\n", curXPath);
162
163 }
164
165 return ret == -1? EXIT_LIB_ERROR : ret;
166 }
167
168 /**
169 * Initialize options values
170 */
171 void
elInitOptions(elOptions * ops)172 elInitOptions(elOptions *ops)
173 {
174 ops->show_attr = 0;
175 ops->show_attr_and_val = 0;
176 ops->sort_uniq = 0;
177 ops->check_depth = 0;
178 }
179
180 typedef struct {
181 xmlChar **array;
182 int offset;
183 } ArrayDest;
184
185 /**
186 * put @name into @data->array[@data->offset]
187 */
188 static void
hash_key_put(void * payload,void * data,xmlChar * name)189 hash_key_put(void *payload, void *data, xmlChar *name)
190 {
191 ArrayDest *dest = data;
192 dest->array[dest->offset++] = name;
193 }
194
195 /**
196 * a compare function for qsort
197 * takes pointers to 2 xmlChar* and compares them
198 */
199 static int
compare_string_ptr(const void * p1,const void * p2)200 compare_string_ptr(const void *p1, const void *p2)
201 {
202 typedef xmlChar const *const xmlCChar;
203 xmlCChar *str1 = p1, *str2 = p2;
204 return xmlStrcmp(*str1, *str2);
205 }
206
207 /**
208 * This is the main function for 'el' option
209 */
210 int
elMain(int argc,char ** argv)211 elMain(int argc, char **argv)
212 {
213 int errorno = 0;
214 char* inp_file = "-";
215
216 if (argc <= 1) elUsage(argc, argv, EXIT_BAD_ARGS);
217
218 elInitOptions(&elOps);
219
220 if (argc == 2)
221 errorno = parse_xml_file("-");
222 else
223 {
224 if (!strcmp(argv[2], "--help") || !strcmp(argv[2], "-h") ||
225 !strcmp(argv[2], "-?") || !strcmp(argv[2], "-Z"))
226 {
227 elUsage(argc, argv, EXIT_SUCCESS);
228 }
229 else if (!strcmp(argv[2], "-a"))
230 {
231 elOps.show_attr = 1;
232 if (argc >= 4) inp_file = argv[3];
233 errorno = parse_xml_file(inp_file);
234 }
235 else if (!strcmp(argv[2], "-v"))
236 {
237 elOps.show_attr_and_val = 1;
238 if (argc >= 4) inp_file = argv[3];
239 errorno = parse_xml_file(inp_file);
240 }
241 else if (!strcmp(argv[2], "-u"))
242 {
243 elOps.sort_uniq = 1;
244 if (argc >= 4) inp_file = argv[3];
245 uniq = xmlHashCreate(0);
246 errorno = parse_xml_file(inp_file);
247 }
248 else if (!strncmp(argv[2], "-d", 2))
249 {
250 elOps.check_depth = atoi(argv[2]+2);
251 /* printf("Checking depth (%d)\n", elOps.check_depth); */
252 elOps.sort_uniq = 1;
253 if (argc >= 4) inp_file = argv[3];
254 uniq = xmlHashCreate(0);
255 errorno = parse_xml_file(inp_file);
256 }
257 else if (argv[2][0] != '-')
258 {
259 errorno = parse_xml_file(argv[2]);
260 }
261 else
262 elUsage(argc, argv, EXIT_BAD_ARGS);
263 }
264
265 if (uniq)
266 {
267 int i;
268 ArrayDest lines;
269 lines.array = xmlMalloc(sizeof(xmlChar*) * xmlHashSize(uniq));
270 lines.offset = 0;
271 xmlHashScan(uniq, hash_key_put, &lines);
272
273 qsort(lines.array, lines.offset, sizeof(xmlChar*), compare_string_ptr);
274
275 for (i = 0; i < lines.offset; i++)
276 {
277 printf("%s\n", lines.array[i]);
278 }
279
280 xmlFree(lines.array);
281 xmlHashFree(uniq, NULL);
282 }
283
284 return errorno;
285 }
286
287