1 /* Read an XML document from standard input and print
2 element declarations (if any) to standard output.
3 It must be used with Expat compiled for UTF-8 output.
4 __ __ _
5 ___\ \/ /_ __ __ _| |_
6 / _ \\ /| '_ \ / _` | __|
7 | __// \| |_) | (_| | |_
8 \___/_/\_\ .__/ \__,_|\__|
9 |_| XML parser
10
11 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
12 Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
13 Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
16 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
17 Copyright (c) 2019 Zhongyuan Zhou <zhouzhongyuan@huawei.com>
18 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org>
19 Licensed under the MIT license:
20
21 Permission is hereby granted, free of charge, to any person obtaining
22 a copy of this software and associated documentation files (the
23 "Software"), to deal in the Software without restriction, including
24 without limitation the rights to use, copy, modify, merge, publish,
25 distribute, sublicense, and/or sell copies of the Software, and to permit
26 persons to whom the Software is furnished to do so, subject to the
27 following conditions:
28
29 The above copyright notice and this permission notice shall be included
30 in all copies or substantial portions of the Software.
31
32 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
34 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
35 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
36 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
37 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
38 USE OR OTHER DEALINGS IN THE SOFTWARE.
39 */
40
41 #include <stdbool.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <expat.h>
45
46 #ifdef XML_LARGE_SIZE
47 # define XML_FMT_INT_MOD "ll"
48 #else
49 # define XML_FMT_INT_MOD "l"
50 #endif
51
52 #ifdef XML_UNICODE_WCHAR_T
53 # define XML_FMT_STR "ls"
54 #else
55 # define XML_FMT_STR "s"
56 #endif
57
58 // While traversing the XML_Content tree, we avoid recursion
59 // to not be vulnerable to a denial of service attack.
60 typedef struct StackStruct {
61 const XML_Content *model;
62 unsigned level;
63 struct StackStruct *prev;
64 } Stack;
65
66 static Stack *
stackPushMalloc(Stack * stackTop,const XML_Content * model,unsigned level)67 stackPushMalloc(Stack *stackTop, const XML_Content *model, unsigned level) {
68 Stack *const newStackTop = malloc(sizeof(Stack));
69 if (! newStackTop) {
70 return NULL;
71 }
72 newStackTop->model = model;
73 newStackTop->level = level;
74 newStackTop->prev = stackTop;
75 return newStackTop;
76 }
77
78 static Stack *
stackPopFree(Stack * stackTop)79 stackPopFree(Stack *stackTop) {
80 Stack *const newStackTop = stackTop->prev;
81 free(stackTop);
82 return newStackTop;
83 }
84
85 static char *
contentTypeName(enum XML_Content_Type contentType)86 contentTypeName(enum XML_Content_Type contentType) {
87 switch (contentType) {
88 case XML_CTYPE_EMPTY:
89 return "EMPTY";
90 case XML_CTYPE_ANY:
91 return "ANY";
92 case XML_CTYPE_MIXED:
93 return "MIXED";
94 case XML_CTYPE_NAME:
95 return "NAME";
96 case XML_CTYPE_CHOICE:
97 return "CHOICE";
98 case XML_CTYPE_SEQ:
99 return "SEQ";
100 default:
101 return "???";
102 }
103 }
104
105 static char *
contentQuantName(enum XML_Content_Quant contentQuant)106 contentQuantName(enum XML_Content_Quant contentQuant) {
107 switch (contentQuant) {
108 case XML_CQUANT_NONE:
109 return "NONE";
110 case XML_CQUANT_OPT:
111 return "OPT";
112 case XML_CQUANT_REP:
113 return "REP";
114 case XML_CQUANT_PLUS:
115 return "PLUS";
116 default:
117 return "???";
118 }
119 }
120
121 static void
dumpContentModelElement(const XML_Content * model,unsigned level,const XML_Content * root)122 dumpContentModelElement(const XML_Content *model, unsigned level,
123 const XML_Content *root) {
124 // Indent
125 unsigned u = 0;
126 for (; u < level; u++) {
127 printf(" ");
128 }
129
130 // Node
131 printf("[%u] type=%s(%u), quant=%s(%u)", (unsigned)(model - root),
132 contentTypeName(model->type), (unsigned int)model->type,
133 contentQuantName(model->quant), (unsigned int)model->quant);
134 if (model->name) {
135 printf(", name=\"%" XML_FMT_STR "\"", model->name);
136 } else {
137 printf(", name=NULL");
138 }
139 printf(", numchildren=%u", model->numchildren);
140 printf("\n");
141 }
142
143 static bool
dumpContentModel(const XML_Char * name,const XML_Content * root)144 dumpContentModel(const XML_Char *name, const XML_Content *root) {
145 printf("Element \"%" XML_FMT_STR "\":\n", name);
146 Stack *stackTop = stackPushMalloc(NULL, root, 1);
147 if (! stackTop) {
148 return false;
149 }
150
151 while (stackTop) {
152 const XML_Content *const model = stackTop->model;
153 const unsigned level = stackTop->level;
154
155 dumpContentModelElement(model, level, root);
156
157 stackTop = stackPopFree(stackTop);
158
159 for (size_t u = model->numchildren; u >= 1; u--) {
160 Stack *const newStackTop
161 = stackPushMalloc(stackTop, model->children + (u - 1), level + 1);
162 if (! newStackTop) {
163 // We ran out of memory, so let's free all memory allocated
164 // earlier in this function, to be leak-clean:
165 while (stackTop != NULL) {
166 stackTop = stackPopFree(stackTop);
167 }
168 return false;
169 }
170 stackTop = newStackTop;
171 }
172 }
173
174 printf("\n");
175 return true;
176 }
177
178 static void XMLCALL
handleElementDeclaration(void * userData,const XML_Char * name,XML_Content * model)179 handleElementDeclaration(void *userData, const XML_Char *name,
180 XML_Content *model) {
181 XML_Parser parser = (XML_Parser)userData;
182 const bool success = dumpContentModel(name, model);
183 XML_FreeContentModel(parser, model);
184 if (! success) {
185 XML_StopParser(parser, /* resumable= */ XML_FALSE);
186 }
187 }
188
189 int
main(void)190 main(void) {
191 XML_Parser parser = XML_ParserCreate(NULL);
192 int done;
193
194 if (! parser) {
195 fprintf(stderr, "Couldn't allocate memory for parser\n");
196 return 1;
197 }
198
199 XML_SetUserData(parser, parser);
200 XML_SetElementDeclHandler(parser, handleElementDeclaration);
201
202 do {
203 void *const buf = XML_GetBuffer(parser, BUFSIZ);
204 if (! buf) {
205 fprintf(stderr, "Couldn't allocate memory for buffer\n");
206 XML_ParserFree(parser);
207 return 1;
208 }
209
210 const size_t len = fread(buf, 1, BUFSIZ, stdin);
211
212 if (ferror(stdin)) {
213 fprintf(stderr, "Read error\n");
214 XML_ParserFree(parser);
215 return 1;
216 }
217
218 done = feof(stdin);
219
220 if (XML_ParseBuffer(parser, (int)len, done) == XML_STATUS_ERROR) {
221 enum XML_Error errorCode = XML_GetErrorCode(parser);
222 if (errorCode == XML_ERROR_ABORTED) {
223 errorCode = XML_ERROR_NO_MEMORY;
224 }
225 fprintf(stderr,
226 "Parse error at line %" XML_FMT_INT_MOD "u:\n%" XML_FMT_STR "\n",
227 XML_GetCurrentLineNumber(parser), XML_ErrorString(errorCode));
228 XML_ParserFree(parser);
229 return 1;
230 }
231 } while (! done);
232
233 XML_ParserFree(parser);
234 return 0;
235 }
236