1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2004-2009 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14 Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org>
15 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
16 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
17 Copyright (c) 2020 Joe Orton <jorton@redhat.com>
18 Copyright (c) 2020 Kleber Tarcísio <klebertarcisio@yahoo.com.br>
19 Copyright (c) 2021 Tim Bray <tbray@textuality.com>
20 Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com>
21 Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
22 Copyright (c) 2025 Alfonso Gregory <gfunni234@gmail.com>
23 Licensed under the MIT license:
24
25 Permission is hereby granted, free of charge, to any person obtaining
26 a copy of this software and associated documentation files (the
27 "Software"), to deal in the Software without restriction, including
28 without limitation the rights to use, copy, modify, merge, publish,
29 distribute, sublicense, and/or sell copies of the Software, and to permit
30 persons to whom the Software is furnished to do so, subject to the
31 following conditions:
32
33 The above copyright notice and this permission notice shall be included
34 in all copies or substantial portions of the Software.
35
36 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
37 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
38 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
39 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
40 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
41 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
42 USE OR OTHER DEALINGS IN THE SOFTWARE.
43 */
44
45 #include "expat_config.h"
46
47 #include <assert.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <stddef.h>
51 #include <string.h>
52 #include <math.h> /* for isnan */
53 #include <errno.h>
54
55 #include "expat.h"
56 #include "codepage.h"
57 #include "internal.h" /* for UNUSED_P only */
58 #include "xmlfile.h"
59 #include "xmltchar.h"
60
61 #ifdef _MSC_VER
62 # include <crtdbg.h>
63 #endif
64
65 #ifdef XML_UNICODE
66 # include <wchar.h>
67 #endif
68
69 enum ExitCode {
70 XMLWF_EXIT_SUCCESS = 0,
71 XMLWF_EXIT_INTERNAL_ERROR = 1,
72 XMLWF_EXIT_NOT_WELLFORMED = 2,
73 XMLWF_EXIT_OUTPUT_ERROR = 3,
74 XMLWF_EXIT_USAGE_ERROR = 4,
75 };
76
77 /* Structures for handler user data */
78 typedef struct NotationList {
79 struct NotationList *next;
80 const XML_Char *notationName;
81 const XML_Char *systemId;
82 const XML_Char *publicId;
83 } NotationList;
84
85 typedef struct xmlwfUserData {
86 FILE *fp;
87 NotationList *notationListHead;
88 const XML_Char *currentDoctypeName;
89 } XmlwfUserData;
90
91 /* This ensures proper sorting. */
92
93 #define NSSEP T('\001')
94
95 static void XMLCALL
characterData(void * userData,const XML_Char * s,int len)96 characterData(void *userData, const XML_Char *s, int len) {
97 FILE *fp = ((XmlwfUserData *)userData)->fp;
98 for (; len > 0; --len, ++s) {
99 switch (*s) {
100 case T('&'):
101 fputts(T("&"), fp);
102 break;
103 case T('<'):
104 fputts(T("<"), fp);
105 break;
106 case T('>'):
107 fputts(T(">"), fp);
108 break;
109 #ifdef W3C14N
110 case 13:
111 fputts(T("
"), fp);
112 break;
113 #else
114 case T('"'):
115 fputts(T("""), fp);
116 break;
117 case 9:
118 case 10:
119 case 13:
120 ftprintf(fp, T("&#%d;"), *s);
121 break;
122 #endif
123 default:
124 puttc(*s, fp);
125 break;
126 }
127 }
128 }
129
130 static void
attributeValue(FILE * fp,const XML_Char * s)131 attributeValue(FILE *fp, const XML_Char *s) {
132 puttc(T('='), fp);
133 puttc(T('"'), fp);
134 assert(s);
135 for (;;) {
136 switch (*s) {
137 case 0:
138 case NSSEP:
139 puttc(T('"'), fp);
140 return;
141 case T('&'):
142 fputts(T("&"), fp);
143 break;
144 case T('<'):
145 fputts(T("<"), fp);
146 break;
147 case T('"'):
148 fputts(T("""), fp);
149 break;
150 #ifdef W3C14N
151 case 9:
152 fputts(T("	"), fp);
153 break;
154 case 10:
155 fputts(T("
"), fp);
156 break;
157 case 13:
158 fputts(T("
"), fp);
159 break;
160 #else
161 case T('>'):
162 fputts(T(">"), fp);
163 break;
164 case 9:
165 case 10:
166 case 13:
167 ftprintf(fp, T("&#%d;"), *s);
168 break;
169 #endif
170 default:
171 puttc(*s, fp);
172 break;
173 }
174 s++;
175 }
176 }
177
178 /* Lexicographically comparing UTF-8 encoded attribute values,
179 is equivalent to lexicographically comparing based on the character number. */
180
181 static int
attcmp(const void * att1,const void * att2)182 attcmp(const void *att1, const void *att2) {
183 return tcscmp(*(const XML_Char *const *)att1, *(const XML_Char *const *)att2);
184 }
185
186 static void XMLCALL
startElement(void * userData,const XML_Char * name,const XML_Char ** atts)187 startElement(void *userData, const XML_Char *name, const XML_Char **atts) {
188 int nAtts;
189 const XML_Char **p;
190 FILE *fp = ((XmlwfUserData *)userData)->fp;
191 puttc(T('<'), fp);
192 fputts(name, fp);
193
194 p = atts;
195 while (*p)
196 ++p;
197 nAtts = (int)((p - atts) >> 1);
198 if (nAtts > 1)
199 qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
200 while (*atts) {
201 puttc(T(' '), fp);
202 fputts(*atts++, fp);
203 attributeValue(fp, *atts);
204 atts++;
205 }
206 puttc(T('>'), fp);
207 }
208
209 static void XMLCALL
endElement(void * userData,const XML_Char * name)210 endElement(void *userData, const XML_Char *name) {
211 FILE *fp = ((XmlwfUserData *)userData)->fp;
212 puttc(T('<'), fp);
213 puttc(T('/'), fp);
214 fputts(name, fp);
215 puttc(T('>'), fp);
216 }
217
218 static int
nsattcmp(const void * p1,const void * p2)219 nsattcmp(const void *p1, const void *p2) {
220 const XML_Char *att1 = *(const XML_Char *const *)p1;
221 const XML_Char *att2 = *(const XML_Char *const *)p2;
222 int sep1 = (tcsrchr(att1, NSSEP) != 0);
223 int sep2 = (tcsrchr(att2, NSSEP) != 0);
224 if (sep1 != sep2)
225 return sep1 - sep2;
226 return tcscmp(att1, att2);
227 }
228
229 static void XMLCALL
startElementNS(void * userData,const XML_Char * name,const XML_Char ** atts)230 startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) {
231 int nAtts;
232 int nsi;
233 const XML_Char **p;
234 FILE *fp = ((XmlwfUserData *)userData)->fp;
235 const XML_Char *sep;
236 puttc(T('<'), fp);
237
238 sep = tcsrchr(name, NSSEP);
239 if (sep) {
240 fputts(T("n1:"), fp);
241 fputts(sep + 1, fp);
242 fputts(T(" xmlns:n1"), fp);
243 attributeValue(fp, name);
244 nsi = 2;
245 } else {
246 fputts(name, fp);
247 nsi = 1;
248 }
249
250 p = atts;
251 while (*p)
252 ++p;
253 nAtts = (int)((p - atts) >> 1);
254 if (nAtts > 1)
255 qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
256 while (*atts) {
257 name = *atts++;
258 sep = tcsrchr(name, NSSEP);
259 puttc(T(' '), fp);
260 if (sep) {
261 ftprintf(fp, T("n%d:"), nsi);
262 fputts(sep + 1, fp);
263 } else
264 fputts(name, fp);
265 attributeValue(fp, *atts);
266 if (sep) {
267 ftprintf(fp, T(" xmlns:n%d"), nsi++);
268 attributeValue(fp, name);
269 }
270 atts++;
271 }
272 puttc(T('>'), fp);
273 }
274
275 static void XMLCALL
endElementNS(void * userData,const XML_Char * name)276 endElementNS(void *userData, const XML_Char *name) {
277 FILE *fp = ((XmlwfUserData *)userData)->fp;
278 const XML_Char *sep;
279 puttc(T('<'), fp);
280 puttc(T('/'), fp);
281 sep = tcsrchr(name, NSSEP);
282 if (sep) {
283 fputts(T("n1:"), fp);
284 fputts(sep + 1, fp);
285 } else
286 fputts(name, fp);
287 puttc(T('>'), fp);
288 }
289
290 #ifndef W3C14N
291
292 static void XMLCALL
processingInstruction(void * userData,const XML_Char * target,const XML_Char * data)293 processingInstruction(void *userData, const XML_Char *target,
294 const XML_Char *data) {
295 FILE *fp = ((XmlwfUserData *)userData)->fp;
296 puttc(T('<'), fp);
297 puttc(T('?'), fp);
298 fputts(target, fp);
299 puttc(T(' '), fp);
300 fputts(data, fp);
301 puttc(T('?'), fp);
302 puttc(T('>'), fp);
303 }
304
305 static XML_Char *
xcsdup(const XML_Char * s)306 xcsdup(const XML_Char *s) {
307 XML_Char *result;
308 int count = 0;
309 size_t numBytes;
310
311 /* Get the length of the string, including terminator */
312 while (s[count++] != 0) {
313 /* Do nothing */
314 }
315 numBytes = count * sizeof(XML_Char);
316 result = malloc(numBytes);
317 if (result == NULL)
318 return NULL;
319 memcpy(result, s, numBytes);
320 return result;
321 }
322
323 static void XMLCALL
startDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * publid,int has_internal_subset)324 startDoctypeDecl(void *userData, const XML_Char *doctypeName,
325 const XML_Char *sysid, const XML_Char *publid,
326 int has_internal_subset) {
327 XmlwfUserData *data = (XmlwfUserData *)userData;
328 UNUSED_P(sysid);
329 UNUSED_P(publid);
330 UNUSED_P(has_internal_subset);
331 data->currentDoctypeName = xcsdup(doctypeName);
332 }
333
334 static void
freeNotations(XmlwfUserData * data)335 freeNotations(XmlwfUserData *data) {
336 NotationList *notationListHead = data->notationListHead;
337
338 while (notationListHead != NULL) {
339 NotationList *next = notationListHead->next;
340 free((void *)notationListHead->notationName);
341 free((void *)notationListHead->systemId);
342 free((void *)notationListHead->publicId);
343 free(notationListHead);
344 notationListHead = next;
345 }
346 data->notationListHead = NULL;
347 }
348
349 static void
cleanupUserData(XmlwfUserData * userData)350 cleanupUserData(XmlwfUserData *userData) {
351 free((void *)userData->currentDoctypeName);
352 userData->currentDoctypeName = NULL;
353 freeNotations(userData);
354 }
355
356 static int
xcscmp(const XML_Char * xs,const XML_Char * xt)357 xcscmp(const XML_Char *xs, const XML_Char *xt) {
358 while (*xs != 0 && *xt != 0) {
359 if (*xs < *xt)
360 return -1;
361 if (*xs > *xt)
362 return 1;
363 xs++;
364 xt++;
365 }
366 if (*xs < *xt)
367 return -1;
368 if (*xs > *xt)
369 return 1;
370 return 0;
371 }
372
373 static int
notationCmp(const void * a,const void * b)374 notationCmp(const void *a, const void *b) {
375 const NotationList *const n1 = *(const NotationList *const *)a;
376 const NotationList *const n2 = *(const NotationList *const *)b;
377
378 return xcscmp(n1->notationName, n2->notationName);
379 }
380
381 static void XMLCALL
endDoctypeDecl(void * userData)382 endDoctypeDecl(void *userData) {
383 XmlwfUserData *data = (XmlwfUserData *)userData;
384 NotationList **notations;
385 int notationCount = 0;
386 NotationList *p;
387 int i;
388
389 /* How many notations do we have? */
390 for (p = data->notationListHead; p != NULL; p = p->next)
391 notationCount++;
392 if (notationCount == 0) {
393 /* Nothing to report */
394 goto cleanUp;
395 }
396
397 notations = malloc(notationCount * sizeof(NotationList *));
398 if (notations == NULL) {
399 fprintf(stderr, "Unable to sort notations");
400 goto cleanUp;
401 }
402
403 for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) {
404 notations[i] = p;
405 }
406 qsort(notations, notationCount, sizeof(NotationList *), notationCmp);
407
408 /* Output the DOCTYPE header */
409 fputts(T("<!DOCTYPE "), data->fp);
410 fputts(data->currentDoctypeName, data->fp);
411 fputts(T(" [\n"), data->fp);
412
413 /* Now the NOTATIONs */
414 for (i = 0; i < notationCount; i++) {
415 fputts(T("<!NOTATION "), data->fp);
416 fputts(notations[i]->notationName, data->fp);
417 if (notations[i]->publicId != NULL) {
418 fputts(T(" PUBLIC '"), data->fp);
419 fputts(notations[i]->publicId, data->fp);
420 puttc(T('\''), data->fp);
421 if (notations[i]->systemId != NULL) {
422 puttc(T(' '), data->fp);
423 puttc(T('\''), data->fp);
424 fputts(notations[i]->systemId, data->fp);
425 puttc(T('\''), data->fp);
426 }
427 } else if (notations[i]->systemId != NULL) {
428 fputts(T(" SYSTEM '"), data->fp);
429 fputts(notations[i]->systemId, data->fp);
430 puttc(T('\''), data->fp);
431 }
432 puttc(T('>'), data->fp);
433 puttc(T('\n'), data->fp);
434 }
435
436 /* Finally end the DOCTYPE */
437 fputts(T("]>\n"), data->fp);
438
439 free(notations);
440
441 cleanUp:
442 freeNotations(data);
443 free((void *)data->currentDoctypeName);
444 data->currentDoctypeName = NULL;
445 }
446
447 static void XMLCALL
notationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)448 notationDecl(void *userData, const XML_Char *notationName, const XML_Char *base,
449 const XML_Char *systemId, const XML_Char *publicId) {
450 XmlwfUserData *data = (XmlwfUserData *)userData;
451 NotationList *entry = malloc(sizeof(NotationList));
452 const char *errorMessage = "Unable to store NOTATION for output\n";
453
454 UNUSED_P(base);
455 if (entry == NULL) {
456 fputs(errorMessage, stderr);
457 return; /* Nothing we can really do about this */
458 }
459 entry->notationName = xcsdup(notationName);
460 if (entry->notationName == NULL) {
461 fputs(errorMessage, stderr);
462 free(entry);
463 return;
464 }
465 if (systemId != NULL) {
466 entry->systemId = xcsdup(systemId);
467 if (entry->systemId == NULL) {
468 fputs(errorMessage, stderr);
469 free((void *)entry->notationName);
470 free(entry);
471 return;
472 }
473 } else {
474 entry->systemId = NULL;
475 }
476 if (publicId != NULL) {
477 entry->publicId = xcsdup(publicId);
478 if (entry->publicId == NULL) {
479 fputs(errorMessage, stderr);
480 free((void *)entry->systemId); /* Safe if it's NULL */
481 free((void *)entry->notationName);
482 free(entry);
483 return;
484 }
485 } else {
486 entry->publicId = NULL;
487 }
488
489 entry->next = data->notationListHead;
490 data->notationListHead = entry;
491 }
492
493 #endif /* not W3C14N */
494
495 static void XMLCALL
defaultCharacterData(void * userData,const XML_Char * s,int len)496 defaultCharacterData(void *userData, const XML_Char *s, int len) {
497 UNUSED_P(s);
498 UNUSED_P(len);
499 XML_DefaultCurrent((XML_Parser)userData);
500 }
501
502 static void XMLCALL
defaultStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)503 defaultStartElement(void *userData, const XML_Char *name,
504 const XML_Char **atts) {
505 UNUSED_P(name);
506 UNUSED_P(atts);
507 XML_DefaultCurrent((XML_Parser)userData);
508 }
509
510 static void XMLCALL
defaultEndElement(void * userData,const XML_Char * name)511 defaultEndElement(void *userData, const XML_Char *name) {
512 UNUSED_P(name);
513 XML_DefaultCurrent((XML_Parser)userData);
514 }
515
516 static void XMLCALL
defaultProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)517 defaultProcessingInstruction(void *userData, const XML_Char *target,
518 const XML_Char *data) {
519 UNUSED_P(target);
520 UNUSED_P(data);
521 XML_DefaultCurrent((XML_Parser)userData);
522 }
523
524 static void XMLCALL
nopCharacterData(void * userData,const XML_Char * s,int len)525 nopCharacterData(void *userData, const XML_Char *s, int len) {
526 UNUSED_P(userData);
527 UNUSED_P(s);
528 UNUSED_P(len);
529 }
530
531 static void XMLCALL
nopStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)532 nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
533 UNUSED_P(userData);
534 UNUSED_P(name);
535 UNUSED_P(atts);
536 }
537
538 static void XMLCALL
nopEndElement(void * userData,const XML_Char * name)539 nopEndElement(void *userData, const XML_Char *name) {
540 UNUSED_P(userData);
541 UNUSED_P(name);
542 }
543
544 static void XMLCALL
nopProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)545 nopProcessingInstruction(void *userData, const XML_Char *target,
546 const XML_Char *data) {
547 UNUSED_P(userData);
548 UNUSED_P(target);
549 UNUSED_P(data);
550 }
551
552 static void XMLCALL
markup(void * userData,const XML_Char * s,int len)553 markup(void *userData, const XML_Char *s, int len) {
554 FILE *fp = ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp;
555 for (; len > 0; --len, ++s)
556 puttc(*s, fp);
557 }
558
559 static void
metaLocation(XML_Parser parser)560 metaLocation(XML_Parser parser) {
561 const XML_Char *uri = XML_GetBase(parser);
562 FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp;
563 if (uri)
564 ftprintf(fp, T(" uri=\"%s\""), uri);
565 ftprintf(fp,
566 T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") T(" nbytes=\"%d\"")
567 T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") T(" col=\"%")
568 T(XML_FMT_INT_MOD) T("u\""),
569 XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser),
570 XML_GetCurrentLineNumber(parser),
571 XML_GetCurrentColumnNumber(parser));
572 }
573
574 static void
metaStartDocument(void * userData)575 metaStartDocument(void *userData) {
576 fputts(T("<document>\n"),
577 ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
578 }
579
580 static void
metaEndDocument(void * userData)581 metaEndDocument(void *userData) {
582 fputts(T("</document>\n"),
583 ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
584 }
585
586 static void XMLCALL
metaStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)587 metaStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
588 XML_Parser parser = (XML_Parser)userData;
589 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
590 FILE *fp = data->fp;
591 const XML_Char **specifiedAttsEnd
592 = atts + XML_GetSpecifiedAttributeCount(parser);
593 const XML_Char **idAttPtr;
594 int idAttIndex = XML_GetIdAttributeIndex(parser);
595 if (idAttIndex < 0)
596 idAttPtr = 0;
597 else
598 idAttPtr = atts + idAttIndex;
599
600 ftprintf(fp, T("<starttag name=\"%s\""), name);
601 metaLocation(parser);
602 if (*atts) {
603 fputts(T(">\n"), fp);
604 do {
605 ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
606 characterData(data, atts[1], (int)tcslen(atts[1]));
607 if (atts >= specifiedAttsEnd)
608 fputts(T("\" defaulted=\"yes\"/>\n"), fp);
609 else if (atts == idAttPtr)
610 fputts(T("\" id=\"yes\"/>\n"), fp);
611 else
612 fputts(T("\"/>\n"), fp);
613 } while (*(atts += 2));
614 fputts(T("</starttag>\n"), fp);
615 } else
616 fputts(T("/>\n"), fp);
617 }
618
619 static void XMLCALL
metaEndElement(void * userData,const XML_Char * name)620 metaEndElement(void *userData, const XML_Char *name) {
621 XML_Parser parser = (XML_Parser)userData;
622 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
623 FILE *fp = data->fp;
624 ftprintf(fp, T("<endtag name=\"%s\""), name);
625 metaLocation(parser);
626 fputts(T("/>\n"), fp);
627 }
628
629 static void XMLCALL
metaProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)630 metaProcessingInstruction(void *userData, const XML_Char *target,
631 const XML_Char *data) {
632 XML_Parser parser = (XML_Parser)userData;
633 XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
634 FILE *fp = usrData->fp;
635 ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
636 characterData(usrData, data, (int)tcslen(data));
637 puttc(T('"'), fp);
638 metaLocation(parser);
639 fputts(T("/>\n"), fp);
640 }
641
642 static void XMLCALL
metaComment(void * userData,const XML_Char * data)643 metaComment(void *userData, const XML_Char *data) {
644 XML_Parser parser = (XML_Parser)userData;
645 XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
646 FILE *fp = usrData->fp;
647 fputts(T("<comment data=\""), fp);
648 characterData(usrData, data, (int)tcslen(data));
649 puttc(T('"'), fp);
650 metaLocation(parser);
651 fputts(T("/>\n"), fp);
652 }
653
654 static void XMLCALL
metaStartCdataSection(void * userData)655 metaStartCdataSection(void *userData) {
656 XML_Parser parser = (XML_Parser)userData;
657 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
658 FILE *fp = data->fp;
659 fputts(T("<startcdata"), fp);
660 metaLocation(parser);
661 fputts(T("/>\n"), fp);
662 }
663
664 static void XMLCALL
metaEndCdataSection(void * userData)665 metaEndCdataSection(void *userData) {
666 XML_Parser parser = (XML_Parser)userData;
667 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
668 FILE *fp = data->fp;
669 fputts(T("<endcdata"), fp);
670 metaLocation(parser);
671 fputts(T("/>\n"), fp);
672 }
673
674 static void XMLCALL
metaCharacterData(void * userData,const XML_Char * s,int len)675 metaCharacterData(void *userData, const XML_Char *s, int len) {
676 XML_Parser parser = (XML_Parser)userData;
677 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
678 FILE *fp = data->fp;
679 fputts(T("<chars str=\""), fp);
680 characterData(data, s, len);
681 puttc(T('"'), fp);
682 metaLocation(parser);
683 fputts(T("/>\n"), fp);
684 }
685
686 static void XMLCALL
metaStartDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)687 metaStartDoctypeDecl(void *userData, const XML_Char *doctypeName,
688 const XML_Char *sysid, const XML_Char *pubid,
689 int has_internal_subset) {
690 XML_Parser parser = (XML_Parser)userData;
691 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
692 FILE *fp = data->fp;
693 UNUSED_P(sysid);
694 UNUSED_P(pubid);
695 UNUSED_P(has_internal_subset);
696 ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
697 metaLocation(parser);
698 fputts(T("/>\n"), fp);
699 }
700
701 static void XMLCALL
metaEndDoctypeDecl(void * userData)702 metaEndDoctypeDecl(void *userData) {
703 XML_Parser parser = (XML_Parser)userData;
704 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
705 FILE *fp = data->fp;
706 fputts(T("<enddoctype"), fp);
707 metaLocation(parser);
708 fputts(T("/>\n"), fp);
709 }
710
711 static void XMLCALL
metaNotationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)712 metaNotationDecl(void *userData, const XML_Char *notationName,
713 const XML_Char *base, const XML_Char *systemId,
714 const XML_Char *publicId) {
715 XML_Parser parser = (XML_Parser)userData;
716 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
717 FILE *fp = data->fp;
718 UNUSED_P(base);
719 ftprintf(fp, T("<notation name=\"%s\""), notationName);
720 if (publicId)
721 ftprintf(fp, T(" public=\"%s\""), publicId);
722 if (systemId) {
723 fputts(T(" system=\""), fp);
724 characterData(data, systemId, (int)tcslen(systemId));
725 puttc(T('"'), fp);
726 }
727 metaLocation(parser);
728 fputts(T("/>\n"), fp);
729 }
730
731 static void XMLCALL
metaEntityDecl(void * userData,const XML_Char * entityName,int is_param,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)732 metaEntityDecl(void *userData, const XML_Char *entityName, int is_param,
733 const XML_Char *value, int value_length, const XML_Char *base,
734 const XML_Char *systemId, const XML_Char *publicId,
735 const XML_Char *notationName) {
736 XML_Parser parser = (XML_Parser)userData;
737 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
738 FILE *fp = data->fp;
739
740 UNUSED_P(is_param);
741 UNUSED_P(base);
742 if (value) {
743 ftprintf(fp, T("<entity name=\"%s\""), entityName);
744 metaLocation(parser);
745 puttc(T('>'), fp);
746 characterData(data, value, value_length);
747 fputts(T("</entity/>\n"), fp);
748 } else if (notationName) {
749 ftprintf(fp, T("<entity name=\"%s\""), entityName);
750 if (publicId)
751 ftprintf(fp, T(" public=\"%s\""), publicId);
752 fputts(T(" system=\""), fp);
753 characterData(data, systemId, (int)tcslen(systemId));
754 puttc(T('"'), fp);
755 ftprintf(fp, T(" notation=\"%s\""), notationName);
756 metaLocation(parser);
757 fputts(T("/>\n"), fp);
758 } else {
759 ftprintf(fp, T("<entity name=\"%s\""), entityName);
760 if (publicId)
761 ftprintf(fp, T(" public=\"%s\""), publicId);
762 fputts(T(" system=\""), fp);
763 characterData(data, systemId, (int)tcslen(systemId));
764 puttc(T('"'), fp);
765 metaLocation(parser);
766 fputts(T("/>\n"), fp);
767 }
768 }
769
770 static void XMLCALL
metaStartNamespaceDecl(void * userData,const XML_Char * prefix,const XML_Char * uri)771 metaStartNamespaceDecl(void *userData, const XML_Char *prefix,
772 const XML_Char *uri) {
773 XML_Parser parser = (XML_Parser)userData;
774 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
775 FILE *fp = data->fp;
776 fputts(T("<startns"), fp);
777 if (prefix)
778 ftprintf(fp, T(" prefix=\"%s\""), prefix);
779 if (uri) {
780 fputts(T(" ns=\""), fp);
781 characterData(data, uri, (int)tcslen(uri));
782 fputts(T("\"/>\n"), fp);
783 } else
784 fputts(T("/>\n"), fp);
785 }
786
787 static void XMLCALL
metaEndNamespaceDecl(void * userData,const XML_Char * prefix)788 metaEndNamespaceDecl(void *userData, const XML_Char *prefix) {
789 XML_Parser parser = (XML_Parser)userData;
790 XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
791 FILE *fp = data->fp;
792 if (! prefix)
793 fputts(T("<endns/>\n"), fp);
794 else
795 ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
796 }
797
798 static int XMLCALL
unknownEncodingConvert(void * data,const char * p)799 unknownEncodingConvert(void *data, const char *p) {
800 return codepageConvert(*(int *)data, p);
801 }
802
803 static int XMLCALL
unknownEncoding(void * userData,const XML_Char * name,XML_Encoding * info)804 unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) {
805 int cp;
806 static const XML_Char prefixL[] = T("windows-");
807 static const XML_Char prefixU[] = T("WINDOWS-");
808 int i;
809
810 UNUSED_P(userData);
811 for (i = 0; prefixU[i]; i++)
812 if (name[i] != prefixU[i] && name[i] != prefixL[i])
813 return 0;
814
815 cp = 0;
816 for (; name[i]; i++) {
817 static const XML_Char digits[] = T("0123456789");
818 const XML_Char *s = tcschr(digits, name[i]);
819 if (! s)
820 return 0;
821 cp *= 10;
822 cp += (int)(s - digits);
823 if (cp >= 0x10000)
824 return 0;
825 }
826 if (! codepageMap(cp, info->map))
827 return 0;
828 info->convert = unknownEncodingConvert;
829 /* We could just cast the code page integer to a void *,
830 and avoid the use of release. */
831 info->release = free;
832 info->data = malloc(sizeof(int));
833 if (! info->data)
834 return 0;
835 *(int *)info->data = cp;
836 return 1;
837 }
838
839 static int XMLCALL
notStandalone(void * userData)840 notStandalone(void *userData) {
841 UNUSED_P(userData);
842 return 0;
843 }
844
845 static void
showVersion(XML_Char * prog)846 showVersion(XML_Char *prog) {
847 XML_Char *s = prog;
848 XML_Char ch;
849 const XML_Feature *features = XML_GetFeatureList();
850 while ((ch = *s) != 0) {
851 if (ch == '/'
852 #if defined(_WIN32)
853 || ch == '\\'
854 #endif
855 )
856 prog = s + 1;
857 ++s;
858 }
859 ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion());
860 if (features != NULL && features[0].feature != XML_FEATURE_END) {
861 int i = 1;
862 ftprintf(stdout, T("%s"), features[0].name);
863 if (features[0].value)
864 ftprintf(stdout, T("=%ld"), features[0].value);
865 while (features[i].feature != XML_FEATURE_END) {
866 ftprintf(stdout, T(", %s"), features[i].name);
867 if (features[i].value)
868 ftprintf(stdout, T("=%ld"), features[i].value);
869 ++i;
870 }
871 ftprintf(stdout, T("\n"));
872 }
873 }
874
875 #if defined(__GNUC__)
876 __attribute__((noreturn))
877 #endif
878 static void
usage(const XML_Char * prog,int rc)879 usage(const XML_Char *prog, int rc) {
880 ftprintf(
881 stderr,
882 /* Generated with:
883 * $ xmlwf/xmlwf_helpgen.sh
884 * To update, change xmlwf/xmlwf_helpgen.py, then paste the output of
885 * xmlwf/xmlwf_helpgen.sh in here.
886 */
887 /* clang-format off */
888 T("usage:\n")
889 T(" %s [OPTIONS] [FILE ...]\n")
890 T(" %s -h|--help\n")
891 T(" %s -v|--version\n")
892 T("\n")
893 T("xmlwf - Determines if an XML document is well-formed\n")
894 T("\n")
895 T("positional arguments:\n")
896 T(" FILE file to process (default: STDIN)\n")
897 T("\n")
898 T("input control arguments:\n")
899 T(" -s print an error if the document is not [s]tandalone\n")
900 T(" -n enable [n]amespace processing\n")
901 T(" -p enable processing of external DTDs and [p]arameter entities\n")
902 T(" -x enable processing of e[x]ternal entities\n")
903 T(" (CAREFUL! This makes xmlwf vulnerable to external entity attacks (XXE).)\n")
904 T(" -e ENCODING override any in-document [e]ncoding declaration\n")
905 T(" -w enable support for [W]indows code pages\n")
906 T(" -r disable memory-mapping and use [r]ead calls instead\n")
907 T(" -g BYTES buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)\n")
908 T(" -k when processing multiple files, [k]eep processing after first file with error\n")
909 T("\n")
910 T("output control arguments:\n")
911 T(" -d DIRECTORY output [d]estination directory\n")
912 T(" -c write a [c]opy of input XML, not canonical XML\n")
913 T(" -m write [m]eta XML, not canonical XML\n")
914 T(" -t write no XML output for [t]iming of plain parsing\n")
915 T(" -N enable adding doctype and [n]otation declarations\n")
916 T("\n")
917 T("amplification attack protection (e.g. billion laughs):\n")
918 T(" NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n")
919 T("\n")
920 T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n")
921 T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)\n")
922 T("\n")
923 T("reparse deferral:\n")
924 T(" -q disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n")
925 T("\n")
926 T("info arguments:\n")
927 T(" -h, --help show this [h]elp message and exit\n")
928 T(" -v, --version show program's [v]ersion number and exit\n")
929 T("\n")
930 T("environment variables:\n")
931 T(" EXPAT_ACCOUNTING_DEBUG=(0|1|2|3)\n")
932 T(" Control verbosity of accounting debugging (default: 0)\n")
933 T(" EXPAT_ENTITY_DEBUG=(0|1)\n")
934 T(" Control verbosity of entity debugging (default: 0)\n")
935 T(" EXPAT_ENTROPY_DEBUG=(0|1)\n")
936 T(" Control verbosity of entropy debugging (default: 0)\n")
937 T(" EXPAT_MALLOC_DEBUG=(0|1|2)\n")
938 T(" Control verbosity of allocation tracker (default: 0)\n")
939 T("\n")
940 T("exit status:\n")
941 T(" 0 the input files are well-formed and the output (if requested) was written successfully\n")
942 T(" 1 could not allocate data structures, signals a serious problem with execution environment\n")
943 T(" 2 one or more input files were not well-formed\n")
944 T(" 3 could not create an output file\n")
945 T(" 4 command-line argument error\n")
946 T("\n")
947 T("xmlwf of libexpat is software libre, licensed under the MIT license.\n")
948 T("Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you!\n")
949 , /* clang-format on */
950 prog, prog, prog);
951 exit(rc);
952 }
953
954 #if defined(__MINGW32__) && defined(XML_UNICODE)
955 /* Silence warning about missing prototype */
956 int wmain(int argc, XML_Char **argv);
957 #endif
958
959 #define XMLWF_SHIFT_ARG_INTO(constCharStarTarget, argc, argv, i, j) \
960 { \
961 if (argv[i][j + 1] == T('\0')) { \
962 if (++i == argc) { \
963 usage(argv[0], XMLWF_EXIT_USAGE_ERROR); \
964 /* usage called exit(..), never gets here */ \
965 } \
966 constCharStarTarget = argv[i]; \
967 } else { \
968 constCharStarTarget = argv[i] + j + 1; \
969 } \
970 i++; \
971 j = 0; \
972 }
973
974 int
tmain(int argc,XML_Char ** argv)975 tmain(int argc, XML_Char **argv) {
976 int i, j;
977 const XML_Char *outputDir = NULL;
978 const XML_Char *encoding = NULL;
979 unsigned processFlags = XML_MAP_FILE;
980 int windowsCodePages = 0;
981 int outputType = 0;
982 int useNamespaces = 0;
983 int requireStandalone = 0;
984 int requiresNotations = 0;
985 int continueOnError = 0;
986
987 float attackMaximumAmplification = -1.0f; /* signaling "not set" */
988 unsigned long long attackThresholdBytes = 0;
989 XML_Bool attackThresholdGiven = XML_FALSE;
990
991 XML_Bool disableDeferral = XML_FALSE;
992
993 int exitCode = XMLWF_EXIT_SUCCESS;
994 enum XML_ParamEntityParsing paramEntityParsing
995 = XML_PARAM_ENTITY_PARSING_NEVER;
996 int useStdin = 0;
997 XmlwfUserData userData = {NULL, NULL, NULL};
998
999 #ifdef _MSC_VER
1000 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
1001 #endif
1002
1003 i = 1;
1004 j = 0;
1005 while (i < argc) {
1006 if (j == 0) {
1007 if (argv[i][0] != T('-'))
1008 break;
1009 if (argv[i][1] == T('-')) {
1010 if (argv[i][2] == T('\0')) {
1011 i++;
1012 break;
1013 } else if (tcscmp(argv[i] + 2, T("help")) == 0) {
1014 usage(argv[0], XMLWF_EXIT_SUCCESS);
1015 // usage called exit(..), never gets here
1016 } else if (tcscmp(argv[i] + 2, T("version")) == 0) {
1017 showVersion(argv[0]);
1018 return XMLWF_EXIT_SUCCESS;
1019 }
1020 }
1021 j++;
1022 }
1023 switch (argv[i][j]) {
1024 case T('r'):
1025 processFlags &= ~XML_MAP_FILE;
1026 j++;
1027 break;
1028 case T('s'):
1029 requireStandalone = 1;
1030 j++;
1031 break;
1032 case T('n'):
1033 useNamespaces = 1;
1034 j++;
1035 break;
1036 case T('p'):
1037 paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
1038 /* fall through */
1039 case T('x'):
1040 processFlags |= XML_EXTERNAL_ENTITIES;
1041 j++;
1042 break;
1043 case T('w'):
1044 windowsCodePages = 1;
1045 j++;
1046 break;
1047 case T('m'):
1048 outputType = 'm';
1049 j++;
1050 break;
1051 case T('c'):
1052 outputType = 'c';
1053 useNamespaces = 0;
1054 j++;
1055 break;
1056 case T('t'):
1057 outputType = 't';
1058 j++;
1059 break;
1060 case T('N'):
1061 requiresNotations = 1;
1062 j++;
1063 break;
1064 case T('d'):
1065 XMLWF_SHIFT_ARG_INTO(outputDir, argc, argv, i, j);
1066 break;
1067 case T('e'):
1068 XMLWF_SHIFT_ARG_INTO(encoding, argc, argv, i, j);
1069 break;
1070 case T('h'):
1071 usage(argv[0], XMLWF_EXIT_SUCCESS);
1072 // usage called exit(..), never gets here
1073 case T('v'):
1074 showVersion(argv[0]);
1075 return XMLWF_EXIT_SUCCESS;
1076 case T('g'): {
1077 const XML_Char *valueText = NULL;
1078 XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1079
1080 errno = 0;
1081 XML_Char *afterValueText = (XML_Char *)valueText;
1082 const long long read_size_bytes_candidate
1083 = tcstoull(valueText, &afterValueText, 10);
1084 if ((errno != 0) || (afterValueText[0] != T('\0'))
1085 || (read_size_bytes_candidate < 1)
1086 || (read_size_bytes_candidate > (INT_MAX / 2 + 1))) {
1087 // This prevents tperror(..) from reporting misleading "[..]: Success"
1088 errno = ERANGE;
1089 tperror(T("invalid buffer size") T(
1090 " (needs an integer from 1 to INT_MAX/2+1 i.e. 1,073,741,824 on most platforms)"));
1091 exit(XMLWF_EXIT_USAGE_ERROR);
1092 }
1093 g_read_size_bytes = (int)read_size_bytes_candidate;
1094 break;
1095 }
1096 case T('k'):
1097 continueOnError = 1;
1098 j++;
1099 break;
1100 case T('a'): {
1101 const XML_Char *valueText = NULL;
1102 XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1103
1104 errno = 0;
1105 XML_Char *afterValueText = NULL;
1106 attackMaximumAmplification = tcstof(valueText, &afterValueText);
1107 if ((errno != 0) || (afterValueText[0] != T('\0'))
1108 || isnan(attackMaximumAmplification)
1109 || (attackMaximumAmplification < 1.0f)) {
1110 // This prevents tperror(..) from reporting misleading "[..]: Success"
1111 errno = ERANGE;
1112 tperror(T("invalid amplification limit") T(
1113 " (needs a floating point number greater or equal than 1.0)"));
1114 exit(XMLWF_EXIT_USAGE_ERROR);
1115 }
1116 #if XML_GE == 0
1117 ftprintf(stderr,
1118 T("Warning: Given amplification limit ignored")
1119 T(", xmlwf has been compiled without DTD/GE support.\n"));
1120 #endif
1121 break;
1122 }
1123 case T('b'): {
1124 const XML_Char *valueText = NULL;
1125 XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1126
1127 errno = 0;
1128 XML_Char *afterValueText = (XML_Char *)valueText;
1129 attackThresholdBytes = tcstoull(valueText, &afterValueText, 10);
1130 if ((errno != 0) || (afterValueText[0] != T('\0'))) {
1131 // This prevents tperror(..) from reporting misleading "[..]: Success"
1132 errno = ERANGE;
1133 tperror(T("invalid ignore threshold")
1134 T(" (needs an integer from 0 to 2^64-1)"));
1135 exit(XMLWF_EXIT_USAGE_ERROR);
1136 }
1137 attackThresholdGiven = XML_TRUE;
1138 #if XML_GE == 0
1139 ftprintf(stderr,
1140 T("Warning: Given attack threshold ignored")
1141 T(", xmlwf has been compiled without DTD/GE support.\n"));
1142 #endif
1143 break;
1144 }
1145 case T('q'): {
1146 disableDeferral = XML_TRUE;
1147 j++;
1148 break;
1149 }
1150 case T('\0'):
1151 if (j > 1) {
1152 i++;
1153 j = 0;
1154 break;
1155 }
1156 /* fall through */
1157 default:
1158 usage(argv[0], XMLWF_EXIT_USAGE_ERROR);
1159 // usage called exit(..), never gets here
1160 }
1161 }
1162 if (i == argc) {
1163 useStdin = 1;
1164 processFlags &= ~XML_MAP_FILE;
1165 i--;
1166 }
1167 for (; i < argc; i++) {
1168 XML_Char *outName = 0;
1169 int result;
1170 XML_Parser parser;
1171 if (useNamespaces)
1172 parser = XML_ParserCreateNS(encoding, NSSEP);
1173 else
1174 parser = XML_ParserCreate(encoding);
1175
1176 if (! parser) {
1177 tperror(T("Could not instantiate parser"));
1178 exit(XMLWF_EXIT_INTERNAL_ERROR);
1179 }
1180
1181 if (attackMaximumAmplification != -1.0f) {
1182 #if XML_GE == 1
1183 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
1184 parser, attackMaximumAmplification);
1185 XML_SetAllocTrackerMaximumAmplification(parser,
1186 attackMaximumAmplification);
1187 #endif
1188 }
1189 if (attackThresholdGiven) {
1190 #if XML_GE == 1
1191 XML_SetBillionLaughsAttackProtectionActivationThreshold(
1192 parser, attackThresholdBytes);
1193 XML_SetAllocTrackerActivationThreshold(parser, attackThresholdBytes);
1194 #else
1195 (void)attackThresholdBytes; // silence -Wunused-but-set-variable
1196 #endif
1197 }
1198
1199 if (disableDeferral) {
1200 const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE);
1201 if (! success) {
1202 // This prevents tperror(..) from reporting misleading "[..]: Success"
1203 errno = EINVAL;
1204 tperror(T("Failed to disable reparse deferral"));
1205 exit(XMLWF_EXIT_INTERNAL_ERROR);
1206 }
1207 }
1208
1209 if (requireStandalone)
1210 XML_SetNotStandaloneHandler(parser, notStandalone);
1211 XML_SetParamEntityParsing(parser, paramEntityParsing);
1212 if (outputType == 't') {
1213 /* This is for doing timings; this gives a more realistic estimate of
1214 the parsing time. */
1215 outputDir = 0;
1216 XML_SetElementHandler(parser, nopStartElement, nopEndElement);
1217 XML_SetCharacterDataHandler(parser, nopCharacterData);
1218 XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
1219 } else if (outputDir) {
1220 const XML_Char *delim = T("/");
1221 const XML_Char *file = useStdin ? T("STDIN") : argv[i];
1222 if (! useStdin) {
1223 /* Jump after last (back)slash */
1224 const XML_Char *lastDelim = tcsrchr(file, delim[0]);
1225 if (lastDelim)
1226 file = lastDelim + 1;
1227 #if defined(_WIN32)
1228 else {
1229 const XML_Char *winDelim = T("\\");
1230 lastDelim = tcsrchr(file, winDelim[0]);
1231 if (lastDelim) {
1232 file = lastDelim + 1;
1233 delim = winDelim;
1234 }
1235 }
1236 #endif
1237 }
1238 outName = (XML_Char *)malloc((tcslen(outputDir) + tcslen(file) + 2)
1239 * sizeof(XML_Char));
1240 if (! outName) {
1241 tperror(T("Could not allocate memory"));
1242 exit(XMLWF_EXIT_INTERNAL_ERROR);
1243 }
1244 tcscpy(outName, outputDir);
1245 tcscat(outName, delim);
1246 tcscat(outName, file);
1247 userData.fp = tfopen(outName, T("wb"));
1248 if (! userData.fp) {
1249 tperror(outName);
1250 exitCode = XMLWF_EXIT_OUTPUT_ERROR;
1251 free(outName);
1252 XML_ParserFree(parser);
1253 if (continueOnError) {
1254 continue;
1255 } else {
1256 break;
1257 }
1258 }
1259 setvbuf(userData.fp, NULL, _IOFBF, 16384);
1260 #ifdef XML_UNICODE
1261 puttc(0xFEFF, userData.fp);
1262 #endif
1263 XML_SetUserData(parser, &userData);
1264 switch (outputType) {
1265 case 'm':
1266 XML_UseParserAsHandlerArg(parser);
1267 XML_SetElementHandler(parser, metaStartElement, metaEndElement);
1268 XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
1269 XML_SetCommentHandler(parser, metaComment);
1270 XML_SetCdataSectionHandler(parser, metaStartCdataSection,
1271 metaEndCdataSection);
1272 XML_SetCharacterDataHandler(parser, metaCharacterData);
1273 XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl,
1274 metaEndDoctypeDecl);
1275 XML_SetEntityDeclHandler(parser, metaEntityDecl);
1276 XML_SetNotationDeclHandler(parser, metaNotationDecl);
1277 XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl,
1278 metaEndNamespaceDecl);
1279 metaStartDocument(parser);
1280 break;
1281 case 'c':
1282 XML_UseParserAsHandlerArg(parser);
1283 XML_SetDefaultHandler(parser, markup);
1284 XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
1285 XML_SetCharacterDataHandler(parser, defaultCharacterData);
1286 XML_SetProcessingInstructionHandler(parser,
1287 defaultProcessingInstruction);
1288 break;
1289 default:
1290 if (useNamespaces)
1291 XML_SetElementHandler(parser, startElementNS, endElementNS);
1292 else
1293 XML_SetElementHandler(parser, startElement, endElement);
1294 XML_SetCharacterDataHandler(parser, characterData);
1295 #ifndef W3C14N
1296 XML_SetProcessingInstructionHandler(parser, processingInstruction);
1297 if (requiresNotations) {
1298 XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl);
1299 XML_SetNotationDeclHandler(parser, notationDecl);
1300 }
1301 #endif /* not W3C14N */
1302 break;
1303 }
1304 }
1305 if (windowsCodePages)
1306 XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
1307 result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags);
1308 if (outputDir) {
1309 if (outputType == 'm')
1310 metaEndDocument(parser);
1311 fclose(userData.fp);
1312 if (! result) {
1313 tremove(outName);
1314 }
1315 free(outName);
1316 }
1317 XML_ParserFree(parser);
1318 if (! result) {
1319 exitCode = XMLWF_EXIT_NOT_WELLFORMED;
1320 cleanupUserData(&userData);
1321 if (! continueOnError) {
1322 break;
1323 }
1324 }
1325 }
1326 return exitCode;
1327 }
1328