xref: /src/contrib/expat/xmlwf/xmlwf.c (revision ae04c7bbf065278687fa930e81a96767e9009d38)
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2004-2009 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14    Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org>
15    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
16    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
17    Copyright (c) 2020      Joe Orton <jorton@redhat.com>
18    Copyright (c) 2020      Kleber Tarcísio <klebertarcisio@yahoo.com.br>
19    Copyright (c) 2021      Tim Bray <tbray@textuality.com>
20    Copyright (c) 2022      Martin Ettl <ettl.martin78@googlemail.com>
21    Copyright (c) 2022      Sean McBride <sean@rogue-research.com>
22    Copyright (c) 2025      Alfonso Gregory <gfunni234@gmail.com>
23    Licensed under the MIT license:
24 
25    Permission is  hereby granted,  free of charge,  to any  person obtaining
26    a  copy  of  this  software   and  associated  documentation  files  (the
27    "Software"),  to  deal in  the  Software  without restriction,  including
28    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
29    distribute, sublicense, and/or sell copies of the Software, and to permit
30    persons  to whom  the Software  is  furnished to  do so,  subject to  the
31    following conditions:
32 
33    The above copyright  notice and this permission notice  shall be included
34    in all copies or substantial portions of the Software.
35 
36    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
37    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
38    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
39    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
40    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
41    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
42    USE OR OTHER DEALINGS IN THE SOFTWARE.
43 */
44 
45 #include "expat_config.h"
46 
47 #include <assert.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <stddef.h>
51 #include <string.h>
52 #include <math.h> /* for isnan */
53 #include <errno.h>
54 
55 #include "expat.h"
56 #include "codepage.h"
57 #include "internal.h" /* for UNUSED_P only */
58 #include "xmlfile.h"
59 #include "xmltchar.h"
60 
61 #ifdef _MSC_VER
62 #  include <crtdbg.h>
63 #endif
64 
65 #ifdef XML_UNICODE
66 #  include <wchar.h>
67 #endif
68 
69 enum ExitCode {
70   XMLWF_EXIT_SUCCESS = 0,
71   XMLWF_EXIT_INTERNAL_ERROR = 1,
72   XMLWF_EXIT_NOT_WELLFORMED = 2,
73   XMLWF_EXIT_OUTPUT_ERROR = 3,
74   XMLWF_EXIT_USAGE_ERROR = 4,
75 };
76 
77 /* Structures for handler user data */
78 typedef struct NotationList {
79   struct NotationList *next;
80   const XML_Char *notationName;
81   const XML_Char *systemId;
82   const XML_Char *publicId;
83 } NotationList;
84 
85 typedef struct xmlwfUserData {
86   FILE *fp;
87   NotationList *notationListHead;
88   const XML_Char *currentDoctypeName;
89 } XmlwfUserData;
90 
91 /* This ensures proper sorting. */
92 
93 #define NSSEP T('\001')
94 
95 static void XMLCALL
characterData(void * userData,const XML_Char * s,int len)96 characterData(void *userData, const XML_Char *s, int len) {
97   FILE *fp = ((XmlwfUserData *)userData)->fp;
98   for (; len > 0; --len, ++s) {
99     switch (*s) {
100     case T('&'):
101       fputts(T("&amp;"), fp);
102       break;
103     case T('<'):
104       fputts(T("&lt;"), fp);
105       break;
106     case T('>'):
107       fputts(T("&gt;"), fp);
108       break;
109 #ifdef W3C14N
110     case 13:
111       fputts(T("&#xD;"), fp);
112       break;
113 #else
114     case T('"'):
115       fputts(T("&quot;"), fp);
116       break;
117     case 9:
118     case 10:
119     case 13:
120       ftprintf(fp, T("&#%d;"), *s);
121       break;
122 #endif
123     default:
124       puttc(*s, fp);
125       break;
126     }
127   }
128 }
129 
130 static void
attributeValue(FILE * fp,const XML_Char * s)131 attributeValue(FILE *fp, const XML_Char *s) {
132   puttc(T('='), fp);
133   puttc(T('"'), fp);
134   assert(s);
135   for (;;) {
136     switch (*s) {
137     case 0:
138     case NSSEP:
139       puttc(T('"'), fp);
140       return;
141     case T('&'):
142       fputts(T("&amp;"), fp);
143       break;
144     case T('<'):
145       fputts(T("&lt;"), fp);
146       break;
147     case T('"'):
148       fputts(T("&quot;"), fp);
149       break;
150 #ifdef W3C14N
151     case 9:
152       fputts(T("&#x9;"), fp);
153       break;
154     case 10:
155       fputts(T("&#xA;"), fp);
156       break;
157     case 13:
158       fputts(T("&#xD;"), fp);
159       break;
160 #else
161     case T('>'):
162       fputts(T("&gt;"), fp);
163       break;
164     case 9:
165     case 10:
166     case 13:
167       ftprintf(fp, T("&#%d;"), *s);
168       break;
169 #endif
170     default:
171       puttc(*s, fp);
172       break;
173     }
174     s++;
175   }
176 }
177 
178 /* Lexicographically comparing UTF-8 encoded attribute values,
179 is equivalent to lexicographically comparing based on the character number. */
180 
181 static int
attcmp(const void * att1,const void * att2)182 attcmp(const void *att1, const void *att2) {
183   return tcscmp(*(const XML_Char *const *)att1, *(const XML_Char *const *)att2);
184 }
185 
186 static void XMLCALL
startElement(void * userData,const XML_Char * name,const XML_Char ** atts)187 startElement(void *userData, const XML_Char *name, const XML_Char **atts) {
188   int nAtts;
189   const XML_Char **p;
190   FILE *fp = ((XmlwfUserData *)userData)->fp;
191   puttc(T('<'), fp);
192   fputts(name, fp);
193 
194   p = atts;
195   while (*p)
196     ++p;
197   nAtts = (int)((p - atts) >> 1);
198   if (nAtts > 1)
199     qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
200   while (*atts) {
201     puttc(T(' '), fp);
202     fputts(*atts++, fp);
203     attributeValue(fp, *atts);
204     atts++;
205   }
206   puttc(T('>'), fp);
207 }
208 
209 static void XMLCALL
endElement(void * userData,const XML_Char * name)210 endElement(void *userData, const XML_Char *name) {
211   FILE *fp = ((XmlwfUserData *)userData)->fp;
212   puttc(T('<'), fp);
213   puttc(T('/'), fp);
214   fputts(name, fp);
215   puttc(T('>'), fp);
216 }
217 
218 static int
nsattcmp(const void * p1,const void * p2)219 nsattcmp(const void *p1, const void *p2) {
220   const XML_Char *att1 = *(const XML_Char *const *)p1;
221   const XML_Char *att2 = *(const XML_Char *const *)p2;
222   int sep1 = (tcsrchr(att1, NSSEP) != 0);
223   int sep2 = (tcsrchr(att2, NSSEP) != 0);
224   if (sep1 != sep2)
225     return sep1 - sep2;
226   return tcscmp(att1, att2);
227 }
228 
229 static void XMLCALL
startElementNS(void * userData,const XML_Char * name,const XML_Char ** atts)230 startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) {
231   int nAtts;
232   int nsi;
233   const XML_Char **p;
234   FILE *fp = ((XmlwfUserData *)userData)->fp;
235   const XML_Char *sep;
236   puttc(T('<'), fp);
237 
238   sep = tcsrchr(name, NSSEP);
239   if (sep) {
240     fputts(T("n1:"), fp);
241     fputts(sep + 1, fp);
242     fputts(T(" xmlns:n1"), fp);
243     attributeValue(fp, name);
244     nsi = 2;
245   } else {
246     fputts(name, fp);
247     nsi = 1;
248   }
249 
250   p = atts;
251   while (*p)
252     ++p;
253   nAtts = (int)((p - atts) >> 1);
254   if (nAtts > 1)
255     qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp);
256   while (*atts) {
257     name = *atts++;
258     sep = tcsrchr(name, NSSEP);
259     puttc(T(' '), fp);
260     if (sep) {
261       ftprintf(fp, T("n%d:"), nsi);
262       fputts(sep + 1, fp);
263     } else
264       fputts(name, fp);
265     attributeValue(fp, *atts);
266     if (sep) {
267       ftprintf(fp, T(" xmlns:n%d"), nsi++);
268       attributeValue(fp, name);
269     }
270     atts++;
271   }
272   puttc(T('>'), fp);
273 }
274 
275 static void XMLCALL
endElementNS(void * userData,const XML_Char * name)276 endElementNS(void *userData, const XML_Char *name) {
277   FILE *fp = ((XmlwfUserData *)userData)->fp;
278   const XML_Char *sep;
279   puttc(T('<'), fp);
280   puttc(T('/'), fp);
281   sep = tcsrchr(name, NSSEP);
282   if (sep) {
283     fputts(T("n1:"), fp);
284     fputts(sep + 1, fp);
285   } else
286     fputts(name, fp);
287   puttc(T('>'), fp);
288 }
289 
290 #ifndef W3C14N
291 
292 static void XMLCALL
processingInstruction(void * userData,const XML_Char * target,const XML_Char * data)293 processingInstruction(void *userData, const XML_Char *target,
294                       const XML_Char *data) {
295   FILE *fp = ((XmlwfUserData *)userData)->fp;
296   puttc(T('<'), fp);
297   puttc(T('?'), fp);
298   fputts(target, fp);
299   puttc(T(' '), fp);
300   fputts(data, fp);
301   puttc(T('?'), fp);
302   puttc(T('>'), fp);
303 }
304 
305 static XML_Char *
xcsdup(const XML_Char * s)306 xcsdup(const XML_Char *s) {
307   XML_Char *result;
308   int count = 0;
309   size_t numBytes;
310 
311   /* Get the length of the string, including terminator */
312   while (s[count++] != 0) {
313     /* Do nothing */
314   }
315   numBytes = count * sizeof(XML_Char);
316   result = malloc(numBytes);
317   if (result == NULL)
318     return NULL;
319   memcpy(result, s, numBytes);
320   return result;
321 }
322 
323 static void XMLCALL
startDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * publid,int has_internal_subset)324 startDoctypeDecl(void *userData, const XML_Char *doctypeName,
325                  const XML_Char *sysid, const XML_Char *publid,
326                  int has_internal_subset) {
327   XmlwfUserData *data = (XmlwfUserData *)userData;
328   UNUSED_P(sysid);
329   UNUSED_P(publid);
330   UNUSED_P(has_internal_subset);
331   data->currentDoctypeName = xcsdup(doctypeName);
332 }
333 
334 static void
freeNotations(XmlwfUserData * data)335 freeNotations(XmlwfUserData *data) {
336   NotationList *notationListHead = data->notationListHead;
337 
338   while (notationListHead != NULL) {
339     NotationList *next = notationListHead->next;
340     free((void *)notationListHead->notationName);
341     free((void *)notationListHead->systemId);
342     free((void *)notationListHead->publicId);
343     free(notationListHead);
344     notationListHead = next;
345   }
346   data->notationListHead = NULL;
347 }
348 
349 static void
cleanupUserData(XmlwfUserData * userData)350 cleanupUserData(XmlwfUserData *userData) {
351   free((void *)userData->currentDoctypeName);
352   userData->currentDoctypeName = NULL;
353   freeNotations(userData);
354 }
355 
356 static int
xcscmp(const XML_Char * xs,const XML_Char * xt)357 xcscmp(const XML_Char *xs, const XML_Char *xt) {
358   while (*xs != 0 && *xt != 0) {
359     if (*xs < *xt)
360       return -1;
361     if (*xs > *xt)
362       return 1;
363     xs++;
364     xt++;
365   }
366   if (*xs < *xt)
367     return -1;
368   if (*xs > *xt)
369     return 1;
370   return 0;
371 }
372 
373 static int
notationCmp(const void * a,const void * b)374 notationCmp(const void *a, const void *b) {
375   const NotationList *const n1 = *(const NotationList *const *)a;
376   const NotationList *const n2 = *(const NotationList *const *)b;
377 
378   return xcscmp(n1->notationName, n2->notationName);
379 }
380 
381 static void XMLCALL
endDoctypeDecl(void * userData)382 endDoctypeDecl(void *userData) {
383   XmlwfUserData *data = (XmlwfUserData *)userData;
384   NotationList **notations;
385   int notationCount = 0;
386   NotationList *p;
387   int i;
388 
389   /* How many notations do we have? */
390   for (p = data->notationListHead; p != NULL; p = p->next)
391     notationCount++;
392   if (notationCount == 0) {
393     /* Nothing to report */
394     goto cleanUp;
395   }
396 
397   notations = malloc(notationCount * sizeof(NotationList *));
398   if (notations == NULL) {
399     fprintf(stderr, "Unable to sort notations");
400     goto cleanUp;
401   }
402 
403   for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) {
404     notations[i] = p;
405   }
406   qsort(notations, notationCount, sizeof(NotationList *), notationCmp);
407 
408   /* Output the DOCTYPE header */
409   fputts(T("<!DOCTYPE "), data->fp);
410   fputts(data->currentDoctypeName, data->fp);
411   fputts(T(" [\n"), data->fp);
412 
413   /* Now the NOTATIONs */
414   for (i = 0; i < notationCount; i++) {
415     fputts(T("<!NOTATION "), data->fp);
416     fputts(notations[i]->notationName, data->fp);
417     if (notations[i]->publicId != NULL) {
418       fputts(T(" PUBLIC '"), data->fp);
419       fputts(notations[i]->publicId, data->fp);
420       puttc(T('\''), data->fp);
421       if (notations[i]->systemId != NULL) {
422         puttc(T(' '), data->fp);
423         puttc(T('\''), data->fp);
424         fputts(notations[i]->systemId, data->fp);
425         puttc(T('\''), data->fp);
426       }
427     } else if (notations[i]->systemId != NULL) {
428       fputts(T(" SYSTEM '"), data->fp);
429       fputts(notations[i]->systemId, data->fp);
430       puttc(T('\''), data->fp);
431     }
432     puttc(T('>'), data->fp);
433     puttc(T('\n'), data->fp);
434   }
435 
436   /* Finally end the DOCTYPE */
437   fputts(T("]>\n"), data->fp);
438 
439   free(notations);
440 
441 cleanUp:
442   freeNotations(data);
443   free((void *)data->currentDoctypeName);
444   data->currentDoctypeName = NULL;
445 }
446 
447 static void XMLCALL
notationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)448 notationDecl(void *userData, const XML_Char *notationName, const XML_Char *base,
449              const XML_Char *systemId, const XML_Char *publicId) {
450   XmlwfUserData *data = (XmlwfUserData *)userData;
451   NotationList *entry = malloc(sizeof(NotationList));
452   const char *errorMessage = "Unable to store NOTATION for output\n";
453 
454   UNUSED_P(base);
455   if (entry == NULL) {
456     fputs(errorMessage, stderr);
457     return; /* Nothing we can really do about this */
458   }
459   entry->notationName = xcsdup(notationName);
460   if (entry->notationName == NULL) {
461     fputs(errorMessage, stderr);
462     free(entry);
463     return;
464   }
465   if (systemId != NULL) {
466     entry->systemId = xcsdup(systemId);
467     if (entry->systemId == NULL) {
468       fputs(errorMessage, stderr);
469       free((void *)entry->notationName);
470       free(entry);
471       return;
472     }
473   } else {
474     entry->systemId = NULL;
475   }
476   if (publicId != NULL) {
477     entry->publicId = xcsdup(publicId);
478     if (entry->publicId == NULL) {
479       fputs(errorMessage, stderr);
480       free((void *)entry->systemId); /* Safe if it's NULL */
481       free((void *)entry->notationName);
482       free(entry);
483       return;
484     }
485   } else {
486     entry->publicId = NULL;
487   }
488 
489   entry->next = data->notationListHead;
490   data->notationListHead = entry;
491 }
492 
493 #endif /* not W3C14N */
494 
495 static void XMLCALL
defaultCharacterData(void * userData,const XML_Char * s,int len)496 defaultCharacterData(void *userData, const XML_Char *s, int len) {
497   UNUSED_P(s);
498   UNUSED_P(len);
499   XML_DefaultCurrent((XML_Parser)userData);
500 }
501 
502 static void XMLCALL
defaultStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)503 defaultStartElement(void *userData, const XML_Char *name,
504                     const XML_Char **atts) {
505   UNUSED_P(name);
506   UNUSED_P(atts);
507   XML_DefaultCurrent((XML_Parser)userData);
508 }
509 
510 static void XMLCALL
defaultEndElement(void * userData,const XML_Char * name)511 defaultEndElement(void *userData, const XML_Char *name) {
512   UNUSED_P(name);
513   XML_DefaultCurrent((XML_Parser)userData);
514 }
515 
516 static void XMLCALL
defaultProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)517 defaultProcessingInstruction(void *userData, const XML_Char *target,
518                              const XML_Char *data) {
519   UNUSED_P(target);
520   UNUSED_P(data);
521   XML_DefaultCurrent((XML_Parser)userData);
522 }
523 
524 static void XMLCALL
nopCharacterData(void * userData,const XML_Char * s,int len)525 nopCharacterData(void *userData, const XML_Char *s, int len) {
526   UNUSED_P(userData);
527   UNUSED_P(s);
528   UNUSED_P(len);
529 }
530 
531 static void XMLCALL
nopStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)532 nopStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
533   UNUSED_P(userData);
534   UNUSED_P(name);
535   UNUSED_P(atts);
536 }
537 
538 static void XMLCALL
nopEndElement(void * userData,const XML_Char * name)539 nopEndElement(void *userData, const XML_Char *name) {
540   UNUSED_P(userData);
541   UNUSED_P(name);
542 }
543 
544 static void XMLCALL
nopProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)545 nopProcessingInstruction(void *userData, const XML_Char *target,
546                          const XML_Char *data) {
547   UNUSED_P(userData);
548   UNUSED_P(target);
549   UNUSED_P(data);
550 }
551 
552 static void XMLCALL
markup(void * userData,const XML_Char * s,int len)553 markup(void *userData, const XML_Char *s, int len) {
554   FILE *fp = ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp;
555   for (; len > 0; --len, ++s)
556     puttc(*s, fp);
557 }
558 
559 static void
metaLocation(XML_Parser parser)560 metaLocation(XML_Parser parser) {
561   const XML_Char *uri = XML_GetBase(parser);
562   FILE *fp = ((XmlwfUserData *)XML_GetUserData(parser))->fp;
563   if (uri)
564     ftprintf(fp, T(" uri=\"%s\""), uri);
565   ftprintf(fp,
566            T(" byte=\"%") T(XML_FMT_INT_MOD) T("d\"") T(" nbytes=\"%d\"")
567                T(" line=\"%") T(XML_FMT_INT_MOD) T("u\"") T(" col=\"%")
568                    T(XML_FMT_INT_MOD) T("u\""),
569            XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser),
570            XML_GetCurrentLineNumber(parser),
571            XML_GetCurrentColumnNumber(parser));
572 }
573 
574 static void
metaStartDocument(void * userData)575 metaStartDocument(void *userData) {
576   fputts(T("<document>\n"),
577          ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
578 }
579 
580 static void
metaEndDocument(void * userData)581 metaEndDocument(void *userData) {
582   fputts(T("</document>\n"),
583          ((XmlwfUserData *)XML_GetUserData((XML_Parser)userData))->fp);
584 }
585 
586 static void XMLCALL
metaStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)587 metaStartElement(void *userData, const XML_Char *name, const XML_Char **atts) {
588   XML_Parser parser = (XML_Parser)userData;
589   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
590   FILE *fp = data->fp;
591   const XML_Char **specifiedAttsEnd
592       = atts + XML_GetSpecifiedAttributeCount(parser);
593   const XML_Char **idAttPtr;
594   int idAttIndex = XML_GetIdAttributeIndex(parser);
595   if (idAttIndex < 0)
596     idAttPtr = 0;
597   else
598     idAttPtr = atts + idAttIndex;
599 
600   ftprintf(fp, T("<starttag name=\"%s\""), name);
601   metaLocation(parser);
602   if (*atts) {
603     fputts(T(">\n"), fp);
604     do {
605       ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
606       characterData(data, atts[1], (int)tcslen(atts[1]));
607       if (atts >= specifiedAttsEnd)
608         fputts(T("\" defaulted=\"yes\"/>\n"), fp);
609       else if (atts == idAttPtr)
610         fputts(T("\" id=\"yes\"/>\n"), fp);
611       else
612         fputts(T("\"/>\n"), fp);
613     } while (*(atts += 2));
614     fputts(T("</starttag>\n"), fp);
615   } else
616     fputts(T("/>\n"), fp);
617 }
618 
619 static void XMLCALL
metaEndElement(void * userData,const XML_Char * name)620 metaEndElement(void *userData, const XML_Char *name) {
621   XML_Parser parser = (XML_Parser)userData;
622   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
623   FILE *fp = data->fp;
624   ftprintf(fp, T("<endtag name=\"%s\""), name);
625   metaLocation(parser);
626   fputts(T("/>\n"), fp);
627 }
628 
629 static void XMLCALL
metaProcessingInstruction(void * userData,const XML_Char * target,const XML_Char * data)630 metaProcessingInstruction(void *userData, const XML_Char *target,
631                           const XML_Char *data) {
632   XML_Parser parser = (XML_Parser)userData;
633   XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
634   FILE *fp = usrData->fp;
635   ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
636   characterData(usrData, data, (int)tcslen(data));
637   puttc(T('"'), fp);
638   metaLocation(parser);
639   fputts(T("/>\n"), fp);
640 }
641 
642 static void XMLCALL
metaComment(void * userData,const XML_Char * data)643 metaComment(void *userData, const XML_Char *data) {
644   XML_Parser parser = (XML_Parser)userData;
645   XmlwfUserData *usrData = (XmlwfUserData *)XML_GetUserData(parser);
646   FILE *fp = usrData->fp;
647   fputts(T("<comment data=\""), fp);
648   characterData(usrData, data, (int)tcslen(data));
649   puttc(T('"'), fp);
650   metaLocation(parser);
651   fputts(T("/>\n"), fp);
652 }
653 
654 static void XMLCALL
metaStartCdataSection(void * userData)655 metaStartCdataSection(void *userData) {
656   XML_Parser parser = (XML_Parser)userData;
657   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
658   FILE *fp = data->fp;
659   fputts(T("<startcdata"), fp);
660   metaLocation(parser);
661   fputts(T("/>\n"), fp);
662 }
663 
664 static void XMLCALL
metaEndCdataSection(void * userData)665 metaEndCdataSection(void *userData) {
666   XML_Parser parser = (XML_Parser)userData;
667   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
668   FILE *fp = data->fp;
669   fputts(T("<endcdata"), fp);
670   metaLocation(parser);
671   fputts(T("/>\n"), fp);
672 }
673 
674 static void XMLCALL
metaCharacterData(void * userData,const XML_Char * s,int len)675 metaCharacterData(void *userData, const XML_Char *s, int len) {
676   XML_Parser parser = (XML_Parser)userData;
677   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
678   FILE *fp = data->fp;
679   fputts(T("<chars str=\""), fp);
680   characterData(data, s, len);
681   puttc(T('"'), fp);
682   metaLocation(parser);
683   fputts(T("/>\n"), fp);
684 }
685 
686 static void XMLCALL
metaStartDoctypeDecl(void * userData,const XML_Char * doctypeName,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)687 metaStartDoctypeDecl(void *userData, const XML_Char *doctypeName,
688                      const XML_Char *sysid, const XML_Char *pubid,
689                      int has_internal_subset) {
690   XML_Parser parser = (XML_Parser)userData;
691   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
692   FILE *fp = data->fp;
693   UNUSED_P(sysid);
694   UNUSED_P(pubid);
695   UNUSED_P(has_internal_subset);
696   ftprintf(fp, T("<startdoctype name=\"%s\""), doctypeName);
697   metaLocation(parser);
698   fputts(T("/>\n"), fp);
699 }
700 
701 static void XMLCALL
metaEndDoctypeDecl(void * userData)702 metaEndDoctypeDecl(void *userData) {
703   XML_Parser parser = (XML_Parser)userData;
704   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
705   FILE *fp = data->fp;
706   fputts(T("<enddoctype"), fp);
707   metaLocation(parser);
708   fputts(T("/>\n"), fp);
709 }
710 
711 static void XMLCALL
metaNotationDecl(void * userData,const XML_Char * notationName,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)712 metaNotationDecl(void *userData, const XML_Char *notationName,
713                  const XML_Char *base, const XML_Char *systemId,
714                  const XML_Char *publicId) {
715   XML_Parser parser = (XML_Parser)userData;
716   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
717   FILE *fp = data->fp;
718   UNUSED_P(base);
719   ftprintf(fp, T("<notation name=\"%s\""), notationName);
720   if (publicId)
721     ftprintf(fp, T(" public=\"%s\""), publicId);
722   if (systemId) {
723     fputts(T(" system=\""), fp);
724     characterData(data, systemId, (int)tcslen(systemId));
725     puttc(T('"'), fp);
726   }
727   metaLocation(parser);
728   fputts(T("/>\n"), fp);
729 }
730 
731 static void XMLCALL
metaEntityDecl(void * userData,const XML_Char * entityName,int is_param,const XML_Char * value,int value_length,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId,const XML_Char * notationName)732 metaEntityDecl(void *userData, const XML_Char *entityName, int is_param,
733                const XML_Char *value, int value_length, const XML_Char *base,
734                const XML_Char *systemId, const XML_Char *publicId,
735                const XML_Char *notationName) {
736   XML_Parser parser = (XML_Parser)userData;
737   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
738   FILE *fp = data->fp;
739 
740   UNUSED_P(is_param);
741   UNUSED_P(base);
742   if (value) {
743     ftprintf(fp, T("<entity name=\"%s\""), entityName);
744     metaLocation(parser);
745     puttc(T('>'), fp);
746     characterData(data, value, value_length);
747     fputts(T("</entity/>\n"), fp);
748   } else if (notationName) {
749     ftprintf(fp, T("<entity name=\"%s\""), entityName);
750     if (publicId)
751       ftprintf(fp, T(" public=\"%s\""), publicId);
752     fputts(T(" system=\""), fp);
753     characterData(data, systemId, (int)tcslen(systemId));
754     puttc(T('"'), fp);
755     ftprintf(fp, T(" notation=\"%s\""), notationName);
756     metaLocation(parser);
757     fputts(T("/>\n"), fp);
758   } else {
759     ftprintf(fp, T("<entity name=\"%s\""), entityName);
760     if (publicId)
761       ftprintf(fp, T(" public=\"%s\""), publicId);
762     fputts(T(" system=\""), fp);
763     characterData(data, systemId, (int)tcslen(systemId));
764     puttc(T('"'), fp);
765     metaLocation(parser);
766     fputts(T("/>\n"), fp);
767   }
768 }
769 
770 static void XMLCALL
metaStartNamespaceDecl(void * userData,const XML_Char * prefix,const XML_Char * uri)771 metaStartNamespaceDecl(void *userData, const XML_Char *prefix,
772                        const XML_Char *uri) {
773   XML_Parser parser = (XML_Parser)userData;
774   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
775   FILE *fp = data->fp;
776   fputts(T("<startns"), fp);
777   if (prefix)
778     ftprintf(fp, T(" prefix=\"%s\""), prefix);
779   if (uri) {
780     fputts(T(" ns=\""), fp);
781     characterData(data, uri, (int)tcslen(uri));
782     fputts(T("\"/>\n"), fp);
783   } else
784     fputts(T("/>\n"), fp);
785 }
786 
787 static void XMLCALL
metaEndNamespaceDecl(void * userData,const XML_Char * prefix)788 metaEndNamespaceDecl(void *userData, const XML_Char *prefix) {
789   XML_Parser parser = (XML_Parser)userData;
790   XmlwfUserData *data = (XmlwfUserData *)XML_GetUserData(parser);
791   FILE *fp = data->fp;
792   if (! prefix)
793     fputts(T("<endns/>\n"), fp);
794   else
795     ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
796 }
797 
798 static int XMLCALL
unknownEncodingConvert(void * data,const char * p)799 unknownEncodingConvert(void *data, const char *p) {
800   return codepageConvert(*(int *)data, p);
801 }
802 
803 static int XMLCALL
unknownEncoding(void * userData,const XML_Char * name,XML_Encoding * info)804 unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) {
805   int cp;
806   static const XML_Char prefixL[] = T("windows-");
807   static const XML_Char prefixU[] = T("WINDOWS-");
808   int i;
809 
810   UNUSED_P(userData);
811   for (i = 0; prefixU[i]; i++)
812     if (name[i] != prefixU[i] && name[i] != prefixL[i])
813       return 0;
814 
815   cp = 0;
816   for (; name[i]; i++) {
817     static const XML_Char digits[] = T("0123456789");
818     const XML_Char *s = tcschr(digits, name[i]);
819     if (! s)
820       return 0;
821     cp *= 10;
822     cp += (int)(s - digits);
823     if (cp >= 0x10000)
824       return 0;
825   }
826   if (! codepageMap(cp, info->map))
827     return 0;
828   info->convert = unknownEncodingConvert;
829   /* We could just cast the code page integer to a void *,
830   and avoid the use of release. */
831   info->release = free;
832   info->data = malloc(sizeof(int));
833   if (! info->data)
834     return 0;
835   *(int *)info->data = cp;
836   return 1;
837 }
838 
839 static int XMLCALL
notStandalone(void * userData)840 notStandalone(void *userData) {
841   UNUSED_P(userData);
842   return 0;
843 }
844 
845 static void
showVersion(XML_Char * prog)846 showVersion(XML_Char *prog) {
847   XML_Char *s = prog;
848   XML_Char ch;
849   const XML_Feature *features = XML_GetFeatureList();
850   while ((ch = *s) != 0) {
851     if (ch == '/'
852 #if defined(_WIN32)
853         || ch == '\\'
854 #endif
855     )
856       prog = s + 1;
857     ++s;
858   }
859   ftprintf(stdout, T("%s using %s\n"), prog, XML_ExpatVersion());
860   if (features != NULL && features[0].feature != XML_FEATURE_END) {
861     int i = 1;
862     ftprintf(stdout, T("%s"), features[0].name);
863     if (features[0].value)
864       ftprintf(stdout, T("=%ld"), features[0].value);
865     while (features[i].feature != XML_FEATURE_END) {
866       ftprintf(stdout, T(", %s"), features[i].name);
867       if (features[i].value)
868         ftprintf(stdout, T("=%ld"), features[i].value);
869       ++i;
870     }
871     ftprintf(stdout, T("\n"));
872   }
873 }
874 
875 #if defined(__GNUC__)
876 __attribute__((noreturn))
877 #endif
878 static void
usage(const XML_Char * prog,int rc)879 usage(const XML_Char *prog, int rc) {
880   ftprintf(
881       stderr,
882       /* Generated with:
883        * $ xmlwf/xmlwf_helpgen.sh
884        * To update, change xmlwf/xmlwf_helpgen.py, then paste the output of
885        * xmlwf/xmlwf_helpgen.sh in here.
886        */
887       /* clang-format off */
888       T("usage:\n")
889       T("  %s [OPTIONS] [FILE ...]\n")
890       T("  %s -h|--help\n")
891       T("  %s -v|--version\n")
892       T("\n")
893       T("xmlwf - Determines if an XML document is well-formed\n")
894       T("\n")
895       T("positional arguments:\n")
896       T("  FILE           file to process (default: STDIN)\n")
897       T("\n")
898       T("input control arguments:\n")
899       T("  -s             print an error if the document is not [s]tandalone\n")
900       T("  -n             enable [n]amespace processing\n")
901       T("  -p             enable processing of external DTDs and [p]arameter entities\n")
902       T("  -x             enable processing of e[x]ternal entities\n")
903       T("                 (CAREFUL! This makes xmlwf vulnerable to external entity attacks (XXE).)\n")
904       T("  -e ENCODING    override any in-document [e]ncoding declaration\n")
905       T("  -w             enable support for [W]indows code pages\n")
906       T("  -r             disable memory-mapping and use [r]ead calls instead\n")
907       T("  -g BYTES       buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)\n")
908       T("  -k             when processing multiple files, [k]eep processing after first file with error\n")
909       T("\n")
910       T("output control arguments:\n")
911       T("  -d DIRECTORY   output [d]estination directory\n")
912       T("  -c             write a [c]opy of input XML, not canonical XML\n")
913       T("  -m             write [m]eta XML, not canonical XML\n")
914       T("  -t             write no XML output for [t]iming of plain parsing\n")
915       T("  -N             enable adding doctype and [n]otation declarations\n")
916       T("\n")
917       T("amplification attack protection (e.g. billion laughs):\n")
918       T("  NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n")
919       T("\n")
920       T("  -a FACTOR      set maximum tolerated [a]mplification factor (default: 100.0)\n")
921       T("  -b BYTES       set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)\n")
922       T("\n")
923       T("reparse deferral:\n")
924       T("  -q             disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n")
925       T("\n")
926       T("info arguments:\n")
927       T("  -h, --help     show this [h]elp message and exit\n")
928       T("  -v, --version  show program's [v]ersion number and exit\n")
929       T("\n")
930       T("environment variables:\n")
931       T("  EXPAT_ACCOUNTING_DEBUG=(0|1|2|3)\n")
932       T("                 Control verbosity of accounting debugging (default: 0)\n")
933       T("  EXPAT_ENTITY_DEBUG=(0|1)\n")
934       T("                 Control verbosity of entity debugging (default: 0)\n")
935       T("  EXPAT_ENTROPY_DEBUG=(0|1)\n")
936       T("                 Control verbosity of entropy debugging (default: 0)\n")
937       T("  EXPAT_MALLOC_DEBUG=(0|1|2)\n")
938       T("                 Control verbosity of allocation tracker (default: 0)\n")
939       T("\n")
940       T("exit status:\n")
941       T("  0              the input files are well-formed and the output (if requested) was written successfully\n")
942       T("  1              could not allocate data structures, signals a serious problem with execution environment\n")
943       T("  2              one or more input files were not well-formed\n")
944       T("  3              could not create an output file\n")
945       T("  4              command-line argument error\n")
946       T("\n")
947       T("xmlwf of libexpat is software libre, licensed under the MIT license.\n")
948       T("Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you!\n")
949       , /* clang-format on */
950       prog, prog, prog);
951   exit(rc);
952 }
953 
954 #if defined(__MINGW32__) && defined(XML_UNICODE)
955 /* Silence warning about missing prototype */
956 int wmain(int argc, XML_Char **argv);
957 #endif
958 
959 #define XMLWF_SHIFT_ARG_INTO(constCharStarTarget, argc, argv, i, j)            \
960   {                                                                            \
961     if (argv[i][j + 1] == T('\0')) {                                           \
962       if (++i == argc) {                                                       \
963         usage(argv[0], XMLWF_EXIT_USAGE_ERROR);                                \
964         /* usage called exit(..), never gets here */                           \
965       }                                                                        \
966       constCharStarTarget = argv[i];                                           \
967     } else {                                                                   \
968       constCharStarTarget = argv[i] + j + 1;                                   \
969     }                                                                          \
970     i++;                                                                       \
971     j = 0;                                                                     \
972   }
973 
974 int
tmain(int argc,XML_Char ** argv)975 tmain(int argc, XML_Char **argv) {
976   int i, j;
977   const XML_Char *outputDir = NULL;
978   const XML_Char *encoding = NULL;
979   unsigned processFlags = XML_MAP_FILE;
980   int windowsCodePages = 0;
981   int outputType = 0;
982   int useNamespaces = 0;
983   int requireStandalone = 0;
984   int requiresNotations = 0;
985   int continueOnError = 0;
986 
987   float attackMaximumAmplification = -1.0f; /* signaling "not set" */
988   unsigned long long attackThresholdBytes = 0;
989   XML_Bool attackThresholdGiven = XML_FALSE;
990 
991   XML_Bool disableDeferral = XML_FALSE;
992 
993   int exitCode = XMLWF_EXIT_SUCCESS;
994   enum XML_ParamEntityParsing paramEntityParsing
995       = XML_PARAM_ENTITY_PARSING_NEVER;
996   int useStdin = 0;
997   XmlwfUserData userData = {NULL, NULL, NULL};
998 
999 #ifdef _MSC_VER
1000   _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
1001 #endif
1002 
1003   i = 1;
1004   j = 0;
1005   while (i < argc) {
1006     if (j == 0) {
1007       if (argv[i][0] != T('-'))
1008         break;
1009       if (argv[i][1] == T('-')) {
1010         if (argv[i][2] == T('\0')) {
1011           i++;
1012           break;
1013         } else if (tcscmp(argv[i] + 2, T("help")) == 0) {
1014           usage(argv[0], XMLWF_EXIT_SUCCESS);
1015           // usage called exit(..), never gets here
1016         } else if (tcscmp(argv[i] + 2, T("version")) == 0) {
1017           showVersion(argv[0]);
1018           return XMLWF_EXIT_SUCCESS;
1019         }
1020       }
1021       j++;
1022     }
1023     switch (argv[i][j]) {
1024     case T('r'):
1025       processFlags &= ~XML_MAP_FILE;
1026       j++;
1027       break;
1028     case T('s'):
1029       requireStandalone = 1;
1030       j++;
1031       break;
1032     case T('n'):
1033       useNamespaces = 1;
1034       j++;
1035       break;
1036     case T('p'):
1037       paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS;
1038       /* fall through */
1039     case T('x'):
1040       processFlags |= XML_EXTERNAL_ENTITIES;
1041       j++;
1042       break;
1043     case T('w'):
1044       windowsCodePages = 1;
1045       j++;
1046       break;
1047     case T('m'):
1048       outputType = 'm';
1049       j++;
1050       break;
1051     case T('c'):
1052       outputType = 'c';
1053       useNamespaces = 0;
1054       j++;
1055       break;
1056     case T('t'):
1057       outputType = 't';
1058       j++;
1059       break;
1060     case T('N'):
1061       requiresNotations = 1;
1062       j++;
1063       break;
1064     case T('d'):
1065       XMLWF_SHIFT_ARG_INTO(outputDir, argc, argv, i, j);
1066       break;
1067     case T('e'):
1068       XMLWF_SHIFT_ARG_INTO(encoding, argc, argv, i, j);
1069       break;
1070     case T('h'):
1071       usage(argv[0], XMLWF_EXIT_SUCCESS);
1072       // usage called exit(..), never gets here
1073     case T('v'):
1074       showVersion(argv[0]);
1075       return XMLWF_EXIT_SUCCESS;
1076     case T('g'): {
1077       const XML_Char *valueText = NULL;
1078       XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1079 
1080       errno = 0;
1081       XML_Char *afterValueText = (XML_Char *)valueText;
1082       const long long read_size_bytes_candidate
1083           = tcstoull(valueText, &afterValueText, 10);
1084       if ((errno != 0) || (afterValueText[0] != T('\0'))
1085           || (read_size_bytes_candidate < 1)
1086           || (read_size_bytes_candidate > (INT_MAX / 2 + 1))) {
1087         // This prevents tperror(..) from reporting misleading "[..]: Success"
1088         errno = ERANGE;
1089         tperror(T("invalid buffer size") T(
1090             " (needs an integer from 1 to INT_MAX/2+1 i.e. 1,073,741,824 on most platforms)"));
1091         exit(XMLWF_EXIT_USAGE_ERROR);
1092       }
1093       g_read_size_bytes = (int)read_size_bytes_candidate;
1094       break;
1095     }
1096     case T('k'):
1097       continueOnError = 1;
1098       j++;
1099       break;
1100     case T('a'): {
1101       const XML_Char *valueText = NULL;
1102       XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1103 
1104       errno = 0;
1105       XML_Char *afterValueText = NULL;
1106       attackMaximumAmplification = tcstof(valueText, &afterValueText);
1107       if ((errno != 0) || (afterValueText[0] != T('\0'))
1108           || isnan(attackMaximumAmplification)
1109           || (attackMaximumAmplification < 1.0f)) {
1110         // This prevents tperror(..) from reporting misleading "[..]: Success"
1111         errno = ERANGE;
1112         tperror(T("invalid amplification limit") T(
1113             " (needs a floating point number greater or equal than 1.0)"));
1114         exit(XMLWF_EXIT_USAGE_ERROR);
1115       }
1116 #if XML_GE == 0
1117       ftprintf(stderr,
1118                T("Warning: Given amplification limit ignored")
1119                    T(", xmlwf has been compiled without DTD/GE support.\n"));
1120 #endif
1121       break;
1122     }
1123     case T('b'): {
1124       const XML_Char *valueText = NULL;
1125       XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
1126 
1127       errno = 0;
1128       XML_Char *afterValueText = (XML_Char *)valueText;
1129       attackThresholdBytes = tcstoull(valueText, &afterValueText, 10);
1130       if ((errno != 0) || (afterValueText[0] != T('\0'))) {
1131         // This prevents tperror(..) from reporting misleading "[..]: Success"
1132         errno = ERANGE;
1133         tperror(T("invalid ignore threshold")
1134                     T(" (needs an integer from 0 to 2^64-1)"));
1135         exit(XMLWF_EXIT_USAGE_ERROR);
1136       }
1137       attackThresholdGiven = XML_TRUE;
1138 #if XML_GE == 0
1139       ftprintf(stderr,
1140                T("Warning: Given attack threshold ignored")
1141                    T(", xmlwf has been compiled without DTD/GE support.\n"));
1142 #endif
1143       break;
1144     }
1145     case T('q'): {
1146       disableDeferral = XML_TRUE;
1147       j++;
1148       break;
1149     }
1150     case T('\0'):
1151       if (j > 1) {
1152         i++;
1153         j = 0;
1154         break;
1155       }
1156       /* fall through */
1157     default:
1158       usage(argv[0], XMLWF_EXIT_USAGE_ERROR);
1159       // usage called exit(..), never gets here
1160     }
1161   }
1162   if (i == argc) {
1163     useStdin = 1;
1164     processFlags &= ~XML_MAP_FILE;
1165     i--;
1166   }
1167   for (; i < argc; i++) {
1168     XML_Char *outName = 0;
1169     int result;
1170     XML_Parser parser;
1171     if (useNamespaces)
1172       parser = XML_ParserCreateNS(encoding, NSSEP);
1173     else
1174       parser = XML_ParserCreate(encoding);
1175 
1176     if (! parser) {
1177       tperror(T("Could not instantiate parser"));
1178       exit(XMLWF_EXIT_INTERNAL_ERROR);
1179     }
1180 
1181     if (attackMaximumAmplification != -1.0f) {
1182 #if XML_GE == 1
1183       XML_SetBillionLaughsAttackProtectionMaximumAmplification(
1184           parser, attackMaximumAmplification);
1185       XML_SetAllocTrackerMaximumAmplification(parser,
1186                                               attackMaximumAmplification);
1187 #endif
1188     }
1189     if (attackThresholdGiven) {
1190 #if XML_GE == 1
1191       XML_SetBillionLaughsAttackProtectionActivationThreshold(
1192           parser, attackThresholdBytes);
1193       XML_SetAllocTrackerActivationThreshold(parser, attackThresholdBytes);
1194 #else
1195       (void)attackThresholdBytes; // silence -Wunused-but-set-variable
1196 #endif
1197     }
1198 
1199     if (disableDeferral) {
1200       const XML_Bool success = XML_SetReparseDeferralEnabled(parser, XML_FALSE);
1201       if (! success) {
1202         // This prevents tperror(..) from reporting misleading "[..]: Success"
1203         errno = EINVAL;
1204         tperror(T("Failed to disable reparse deferral"));
1205         exit(XMLWF_EXIT_INTERNAL_ERROR);
1206       }
1207     }
1208 
1209     if (requireStandalone)
1210       XML_SetNotStandaloneHandler(parser, notStandalone);
1211     XML_SetParamEntityParsing(parser, paramEntityParsing);
1212     if (outputType == 't') {
1213       /* This is for doing timings; this gives a more realistic estimate of
1214          the parsing time. */
1215       outputDir = 0;
1216       XML_SetElementHandler(parser, nopStartElement, nopEndElement);
1217       XML_SetCharacterDataHandler(parser, nopCharacterData);
1218       XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
1219     } else if (outputDir) {
1220       const XML_Char *delim = T("/");
1221       const XML_Char *file = useStdin ? T("STDIN") : argv[i];
1222       if (! useStdin) {
1223         /* Jump after last (back)slash */
1224         const XML_Char *lastDelim = tcsrchr(file, delim[0]);
1225         if (lastDelim)
1226           file = lastDelim + 1;
1227 #if defined(_WIN32)
1228         else {
1229           const XML_Char *winDelim = T("\\");
1230           lastDelim = tcsrchr(file, winDelim[0]);
1231           if (lastDelim) {
1232             file = lastDelim + 1;
1233             delim = winDelim;
1234           }
1235         }
1236 #endif
1237       }
1238       outName = (XML_Char *)malloc((tcslen(outputDir) + tcslen(file) + 2)
1239                                    * sizeof(XML_Char));
1240       if (! outName) {
1241         tperror(T("Could not allocate memory"));
1242         exit(XMLWF_EXIT_INTERNAL_ERROR);
1243       }
1244       tcscpy(outName, outputDir);
1245       tcscat(outName, delim);
1246       tcscat(outName, file);
1247       userData.fp = tfopen(outName, T("wb"));
1248       if (! userData.fp) {
1249         tperror(outName);
1250         exitCode = XMLWF_EXIT_OUTPUT_ERROR;
1251         free(outName);
1252         XML_ParserFree(parser);
1253         if (continueOnError) {
1254           continue;
1255         } else {
1256           break;
1257         }
1258       }
1259       setvbuf(userData.fp, NULL, _IOFBF, 16384);
1260 #ifdef XML_UNICODE
1261       puttc(0xFEFF, userData.fp);
1262 #endif
1263       XML_SetUserData(parser, &userData);
1264       switch (outputType) {
1265       case 'm':
1266         XML_UseParserAsHandlerArg(parser);
1267         XML_SetElementHandler(parser, metaStartElement, metaEndElement);
1268         XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
1269         XML_SetCommentHandler(parser, metaComment);
1270         XML_SetCdataSectionHandler(parser, metaStartCdataSection,
1271                                    metaEndCdataSection);
1272         XML_SetCharacterDataHandler(parser, metaCharacterData);
1273         XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl,
1274                                   metaEndDoctypeDecl);
1275         XML_SetEntityDeclHandler(parser, metaEntityDecl);
1276         XML_SetNotationDeclHandler(parser, metaNotationDecl);
1277         XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl,
1278                                     metaEndNamespaceDecl);
1279         metaStartDocument(parser);
1280         break;
1281       case 'c':
1282         XML_UseParserAsHandlerArg(parser);
1283         XML_SetDefaultHandler(parser, markup);
1284         XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
1285         XML_SetCharacterDataHandler(parser, defaultCharacterData);
1286         XML_SetProcessingInstructionHandler(parser,
1287                                             defaultProcessingInstruction);
1288         break;
1289       default:
1290         if (useNamespaces)
1291           XML_SetElementHandler(parser, startElementNS, endElementNS);
1292         else
1293           XML_SetElementHandler(parser, startElement, endElement);
1294         XML_SetCharacterDataHandler(parser, characterData);
1295 #ifndef W3C14N
1296         XML_SetProcessingInstructionHandler(parser, processingInstruction);
1297         if (requiresNotations) {
1298           XML_SetDoctypeDeclHandler(parser, startDoctypeDecl, endDoctypeDecl);
1299           XML_SetNotationDeclHandler(parser, notationDecl);
1300         }
1301 #endif /* not W3C14N */
1302         break;
1303       }
1304     }
1305     if (windowsCodePages)
1306       XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
1307     result = XML_ProcessFile(parser, useStdin ? NULL : argv[i], processFlags);
1308     if (outputDir) {
1309       if (outputType == 'm')
1310         metaEndDocument(parser);
1311       fclose(userData.fp);
1312       if (! result) {
1313         tremove(outName);
1314       }
1315       free(outName);
1316     }
1317     XML_ParserFree(parser);
1318     if (! result) {
1319       exitCode = XMLWF_EXIT_NOT_WELLFORMED;
1320       cleanupUserData(&userData);
1321       if (! continueOnError) {
1322         break;
1323       }
1324     }
1325   }
1326   return exitCode;
1327 }
1328