1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
14 Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org>
15 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
16 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
17 Copyright (c) 2021 Donghee Na <donghee.na@python.org>
18 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org>
19 Licensed under the MIT license:
20
21 Permission is hereby granted, free of charge, to any person obtaining
22 a copy of this software and associated documentation files (the
23 "Software"), to deal in the Software without restriction, including
24 without limitation the rights to use, copy, modify, merge, publish,
25 distribute, sublicense, and/or sell copies of the Software, and to permit
26 persons to whom the Software is furnished to do so, subject to the
27 following conditions:
28
29 The above copyright notice and this permission notice shall be included
30 in all copies or substantial portions of the Software.
31
32 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
34 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
35 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
36 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
37 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
38 USE OR OTHER DEALINGS IN THE SOFTWARE.
39 */
40
41 #include "expat_config.h"
42
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <stddef.h>
46 #include <string.h>
47 #include <fcntl.h>
48
49 #ifdef _WIN32
50 # include "winconfig.h"
51 #endif
52
53 #include "expat.h"
54 #include "internal.h" /* for UNUSED_P only */
55 #include "xmlfile.h"
56 #include "xmltchar.h"
57 #include "filemap.h"
58
59 /* Function "read": */
60 #if defined(_MSC_VER)
61 # include <io.h>
62 /* https://msdn.microsoft.com/en-us/library/wyssk1bs(v=vs.100).aspx */
63 # define EXPAT_read _read
64 # define EXPAT_read_count_t int
65 # define EXPAT_read_req_t unsigned int
66 #else /* POSIX */
67 # include <unistd.h>
68 /* https://pubs.opengroup.org/onlinepubs/009695399/functions/read.html */
69 # define EXPAT_read read
70 # define EXPAT_read_count_t ssize_t
71 # define EXPAT_read_req_t size_t
72 #endif
73
74 #ifndef O_BINARY
75 # ifdef _O_BINARY
76 # define O_BINARY _O_BINARY
77 # else
78 # define O_BINARY 0
79 # endif
80 #endif
81
82 int g_read_size_bytes = 1024 * 8;
83
84 typedef struct {
85 XML_Parser parser;
86 int *retPtr;
87 } PROCESS_ARGS;
88
89 static int processStream(const XML_Char *filename, XML_Parser parser);
90
91 static void
reportError(XML_Parser parser,const XML_Char * filename)92 reportError(XML_Parser parser, const XML_Char *filename) {
93 enum XML_Error code = XML_GetErrorCode(parser);
94 const XML_Char *message = XML_ErrorString(code);
95 if (message)
96 ftprintf(stdout,
97 T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%")
98 T(XML_FMT_INT_MOD) T("u") T(": %s\n"),
99 filename, XML_GetCurrentLineNumber(parser),
100 XML_GetCurrentColumnNumber(parser), message);
101 else
102 ftprintf(stderr, T("%s: (unknown message %u)\n"), filename,
103 (unsigned int)code);
104 }
105
106 /* This implementation will give problems on files larger than INT_MAX. */
107 static void
processFile(const void * data,size_t size,const XML_Char * filename,void * args)108 processFile(const void *data, size_t size, const XML_Char *filename,
109 void *args) {
110 XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
111 int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
112 if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
113 reportError(parser, filename);
114 *retPtr = 0;
115 } else
116 *retPtr = 1;
117 }
118
119 #if defined(_WIN32)
120
121 static int
isAsciiLetter(XML_Char c)122 isAsciiLetter(XML_Char c) {
123 return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
124 }
125
126 #endif /* _WIN32 */
127
128 static const XML_Char *
resolveSystemId(const XML_Char * base,const XML_Char * systemId,XML_Char ** toFree)129 resolveSystemId(const XML_Char *base, const XML_Char *systemId,
130 XML_Char **toFree) {
131 XML_Char *s;
132 *toFree = 0;
133 if (! base || *systemId == T('/')
134 #if defined(_WIN32)
135 || *systemId == T('\\')
136 || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
137 #endif
138 )
139 return systemId;
140 *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
141 * sizeof(XML_Char));
142 if (! *toFree)
143 return systemId;
144 tcscpy(*toFree, base);
145 s = *toFree;
146 if (tcsrchr(s, T('/')))
147 s = tcsrchr(s, T('/')) + 1;
148 #if defined(_WIN32)
149 if (tcsrchr(s, T('\\')))
150 s = tcsrchr(s, T('\\')) + 1;
151 #endif
152 tcscpy(s, systemId);
153 return *toFree;
154 }
155
156 static int
externalEntityRefFilemap(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)157 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context,
158 const XML_Char *base, const XML_Char *systemId,
159 const XML_Char *publicId) {
160 int result;
161 XML_Char *s;
162 const XML_Char *filename;
163 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
164 int filemapRes;
165 PROCESS_ARGS args;
166 UNUSED_P(publicId);
167 args.retPtr = &result;
168 args.parser = entParser;
169 filename = resolveSystemId(base, systemId, &s);
170 XML_SetBase(entParser, filename);
171 filemapRes = filemap(filename, processFile, &args);
172 switch (filemapRes) {
173 case 0:
174 result = 0;
175 break;
176 case 2:
177 ftprintf(stderr,
178 T("%s: file too large for memory-mapping")
179 T(", switching to streaming\n"),
180 filename);
181 result = processStream(filename, entParser);
182 break;
183 }
184 free(s);
185 XML_ParserFree(entParser);
186 return result;
187 }
188
189 static int
processStream(const XML_Char * filename,XML_Parser parser)190 processStream(const XML_Char *filename, XML_Parser parser) {
191 /* passing NULL for filename means read input from stdin */
192 int fd = 0; /* 0 is the fileno for stdin */
193
194 if (filename != NULL) {
195 fd = topen(filename, O_BINARY | O_RDONLY);
196 if (fd < 0) {
197 tperror(filename);
198 return 0;
199 }
200 }
201 for (;;) {
202 EXPAT_read_count_t nread;
203 char *buf = (char *)XML_GetBuffer(parser, g_read_size_bytes);
204 if (! buf) {
205 if (filename != NULL)
206 close(fd);
207 ftprintf(stderr, T("%s: out of memory\n"),
208 filename != NULL ? filename : T("xmlwf"));
209 return 0;
210 }
211 nread = EXPAT_read(fd, buf, (EXPAT_read_req_t)g_read_size_bytes);
212 if (nread < 0) {
213 tperror(filename != NULL ? filename : T("STDIN"));
214 if (filename != NULL)
215 close(fd);
216 return 0;
217 }
218 if (XML_ParseBuffer(parser, (int)nread, nread == 0) == XML_STATUS_ERROR) {
219 reportError(parser, filename != NULL ? filename : T("STDIN"));
220 if (filename != NULL)
221 close(fd);
222 return 0;
223 }
224 if (nread == 0) {
225 if (filename != NULL)
226 close(fd);
227 break;
228 ;
229 }
230 }
231 return 1;
232 }
233
234 static int
externalEntityRefStream(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)235 externalEntityRefStream(XML_Parser parser, const XML_Char *context,
236 const XML_Char *base, const XML_Char *systemId,
237 const XML_Char *publicId) {
238 XML_Char *s;
239 const XML_Char *filename;
240 int ret;
241 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
242 UNUSED_P(publicId);
243 filename = resolveSystemId(base, systemId, &s);
244 XML_SetBase(entParser, filename);
245 ret = processStream(filename, entParser);
246 free(s);
247 XML_ParserFree(entParser);
248 return ret;
249 }
250
251 int
XML_ProcessFile(XML_Parser parser,const XML_Char * filename,unsigned flags)252 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) {
253 int result;
254
255 if (! XML_SetBase(parser, filename)) {
256 ftprintf(stderr, T("%s: out of memory"), filename);
257 exit(1);
258 }
259
260 if (flags & XML_EXTERNAL_ENTITIES)
261 XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE)
262 ? externalEntityRefFilemap
263 : externalEntityRefStream);
264 if (flags & XML_MAP_FILE) {
265 int filemapRes;
266 PROCESS_ARGS args;
267 args.retPtr = &result;
268 args.parser = parser;
269 filemapRes = filemap(filename, processFile, &args);
270 switch (filemapRes) {
271 case 0:
272 result = 0;
273 break;
274 case 2:
275 ftprintf(stderr,
276 T("%s: file too large for memory-mapping")
277 T(", switching to streaming\n"),
278 filename);
279 result = processStream(filename, parser);
280 break;
281 }
282 } else
283 result = processStream(filename, parser);
284 return result;
285 }
286