1 /*
2 * Copyright 2024-2025 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9 #include <stdio.h>
10 #include <string.h>
11 #include <openssl/bio.h>
12 #include <openssl/evp.h>
13 #include <openssl/rand.h>
14
15 #include "testutil.h"
16
17 /* 2047 bytes of "#ooooooooo..." + NUL terminator */
18 static char gunk[2048];
19
20 typedef struct {
21 char *prefix;
22 char *encoded;
23 unsigned bytes;
24 int trunc;
25 char *suffix;
26 int retry;
27 int no_nl;
28 } test_case;
29
30 #define BUFMAX 0xa0000 /* Encode at most 640kB. */
31 #define sEOF "-EOF" /* '-' as in PEM and MIME boundaries */
32 #define junk "#foo" /* Skipped initial content */
33
34 #define EOF_RETURN (-1729) /* Distinct from -1, etc., internal results */
35 #define NLEN 6
36 #define NVAR 5
37 /*
38 * Junk suffixed variants don't make sense with padding or truncated groups
39 * because we will typically stop with an error before seeing the suffix, but
40 * with retriable BIOs may never look at the suffix after detecting padding.
41 */
42 #define NPAD 6
43 #define NVARPAD (NVAR * NPAD - NPAD + 1)
44
45 static char *prefixes[NVAR] = { "", junk, gunk, "", "" };
46 static char *suffixes[NVAR] = { "", "", "", sEOF, junk };
47 static unsigned lengths[6] = { 0, 3, 48, 192, 768, 1536 };
48 static unsigned linelengths[] = {
49 4, 8, 16, 28, 40, 64, 80, 128, 256, 512, 1023, 0
50 };
51 static unsigned wscnts[] = { 0, 1, 2, 4, 8, 16, 0xFFFF };
52
53 /* Generate `len` random octets */
genbytes(unsigned len)54 static unsigned char *genbytes(unsigned len)
55 {
56 unsigned char *buf = NULL;
57
58 if (len > 0 && len <= BUFMAX && (buf = OPENSSL_malloc(len)) != NULL)
59 RAND_bytes(buf, len);
60
61 return buf;
62 }
63
64 /* Append one base64 codepoint, adding newlines after every `llen` bytes */
memout(BIO * mem,char c,int llen,int * pos)65 static int memout(BIO *mem, char c, int llen, int *pos)
66 {
67 if (BIO_write(mem, &c, 1) != 1)
68 return 0;
69 if (++*pos == llen) {
70 *pos = 0;
71 c = '\n';
72 if (BIO_write(mem, &c, 1) != 1)
73 return 0;
74 }
75 return 1;
76 }
77
78 /* Encode and append one 6-bit slice, randomly prepending some whitespace */
memoutws(BIO * mem,char c,unsigned wscnt,unsigned llen,int * pos)79 static int memoutws(BIO *mem, char c, unsigned wscnt, unsigned llen, int *pos)
80 {
81 if (wscnt > 0
82 && (test_random() % llen) < wscnt
83 && memout(mem, ' ', llen, pos) == 0)
84 return 0;
85 return memout(mem, c, llen, pos);
86 }
87
88 /*
89 * Encode an octet string in base64, approximately `llen` bytes per line,
90 * with up to roughly `wscnt` additional space characters inserted at random
91 * before some of the base64 code points.
92 */
encode(unsigned const char * buf,unsigned buflen,char * encoded,int trunc,unsigned llen,unsigned wscnt,BIO * mem)93 static int encode(unsigned const char *buf, unsigned buflen, char *encoded,
94 int trunc, unsigned llen, unsigned wscnt, BIO *mem)
95 {
96 static const unsigned char b64[65] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
97 int pos = 0;
98 char nl = '\n';
99
100 /* Use a verbatim encoding when provided */
101 if (encoded != NULL) {
102 int elen = strlen(encoded);
103
104 return BIO_write(mem, encoded, elen) == elen;
105 }
106
107 /* Encode full 3-octet groups */
108 while (buflen > 2) {
109 unsigned long v = buf[0] << 16 | buf[1] << 8 | buf[2];
110
111 if (memoutws(mem, b64[v >> 18], wscnt, llen, &pos) == 0
112 || memoutws(mem, b64[(v >> 12) & 0x3f], wscnt, llen, &pos) == 0
113 || memoutws(mem, b64[(v >> 6) & 0x3f], wscnt, llen, &pos) == 0
114 || memoutws(mem, b64[v & 0x3f], wscnt, llen, &pos) == 0)
115 return 0;
116 buf += 3;
117 buflen -= 3;
118 }
119
120 /* Encode and pad final 1 or 2 octet group */
121 if (buflen == 2) {
122 unsigned long v = buf[0] << 8 | buf[1];
123
124 if (memoutws(mem, b64[(v >> 10) & 0x3f], wscnt, llen, &pos) == 0
125 || memoutws(mem, b64[(v >> 4) & 0x3f], wscnt, llen, &pos) == 0
126 || memoutws(mem, b64[(v & 0xf) << 2], wscnt, llen, &pos) == 0
127 || memoutws(mem, '=', wscnt, llen, &pos) == 0)
128 return 0;
129 } else if (buflen == 1) {
130 unsigned long v = buf[0];
131
132 if (memoutws(mem, b64[v >> 2], wscnt, llen, &pos) == 0
133 || memoutws(mem, b64[(v & 0x3) << 4], wscnt, llen, &pos) == 0
134 || memoutws(mem, '=', wscnt, llen, &pos) == 0
135 || memoutws(mem, '=', wscnt, llen, &pos) == 0)
136 return 0;
137 }
138
139 while (trunc-- > 0)
140 if (memoutws(mem, 'A', wscnt, llen, &pos) == 0)
141 return 0;
142
143 /* Terminate last line */
144 if (pos > 0 && BIO_write(mem, &nl, 1) != 1)
145 return 0;
146
147 return 1;
148 }
149
genb64(char * prefix,char * suffix,unsigned const char * buf,unsigned buflen,int trunc,char * encoded,unsigned llen,unsigned wscnt,char ** out)150 static int genb64(char *prefix, char *suffix, unsigned const char *buf,
151 unsigned buflen, int trunc, char *encoded, unsigned llen,
152 unsigned wscnt, char **out)
153 {
154 int preflen = strlen(prefix);
155 int sufflen = strlen(suffix);
156 int outlen;
157 char newline = '\n';
158 BUF_MEM *bptr;
159 BIO *mem = BIO_new(BIO_s_mem());
160
161 if (mem == NULL)
162 return -1;
163
164 if ((*prefix && (BIO_write(mem, prefix, preflen) != preflen || BIO_write(mem, &newline, 1) != 1))
165 || encode(buf, buflen, encoded, trunc, llen, wscnt, mem) <= 0
166 || (*suffix && (BIO_write(mem, suffix, sufflen) != sufflen || BIO_write(mem, &newline, 1) != 1))) {
167 BIO_free(mem);
168 return -1;
169 }
170
171 /* Orphan the memory BIO's data buffer */
172 BIO_get_mem_ptr(mem, &bptr);
173 *out = bptr->data;
174 outlen = bptr->length;
175 bptr->data = NULL;
176 (void)BIO_set_close(mem, BIO_NOCLOSE);
177 BIO_free(mem);
178 BUF_MEM_free(bptr);
179
180 return outlen;
181 }
182
test_bio_base64_run(test_case * t,int llen,int wscnt)183 static int test_bio_base64_run(test_case *t, int llen, int wscnt)
184 {
185 unsigned char *raw;
186 unsigned char *out;
187 unsigned out_len;
188 char *encoded = NULL;
189 int elen;
190 BIO *bio, *b64;
191 int n, n1, n2;
192 int ret;
193
194 /*
195 * Pre-encoded data always encodes NUL octets. If all we care about is the
196 * length, and not the payload, use random bytes.
197 */
198 if (t->encoded != NULL)
199 raw = OPENSSL_zalloc(t->bytes);
200 else
201 raw = genbytes(t->bytes);
202
203 if (raw == NULL && t->bytes > 0) {
204 TEST_error("out of memory");
205 return -1;
206 }
207
208 out_len = t->bytes + 1024;
209 out = OPENSSL_malloc(out_len);
210 if (out == NULL) {
211 OPENSSL_free(raw);
212 TEST_error("out of memory");
213 return -1;
214 }
215
216 elen = genb64(t->prefix, t->suffix, raw, t->bytes, t->trunc, t->encoded,
217 llen, wscnt, &encoded);
218 if (elen < 0 || (bio = BIO_new(BIO_s_mem())) == NULL) {
219 OPENSSL_free(raw);
220 OPENSSL_free(out);
221 OPENSSL_free(encoded);
222 TEST_error("out of memory");
223 return -1;
224 }
225 if (t->retry)
226 BIO_set_mem_eof_return(bio, EOF_RETURN);
227 else
228 BIO_set_mem_eof_return(bio, 0);
229
230 /*
231 * When the input is long enough, and the source bio is retriable, exercise
232 * retries by writing the input to the underlying BIO in two steps (1024
233 * bytes, then the rest) and trying to decode some data after each write.
234 */
235 n1 = elen;
236 if (t->retry)
237 n1 = elen / 2;
238 if (n1 > 0)
239 BIO_write(bio, encoded, n1);
240
241 b64 = BIO_new(BIO_f_base64());
242 if (t->no_nl)
243 BIO_set_flags(b64, BIO_FLAGS_BASE64_NO_NL);
244 BIO_push(b64, bio);
245
246 n = BIO_read(b64, out, out_len);
247
248 if (n1 < elen) {
249 /* Append the rest of the input, and read again */
250 BIO_write(bio, encoded + n1, elen - n1);
251 if (n > 0) {
252 n2 = BIO_read(b64, out + n, out_len - n);
253 if (n2 > 0)
254 n += n2;
255 } else if (n == EOF_RETURN) {
256 n = BIO_read(b64, out, out_len);
257 }
258 }
259
260 /* Turn retry-related negative results to normal (0) EOF */
261 if (n < 0 && n == EOF_RETURN)
262 n = 0;
263
264 /* Turn off retries */
265 if (t->retry)
266 BIO_set_mem_eof_return(bio, 0);
267
268 if (n < (int)out_len)
269 /* Perform the last read, checking its result */
270 ret = BIO_read(b64, out + n, out_len - n);
271 else {
272 /* Should not happen, given extra space in out_len */
273 TEST_error("Unexpectedly long decode output");
274 ret = -1;
275 }
276
277 /*
278 * Expect an error to be detected with:
279 *
280 * - truncated groups,
281 * - non-base64 suffixes (other than soft EOF) for non-empty or oneline
282 * input
283 * - non-base64 prefixes in NO_NL mode
284 *
285 * Otherwise, check the decoded content
286 */
287 if (t->trunc > 0
288 || ((t->bytes > 0 || t->no_nl) && *t->suffix && *t->suffix != '-')
289 || (t->no_nl && *t->prefix)) {
290 if ((ret = ret < 0 ? 0 : -1) != 0)
291 TEST_error("Final read result was non-negative");
292 } else if (ret != 0
293 || n != (int)t->bytes
294 || (n > 0 && memcmp(raw, out, n) != 0)) {
295 TEST_error("Failed to decode expected data");
296 ret = -1;
297 }
298
299 BIO_free_all(b64);
300 OPENSSL_free(out);
301 OPENSSL_free(raw);
302 OPENSSL_free(encoded);
303
304 return ret;
305 }
306
generic_case(test_case * t,int verbose)307 static int generic_case(test_case *t, int verbose)
308 {
309 unsigned *llen;
310 unsigned *wscnt;
311 int ok = 1;
312
313 for (llen = linelengths; *llen > 0; ++llen) {
314 for (wscnt = wscnts; *wscnt * 2 < *llen; ++wscnt) {
315 int extra = t->no_nl ? 64 : 0;
316
317 /*
318 * Use a longer line for NO_NL tests, in particular, eventually
319 * exceeding 1k bytes.
320 */
321 if (test_bio_base64_run(t, *llen + extra, *wscnt) != 0)
322 ok = 0;
323
324 if (verbose) {
325 fprintf(stderr, "bio_base64_test: ok=%d", ok);
326 if (*t->prefix)
327 fprintf(stderr, ", prefix='%s'", t->prefix);
328 if (t->encoded)
329 fprintf(stderr, ", data='%s'", t->encoded);
330 else
331 fprintf(stderr, ", datalen=%u", t->bytes);
332 if (t->trunc)
333 fprintf(stderr, ", trunc=%d", t->trunc);
334 if (*t->suffix)
335 fprintf(stderr, ", suffix='%s'", t->suffix);
336 fprintf(stderr, ", linelen=%u", *llen);
337 fprintf(stderr, ", wscount=%u", *wscnt);
338 if (t->retry)
339 fprintf(stderr, ", retriable");
340 if (t->no_nl)
341 fprintf(stderr, ", oneline");
342 fputc('\n', stderr);
343 }
344
345 /* For verbatim input no effect from varying llen or wscnt */
346 if (t->encoded)
347 return ok;
348 }
349 /*
350 * Longer 'llen' has no effect once we're sure to not have multiple
351 * lines of data
352 */
353 if (*llen > t->bytes + (t->bytes >> 1))
354 break;
355 }
356 return ok;
357 }
358
quotrem(int i,unsigned int m,int * q)359 static int quotrem(int i, unsigned int m, int *q)
360 {
361 *q = i / m;
362 return i - *q * m;
363 }
364
test_bio_base64_generated(int idx)365 static int test_bio_base64_generated(int idx)
366 {
367 test_case t;
368 int variant;
369 int lencase;
370 int padcase;
371 int q = idx;
372
373 lencase = quotrem(q, NLEN, &q);
374 variant = quotrem(q, NVARPAD, &q);
375 padcase = quotrem(variant, NPAD, &variant);
376 t.retry = quotrem(q, 2, &q);
377 t.no_nl = quotrem(q, 2, &q);
378
379 if (q != 0) {
380 fprintf(stderr, "Test index out of range: %d", idx);
381 return 0;
382 }
383
384 t.prefix = prefixes[variant];
385 t.encoded = NULL;
386 t.bytes = lengths[lencase];
387 t.trunc = 0;
388 if (padcase && padcase < 3)
389 t.bytes += padcase;
390 else if (padcase >= 3)
391 t.trunc = padcase - 2;
392 t.suffix = suffixes[variant];
393
394 if (padcase != 0 && (*t.suffix && *t.suffix != '-')) {
395 TEST_error("Unexpected suffix test after padding");
396 return 0;
397 }
398
399 return generic_case(&t, 0);
400 }
401
test_bio_base64_corner_case_bug(int idx)402 static int test_bio_base64_corner_case_bug(int idx)
403 {
404 test_case t;
405 int q = idx;
406
407 t.retry = quotrem(q, 2, &q);
408 t.no_nl = quotrem(q, 2, &q);
409
410 if (q != 0) {
411 fprintf(stderr, "Test index out of range: %d", idx);
412 return 0;
413 }
414
415 /* 9 bytes of skipped non-base64 input + newline */
416 t.prefix = "#foo\n#bar";
417
418 /* 9 bytes on 2nd and subsequent lines */
419 t.encoded = "A\nAAA\nAAAA\n";
420 t.suffix = "";
421
422 /* Expected decode length */
423 t.bytes = 6;
424 t.trunc = 0; /* ignored */
425
426 return generic_case(&t, 0);
427 }
428
setup_tests(void)429 int setup_tests(void)
430 {
431 int numidx;
432
433 memset(gunk, 'o', sizeof(gunk));
434 gunk[0] = '#';
435 gunk[sizeof(gunk) - 1] = '\0';
436
437 /*
438 * Test 5 variants of prefix or suffix
439 *
440 * - both empty
441 * - short junk prefix
442 * - long gunk prefix (> internal BIO 1k buffer size),
443 * - soft EOF suffix
444 * - junk suffix (expect to detect an error)
445 *
446 * For 6 input lengths of randomly generated raw input:
447 *
448 * 0, 3, 48, 192, 768 and 1536
449 *
450 * corresponding to encoded lengths (plus linebreaks and ignored
451 * whitespace) of:
452 *
453 * 0, 4, 64, 256, 1024 and 2048
454 *
455 * Followed by zero, one or two additional bytes that may involve padding,
456 * or else (truncation) 1, 2 or 3 bytes with missing padding.
457 * Only the first four variants make sense with padding or truncated
458 * groups.
459 *
460 * With two types of underlying BIO
461 *
462 * - Non-retriable underlying BIO
463 * - Retriable underlying BIO
464 *
465 * And with/without the BIO_FLAGS_BASE64_NO_NL flag, where now an error is
466 * expected with the junk and gunk prefixes, however, but the "soft EOF"
467 * suffix is still accepted.
468 *
469 * Internally, each test may loop over a range of encoded line lengths and
470 * whitespace average "densities".
471 */
472 numidx = NLEN * (NVAR * NPAD - NPAD + 1) * 2 * 2;
473 ADD_ALL_TESTS(test_bio_base64_generated, numidx);
474
475 /*
476 * Corner case in original code that skips ignored input, when the ignored
477 * length is one byte longer than the total of the second and later lines
478 * of valid input in the first 1k bytes of input. No content variants,
479 * just BIO retry status and oneline flags vary.
480 */
481 numidx = 2 * 2;
482 ADD_ALL_TESTS(test_bio_base64_corner_case_bug, numidx);
483
484 return 1;
485 }
486