xref: /src/crypto/openssl/test/bio_base64_test.c (revision f25b8c9fb4f58cf61adb47d7570abe7caa6d385d)
1 /*
2  * Copyright 2024-2025 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 #include <stdio.h>
10 #include <string.h>
11 #include <openssl/bio.h>
12 #include <openssl/evp.h>
13 #include <openssl/rand.h>
14 
15 #include "testutil.h"
16 
17 /* 2047 bytes of "#ooooooooo..." + NUL terminator */
18 static char gunk[2048];
19 
20 typedef struct {
21     char *prefix;
22     char *encoded;
23     unsigned bytes;
24     int trunc;
25     char *suffix;
26     int retry;
27     int no_nl;
28 } test_case;
29 
30 #define BUFMAX 0xa0000 /* Encode at most 640kB. */
31 #define sEOF "-EOF" /* '-' as in PEM and MIME boundaries */
32 #define junk "#foo" /* Skipped initial content */
33 
34 #define EOF_RETURN (-1729) /* Distinct from -1, etc., internal results */
35 #define NLEN 6
36 #define NVAR 5
37 /*
38  * Junk suffixed variants don't make sense with padding or truncated groups
39  * because we will typically stop with an error before seeing the suffix, but
40  * with retriable BIOs may never look at the suffix after detecting padding.
41  */
42 #define NPAD 6
43 #define NVARPAD (NVAR * NPAD - NPAD + 1)
44 
45 static char *prefixes[NVAR] = { "", junk, gunk, "", "" };
46 static char *suffixes[NVAR] = { "", "", "", sEOF, junk };
47 static unsigned lengths[6] = { 0, 3, 48, 192, 768, 1536 };
48 static unsigned linelengths[] = {
49     4, 8, 16, 28, 40, 64, 80, 128, 256, 512, 1023, 0
50 };
51 static unsigned wscnts[] = { 0, 1, 2, 4, 8, 16, 0xFFFF };
52 
53 /* Generate `len` random octets */
genbytes(unsigned len)54 static unsigned char *genbytes(unsigned len)
55 {
56     unsigned char *buf = NULL;
57 
58     if (len > 0 && len <= BUFMAX && (buf = OPENSSL_malloc(len)) != NULL)
59         RAND_bytes(buf, len);
60 
61     return buf;
62 }
63 
64 /* Append one base64 codepoint, adding newlines after every `llen` bytes */
memout(BIO * mem,char c,int llen,int * pos)65 static int memout(BIO *mem, char c, int llen, int *pos)
66 {
67     if (BIO_write(mem, &c, 1) != 1)
68         return 0;
69     if (++*pos == llen) {
70         *pos = 0;
71         c = '\n';
72         if (BIO_write(mem, &c, 1) != 1)
73             return 0;
74     }
75     return 1;
76 }
77 
78 /* Encode and append one 6-bit slice, randomly prepending some whitespace */
memoutws(BIO * mem,char c,unsigned wscnt,unsigned llen,int * pos)79 static int memoutws(BIO *mem, char c, unsigned wscnt, unsigned llen, int *pos)
80 {
81     if (wscnt > 0
82         && (test_random() % llen) < wscnt
83         && memout(mem, ' ', llen, pos) == 0)
84         return 0;
85     return memout(mem, c, llen, pos);
86 }
87 
88 /*
89  * Encode an octet string in base64, approximately `llen` bytes per line,
90  * with up to roughly `wscnt` additional space characters inserted at random
91  * before some of the base64 code points.
92  */
encode(unsigned const char * buf,unsigned buflen,char * encoded,int trunc,unsigned llen,unsigned wscnt,BIO * mem)93 static int encode(unsigned const char *buf, unsigned buflen, char *encoded,
94     int trunc, unsigned llen, unsigned wscnt, BIO *mem)
95 {
96     static const unsigned char b64[65] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
97     int pos = 0;
98     char nl = '\n';
99 
100     /* Use a verbatim encoding when provided */
101     if (encoded != NULL) {
102         int elen = strlen(encoded);
103 
104         return BIO_write(mem, encoded, elen) == elen;
105     }
106 
107     /* Encode full 3-octet groups */
108     while (buflen > 2) {
109         unsigned long v = buf[0] << 16 | buf[1] << 8 | buf[2];
110 
111         if (memoutws(mem, b64[v >> 18], wscnt, llen, &pos) == 0
112             || memoutws(mem, b64[(v >> 12) & 0x3f], wscnt, llen, &pos) == 0
113             || memoutws(mem, b64[(v >> 6) & 0x3f], wscnt, llen, &pos) == 0
114             || memoutws(mem, b64[v & 0x3f], wscnt, llen, &pos) == 0)
115             return 0;
116         buf += 3;
117         buflen -= 3;
118     }
119 
120     /* Encode and pad final 1 or 2 octet group */
121     if (buflen == 2) {
122         unsigned long v = buf[0] << 8 | buf[1];
123 
124         if (memoutws(mem, b64[(v >> 10) & 0x3f], wscnt, llen, &pos) == 0
125             || memoutws(mem, b64[(v >> 4) & 0x3f], wscnt, llen, &pos) == 0
126             || memoutws(mem, b64[(v & 0xf) << 2], wscnt, llen, &pos) == 0
127             || memoutws(mem, '=', wscnt, llen, &pos) == 0)
128             return 0;
129     } else if (buflen == 1) {
130         unsigned long v = buf[0];
131 
132         if (memoutws(mem, b64[v >> 2], wscnt, llen, &pos) == 0
133             || memoutws(mem, b64[(v & 0x3) << 4], wscnt, llen, &pos) == 0
134             || memoutws(mem, '=', wscnt, llen, &pos) == 0
135             || memoutws(mem, '=', wscnt, llen, &pos) == 0)
136             return 0;
137     }
138 
139     while (trunc-- > 0)
140         if (memoutws(mem, 'A', wscnt, llen, &pos) == 0)
141             return 0;
142 
143     /* Terminate last line */
144     if (pos > 0 && BIO_write(mem, &nl, 1) != 1)
145         return 0;
146 
147     return 1;
148 }
149 
genb64(char * prefix,char * suffix,unsigned const char * buf,unsigned buflen,int trunc,char * encoded,unsigned llen,unsigned wscnt,char ** out)150 static int genb64(char *prefix, char *suffix, unsigned const char *buf,
151     unsigned buflen, int trunc, char *encoded, unsigned llen,
152     unsigned wscnt, char **out)
153 {
154     int preflen = strlen(prefix);
155     int sufflen = strlen(suffix);
156     int outlen;
157     char newline = '\n';
158     BUF_MEM *bptr;
159     BIO *mem = BIO_new(BIO_s_mem());
160 
161     if (mem == NULL)
162         return -1;
163 
164     if ((*prefix && (BIO_write(mem, prefix, preflen) != preflen || BIO_write(mem, &newline, 1) != 1))
165         || encode(buf, buflen, encoded, trunc, llen, wscnt, mem) <= 0
166         || (*suffix && (BIO_write(mem, suffix, sufflen) != sufflen || BIO_write(mem, &newline, 1) != 1))) {
167         BIO_free(mem);
168         return -1;
169     }
170 
171     /* Orphan the memory BIO's data buffer */
172     BIO_get_mem_ptr(mem, &bptr);
173     *out = bptr->data;
174     outlen = bptr->length;
175     bptr->data = NULL;
176     (void)BIO_set_close(mem, BIO_NOCLOSE);
177     BIO_free(mem);
178     BUF_MEM_free(bptr);
179 
180     return outlen;
181 }
182 
test_bio_base64_run(test_case * t,int llen,int wscnt)183 static int test_bio_base64_run(test_case *t, int llen, int wscnt)
184 {
185     unsigned char *raw;
186     unsigned char *out;
187     unsigned out_len;
188     char *encoded = NULL;
189     int elen;
190     BIO *bio, *b64;
191     int n, n1, n2;
192     int ret;
193 
194     /*
195      * Pre-encoded data always encodes NUL octets.  If all we care about is the
196      * length, and not the payload, use random bytes.
197      */
198     if (t->encoded != NULL)
199         raw = OPENSSL_zalloc(t->bytes);
200     else
201         raw = genbytes(t->bytes);
202 
203     if (raw == NULL && t->bytes > 0) {
204         TEST_error("out of memory");
205         return -1;
206     }
207 
208     out_len = t->bytes + 1024;
209     out = OPENSSL_malloc(out_len);
210     if (out == NULL) {
211         OPENSSL_free(raw);
212         TEST_error("out of memory");
213         return -1;
214     }
215 
216     elen = genb64(t->prefix, t->suffix, raw, t->bytes, t->trunc, t->encoded,
217         llen, wscnt, &encoded);
218     if (elen < 0 || (bio = BIO_new(BIO_s_mem())) == NULL) {
219         OPENSSL_free(raw);
220         OPENSSL_free(out);
221         OPENSSL_free(encoded);
222         TEST_error("out of memory");
223         return -1;
224     }
225     if (t->retry)
226         BIO_set_mem_eof_return(bio, EOF_RETURN);
227     else
228         BIO_set_mem_eof_return(bio, 0);
229 
230     /*
231      * When the input is long enough, and the source bio is retriable, exercise
232      * retries by writing the input to the underlying BIO in two steps (1024
233      * bytes, then the rest) and trying to decode some data after each write.
234      */
235     n1 = elen;
236     if (t->retry)
237         n1 = elen / 2;
238     if (n1 > 0)
239         BIO_write(bio, encoded, n1);
240 
241     b64 = BIO_new(BIO_f_base64());
242     if (t->no_nl)
243         BIO_set_flags(b64, BIO_FLAGS_BASE64_NO_NL);
244     BIO_push(b64, bio);
245 
246     n = BIO_read(b64, out, out_len);
247 
248     if (n1 < elen) {
249         /* Append the rest of the input, and read again */
250         BIO_write(bio, encoded + n1, elen - n1);
251         if (n > 0) {
252             n2 = BIO_read(b64, out + n, out_len - n);
253             if (n2 > 0)
254                 n += n2;
255         } else if (n == EOF_RETURN) {
256             n = BIO_read(b64, out, out_len);
257         }
258     }
259 
260     /* Turn retry-related negative results to normal (0) EOF */
261     if (n < 0 && n == EOF_RETURN)
262         n = 0;
263 
264     /* Turn off retries */
265     if (t->retry)
266         BIO_set_mem_eof_return(bio, 0);
267 
268     if (n < (int)out_len)
269         /* Perform the last read, checking its result */
270         ret = BIO_read(b64, out + n, out_len - n);
271     else {
272         /* Should not happen, given extra space in out_len */
273         TEST_error("Unexpectedly long decode output");
274         ret = -1;
275     }
276 
277     /*
278      * Expect an error to be detected with:
279      *
280      * - truncated groups,
281      * - non-base64 suffixes (other than soft EOF) for non-empty or oneline
282      *   input
283      * - non-base64 prefixes in NO_NL mode
284      *
285      * Otherwise, check the decoded content
286      */
287     if (t->trunc > 0
288         || ((t->bytes > 0 || t->no_nl) && *t->suffix && *t->suffix != '-')
289         || (t->no_nl && *t->prefix)) {
290         if ((ret = ret < 0 ? 0 : -1) != 0)
291             TEST_error("Final read result was non-negative");
292     } else if (ret != 0
293         || n != (int)t->bytes
294         || (n > 0 && memcmp(raw, out, n) != 0)) {
295         TEST_error("Failed to decode expected data");
296         ret = -1;
297     }
298 
299     BIO_free_all(b64);
300     OPENSSL_free(out);
301     OPENSSL_free(raw);
302     OPENSSL_free(encoded);
303 
304     return ret;
305 }
306 
generic_case(test_case * t,int verbose)307 static int generic_case(test_case *t, int verbose)
308 {
309     unsigned *llen;
310     unsigned *wscnt;
311     int ok = 1;
312 
313     for (llen = linelengths; *llen > 0; ++llen) {
314         for (wscnt = wscnts; *wscnt * 2 < *llen; ++wscnt) {
315             int extra = t->no_nl ? 64 : 0;
316 
317             /*
318              * Use a longer line for NO_NL tests, in particular, eventually
319              * exceeding 1k bytes.
320              */
321             if (test_bio_base64_run(t, *llen + extra, *wscnt) != 0)
322                 ok = 0;
323 
324             if (verbose) {
325                 fprintf(stderr, "bio_base64_test: ok=%d", ok);
326                 if (*t->prefix)
327                     fprintf(stderr, ", prefix='%s'", t->prefix);
328                 if (t->encoded)
329                     fprintf(stderr, ", data='%s'", t->encoded);
330                 else
331                     fprintf(stderr, ", datalen=%u", t->bytes);
332                 if (t->trunc)
333                     fprintf(stderr, ", trunc=%d", t->trunc);
334                 if (*t->suffix)
335                     fprintf(stderr, ", suffix='%s'", t->suffix);
336                 fprintf(stderr, ", linelen=%u", *llen);
337                 fprintf(stderr, ", wscount=%u", *wscnt);
338                 if (t->retry)
339                     fprintf(stderr, ", retriable");
340                 if (t->no_nl)
341                     fprintf(stderr, ", oneline");
342                 fputc('\n', stderr);
343             }
344 
345             /* For verbatim input no effect from varying llen or wscnt */
346             if (t->encoded)
347                 return ok;
348         }
349         /*
350          * Longer 'llen' has no effect once we're sure to not have multiple
351          * lines of data
352          */
353         if (*llen > t->bytes + (t->bytes >> 1))
354             break;
355     }
356     return ok;
357 }
358 
quotrem(int i,unsigned int m,int * q)359 static int quotrem(int i, unsigned int m, int *q)
360 {
361     *q = i / m;
362     return i - *q * m;
363 }
364 
test_bio_base64_generated(int idx)365 static int test_bio_base64_generated(int idx)
366 {
367     test_case t;
368     int variant;
369     int lencase;
370     int padcase;
371     int q = idx;
372 
373     lencase = quotrem(q, NLEN, &q);
374     variant = quotrem(q, NVARPAD, &q);
375     padcase = quotrem(variant, NPAD, &variant);
376     t.retry = quotrem(q, 2, &q);
377     t.no_nl = quotrem(q, 2, &q);
378 
379     if (q != 0) {
380         fprintf(stderr, "Test index out of range: %d", idx);
381         return 0;
382     }
383 
384     t.prefix = prefixes[variant];
385     t.encoded = NULL;
386     t.bytes = lengths[lencase];
387     t.trunc = 0;
388     if (padcase && padcase < 3)
389         t.bytes += padcase;
390     else if (padcase >= 3)
391         t.trunc = padcase - 2;
392     t.suffix = suffixes[variant];
393 
394     if (padcase != 0 && (*t.suffix && *t.suffix != '-')) {
395         TEST_error("Unexpected suffix test after padding");
396         return 0;
397     }
398 
399     return generic_case(&t, 0);
400 }
401 
test_bio_base64_corner_case_bug(int idx)402 static int test_bio_base64_corner_case_bug(int idx)
403 {
404     test_case t;
405     int q = idx;
406 
407     t.retry = quotrem(q, 2, &q);
408     t.no_nl = quotrem(q, 2, &q);
409 
410     if (q != 0) {
411         fprintf(stderr, "Test index out of range: %d", idx);
412         return 0;
413     }
414 
415     /* 9 bytes of skipped non-base64 input + newline */
416     t.prefix = "#foo\n#bar";
417 
418     /* 9 bytes on 2nd and subsequent lines */
419     t.encoded = "A\nAAA\nAAAA\n";
420     t.suffix = "";
421 
422     /* Expected decode length */
423     t.bytes = 6;
424     t.trunc = 0; /* ignored */
425 
426     return generic_case(&t, 0);
427 }
428 
setup_tests(void)429 int setup_tests(void)
430 {
431     int numidx;
432 
433     memset(gunk, 'o', sizeof(gunk));
434     gunk[0] = '#';
435     gunk[sizeof(gunk) - 1] = '\0';
436 
437     /*
438      * Test 5 variants of prefix or suffix
439      *
440      *  - both empty
441      *  - short junk prefix
442      *  - long gunk prefix (> internal BIO 1k buffer size),
443      *  - soft EOF suffix
444      *  - junk suffix (expect to detect an error)
445      *
446      * For 6 input lengths of randomly generated raw input:
447      *
448      *  0, 3, 48, 192, 768 and 1536
449      *
450      * corresponding to encoded lengths (plus linebreaks and ignored
451      * whitespace) of:
452      *
453      *  0, 4, 64, 256, 1024 and 2048
454      *
455      * Followed by zero, one or two additional bytes that may involve padding,
456      * or else (truncation) 1, 2 or 3 bytes with missing padding.
457      * Only the first four variants make sense with padding or truncated
458      * groups.
459      *
460      * With two types of underlying BIO
461      *
462      *  - Non-retriable underlying BIO
463      *  - Retriable underlying BIO
464      *
465      * And with/without the BIO_FLAGS_BASE64_NO_NL flag, where now an error is
466      * expected with the junk and gunk prefixes, however, but the "soft EOF"
467      * suffix is still accepted.
468      *
469      * Internally, each test may loop over a range of encoded line lengths and
470      * whitespace average "densities".
471      */
472     numidx = NLEN * (NVAR * NPAD - NPAD + 1) * 2 * 2;
473     ADD_ALL_TESTS(test_bio_base64_generated, numidx);
474 
475     /*
476      * Corner case in original code that skips ignored input, when the ignored
477      * length is one byte longer than the total of the second and later lines
478      * of valid input in the first 1k bytes of input.  No content variants,
479      * just BIO retry status and oneline flags vary.
480      */
481     numidx = 2 * 2;
482     ADD_ALL_TESTS(test_bio_base64_corner_case_bug, numidx);
483 
484     return 1;
485 }
486