1 /* $Id: dba.c,v 1.11 2025/09/24 13:13:30 schwarze Exp $ */
2 /*
3 * Copyright (c) 2016, 2017, 2025 Ingo Schwarze <schwarze@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * Allocation-based version of the mandoc database, for read-write access.
18 * The interface is defined in "dba.h".
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23 #if HAVE_ENDIAN
24 #include <endian.h>
25 #elif HAVE_SYS_ENDIAN
26 #include <sys/endian.h>
27 #elif HAVE_NTOHL
28 #include <arpa/inet.h>
29 #endif
30 #include <errno.h>
31 #include <stddef.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36
37 #include "mandoc_aux.h"
38 #include "mandoc_ohash.h"
39 #include "mansearch.h"
40 #include "dba_write.h"
41 #include "dba_array.h"
42 #include "dba.h"
43
44 struct macro_entry {
45 struct dba_array *pages;
46 char value[];
47 };
48
49 static void *prepend(const char *, char);
50 static void dba_pages_write(struct dba_array *);
51 static int compare_names(const void *, const void *);
52 static int compare_strings(const void *, const void *);
53
54 static struct macro_entry
55 *get_macro_entry(struct ohash *, const char *, int32_t);
56 static void dba_macros_write(struct dba_array *);
57 static void dba_macro_write(struct ohash *);
58 static int compare_entries(const void *, const void *);
59
60
61 /*** top-level functions **********************************************/
62
63 struct dba *
dba_new(int32_t npages)64 dba_new(int32_t npages)
65 {
66 struct dba *dba;
67 struct ohash *macro;
68 int32_t im;
69
70 dba = mandoc_malloc(sizeof(*dba));
71 dba->pages = dba_array_new(npages, DBA_GROW);
72 dba->macros = dba_array_new(MACRO_MAX, 0);
73 for (im = 0; im < MACRO_MAX; im++) {
74 macro = mandoc_malloc(sizeof(*macro));
75 mandoc_ohash_init(macro, 4,
76 offsetof(struct macro_entry, value));
77 dba_array_set(dba->macros, im, macro);
78 }
79 return dba;
80 }
81
82 void
dba_free(struct dba * dba)83 dba_free(struct dba *dba)
84 {
85 struct dba_array *page;
86 struct ohash *macro;
87 struct macro_entry *entry;
88 unsigned int slot;
89
90 dba_array_FOREACH(dba->macros, macro) {
91 for (entry = ohash_first(macro, &slot); entry != NULL;
92 entry = ohash_next(macro, &slot)) {
93 dba_array_free(entry->pages);
94 free(entry);
95 }
96 ohash_delete(macro);
97 free(macro);
98 }
99 dba_array_free(dba->macros);
100
101 dba_array_undel(dba->pages);
102 dba_array_FOREACH(dba->pages, page) {
103 dba_array_free(dba_array_get(page, DBP_NAME));
104 dba_array_free(dba_array_get(page, DBP_SECT));
105 dba_array_free(dba_array_get(page, DBP_ARCH));
106 free(dba_array_get(page, DBP_DESC));
107 dba_array_free(dba_array_get(page, DBP_FILE));
108 dba_array_free(page);
109 }
110 dba_array_free(dba->pages);
111
112 free(dba);
113 }
114
115 /*
116 * Write the complete mandoc database to disk; the format is:
117 * - One integer each for magic and version.
118 * - One pointer each to the macros table and to the final magic.
119 * - The pages table.
120 * - The macros table.
121 * - And at the very end, the magic integer again.
122 */
123 int
dba_write(const char * fname,struct dba * dba)124 dba_write(const char *fname, struct dba *dba)
125 {
126 int save_errno;
127 int32_t pos_end, pos_macros, pos_macros_ptr;
128
129 if (dba_open(fname) == -1)
130 return -1;
131 dba_int_write(MANDOCDB_MAGIC);
132 dba_int_write(MANDOCDB_VERSION);
133 pos_macros_ptr = dba_skip(1, 2);
134 dba_pages_write(dba->pages);
135 pos_macros = dba_tell();
136 dba_macros_write(dba->macros);
137 pos_end = dba_tell();
138 dba_int_write(MANDOCDB_MAGIC);
139 dba_seek(pos_macros_ptr);
140 dba_int_write(pos_macros);
141 dba_int_write(pos_end);
142 if (dba_close() == -1) {
143 save_errno = errno;
144 unlink(fname);
145 errno = save_errno;
146 return -1;
147 }
148 return 0;
149 }
150
151
152 /*** functions for handling pages *************************************/
153
154 /*
155 * Create a new page and append it to the pages table.
156 */
157 struct dba_array *
dba_page_new(struct dba_array * pages,const char * arch,const char * desc,const char * file,enum form form)158 dba_page_new(struct dba_array *pages, const char *arch,
159 const char *desc, const char *file, enum form form)
160 {
161 struct dba_array *page, *entry;
162
163 page = dba_array_new(DBP_MAX, 0);
164 entry = dba_array_new(1, DBA_STR | DBA_GROW);
165 dba_array_add(page, entry);
166 entry = dba_array_new(1, DBA_STR | DBA_GROW);
167 dba_array_add(page, entry);
168 if (arch != NULL && *arch != '\0') {
169 entry = dba_array_new(1, DBA_STR | DBA_GROW);
170 dba_array_add(entry, (void *)arch);
171 } else
172 entry = NULL;
173 dba_array_add(page, entry);
174 dba_array_add(page, mandoc_strdup(desc));
175 entry = dba_array_new(1, DBA_STR | DBA_GROW);
176 dba_array_add(entry, prepend(file, form));
177 dba_array_add(page, entry);
178 dba_array_add(pages, page);
179 return page;
180 }
181
182 /*
183 * Add a section, architecture, or file name to an existing page.
184 * Passing the NULL pointer for the architecture makes the page MI.
185 * In that case, any earlier or later architectures are ignored.
186 */
187 void
dba_page_add(struct dba_array * page,int32_t ie,const char * str)188 dba_page_add(struct dba_array *page, int32_t ie, const char *str)
189 {
190 struct dba_array *entries;
191 char *entry;
192
193 entries = dba_array_get(page, ie);
194 if (ie == DBP_ARCH) {
195 if (entries == NULL)
196 return;
197 if (str == NULL || *str == '\0') {
198 dba_array_free(entries);
199 dba_array_set(page, DBP_ARCH, NULL);
200 return;
201 }
202 }
203 if (*str == '\0')
204 return;
205 dba_array_FOREACH(entries, entry) {
206 if (ie == DBP_FILE && *entry < ' ')
207 entry++;
208 if (strcmp(entry, str) == 0)
209 return;
210 }
211 dba_array_add(entries, (void *)str);
212 }
213
214 /*
215 * Add an additional name to an existing page.
216 */
217 void
dba_page_alias(struct dba_array * page,const char * name,uint64_t mask)218 dba_page_alias(struct dba_array *page, const char *name, uint64_t mask)
219 {
220 struct dba_array *entries;
221 char *entry;
222 char maskbyte;
223
224 if (*name == '\0')
225 return;
226 maskbyte = mask & NAME_MASK;
227 entries = dba_array_get(page, DBP_NAME);
228 dba_array_FOREACH(entries, entry) {
229 if (strcmp(entry + 1, name) == 0) {
230 *entry |= maskbyte;
231 return;
232 }
233 }
234 dba_array_add(entries, prepend(name, maskbyte));
235 }
236
237 /*
238 * Return a pointer to a temporary copy of instr with inbyte prepended.
239 */
240 static void *
prepend(const char * instr,char inbyte)241 prepend(const char *instr, char inbyte)
242 {
243 static char *outstr = NULL;
244 static size_t outlen = 0;
245 size_t newlen;
246
247 newlen = strlen(instr) + 1;
248 if (newlen > outlen) {
249 outstr = mandoc_realloc(outstr, newlen + 1);
250 outlen = newlen;
251 }
252 *outstr = inbyte;
253 memcpy(outstr + 1, instr, newlen);
254 return outstr;
255 }
256
257 /*
258 * Write the pages table to disk; the format is:
259 * - One integer containing the number of pages.
260 * - For each page, five pointers to the names, sections,
261 * architectures, description, and file names of the page.
262 * MI pages write 0 instead of the architecture pointer.
263 * - One list each for names, sections, architectures, descriptions and
264 * file names. The description for each page ends with a NUL byte.
265 * For all the other lists, each string ends with a NUL byte,
266 * and the last string for a page ends with two NUL bytes.
267 * - To assure alignment of following integers,
268 * the end is padded with NUL bytes up to a multiple of four bytes.
269 */
270 static void
dba_pages_write(struct dba_array * pages)271 dba_pages_write(struct dba_array *pages)
272 {
273 struct dba_array *page, *entry;
274 int32_t pos_pages, pos_end;
275
276 pos_pages = dba_array_writelen(pages, 5);
277 dba_array_FOREACH(pages, page) {
278 dba_array_setpos(page, DBP_NAME, dba_tell());
279 entry = dba_array_get(page, DBP_NAME);
280 dba_array_sort(entry, compare_names);
281 dba_array_writelst(entry);
282 }
283 dba_array_FOREACH(pages, page) {
284 dba_array_setpos(page, DBP_SECT, dba_tell());
285 entry = dba_array_get(page, DBP_SECT);
286 dba_array_sort(entry, compare_strings);
287 dba_array_writelst(entry);
288 }
289 dba_array_FOREACH(pages, page) {
290 if ((entry = dba_array_get(page, DBP_ARCH)) != NULL) {
291 dba_array_setpos(page, DBP_ARCH, dba_tell());
292 dba_array_sort(entry, compare_strings);
293 dba_array_writelst(entry);
294 } else
295 dba_array_setpos(page, DBP_ARCH, 0);
296 }
297 dba_array_FOREACH(pages, page) {
298 dba_array_setpos(page, DBP_DESC, dba_tell());
299 dba_str_write(dba_array_get(page, DBP_DESC));
300 }
301 dba_array_FOREACH(pages, page) {
302 dba_array_setpos(page, DBP_FILE, dba_tell());
303 dba_array_writelst(dba_array_get(page, DBP_FILE));
304 }
305 pos_end = dba_align();
306 dba_seek(pos_pages);
307 dba_array_FOREACH(pages, page)
308 dba_array_writepos(page);
309 dba_seek(pos_end);
310 }
311
312 static int
compare_names(const void * vp1,const void * vp2)313 compare_names(const void *vp1, const void *vp2)
314 {
315 const char *cp1, *cp2;
316 int diff;
317
318 cp1 = *(const char * const *)vp1;
319 cp2 = *(const char * const *)vp2;
320 return (diff = *cp2 - *cp1) ? diff :
321 (diff = strcasecmp(cp1 + 1, cp2 + 1)) ? diff :
322 strcmp(cp1 + 1, cp2 + 1);
323 }
324
325 static int
compare_strings(const void * vp1,const void * vp2)326 compare_strings(const void *vp1, const void *vp2)
327 {
328 const char *cp1, *cp2;
329
330 cp1 = *(const char * const *)vp1;
331 cp2 = *(const char * const *)vp2;
332 return strcmp(cp1, cp2);
333 }
334
335 /*** functions for handling macros ************************************/
336
337 /*
338 * In the hash table for a single macro, look up an entry by
339 * the macro value or add an empty one if it doesn't exist yet.
340 */
341 static struct macro_entry *
get_macro_entry(struct ohash * macro,const char * value,int32_t np)342 get_macro_entry(struct ohash *macro, const char *value, int32_t np)
343 {
344 struct macro_entry *entry;
345 size_t len;
346 unsigned int slot;
347
348 slot = ohash_qlookup(macro, value);
349 if ((entry = ohash_find(macro, slot)) == NULL) {
350 len = strlen(value) + 1;
351 entry = mandoc_malloc(sizeof(*entry) + len);
352 memcpy(&entry->value, value, len);
353 entry->pages = dba_array_new(np, DBA_GROW);
354 ohash_insert(macro, slot, entry);
355 }
356 return entry;
357 }
358
359 /*
360 * In addition to get_macro_entry(), add multiple page references,
361 * converting them from the on-disk format (byte offsets in the file)
362 * to page pointers in memory.
363 */
364 void
dba_macro_new(struct dba * dba,int32_t im,const char * value,const int32_t * pp)365 dba_macro_new(struct dba *dba, int32_t im, const char *value,
366 const int32_t *pp)
367 {
368 struct macro_entry *entry;
369 const int32_t *ip;
370 int32_t np;
371
372 np = 0;
373 for (ip = pp; *ip; ip++)
374 np++;
375
376 entry = get_macro_entry(dba_array_get(dba->macros, im), value, np);
377 for (ip = pp; *ip; ip++)
378 dba_array_add(entry->pages, dba_array_get(dba->pages,
379 be32toh(*ip) / 5 / sizeof(*ip) - 1));
380 }
381
382 /*
383 * In addition to get_macro_entry(), add one page reference,
384 * directly taking the in-memory page pointer as an argument.
385 */
386 void
dba_macro_add(struct dba_array * macros,int32_t im,const char * value,struct dba_array * page)387 dba_macro_add(struct dba_array *macros, int32_t im, const char *value,
388 struct dba_array *page)
389 {
390 struct macro_entry *entry;
391
392 if (*value == '\0')
393 return;
394 entry = get_macro_entry(dba_array_get(macros, im), value, 1);
395 dba_array_add(entry->pages, page);
396 }
397
398 /*
399 * Write the macros table to disk; the format is:
400 * - The number of macro tables (actually, MACRO_MAX).
401 * - That number of pointers to the individual macro tables.
402 * - The individual macro tables.
403 */
404 static void
dba_macros_write(struct dba_array * macros)405 dba_macros_write(struct dba_array *macros)
406 {
407 struct ohash *macro;
408 int32_t im, pos_macros, pos_end;
409
410 pos_macros = dba_array_writelen(macros, 1);
411 im = 0;
412 dba_array_FOREACH(macros, macro) {
413 dba_array_setpos(macros, im++, dba_tell());
414 dba_macro_write(macro);
415 }
416 pos_end = dba_tell();
417 dba_seek(pos_macros);
418 dba_array_writepos(macros);
419 dba_seek(pos_end);
420 }
421
422 /*
423 * Write one individual macro table to disk; the format is:
424 * - The number of entries in the table.
425 * - For each entry, two pointers, the first one to the value
426 * and the second one to the list of pages.
427 * - A list of values, each ending in a NUL byte.
428 * - To assure alignment of following integers,
429 * padding with NUL bytes up to a multiple of four bytes.
430 * - A list of pointers to pages, each list ending in a 0 integer.
431 */
432 static void
dba_macro_write(struct ohash * macro)433 dba_macro_write(struct ohash *macro)
434 {
435 struct macro_entry **entries, *entry;
436 struct dba_array *page;
437 int32_t *kpos, *dpos;
438 unsigned int ie, ne, slot;
439 int use;
440 int32_t addr, pos_macro, pos_end;
441
442 /* Temporary storage for filtering and sorting. */
443
444 ne = ohash_entries(macro);
445 entries = mandoc_reallocarray(NULL, ne, sizeof(*entries));
446 kpos = mandoc_reallocarray(NULL, ne, sizeof(*kpos));
447 dpos = mandoc_reallocarray(NULL, ne, sizeof(*dpos));
448
449 /* Build a list of non-empty entries and sort it. */
450
451 ne = 0;
452 for (entry = ohash_first(macro, &slot); entry != NULL;
453 entry = ohash_next(macro, &slot)) {
454 use = 0;
455 dba_array_FOREACH(entry->pages, page)
456 if (dba_array_getpos(page))
457 use = 1;
458 if (use)
459 entries[ne++] = entry;
460 }
461 qsort(entries, ne, sizeof(*entries), compare_entries);
462
463 /* Number of entries, and space for the pointer pairs. */
464
465 dba_int_write(ne);
466 pos_macro = dba_skip(2, ne);
467
468 /* String table. */
469
470 for (ie = 0; ie < ne; ie++) {
471 kpos[ie] = dba_tell();
472 dba_str_write(entries[ie]->value);
473 }
474 dba_align();
475
476 /* Pages table. */
477
478 for (ie = 0; ie < ne; ie++) {
479 dpos[ie] = dba_tell();
480 dba_array_FOREACH(entries[ie]->pages, page)
481 if ((addr = dba_array_getpos(page)))
482 dba_int_write(addr);
483 dba_int_write(0);
484 }
485 pos_end = dba_tell();
486
487 /* Fill in the pointer pairs. */
488
489 dba_seek(pos_macro);
490 for (ie = 0; ie < ne; ie++) {
491 dba_int_write(kpos[ie]);
492 dba_int_write(dpos[ie]);
493 }
494 dba_seek(pos_end);
495
496 free(entries);
497 free(kpos);
498 free(dpos);
499 }
500
501 static int
compare_entries(const void * vp1,const void * vp2)502 compare_entries(const void *vp1, const void *vp2)
503 {
504 const struct macro_entry *ep1, *ep2;
505
506 ep1 = *(const struct macro_entry * const *)vp1;
507 ep2 = *(const struct macro_entry * const *)vp2;
508 return strcmp(ep1->value, ep2->value);
509 }
510