1 /* $Id: man_html.c,v 1.188 2025/06/26 17:06:34 schwarze Exp $ */
2 /*
3 * Copyright (c) 2013-2020,2022-2023,2025 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * HTML formatter for man(7) used by mandoc(1).
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "mandoc_aux.h"
31 #include "mandoc.h"
32 #include "roff.h"
33 #include "man.h"
34 #include "out.h"
35 #include "html.h"
36 #include "main.h"
37
38 #define MAN_ARGS const struct roff_meta *man, \
39 struct roff_node *n, \
40 struct html *h
41
42 struct man_html_act {
43 int (*pre)(MAN_ARGS);
44 int (*post)(MAN_ARGS);
45 };
46
47 static void print_man_head(const struct roff_meta *,
48 struct html *);
49 static void print_man_nodelist(MAN_ARGS);
50 static void print_man_node(MAN_ARGS);
51 static char list_continues(const struct roff_node *,
52 const struct roff_node *);
53 static int man_B_pre(MAN_ARGS);
54 static int man_IP_pre(MAN_ARGS);
55 static int man_I_pre(MAN_ARGS);
56 static int man_MR_pre(MAN_ARGS);
57 static int man_OP_pre(MAN_ARGS);
58 static int man_PP_pre(MAN_ARGS);
59 static int man_RS_pre(MAN_ARGS);
60 static int man_SH_pre(MAN_ARGS);
61 static int man_SM_pre(MAN_ARGS);
62 static int man_SY_pre(MAN_ARGS);
63 static int man_UR_pre(MAN_ARGS);
64 static int man_alt_pre(MAN_ARGS);
65 static int man_ign_pre(MAN_ARGS);
66 static int man_in_pre(MAN_ARGS);
67 static void man_root_post(const struct roff_meta *,
68 struct html *);
69 static void man_root_pre(const struct roff_meta *,
70 struct html *);
71
72 static const struct man_html_act man_html_acts[MAN_MAX - MAN_TH] = {
73 { NULL, NULL }, /* TH */
74 { man_SH_pre, NULL }, /* SH */
75 { man_SH_pre, NULL }, /* SS */
76 { man_IP_pre, NULL }, /* TP */
77 { man_IP_pre, NULL }, /* TQ */
78 { man_PP_pre, NULL }, /* LP */
79 { man_PP_pre, NULL }, /* PP */
80 { man_PP_pre, NULL }, /* P */
81 { man_IP_pre, NULL }, /* IP */
82 { man_PP_pre, NULL }, /* HP */
83 { man_SM_pre, NULL }, /* SM */
84 { man_SM_pre, NULL }, /* SB */
85 { man_alt_pre, NULL }, /* BI */
86 { man_alt_pre, NULL }, /* IB */
87 { man_alt_pre, NULL }, /* BR */
88 { man_alt_pre, NULL }, /* RB */
89 { NULL, NULL }, /* R */
90 { man_B_pre, NULL }, /* B */
91 { man_I_pre, NULL }, /* I */
92 { man_alt_pre, NULL }, /* IR */
93 { man_alt_pre, NULL }, /* RI */
94 { NULL, NULL }, /* RE */
95 { man_RS_pre, NULL }, /* RS */
96 { man_ign_pre, NULL }, /* DT */
97 { man_ign_pre, NULL }, /* UC */
98 { man_ign_pre, NULL }, /* PD */
99 { man_ign_pre, NULL }, /* AT */
100 { man_in_pre, NULL }, /* in */
101 { man_SY_pre, NULL }, /* SY */
102 { NULL, NULL }, /* YS */
103 { man_OP_pre, NULL }, /* OP */
104 { NULL, NULL }, /* EX */
105 { NULL, NULL }, /* EE */
106 { man_UR_pre, NULL }, /* UR */
107 { NULL, NULL }, /* UE */
108 { man_UR_pre, NULL }, /* MT */
109 { NULL, NULL }, /* ME */
110 { man_MR_pre, NULL }, /* MR */
111 };
112
113
114 void
html_man(void * arg,const struct roff_meta * man)115 html_man(void *arg, const struct roff_meta *man)
116 {
117 struct html *h;
118 struct roff_node *n;
119 struct tag *t;
120
121 h = (struct html *)arg;
122 n = man->first->child;
123
124 if ((h->oflags & HTML_FRAGMENT) == 0) {
125 print_gen_decls(h);
126 print_otag(h, TAG_HTML, "");
127 t = print_otag(h, TAG_HEAD, "");
128 print_man_head(man, h);
129 print_tagq(h, t);
130 if (n != NULL && n->type == ROFFT_COMMENT)
131 print_gen_comment(h, n);
132 print_otag(h, TAG_BODY, "");
133 }
134
135 man_root_pre(man, h);
136 t = print_otag(h, TAG_MAIN, "c", "manual-text");
137 print_man_nodelist(man, n, h);
138 print_tagq(h, t);
139 man_root_post(man, h);
140 print_tagq(h, NULL);
141 }
142
143 static void
print_man_head(const struct roff_meta * man,struct html * h)144 print_man_head(const struct roff_meta *man, struct html *h)
145 {
146 char *cp;
147
148 print_gen_head(h);
149 mandoc_asprintf(&cp, "%s(%s)", man->title, man->msec);
150 print_otag(h, TAG_TITLE, "");
151 print_text(h, cp);
152 free(cp);
153 }
154
155 static void
print_man_nodelist(MAN_ARGS)156 print_man_nodelist(MAN_ARGS)
157 {
158 while (n != NULL) {
159 print_man_node(man, n, h);
160 n = n->next;
161 }
162 }
163
164 static void
print_man_node(MAN_ARGS)165 print_man_node(MAN_ARGS)
166 {
167 struct tag *t;
168 int child;
169
170 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
171 return;
172
173 if ((n->flags & NODE_NOFILL) == 0)
174 html_fillmode(h, ROFF_fi);
175 else if (html_fillmode(h, ROFF_nf) == ROFF_nf &&
176 n->tok != ROFF_fi && n->flags & NODE_LINE &&
177 (n->prev == NULL || n->prev->tok != MAN_YS))
178 print_endline(h);
179
180 child = 1;
181 switch (n->type) {
182 case ROFFT_TEXT:
183 if (*n->string == '\0') {
184 print_endline(h);
185 return;
186 }
187 if (*n->string == ' ' && n->flags & NODE_LINE &&
188 (h->flags & HTML_NONEWLINE) == 0)
189 print_otag(h, TAG_BR, "");
190 else if (n->flags & NODE_DELIMC)
191 h->flags |= HTML_NOSPACE;
192 t = h->tag;
193 t->refcnt++;
194 print_text(h, n->string);
195 break;
196 case ROFFT_EQN:
197 t = h->tag;
198 t->refcnt++;
199 print_eqn(h, n->eqn);
200 break;
201 case ROFFT_TBL:
202 /*
203 * This will take care of initialising all of the table
204 * state data for the first table, then tearing it down
205 * for the last one.
206 */
207 print_tbl(h, n->span);
208 return;
209 default:
210 /*
211 * Close out scope of font prior to opening a macro
212 * scope.
213 */
214 if (h->metac != ESCAPE_FONTROMAN) {
215 h->metal = h->metac;
216 h->metac = ESCAPE_FONTROMAN;
217 }
218
219 /*
220 * Close out the current table, if it's open, and unset
221 * the "meta" table state. This will be reopened on the
222 * next table element.
223 */
224 if (h->tblt != NULL)
225 print_tblclose(h);
226 t = h->tag;
227 t->refcnt++;
228 if (n->tok < ROFF_MAX) {
229 roff_html_pre(h, n);
230 t->refcnt--;
231 print_stagq(h, t);
232 return;
233 }
234 assert(n->tok >= MAN_TH && n->tok < MAN_MAX);
235 if (man_html_acts[n->tok - MAN_TH].pre != NULL)
236 child = (*man_html_acts[n->tok - MAN_TH].pre)(man,
237 n, h);
238 break;
239 }
240
241 if (child && n->child != NULL)
242 print_man_nodelist(man, n->child, h);
243
244 /* This will automatically close out any font scope. */
245 t->refcnt--;
246 if (n->type == ROFFT_BLOCK &&
247 (n->tok == MAN_IP || n->tok == MAN_TP || n->tok == MAN_TQ)) {
248 t = h->tag;
249 while (t->tag != TAG_DL && t->tag != TAG_UL)
250 t = t->next;
251 /*
252 * Close the list if no further item of the same type
253 * follows; otherwise, close the item only.
254 */
255 if (list_continues(n, roff_node_next(n)) == '\0') {
256 print_tagq(h, t);
257 t = NULL;
258 }
259 }
260 if (t != NULL)
261 print_stagq(h, t);
262 }
263
264 static void
man_root_pre(const struct roff_meta * man,struct html * h)265 man_root_pre(const struct roff_meta *man, struct html *h)
266 {
267 struct tag *t;
268 char *title;
269
270 assert(man->title);
271 assert(man->msec);
272 mandoc_asprintf(&title, "%s(%s)", man->title, man->msec);
273
274 t = print_otag(h, TAG_DIV, "cr?", "head", "doc-pageheader",
275 "aria-label", "Manual header line");
276
277 print_otag(h, TAG_SPAN, "c", "head-ltitle");
278 print_text(h, title);
279 print_stagq(h, t);
280
281 print_otag(h, TAG_SPAN, "c", "head-vol");
282 if (man->vol != NULL)
283 print_text(h, man->vol);
284 print_stagq(h, t);
285
286 print_otag(h, TAG_SPAN, "c", "head-rtitle");
287 print_text(h, title);
288 print_tagq(h, t);
289 free(title);
290 }
291
292 static void
man_root_post(const struct roff_meta * man,struct html * h)293 man_root_post(const struct roff_meta *man, struct html *h)
294 {
295 struct tag *t;
296 char *title;
297
298 assert(man->title != NULL);
299 if (man->msec == NULL)
300 title = mandoc_strdup(man->title);
301 else
302 mandoc_asprintf(&title, "%s(%s)", man->title, man->msec);
303
304 t = print_otag(h, TAG_DIV, "cr?", "foot", "doc-pagefooter",
305 "aria-label", "Manual footer line");
306
307 print_otag(h, TAG_SPAN, "c", "foot-left");
308 if (man->os != NULL)
309 print_text(h, man->os);
310 print_stagq(h, t);
311
312 print_otag(h, TAG_SPAN, "c", "foot-date");
313 print_text(h, man->date);
314 print_stagq(h, t);
315
316 print_otag(h, TAG_SPAN, "c", "foot-right");
317 print_text(h, title);
318 print_tagq(h, t);
319 free(title);
320 }
321
322 static int
man_SH_pre(MAN_ARGS)323 man_SH_pre(MAN_ARGS)
324 {
325 const char *class;
326 enum htmltag tag;
327
328 if (n->tok == MAN_SH) {
329 tag = TAG_H2;
330 class = "Sh";
331 } else {
332 tag = TAG_H3;
333 class = "Ss";
334 }
335 switch (n->type) {
336 case ROFFT_BLOCK:
337 html_close_paragraph(h);
338 print_otag(h, TAG_SECTION, "c", class);
339 break;
340 case ROFFT_HEAD:
341 print_otag_id(h, tag, class, n);
342 break;
343 case ROFFT_BODY:
344 break;
345 default:
346 abort();
347 }
348 return 1;
349 }
350
351 static int
man_alt_pre(MAN_ARGS)352 man_alt_pre(MAN_ARGS)
353 {
354 const struct roff_node *nn;
355 struct tag *t;
356 int i;
357 enum htmltag fp;
358
359 for (i = 0, nn = n->child; nn != NULL; nn = nn->next, i++) {
360 switch (n->tok) {
361 case MAN_BI:
362 fp = i % 2 ? TAG_I : TAG_B;
363 break;
364 case MAN_IB:
365 fp = i % 2 ? TAG_B : TAG_I;
366 break;
367 case MAN_RI:
368 fp = i % 2 ? TAG_I : TAG_MAX;
369 break;
370 case MAN_IR:
371 fp = i % 2 ? TAG_MAX : TAG_I;
372 break;
373 case MAN_BR:
374 fp = i % 2 ? TAG_MAX : TAG_B;
375 break;
376 case MAN_RB:
377 fp = i % 2 ? TAG_B : TAG_MAX;
378 break;
379 default:
380 abort();
381 }
382
383 if (i)
384 h->flags |= HTML_NOSPACE;
385
386 if (fp != TAG_MAX)
387 t = print_otag(h, fp, "");
388
389 print_text(h, nn->string);
390
391 if (fp != TAG_MAX)
392 print_tagq(h, t);
393 }
394 return 0;
395 }
396
397 static int
man_SM_pre(MAN_ARGS)398 man_SM_pre(MAN_ARGS)
399 {
400 print_otag(h, TAG_SMALL, "");
401 if (n->tok == MAN_SB)
402 print_otag(h, TAG_B, "");
403 return 1;
404 }
405
406 static int
man_PP_pre(MAN_ARGS)407 man_PP_pre(MAN_ARGS)
408 {
409 switch (n->type) {
410 case ROFFT_BLOCK:
411 html_close_paragraph(h);
412 break;
413 case ROFFT_HEAD:
414 return 0;
415 case ROFFT_BODY:
416 if (n->child != NULL &&
417 (n->child->flags & NODE_NOFILL) == 0)
418 print_otag(h, TAG_P, "c",
419 n->tok == MAN_HP ? "Pp HP" : "Pp");
420 break;
421 default:
422 abort();
423 }
424 return 1;
425 }
426
427 static char
list_continues(const struct roff_node * n1,const struct roff_node * n2)428 list_continues(const struct roff_node *n1, const struct roff_node *n2)
429 {
430 const char *s1, *s2;
431 char c1, c2;
432
433 if (n1 == NULL || n1->type != ROFFT_BLOCK ||
434 n2 == NULL || n2->type != ROFFT_BLOCK)
435 return '\0';
436 if ((n1->tok == MAN_TP || n1->tok == MAN_TQ) &&
437 (n2->tok == MAN_TP || n2->tok == MAN_TQ))
438 return ' ';
439 if (n1->tok != MAN_IP || n2->tok != MAN_IP)
440 return '\0';
441 n1 = n1->head->child;
442 n2 = n2->head->child;
443 s1 = n1 == NULL ? "" : n1->string;
444 s2 = n2 == NULL ? "" : n2->string;
445 c1 = strcmp(s1, "*") == 0 ? '*' :
446 strcmp(s1, "\\-") == 0 ? '-' :
447 strcmp(s1, "\\(bu") == 0 ? 'b' :
448 strcmp(s1, "\\[bu]") == 0 ? 'b' : ' ';
449 c2 = strcmp(s2, "*") == 0 ? '*' :
450 strcmp(s2, "\\-") == 0 ? '-' :
451 strcmp(s2, "\\(bu") == 0 ? 'b' :
452 strcmp(s2, "\\[bu]") == 0 ? 'b' : ' ';
453 return c1 != c2 ? '\0' : c1 == 'b' ? '*' : c1;
454 }
455
456 static int
man_IP_pre(MAN_ARGS)457 man_IP_pre(MAN_ARGS)
458 {
459 struct roff_node *nn;
460 const char *list_class;
461 enum htmltag list_elem, body_elem;
462 char list_type;
463
464 nn = n->type == ROFFT_BLOCK ? n : n->parent;
465 list_type = list_continues(roff_node_prev(nn), nn);
466 if (list_type == '\0') {
467 /* Start a new list. */
468 list_type = list_continues(nn, roff_node_next(nn));
469 if (list_type == '\0')
470 list_type = ' ';
471 switch (list_type) {
472 case ' ':
473 list_class = "Bl-tag";
474 list_elem = TAG_DL;
475 break;
476 case '*':
477 list_class = "Bl-bullet";
478 list_elem = TAG_UL;
479 break;
480 case '-':
481 list_class = "Bl-dash";
482 list_elem = TAG_UL;
483 break;
484 default:
485 abort();
486 }
487 } else {
488 /* Continue a list that was started earlier. */
489 list_class = NULL;
490 list_elem = TAG_MAX;
491 }
492 body_elem = list_type == ' ' ? TAG_DD : TAG_LI;
493
494 switch (n->type) {
495 case ROFFT_BLOCK:
496 html_close_paragraph(h);
497 if (list_elem != TAG_MAX)
498 print_otag(h, list_elem, "c", list_class);
499 return 1;
500 case ROFFT_HEAD:
501 if (body_elem == TAG_LI)
502 return 0;
503 print_otag_id(h, TAG_DT, NULL, n);
504 break;
505 case ROFFT_BODY:
506 print_otag(h, body_elem, "");
507 return 1;
508 default:
509 abort();
510 }
511 switch(n->tok) {
512 case MAN_IP: /* Only print the first header element. */
513 if (n->child != NULL)
514 print_man_node(man, n->child, h);
515 break;
516 case MAN_TP: /* Only print next-line header elements. */
517 case MAN_TQ:
518 nn = n->child;
519 while (nn != NULL && (NODE_LINE & nn->flags) == 0)
520 nn = nn->next;
521 while (nn != NULL) {
522 print_man_node(man, nn, h);
523 nn = nn->next;
524 }
525 break;
526 default:
527 abort();
528 }
529 return 0;
530 }
531
532 static int
man_MR_pre(MAN_ARGS)533 man_MR_pre(MAN_ARGS)
534 {
535 struct tag *t;
536 const char *name, *section, *suffix;
537 char *label;
538
539 html_setfont(h, ESCAPE_FONTROMAN);
540 name = section = suffix = label = NULL;
541 if (n->child != NULL) {
542 name = n->child->string;
543 if (n->child->next != NULL) {
544 section = n->child->next->string;
545 mandoc_asprintf(&label,
546 "%s, section %s", name, section);
547 if (n->child->next->next != NULL)
548 suffix = n->child->next->next->string;
549 }
550 }
551
552 if (name != NULL && section != NULL && h->base_man1 != NULL)
553 t = print_otag(h, TAG_A, "chM?", "Xr",
554 name, section, "aria-label", label);
555 else
556 t = print_otag(h, TAG_A, "c?", "Xr", "aria-label", label);
557
558 free(label);
559 if (name != NULL) {
560 print_text(h, name);
561 h->flags |= HTML_NOSPACE;
562 }
563 print_text(h, "(");
564 h->flags |= HTML_NOSPACE;
565 if (section != NULL) {
566 print_text(h, section);
567 h->flags |= HTML_NOSPACE;
568 }
569 print_text(h, ")");
570 print_tagq(h, t);
571 if (suffix != NULL) {
572 h->flags |= HTML_NOSPACE;
573 print_text(h, suffix);
574 }
575 return 0;
576 }
577
578 static int
man_OP_pre(MAN_ARGS)579 man_OP_pre(MAN_ARGS)
580 {
581 struct tag *tt;
582
583 print_text(h, "[");
584 h->flags |= HTML_NOSPACE;
585 tt = print_otag(h, TAG_SPAN, "c", "Op");
586
587 if ((n = n->child) != NULL) {
588 print_otag(h, TAG_B, "");
589 print_text(h, n->string);
590 }
591
592 print_stagq(h, tt);
593
594 if (n != NULL && n->next != NULL) {
595 print_otag(h, TAG_I, "");
596 print_text(h, n->next->string);
597 }
598
599 print_stagq(h, tt);
600 h->flags |= HTML_NOSPACE;
601 print_text(h, "]");
602 return 0;
603 }
604
605 static int
man_B_pre(MAN_ARGS)606 man_B_pre(MAN_ARGS)
607 {
608 print_otag(h, TAG_B, "");
609 return 1;
610 }
611
612 static int
man_I_pre(MAN_ARGS)613 man_I_pre(MAN_ARGS)
614 {
615 print_otag(h, TAG_I, "");
616 return 1;
617 }
618
619 static int
man_in_pre(MAN_ARGS)620 man_in_pre(MAN_ARGS)
621 {
622 print_otag(h, TAG_BR, "");
623 return 0;
624 }
625
626 static int
man_ign_pre(MAN_ARGS)627 man_ign_pre(MAN_ARGS)
628 {
629 return 0;
630 }
631
632 static int
man_RS_pre(MAN_ARGS)633 man_RS_pre(MAN_ARGS)
634 {
635 switch (n->type) {
636 case ROFFT_BLOCK:
637 html_close_paragraph(h);
638 break;
639 case ROFFT_HEAD:
640 return 0;
641 case ROFFT_BODY:
642 print_otag(h, TAG_DIV, "c", "Bd-indent");
643 break;
644 default:
645 abort();
646 }
647 return 1;
648 }
649
650 static int
man_SY_pre(MAN_ARGS)651 man_SY_pre(MAN_ARGS)
652 {
653 switch (n->type) {
654 case ROFFT_BLOCK:
655 html_close_paragraph(h);
656 print_otag(h, TAG_TABLE, "c", "Nm");
657 print_otag(h, TAG_TR, "");
658 break;
659 case ROFFT_HEAD:
660 print_otag(h, TAG_TD, "");
661 print_otag(h, TAG_CODE, "c", "Nm");
662 break;
663 case ROFFT_BODY:
664 print_otag(h, TAG_TD, "");
665 break;
666 default:
667 abort();
668 }
669 return 1;
670 }
671
672 static int
man_UR_pre(MAN_ARGS)673 man_UR_pre(MAN_ARGS)
674 {
675 char *cp;
676
677 n = n->child;
678 assert(n->type == ROFFT_HEAD);
679 if (n->child != NULL) {
680 assert(n->child->type == ROFFT_TEXT);
681 if (n->tok == MAN_MT) {
682 mandoc_asprintf(&cp, "mailto:%s", n->child->string);
683 print_otag(h, TAG_A, "ch", "Mt", cp);
684 free(cp);
685 } else
686 print_otag(h, TAG_A, "ch", "Lk", n->child->string);
687 }
688
689 assert(n->next->type == ROFFT_BODY);
690 if (n->next->child != NULL)
691 n = n->next;
692
693 print_man_nodelist(man, n->child, h);
694 return 0;
695 }
696