1 /* $Id: term.c,v 1.294 2025/08/01 14:59:39 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2022, 2025 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "out.h"
32 #include "term.h"
33 #include "main.h"
34
35 static size_t cond_width(const struct termp *, int, int *);
36 static void adjbuf(struct termp_col *, size_t);
37 static void bufferc(struct termp *, char);
38 static void encode(struct termp *, const char *, size_t);
39 static void encode1(struct termp *, int);
40 static void endline(struct termp *);
41 static void term_field(struct termp *, size_t, size_t);
42 static void term_fill(struct termp *, size_t *, size_t *,
43 size_t);
44
45
46 void
term_setcol(struct termp * p,size_t maxtcol)47 term_setcol(struct termp *p, size_t maxtcol)
48 {
49 if (maxtcol > p->maxtcol) {
50 p->tcols = mandoc_recallocarray(p->tcols,
51 p->maxtcol, maxtcol, sizeof(*p->tcols));
52 p->maxtcol = maxtcol;
53 }
54 p->lasttcol = maxtcol - 1;
55 p->tcol = p->tcols;
56 }
57
58 void
term_free(struct termp * p)59 term_free(struct termp *p)
60 {
61 term_tab_free();
62 for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++)
63 free(p->tcol->buf);
64 free(p->tcols);
65 free(p->fontq);
66 free(p);
67 }
68
69 void
term_begin(struct termp * p,term_margin head,term_margin foot,const struct roff_meta * arg)70 term_begin(struct termp *p, term_margin head,
71 term_margin foot, const struct roff_meta *arg)
72 {
73
74 p->headf = head;
75 p->footf = foot;
76 p->argf = arg;
77 (*p->begin)(p);
78 }
79
80 void
term_end(struct termp * p)81 term_end(struct termp *p)
82 {
83
84 (*p->end)(p);
85 }
86
87 /*
88 * Flush a chunk of text. By default, break the output line each time
89 * the right margin is reached, and continue output on the next line
90 * at the same offset as the chunk itself. By default, also break the
91 * output line at the end of the chunk. There are many flags modifying
92 * this behaviour, see the comments in the body of the function.
93 */
94 void
term_flushln(struct termp * p)95 term_flushln(struct termp *p)
96 {
97 /* Widths in basic units. */
98 size_t vbl; /* Whitespace to prepend to the output. */
99 size_t vbr; /* Actual visual position of the end of field. */
100 size_t vfield; /* Desired visual field width. */
101 size_t vtarget; /* Desired visual position of the right margin. */
102
103 /* Bytes. */
104 size_t ic; /* Byte index in the input buffer. */
105 size_t nbr; /* Number of bytes to print in this field. */
106
107 /*
108 * Normally, start writing at the left margin, but with the
109 * NOPAD flag, start writing at the current position instead.
110 */
111
112 vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ?
113 0 : p->tcol->offset - p->viscol;
114 if (p->minbl > 0 && vbl < term_len(p, p->minbl))
115 vbl = term_len(p, p->minbl);
116
117 if ((p->flags & TERMP_MULTICOL) == 0)
118 p->tcol->col = 0;
119
120 /* Loop over output lines. */
121
122 for (;;) {
123 vfield = p->tcol->rmargin > p->viscol + vbl ?
124 p->tcol->rmargin - p->viscol - vbl : 0;
125
126 /*
127 * Normally, break the line at the the right margin
128 * of the field, but with the NOBREAK flag, only
129 * break it at the max right margin of the screen,
130 * and with the BRNEVER flag, never break it at all.
131 */
132
133 vtarget = (p->flags & TERMP_NOBREAK) == 0 ? vfield :
134 p->maxrmargin > p->viscol + vbl ?
135 p->maxrmargin - p->viscol - vbl : 0;
136
137 /*
138 * Figure out how much text will fit in the field.
139 * If there is whitespace only, print nothing.
140 */
141
142 term_fill(p, &nbr, &vbr,
143 p->flags & TERMP_BRNEVER ? SIZE_MAX / 2 : vtarget);
144 if (nbr == 0)
145 break;
146
147 /*
148 * With the CENTER or RIGHT flag, increase the indentation
149 * to center the text between the left and right margins
150 * or to adjust it to the right margin, respectively.
151 */
152
153 if (vbr < vtarget) {
154 if (p->flags & TERMP_CENTER)
155 vbl += (vtarget - vbr) / 2;
156 else if (p->flags & TERMP_RIGHT)
157 vbl += vtarget - vbr;
158 }
159
160 /* Finally, print the field content. */
161
162 term_field(p, vbl, nbr);
163 if (vbr < vtarget)
164 p->tcol->taboff += vbr;
165 else
166 p->tcol->taboff += vtarget;
167 p->tcol->taboff += term_len(p, 1);
168
169 /*
170 * If there is no text left in the field, exit the loop.
171 * If the BRTRSP flag is set, consider trailing
172 * whitespace significant when deciding whether
173 * the field fits or not.
174 */
175
176 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
177 switch (p->tcol->buf[ic]) {
178 case '\t':
179 if (p->flags & TERMP_BRTRSP)
180 vbr = term_tab_next(vbr);
181 continue;
182 case ' ':
183 if (p->flags & TERMP_BRTRSP)
184 vbr += term_len(p, 1);
185 continue;
186 case '\n':
187 case ASCII_NBRZW:
188 case ASCII_BREAK:
189 case ASCII_TABREF:
190 continue;
191 default:
192 break;
193 }
194 break;
195 }
196 if (ic == p->tcol->lastcol)
197 break;
198
199 /*
200 * At the location of an automatic line break, input
201 * space characters are consumed by the line break.
202 */
203
204 while (p->tcol->col < p->tcol->lastcol &&
205 p->tcol->buf[p->tcol->col] == ' ')
206 p->tcol->col++;
207
208 /*
209 * In multi-column mode, leave the rest of the text
210 * in the buffer to be handled by a subsequent
211 * invocation, such that the other columns of the
212 * table can be handled first.
213 * In single-column mode, simply break the line.
214 */
215
216 if (p->flags & TERMP_MULTICOL)
217 return;
218
219 endline(p);
220
221 /*
222 * Normally, start the next line at the same indentation
223 * as this one, but with the BRIND flag, start it at the
224 * right margin instead. This is used together with
225 * NOBREAK for the tags in various kinds of tagged lists.
226 */
227
228 vbl = p->flags & TERMP_BRIND ?
229 p->tcol->rmargin : p->tcol->offset;
230 }
231
232 /* Reset output state in preparation for the next field. */
233
234 p->col = p->tcol->col = p->tcol->lastcol = 0;
235 p->minbl = p->trailspace;
236 p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD);
237
238 if (p->flags & TERMP_MULTICOL)
239 return;
240
241 /*
242 * The HANG flag means that the next field
243 * always follows on the same line.
244 * The NOBREAK flag means that the next field
245 * follows on the same line unless the field was overrun.
246 * Normally, break the line at the end of each field.
247 */
248
249 if ((p->flags & TERMP_HANG) == 0 &&
250 ((p->flags & TERMP_NOBREAK) == 0 ||
251 vbr + term_len(p, p->trailspace) > vfield + term_len(p, 1) / 2))
252 endline(p);
253 }
254
255 /*
256 * Store the number of input bytes to print in this field in *nbr
257 * and their total visual width in basic units in *vbr.
258 * If there is only whitespace in the field, both remain zero.
259 * The desired visual width of the field is provided by vtarget.
260 * If the first word is longer, the field will be overrun.
261 */
262 static void
term_fill(struct termp * p,size_t * nbr,size_t * vbr,size_t vtarget)263 term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget)
264 {
265 /* Widths in basic units. */
266 size_t vis; /* Visual position of the current character. */
267 size_t vn; /* Visual position of the next character. */
268 size_t enw; /* Width of an EN unit. */
269 int taboff; /* Temporary offset for literal tabs. */
270
271 size_t ic; /* Byte index in the input buffer. */
272 int breakline; /* Break at the end of this word. */
273 int graph; /* Last character was non-blank. */
274
275 *nbr = *vbr = vis = 0;
276 breakline = graph = 0;
277 taboff = p->tcol->taboff;
278 enw = (*p->getwidth)(p, ' ');
279 vtarget += enw / 2;
280 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
281 switch (p->tcol->buf[ic]) {
282 case '\b': /* Escape \o (overstrike) or backspace markup. */
283 assert(ic > 0);
284 vis -= (*p->getwidth)(p, p->tcol->buf[ic - 1]);
285 continue;
286
287 case ' ':
288 case ASCII_BREAK: /* Escape \: (breakpoint). */
289 vn = vis;
290 if (p->tcol->buf[ic] == ' ')
291 vn += enw;
292 /* Can break at the end of a word. */
293 if (breakline || vn > vtarget)
294 break;
295 if (graph) {
296 *nbr = ic;
297 *vbr = vis;
298 graph = 0;
299 }
300 vis = vn;
301 continue;
302
303 case '\n': /* Escape \p (break at the end of the word). */
304 breakline = 1;
305 continue;
306
307 case ASCII_HYPH: /* Breakable hyphen. */
308 graph = 1;
309 /*
310 * We are about to decide whether to break the
311 * line or not, so we no longer need this hyphen
312 * to be marked as breakable. Put back a real
313 * hyphen such that we get the correct width.
314 */
315 p->tcol->buf[ic] = '-';
316 vis += (*p->getwidth)(p, '-');
317 if (vis > vtarget) {
318 ic++;
319 break;
320 }
321 *nbr = ic + 1;
322 *vbr = vis;
323 continue;
324
325 case ASCII_TABREF:
326 taboff = -vis - enw;
327 continue;
328
329 default:
330 switch (p->tcol->buf[ic]) {
331 case '\t':
332 if (taboff < 0 && (size_t)-taboff > vis)
333 vis = 0;
334 else
335 vis += taboff;
336 vis = term_tab_next(vis);
337 vis -= taboff;
338 break;
339 case ASCII_NBRZW: /* Non-breakable zero-width. */
340 break;
341 case ASCII_NBRSP: /* Non-breakable space. */
342 p->tcol->buf[ic] = ' ';
343 /* FALLTHROUGH */
344 default: /* Printable character. */
345 vis += (*p->getwidth)(p, p->tcol->buf[ic]);
346 break;
347 }
348 graph = 1;
349 if (vis > vtarget && *nbr > 0)
350 return;
351 continue;
352 }
353 break;
354 }
355
356 /*
357 * If the last word extends to the end of the field without any
358 * trailing whitespace, the loop could not check yet whether it
359 * can remain on this line. So do the check now.
360 */
361
362 if (graph && (vis <= vtarget || *nbr == 0)) {
363 *nbr = ic;
364 *vbr = vis;
365 }
366 }
367
368 /*
369 * Print the contents of one field
370 * with an indentation of vbl basic units
371 * and an input string length of nbr bytes.
372 */
373 static void
term_field(struct termp * p,size_t vbl,size_t nbr)374 term_field(struct termp *p, size_t vbl, size_t nbr)
375 {
376 /* Widths in basic units. */
377 size_t vis; /* Visual position of the current character. */
378 size_t vt; /* Visual position including tab offset. */
379 size_t dv; /* Visual width of the current character. */
380 int taboff; /* Temporary offset for literal tabs. */
381
382 size_t ic; /* Byte position in the input buffer. */
383
384 vis = 0;
385 taboff = p->tcol->taboff;
386 for (ic = p->tcol->col; ic < nbr; ic++) {
387
388 /*
389 * To avoid the printing of trailing whitespace,
390 * do not print whitespace right away, only count it.
391 */
392
393 switch (p->tcol->buf[ic]) {
394 case '\n':
395 case ASCII_BREAK:
396 case ASCII_NBRZW:
397 continue;
398 case ASCII_TABREF:
399 taboff = -vis - (*p->getwidth)(p, ' ');
400 continue;
401 case '\t':
402 case ' ':
403 case ASCII_NBRSP:
404 if (p->tcol->buf[ic] == '\t') {
405 if (taboff < 0 && (size_t)-taboff > vis)
406 vt = 0;
407 else
408 vt = vis + taboff;
409 dv = term_tab_next(vt) - vt;
410 } else
411 dv = (*p->getwidth)(p, ' ');
412 vbl += dv;
413 vis += dv;
414 continue;
415 default:
416 break;
417 }
418
419 /*
420 * We found a non-blank character to print,
421 * so write preceding white space now.
422 */
423
424 if (vbl > 0) {
425 (*p->advance)(p, vbl);
426 vbl = 0;
427 }
428
429 /* Print the character and adjust the visual position. */
430
431 (*p->letter)(p, p->tcol->buf[ic]);
432 if (p->tcol->buf[ic] == '\b') {
433 dv = (*p->getwidth)(p, p->tcol->buf[ic - 1]);
434 p->viscol -= dv;
435 vis -= dv;
436 } else {
437 dv = (*p->getwidth)(p, p->tcol->buf[ic]);
438 p->viscol += dv;
439 vis += dv;
440 }
441 }
442 p->tcol->col = nbr;
443 }
444
445 /*
446 * Print the margin character, if one is configured,
447 * and end the output line.
448 */
449 static void
endline(struct termp * p)450 endline(struct termp *p)
451 {
452 if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) {
453 p->mc = NULL;
454 p->flags &= ~TERMP_ENDMC;
455 }
456 if (p->mc != NULL) {
457 if (p->viscol > 0 && p->viscol <= p->maxrmargin)
458 (*p->advance)(p,
459 p->maxrmargin - p->viscol + term_len(p, 1));
460 p->flags |= TERMP_NOBUF | TERMP_NOSPACE;
461 term_word(p, p->mc);
462 p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC);
463 }
464 (*p->endline)(p);
465 }
466
467 /*
468 * A newline only breaks an existing line; it won't assert vertical
469 * space. All data in the output buffer is flushed prior to the newline
470 * assertion.
471 */
472 void
term_newln(struct termp * p)473 term_newln(struct termp *p)
474 {
475 p->flags |= TERMP_NOSPACE;
476 if (p->tcol->lastcol || p->viscol)
477 term_flushln(p);
478 p->tcol->taboff = 0;
479 }
480
481 /*
482 * Asserts a vertical space (a full, empty line-break between lines).
483 * Note that if used twice, this will cause two blank spaces and so on.
484 * All data in the output buffer is flushed prior to the newline
485 * assertion.
486 */
487 void
term_vspace(struct termp * p)488 term_vspace(struct termp *p)
489 {
490
491 term_newln(p);
492 if (0 < p->skipvsp)
493 p->skipvsp--;
494 else
495 (*p->endline)(p);
496 }
497
498 /* Swap current and previous font; for \fP and .ft P */
499 void
term_fontlast(struct termp * p)500 term_fontlast(struct termp *p)
501 {
502 enum termfont f;
503
504 f = p->fontl;
505 p->fontl = p->fontq[p->fonti];
506 p->fontq[p->fonti] = f;
507 }
508
509 /* Set font, save current, discard previous; for \f, .ft, and man(7). */
510 void
term_fontrepl(struct termp * p,enum termfont f)511 term_fontrepl(struct termp *p, enum termfont f)
512 {
513 p->fontl = p->fontq[p->fonti];
514 if (p->fontibi && f == TERMFONT_UNDER)
515 f = TERMFONT_BI;
516 p->fontq[p->fonti] = f;
517 }
518
519 /* Set font, save previous; for mdoc(7), eqn(7), and tbl(7). */
520 void
term_fontpush(struct termp * p,enum termfont f)521 term_fontpush(struct termp *p, enum termfont f)
522 {
523 enum termfont fl;
524
525 fl = p->fontq[p->fonti];
526 if (++p->fonti == p->fontsz) {
527 p->fontsz += 8;
528 p->fontq = mandoc_reallocarray(p->fontq,
529 p->fontsz, sizeof(*p->fontq));
530 }
531 p->fontq[p->fonti] = fl;
532 term_fontrepl(p, f);
533 }
534
535 /* Flush to make the saved pointer current again. */
536 void
term_fontpopq(struct termp * p,int i)537 term_fontpopq(struct termp *p, int i)
538 {
539 assert(i >= 0);
540 if (p->fonti > i)
541 p->fonti = i;
542 }
543
544 /* Pop one font off the stack. */
545 void
term_fontpop(struct termp * p)546 term_fontpop(struct termp *p)
547 {
548 assert(p->fonti > 0);
549 p->fonti--;
550 }
551
552 /*
553 * Handle pwords, partial words, which may be either a single word or a
554 * phrase that cannot be broken down (such as a literal string). This
555 * handles word styling.
556 */
557 void
term_word(struct termp * p,const char * word)558 term_word(struct termp *p, const char *word)
559 {
560 struct roffsu su;
561 const char nbrsp[2] = { ASCII_NBRSP, 0 };
562 const char *seq; /* Escape sequence argument. */
563 const char *cp; /* String to be printed. */
564 size_t csz; /* String length in basic units. */
565 size_t lsz; /* Line width in basic units. */
566 size_t ssz; /* Substring length in bytes. */
567 int sz; /* Argument length in bytes. */
568 int uc; /* Unicode codepoint number. */
569 int bu; /* Width in basic units. */
570 enum mandoc_esc esc;
571
572 if ((p->flags & TERMP_NOBUF) == 0) {
573 if ((p->flags & TERMP_NOSPACE) == 0) {
574 if ((p->flags & TERMP_KEEP) == 0) {
575 bufferc(p, ' ');
576 if (p->flags & TERMP_SENTENCE)
577 bufferc(p, ' ');
578 } else
579 bufferc(p, ASCII_NBRSP);
580 }
581 if (p->flags & TERMP_PREKEEP)
582 p->flags |= TERMP_KEEP;
583 if (p->flags & TERMP_NONOSPACE)
584 p->flags |= TERMP_NOSPACE;
585 else
586 p->flags &= ~TERMP_NOSPACE;
587 p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE);
588 p->skipvsp = 0;
589 }
590
591 while ('\0' != *word) {
592 if ('\\' != *word) {
593 if (TERMP_NBRWORD & p->flags) {
594 if (' ' == *word) {
595 encode(p, nbrsp, 1);
596 word++;
597 continue;
598 }
599 ssz = strcspn(word, "\\ ");
600 } else
601 ssz = strcspn(word, "\\");
602 encode(p, word, ssz);
603 word += (int)ssz;
604 continue;
605 }
606
607 word++;
608 esc = mandoc_escape(&word, &seq, &sz);
609 switch (esc) {
610 case ESCAPE_UNICODE:
611 uc = mchars_num2uc(seq + 1, sz - 1);
612 break;
613 case ESCAPE_NUMBERED:
614 uc = mchars_num2char(seq, sz);
615 if (uc >= 0)
616 break;
617 bufferc(p, ASCII_NBRZW);
618 continue;
619 case ESCAPE_SPECIAL:
620 if (p->enc == TERMENC_ASCII) {
621 cp = mchars_spec2str(seq, sz, &ssz);
622 if (cp != NULL)
623 encode(p, cp, ssz);
624 else
625 bufferc(p, ASCII_NBRZW);
626 } else {
627 uc = mchars_spec2cp(seq, sz);
628 if (uc > 0)
629 encode1(p, uc);
630 else
631 bufferc(p, ASCII_NBRZW);
632 }
633 continue;
634 case ESCAPE_UNDEF:
635 uc = *seq;
636 break;
637 case ESCAPE_FONTBOLD:
638 case ESCAPE_FONTCB:
639 term_fontrepl(p, TERMFONT_BOLD);
640 continue;
641 case ESCAPE_FONTITALIC:
642 case ESCAPE_FONTCI:
643 term_fontrepl(p, TERMFONT_UNDER);
644 continue;
645 case ESCAPE_FONTBI:
646 term_fontrepl(p, TERMFONT_BI);
647 continue;
648 case ESCAPE_FONT:
649 case ESCAPE_FONTCR:
650 case ESCAPE_FONTROMAN:
651 term_fontrepl(p, TERMFONT_NONE);
652 continue;
653 case ESCAPE_FONTPREV:
654 term_fontlast(p);
655 continue;
656 case ESCAPE_BREAK:
657 bufferc(p, '\n');
658 continue;
659 case ESCAPE_NOSPACE:
660 if (p->flags & TERMP_BACKAFTER)
661 p->flags &= ~TERMP_BACKAFTER;
662 else if (*word == '\0')
663 p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE);
664 continue;
665 case ESCAPE_DEVICE:
666 if (p->type == TERMTYPE_PDF)
667 encode(p, "pdf", 3);
668 else if (p->type == TERMTYPE_PS)
669 encode(p, "ps", 2);
670 else if (p->enc == TERMENC_ASCII)
671 encode(p, "ascii", 5);
672 else
673 encode(p, "utf8", 4);
674 continue;
675 case ESCAPE_HORIZ:
676 if (p->flags & TERMP_BACKAFTER) {
677 p->flags &= ~TERMP_BACKAFTER;
678 continue;
679 }
680 if (*seq == '|') {
681 seq++;
682 bu = -term_len(p, p->col);
683 } else
684 bu = 0;
685 if (a2roffsu(seq, &su, SCALE_EM) == NULL)
686 continue;
687 bu += term_hspan(p, &su);
688 if (bu >= 0) {
689 while (bu > 0) {
690 bu -= term_len(p, 1);
691 if (p->flags & TERMP_BACKBEFORE)
692 p->flags &= ~TERMP_BACKBEFORE;
693 else
694 bufferc(p, ASCII_NBRSP);
695 }
696 continue;
697 }
698 if (p->flags & TERMP_BACKBEFORE) {
699 p->flags &= ~TERMP_BACKBEFORE;
700 assert(p->col > 1);
701 p->col--;
702 }
703 if (term_len(p, p->col) >= (size_t)(-bu)) {
704 p->col -= -bu / term_len(p, 1);
705 } else {
706 bu += term_len(p, p->col);
707 p->col = 0;
708 if (p->tcol->offset > (size_t)(-bu)) {
709 p->ti += bu;
710 p->tcol->offset += bu;
711 } else {
712 p->ti -= p->tcol->offset;
713 p->tcol->offset = 0;
714 }
715 }
716 continue;
717 case ESCAPE_HLINE:
718 if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL)
719 continue;
720 bu = term_hspan(p, &su);
721 if (bu <= 0) {
722 if (p->tcol->rmargin <= p->tcol->offset)
723 continue;
724 lsz = p->tcol->rmargin - p->tcol->offset;
725 } else
726 lsz = bu;
727 if (*cp == seq[-1])
728 uc = -1;
729 else if (*cp == '\\') {
730 seq = cp + 1;
731 esc = mandoc_escape(&seq, &cp, &sz);
732 switch (esc) {
733 case ESCAPE_UNICODE:
734 uc = mchars_num2uc(cp + 1, sz - 1);
735 break;
736 case ESCAPE_NUMBERED:
737 uc = mchars_num2char(cp, sz);
738 break;
739 case ESCAPE_SPECIAL:
740 uc = mchars_spec2cp(cp, sz);
741 break;
742 case ESCAPE_UNDEF:
743 uc = *seq;
744 break;
745 default:
746 uc = -1;
747 break;
748 }
749 } else
750 uc = *cp;
751 if (uc < 0x20 || (uc > 0x7E && uc < 0xA0))
752 uc = '_';
753 if (p->enc == TERMENC_ASCII) {
754 cp = ascii_uc2str(uc);
755 csz = term_strlen(p, cp);
756 ssz = strlen(cp);
757 } else
758 csz = (*p->getwidth)(p, uc);
759 while (lsz > 0) {
760 if (p->enc == TERMENC_ASCII)
761 encode(p, cp, ssz);
762 else
763 encode1(p, uc);
764 if (lsz > csz)
765 lsz -= csz;
766 else
767 lsz = 0;
768 }
769 continue;
770 case ESCAPE_SKIPCHAR:
771 p->flags |= TERMP_BACKAFTER;
772 continue;
773 case ESCAPE_OVERSTRIKE:
774 cp = seq + sz;
775 while (seq < cp) {
776 if (*seq == '\\') {
777 mandoc_escape(&seq, NULL, NULL);
778 continue;
779 }
780 encode1(p, *seq++);
781 if (seq < cp) {
782 if (p->flags & TERMP_BACKBEFORE)
783 p->flags |= TERMP_BACKAFTER;
784 else
785 p->flags |= TERMP_BACKBEFORE;
786 }
787 }
788 /* Trim trailing backspace/blank pair. */
789 if (p->tcol->lastcol > 2 &&
790 (p->tcol->buf[p->tcol->lastcol - 1] == ' ' ||
791 p->tcol->buf[p->tcol->lastcol - 1] == '\t'))
792 p->tcol->lastcol -= 2;
793 if (p->col > p->tcol->lastcol)
794 p->col = p->tcol->lastcol;
795 continue;
796 case ESCAPE_IGNORE:
797 bufferc(p, ASCII_NBRZW);
798 continue;
799 default:
800 continue;
801 }
802
803 /*
804 * Common handling for Unicode and numbered
805 * character escape sequences.
806 */
807
808 if (p->enc == TERMENC_ASCII) {
809 cp = ascii_uc2str(uc);
810 encode(p, cp, strlen(cp));
811 } else {
812 if ((uc < 0x20 && uc != 0x09) ||
813 (uc > 0x7E && uc < 0xA0))
814 uc = 0xFFFD;
815 encode1(p, uc);
816 }
817 }
818 p->flags &= ~TERMP_NBRWORD;
819 }
820
821 static void
adjbuf(struct termp_col * c,size_t sz)822 adjbuf(struct termp_col *c, size_t sz)
823 {
824 if (c->maxcols == 0)
825 c->maxcols = 1024;
826 while (c->maxcols <= sz)
827 c->maxcols <<= 2;
828 c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf));
829 }
830
831 static void
bufferc(struct termp * p,char c)832 bufferc(struct termp *p, char c)
833 {
834 if (p->flags & TERMP_NOBUF) {
835 (*p->letter)(p, c);
836 return;
837 }
838 if (p->col + 1 >= p->tcol->maxcols)
839 adjbuf(p->tcol, p->col + 1);
840 if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
841 p->tcol->buf[p->col] = c;
842 if (p->tcol->lastcol < ++p->col)
843 p->tcol->lastcol = p->col;
844 }
845
846 void
term_tab_ref(struct termp * p)847 term_tab_ref(struct termp *p)
848 {
849 if (p->tcol->lastcol && p->tcol->lastcol <= p->col &&
850 (p->flags & TERMP_NOBUF) == 0)
851 bufferc(p, ASCII_TABREF);
852 }
853
854 /*
855 * See encode().
856 * Do this for a single (probably unicode) value.
857 * Does not check for non-decorated glyphs.
858 */
859 static void
encode1(struct termp * p,int c)860 encode1(struct termp *p, int c)
861 {
862 enum termfont f;
863
864 if (p->flags & TERMP_NOBUF) {
865 (*p->letter)(p, c);
866 return;
867 }
868
869 if (p->col + 7 >= p->tcol->maxcols)
870 adjbuf(p->tcol, p->col + 7);
871
872 f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ?
873 p->fontq[p->fonti] : TERMFONT_NONE;
874
875 if (p->flags & TERMP_BACKBEFORE) {
876 if (p->tcol->buf[p->col - 1] == ' ' ||
877 p->tcol->buf[p->col - 1] == '\t')
878 p->col--;
879 else
880 p->tcol->buf[p->col++] = '\b';
881 p->flags &= ~TERMP_BACKBEFORE;
882 }
883 if (f == TERMFONT_UNDER || f == TERMFONT_BI) {
884 p->tcol->buf[p->col++] = '_';
885 p->tcol->buf[p->col++] = '\b';
886 }
887 if (f == TERMFONT_BOLD || f == TERMFONT_BI) {
888 if (c == ASCII_HYPH)
889 p->tcol->buf[p->col++] = '-';
890 else
891 p->tcol->buf[p->col++] = c;
892 p->tcol->buf[p->col++] = '\b';
893 }
894 if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
895 p->tcol->buf[p->col] = c;
896 if (p->tcol->lastcol < ++p->col)
897 p->tcol->lastcol = p->col;
898 if (p->flags & TERMP_BACKAFTER) {
899 p->flags |= TERMP_BACKBEFORE;
900 p->flags &= ~TERMP_BACKAFTER;
901 }
902 }
903
904 static void
encode(struct termp * p,const char * word,size_t sz)905 encode(struct termp *p, const char *word, size_t sz)
906 {
907 size_t i;
908
909 if (p->flags & TERMP_NOBUF) {
910 for (i = 0; i < sz; i++)
911 (*p->letter)(p, word[i]);
912 return;
913 }
914
915 if (p->col + 2 + (sz * 5) >= p->tcol->maxcols)
916 adjbuf(p->tcol, p->col + 2 + (sz * 5));
917
918 for (i = 0; i < sz; i++) {
919 if (ASCII_HYPH == word[i] ||
920 isgraph((unsigned char)word[i]))
921 encode1(p, word[i]);
922 else {
923 if (p->tcol->lastcol <= p->col ||
924 (word[i] != ' ' && word[i] != ASCII_NBRSP))
925 p->tcol->buf[p->col] = word[i];
926 p->col++;
927
928 /*
929 * Postpone the effect of \z while handling
930 * an overstrike sequence from ascii_uc2str().
931 */
932
933 if (word[i] == '\b' &&
934 (p->flags & TERMP_BACKBEFORE)) {
935 p->flags &= ~TERMP_BACKBEFORE;
936 p->flags |= TERMP_BACKAFTER;
937 }
938 }
939 }
940 if (p->tcol->lastcol < p->col)
941 p->tcol->lastcol = p->col;
942 }
943
944 void
term_setwidth(struct termp * p,const char * wstr)945 term_setwidth(struct termp *p, const char *wstr)
946 {
947 struct roffsu su;
948 int iop, width;
949
950 iop = 0;
951 width = 0;
952 if (NULL != wstr) {
953 switch (*wstr) {
954 case '+':
955 iop = 1;
956 wstr++;
957 break;
958 case '-':
959 iop = -1;
960 wstr++;
961 break;
962 default:
963 break;
964 }
965 if (a2roffsu(wstr, &su, SCALE_MAX) != NULL)
966 width = term_hspan(p, &su);
967 else
968 iop = 0;
969 }
970 (*p->setwidth)(p, iop, width);
971 }
972
973 size_t
term_len(const struct termp * p,size_t sz)974 term_len(const struct termp *p, size_t sz)
975 {
976 return (*p->getwidth)(p, ' ') * sz;
977 }
978
979 static size_t
cond_width(const struct termp * p,int c,int * skip)980 cond_width(const struct termp *p, int c, int *skip)
981 {
982 if (*skip) {
983 (*skip) = 0;
984 return 0;
985 } else
986 return (*p->getwidth)(p, c);
987 }
988
989 size_t
term_strlen(const struct termp * p,const char * cp)990 term_strlen(const struct termp *p, const char *cp)
991 {
992 const char *seq; /* Escape sequence argument. */
993 const char *rhs; /* String to be printed. */
994
995 /* Widths in basic units. */
996 size_t sz; /* Return value. */
997 size_t this_sz; /* Individual char for overstrike. */
998 size_t max_sz; /* Result of overstrike. */
999
1000 /* Numbers of bytes. */
1001 size_t rsz; /* Substring length in bytes. */
1002 size_t i; /* Byte index in substring. */
1003 int ssz; /* Argument length in bytes. */
1004 int skip; /* Number of bytes to skip. */
1005
1006 int uc; /* Unicode codepoint number. */
1007 enum mandoc_esc esc;
1008
1009 static const char rej[] = { '\\', ASCII_NBRSP, ASCII_NBRZW,
1010 ASCII_BREAK, ASCII_HYPH, ASCII_TABREF, '\0' };
1011
1012 /*
1013 * Account for escaped sequences within string length
1014 * calculations. This follows the logic in term_word() as we
1015 * must calculate the width of produced strings.
1016 */
1017
1018 sz = 0;
1019 skip = 0;
1020 while ('\0' != *cp) {
1021 rsz = strcspn(cp, rej);
1022 for (i = 0; i < rsz; i++)
1023 sz += cond_width(p, *cp++, &skip);
1024
1025 switch (*cp) {
1026 case '\\':
1027 cp++;
1028 rhs = NULL;
1029 esc = mandoc_escape(&cp, &seq, &ssz);
1030 switch (esc) {
1031 case ESCAPE_UNICODE:
1032 uc = mchars_num2uc(seq + 1, ssz - 1);
1033 break;
1034 case ESCAPE_NUMBERED:
1035 uc = mchars_num2char(seq, ssz);
1036 if (uc < 0)
1037 continue;
1038 break;
1039 case ESCAPE_SPECIAL:
1040 if (p->enc == TERMENC_ASCII) {
1041 rhs = mchars_spec2str(seq, ssz, &rsz);
1042 if (rhs != NULL)
1043 break;
1044 } else {
1045 uc = mchars_spec2cp(seq, ssz);
1046 if (uc > 0)
1047 sz += cond_width(p, uc, &skip);
1048 }
1049 continue;
1050 case ESCAPE_UNDEF:
1051 uc = *seq;
1052 break;
1053 case ESCAPE_DEVICE:
1054 if (p->type == TERMTYPE_PDF) {
1055 rhs = "pdf";
1056 rsz = 3;
1057 } else if (p->type == TERMTYPE_PS) {
1058 rhs = "ps";
1059 rsz = 2;
1060 } else if (p->enc == TERMENC_ASCII) {
1061 rhs = "ascii";
1062 rsz = 5;
1063 } else {
1064 rhs = "utf8";
1065 rsz = 4;
1066 }
1067 break;
1068 case ESCAPE_SKIPCHAR:
1069 skip = 1;
1070 continue;
1071 case ESCAPE_OVERSTRIKE:
1072 max_sz = 0;
1073 rhs = seq + ssz;
1074 while (seq < rhs) {
1075 if (*seq == '\\') {
1076 mandoc_escape(&seq, NULL, NULL);
1077 continue;
1078 }
1079 this_sz = (*p->getwidth)(p, *seq++);
1080 if (max_sz < this_sz)
1081 max_sz = this_sz;
1082 }
1083 sz += max_sz;
1084 continue;
1085 default:
1086 continue;
1087 }
1088
1089 /*
1090 * Common handling for Unicode and numbered
1091 * character escape sequences.
1092 */
1093
1094 if (rhs == NULL) {
1095 if (p->enc == TERMENC_ASCII) {
1096 rhs = ascii_uc2str(uc);
1097 rsz = strlen(rhs);
1098 } else {
1099 if ((uc < 0x20 && uc != 0x09) ||
1100 (uc > 0x7E && uc < 0xA0))
1101 uc = 0xFFFD;
1102 sz += cond_width(p, uc, &skip);
1103 continue;
1104 }
1105 }
1106
1107 if (skip) {
1108 skip = 0;
1109 break;
1110 }
1111
1112 /*
1113 * Common handling for all escape sequences
1114 * printing more than one character.
1115 */
1116
1117 for (i = 0; i < rsz; i++)
1118 sz += (*p->getwidth)(p, *rhs++);
1119 break;
1120 case ASCII_NBRSP:
1121 sz += cond_width(p, ' ', &skip);
1122 cp++;
1123 break;
1124 case ASCII_HYPH:
1125 sz += cond_width(p, '-', &skip);
1126 cp++;
1127 break;
1128 default:
1129 break;
1130 }
1131 }
1132
1133 return sz;
1134 }
1135
1136 int
term_vspan(const struct termp * p,const struct roffsu * su)1137 term_vspan(const struct termp *p, const struct roffsu *su)
1138 {
1139 double r;
1140 int ri;
1141
1142 switch (su->unit) {
1143 case SCALE_BU:
1144 r = su->scale / 40.0;
1145 break;
1146 case SCALE_CM:
1147 r = su->scale * 6.0 / 2.54;
1148 break;
1149 case SCALE_FS:
1150 r = su->scale * 65536.0 / 40.0;
1151 break;
1152 case SCALE_IN:
1153 r = su->scale * 6.0;
1154 break;
1155 case SCALE_MM:
1156 r = su->scale * 0.006;
1157 break;
1158 case SCALE_PC:
1159 r = su->scale;
1160 break;
1161 case SCALE_PT:
1162 r = su->scale / 12.0;
1163 break;
1164 case SCALE_EN:
1165 case SCALE_EM:
1166 r = su->scale * 0.6;
1167 break;
1168 case SCALE_VS:
1169 r = su->scale;
1170 break;
1171 default:
1172 abort();
1173 }
1174 ri = r > 0.0 ? r + 0.4995 : r - 0.4995;
1175 return ri < 66 ? ri : 1;
1176 }
1177
1178 /*
1179 * Convert a scaling width to basic units.
1180 */
1181 int
term_hspan(const struct termp * p,const struct roffsu * su)1182 term_hspan(const struct termp *p, const struct roffsu *su)
1183 {
1184 return (*p->hspan)(p, su);
1185 }
1186