1f9002b85SWarner Losh### ==================================================================== 2f9002b85SWarner Losh### @Awk-file{ 3f9002b85SWarner Losh### author = "Nelson H. F. Beebe", 4f9002b85SWarner Losh### version = "1.00", 5f9002b85SWarner Losh### date = "09 October 1996", 6f9002b85SWarner Losh### time = "15:57:06 MDT", 7f9002b85SWarner Losh### filename = "journal-toc.awk", 8f9002b85SWarner Losh### address = "Center for Scientific Computing 9f9002b85SWarner Losh### Department of Mathematics 10f9002b85SWarner Losh### University of Utah 11f9002b85SWarner Losh### Salt Lake City, UT 84112 12f9002b85SWarner Losh### USA", 13f9002b85SWarner Losh### telephone = "+1 801 581 5254", 14f9002b85SWarner Losh### FAX = "+1 801 581 4148", 15f9002b85SWarner Losh### URL = "http://www.math.utah.edu/~beebe", 16f9002b85SWarner Losh### checksum = "25092 977 3357 26493", 17f9002b85SWarner Losh### email = "beebe@math.utah.edu (Internet)", 18f9002b85SWarner Losh### codetable = "ISO/ASCII", 19f9002b85SWarner Losh### keywords = "BibTeX, bibliography, HTML, journal table of 20f9002b85SWarner Losh### contents", 21f9002b85SWarner Losh### supported = "yes", 22f9002b85SWarner Losh### docstring = "Create a journal cover table of contents from 23f9002b85SWarner Losh### <at>Article{...} entries in a journal BibTeX 24f9002b85SWarner Losh### .bib file for checking the bibliography 25f9002b85SWarner Losh### database against the actual journal covers. 26f9002b85SWarner Losh### The output can be either plain text, or HTML. 27f9002b85SWarner Losh### 28f9002b85SWarner Losh### Usage: 29f9002b85SWarner Losh### bibclean -max-width 0 BibTeX-file(s) | \ 30f9002b85SWarner Losh### bibsort -byvolume | \ 31f9002b85SWarner Losh### awk -f journal-toc.awk \ 32f9002b85SWarner Losh### [-v HTML=nnn] [-v INDENT=nnn] \ 33f9002b85SWarner Losh### [-v BIBFILEURL=url] >foo.toc 34f9002b85SWarner Losh### 35f9002b85SWarner Losh### or if the bibliography is already sorted 36f9002b85SWarner Losh### by volume, 37f9002b85SWarner Losh### 38f9002b85SWarner Losh### bibclean -max-width 0 BibTeX-file(s) | \ 39f9002b85SWarner Losh### awk -f journal-toc.awk \ 40f9002b85SWarner Losh### [-v HTML=nnn] [-v INDENT=nnn] \ 41f9002b85SWarner Losh### [-v BIBFILEURL=url] >foo.toc 42f9002b85SWarner Losh### 43f9002b85SWarner Losh### A non-zero value of the command-line option, 44f9002b85SWarner Losh### HTML=nnn, results in HTML output instead of 45f9002b85SWarner Losh### the default plain ASCII text (corresponding 46f9002b85SWarner Losh### to HTML=0). The 47f9002b85SWarner Losh### 48f9002b85SWarner Losh### The INDENT=nnn command-line option specifies 49f9002b85SWarner Losh### the number of blanks to indent each logical 50f9002b85SWarner Losh### level of HTML. The default is INDENT=4. 51f9002b85SWarner Losh### INDENT=0 suppresses indentation. The INDENT 52f9002b85SWarner Losh### option has no effect when the default HTML=0 53f9002b85SWarner Losh### (plain text output) option is in effect. 54f9002b85SWarner Losh### 55f9002b85SWarner Losh### When HTML output is selected, the 56f9002b85SWarner Losh### BIBFILEURL=url command-line option provides a 57f9002b85SWarner Losh### way to request hypertext links from table of 58f9002b85SWarner Losh### contents page numbers to the complete BibTeX 59f9002b85SWarner Losh### entry for the article. These links are 60f9002b85SWarner Losh### created by appending a sharp (#) and the 61f9002b85SWarner Losh### citation label to the BIBFILEURL value, which 62f9002b85SWarner Losh### conforms with the practice of 63f9002b85SWarner Losh### bibtex-to-html.awk. 64f9002b85SWarner Losh### 65f9002b85SWarner Losh### The HTML output form may be useful as a more 66f9002b85SWarner Losh### compact representation of journal article 67f9002b85SWarner Losh### bibliography data than the original BibTeX 68f9002b85SWarner Losh### file provides. Of course, the 69f9002b85SWarner Losh### table-of-contents format provides less 70f9002b85SWarner Losh### information, and is considerably more 71f9002b85SWarner Losh### troublesome for a computer program to parse. 72f9002b85SWarner Losh### 73f9002b85SWarner Losh### When URL key values are provided, they will 74f9002b85SWarner Losh### be used to create hypertext links around 75f9002b85SWarner Losh### article titles. This supports journals that 76f9002b85SWarner Losh### provide article contents on the World-Wide 77f9002b85SWarner Losh### Web. 78f9002b85SWarner Losh### 79f9002b85SWarner Losh### For parsing simplicity, this program requires 80f9002b85SWarner Losh### that BibTeX 81f9002b85SWarner Losh### 82f9002b85SWarner Losh### key = "value" 83f9002b85SWarner Losh### 84f9002b85SWarner Losh### and 85f9002b85SWarner Losh### 86f9002b85SWarner Losh### @String{name = "value"} 87f9002b85SWarner Losh### 88f9002b85SWarner Losh### specifications be entirely contained on 89f9002b85SWarner Losh### single lines, which is readily provided by 90f9002b85SWarner Losh### the `bibclean -max-width 0' filter. It also 91f9002b85SWarner Losh### requires that bibliography entries begin and 92f9002b85SWarner Losh### end at the start of a line, and that 93f9002b85SWarner Losh### quotation marks, rather than balanced braces, 94f9002b85SWarner Losh### delimit string values. This is a 95f9002b85SWarner Losh### conventional format that again can be 96f9002b85SWarner Losh### guaranteed by bibclean. 97f9002b85SWarner Losh### 98f9002b85SWarner Losh### This program requires `new' awk, as described 99f9002b85SWarner Losh### in the book 100f9002b85SWarner Losh### 101f9002b85SWarner Losh### Alfred V. Aho, Brian W. Kernighan, and 102f9002b85SWarner Losh### Peter J. Weinberger, 103f9002b85SWarner Losh### ``The AWK Programming Language'', 104f9002b85SWarner Losh### Addison-Wesley (1988), ISBN 105f9002b85SWarner Losh### 0-201-07981-X, 106f9002b85SWarner Losh### 107f9002b85SWarner Losh### such as provided by programs named (GNU) 108f9002b85SWarner Losh### gawk, nawk, and recent AT&T awk. 109f9002b85SWarner Losh### 110f9002b85SWarner Losh### The checksum field above contains a CRC-16 111f9002b85SWarner Losh### checksum as the first value, followed by the 112f9002b85SWarner Losh### equivalent of the standard UNIX wc (word 113f9002b85SWarner Losh### count) utility output of lines, words, and 114f9002b85SWarner Losh### characters. This is produced by Robert 115f9002b85SWarner Losh### Solovay's checksum utility.", 116f9002b85SWarner Losh### } 117f9002b85SWarner Losh### ==================================================================== 118f9002b85SWarner Losh 119f9002b85SWarner LoshBEGIN { initialize() } 120f9002b85SWarner Losh 121f9002b85SWarner Losh/^ *@ *[Ss][Tt][Rr][Ii][Nn][Gg] *{/ { do_String(); next } 122f9002b85SWarner Losh 123f9002b85SWarner Losh/^ *@ *[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee]/ { next } 124f9002b85SWarner Losh 125f9002b85SWarner Losh/^ *@ *[Aa][Rr][Tt][Ii][Cc][Ll][Ee]/ { do_Article(); next } 126f9002b85SWarner Losh 127f9002b85SWarner Losh/^ *@/ { do_Other(); next } 128f9002b85SWarner Losh 129f9002b85SWarner Losh/^ *author *= *\"/ { do_author(); next } 130f9002b85SWarner Losh 131f9002b85SWarner Losh/^ *journal *= */ { do_journal(); next } 132f9002b85SWarner Losh 133f9002b85SWarner Losh/^ *volume *= *\"/ { do_volume(); next } 134f9002b85SWarner Losh 135f9002b85SWarner Losh/^ *number *= *\"/ { do_number(); next } 136f9002b85SWarner Losh 137f9002b85SWarner Losh/^ *year *= *\"/ { do_year(); next } 138f9002b85SWarner Losh 139f9002b85SWarner Losh/^ *month *= */ { do_month(); next } 140f9002b85SWarner Losh 141f9002b85SWarner Losh/^ *title *= *\"/ { do_title(); next } 142f9002b85SWarner Losh 143f9002b85SWarner Losh/^ *pages *= *\"/ { do_pages(); next } 144f9002b85SWarner Losh 145f9002b85SWarner Losh/^ *URL *= *\"/ { do_URL(); next } 146f9002b85SWarner Losh 147f9002b85SWarner Losh/^ *} *$/ { if (In_Article) do_end_entry(); next } 148f9002b85SWarner Losh 149f9002b85SWarner LoshEND { terminate() } 150f9002b85SWarner Losh 151f9002b85SWarner Losh 152f9002b85SWarner Losh######################################################################## 153f9002b85SWarner Losh# NB: The programming conventions for variables in this program are: # 154f9002b85SWarner Losh# UPPERCASE global constants and user options # 155f9002b85SWarner Losh# Initialuppercase global variables # 156f9002b85SWarner Losh# lowercase local variables # 157f9002b85SWarner Losh# Any deviation is an error! # 158f9002b85SWarner Losh######################################################################## 159f9002b85SWarner Losh 160f9002b85SWarner Losh 161f9002b85SWarner Loshfunction do_Article() 162f9002b85SWarner Losh{ 163f9002b85SWarner Losh In_Article = 1 164f9002b85SWarner Losh 165f9002b85SWarner Losh Citation_label = $0 166f9002b85SWarner Losh sub(/^[^\{]*{/,"",Citation_label) 167f9002b85SWarner Losh sub(/ *, *$/,"",Citation_label) 168f9002b85SWarner Losh 169f9002b85SWarner Losh Author = "" 170f9002b85SWarner Losh Title = "" 171f9002b85SWarner Losh Journal = "" 172f9002b85SWarner Losh Volume = "" 173f9002b85SWarner Losh Number = "" 174f9002b85SWarner Losh Month = "" 175f9002b85SWarner Losh Year = "" 176f9002b85SWarner Losh Pages = "" 177f9002b85SWarner Losh Url = "" 178f9002b85SWarner Losh} 179f9002b85SWarner Losh 180f9002b85SWarner Losh 181f9002b85SWarner Loshfunction do_author() 182f9002b85SWarner Losh{ 183f9002b85SWarner Losh Author = TeX_to_HTML(get_value($0)) 184f9002b85SWarner Losh} 185f9002b85SWarner Losh 186f9002b85SWarner Losh 187f9002b85SWarner Loshfunction do_end_entry( k,n,parts) 188f9002b85SWarner Losh{ 189f9002b85SWarner Losh n = split(Author,parts," and ") 190f9002b85SWarner Losh if (Last_number != Number) 191f9002b85SWarner Losh do_new_issue() 192f9002b85SWarner Losh for (k = 1; k < n; ++k) 193f9002b85SWarner Losh print_toc_line(parts[k] " and", "", "") 194f9002b85SWarner Losh Title_prefix = html_begin_title() 195f9002b85SWarner Losh Title_suffix = html_end_title() 196f9002b85SWarner Losh if (html_length(Title) <= (MAX_TITLE_CHARS + MIN_LEADERS)) # complete title fits on line 197f9002b85SWarner Losh print_toc_line(parts[n], Title, html_begin_pages() Pages html_end_pages()) 198f9002b85SWarner Losh else # need to split long title over multiple lines 199f9002b85SWarner Losh do_long_title(parts[n], Title, html_begin_pages() Pages html_end_pages()) 200f9002b85SWarner Losh} 201f9002b85SWarner Losh 202f9002b85SWarner Losh 203f9002b85SWarner Loshfunction do_journal() 204f9002b85SWarner Losh{ 205f9002b85SWarner Losh if ($0 ~ /[=] *"/) # have journal = "quoted journal name", 206f9002b85SWarner Losh Journal = get_value($0) 207f9002b85SWarner Losh else # have journal = journal-abbreviation, 208f9002b85SWarner Losh { 209f9002b85SWarner Losh Journal = get_abbrev($0) 210f9002b85SWarner Losh if (Journal in String) # replace abbrev by its expansion 211f9002b85SWarner Losh Journal = String[Journal] 212f9002b85SWarner Losh } 213f9002b85SWarner Losh gsub(/\\-/,"",Journal) # remove discretionary hyphens 214f9002b85SWarner Losh} 215f9002b85SWarner Losh 216f9002b85SWarner Losh 217f9002b85SWarner Loshfunction do_long_title(author,title,pages, last_title,n) 218f9002b85SWarner Losh{ 219f9002b85SWarner Losh title = trim(title) # discard leading and trailing space 220f9002b85SWarner Losh while (length(title) > 0) 221f9002b85SWarner Losh { 222f9002b85SWarner Losh n = html_breakpoint(title,MAX_TITLE_CHARS+MIN_LEADERS) 223f9002b85SWarner Losh last_title = substr(title,1,n) 224f9002b85SWarner Losh title = substr(title,n+1) 225f9002b85SWarner Losh sub(/^ +/,"",title) # discard any leading space 226f9002b85SWarner Losh print_toc_line(author, last_title, (length(title) == 0) ? pages : "") 227f9002b85SWarner Losh author = "" 228f9002b85SWarner Losh } 229f9002b85SWarner Losh} 230f9002b85SWarner Losh 231f9002b85SWarner Losh 232f9002b85SWarner Loshfunction do_month( k,n,parts) 233f9002b85SWarner Losh{ 234f9002b85SWarner Losh Month = ($0 ~ /[=] *"/) ? get_value($0) : get_abbrev($0) 235f9002b85SWarner Losh gsub(/[\"]/,"",Month) 236f9002b85SWarner Losh gsub(/ *# *\\slash *# */," / ",Month) 237f9002b85SWarner Losh gsub(/ *# *-+ *# */," / ",Month) 238f9002b85SWarner Losh n = split(Month,parts," */ *") 239f9002b85SWarner Losh Month = "" 240f9002b85SWarner Losh for (k = 1; k <= n; ++k) 241f9002b85SWarner Losh Month = Month ((k > 1) ? " / " : "") \ 242f9002b85SWarner Losh ((parts[k] in Month_expansion) ? Month_expansion[parts[k]] : parts[k]) 243f9002b85SWarner Losh} 244f9002b85SWarner Losh 245f9002b85SWarner Losh 246f9002b85SWarner Loshfunction do_new_issue() 247f9002b85SWarner Losh{ 248f9002b85SWarner Losh Last_number = Number 249f9002b85SWarner Losh if (HTML) 250f9002b85SWarner Losh { 251f9002b85SWarner Losh if (Last_volume != Volume) 252f9002b85SWarner Losh { 253f9002b85SWarner Losh Last_volume = Volume 254f9002b85SWarner Losh print_line(prefix(2) "<BR>") 255f9002b85SWarner Losh } 256f9002b85SWarner Losh html_end_toc() 257f9002b85SWarner Losh html_begin_issue() 258f9002b85SWarner Losh print_line(prefix(2) Journal "<BR>") 259f9002b85SWarner Losh } 260f9002b85SWarner Losh else 261f9002b85SWarner Losh { 262f9002b85SWarner Losh print_line("") 263f9002b85SWarner Losh print_line(Journal) 264f9002b85SWarner Losh } 265f9002b85SWarner Losh 266f9002b85SWarner Losh print_line(strip_html(vol_no_month_year())) 267f9002b85SWarner Losh 268f9002b85SWarner Losh if (HTML) 269f9002b85SWarner Losh { 270f9002b85SWarner Losh html_end_issue() 271f9002b85SWarner Losh html_toc_entry() 272f9002b85SWarner Losh html_begin_toc() 273f9002b85SWarner Losh } 274f9002b85SWarner Losh else 275f9002b85SWarner Losh print_line("") 276f9002b85SWarner Losh} 277f9002b85SWarner Losh 278f9002b85SWarner Losh 279f9002b85SWarner Loshfunction do_number() 280f9002b85SWarner Losh{ 281f9002b85SWarner Losh Number = get_value($0) 282f9002b85SWarner Losh} 283f9002b85SWarner Losh 284f9002b85SWarner Losh 285f9002b85SWarner Loshfunction do_Other() 286f9002b85SWarner Losh{ 287f9002b85SWarner Losh In_Article = 0 288f9002b85SWarner Losh} 289f9002b85SWarner Losh 290f9002b85SWarner Losh 291f9002b85SWarner Loshfunction do_pages() 292f9002b85SWarner Losh{ 293f9002b85SWarner Losh Pages = get_value($0) 294f9002b85SWarner Losh sub(/--[?][?]/,"",Pages) 295f9002b85SWarner Losh} 296f9002b85SWarner Losh 297f9002b85SWarner Losh 298f9002b85SWarner Loshfunction do_String() 299f9002b85SWarner Losh{ 300f9002b85SWarner Losh sub(/^[^\{]*\{/,"",$0) # discard up to and including open brace 301f9002b85SWarner Losh sub(/\} *$/,"",$0) # discard from optional whitespace and trailing brace to end of line 302f9002b85SWarner Losh String[get_key($0)] = get_value($0) 303f9002b85SWarner Losh} 304f9002b85SWarner Losh 305f9002b85SWarner Losh 306f9002b85SWarner Loshfunction do_title() 307f9002b85SWarner Losh{ 308f9002b85SWarner Losh Title = TeX_to_HTML(get_value($0)) 309f9002b85SWarner Losh} 310f9002b85SWarner Losh 311f9002b85SWarner Losh 312f9002b85SWarner Loshfunction do_URL( parts) 313f9002b85SWarner Losh{ 314f9002b85SWarner Losh Url = get_value($0) 315f9002b85SWarner Losh split(Url,parts,"[,;]") # in case we have multiple URLs 316f9002b85SWarner Losh Url = trim(parts[1]) 317f9002b85SWarner Losh} 318f9002b85SWarner Losh 319f9002b85SWarner Losh 320f9002b85SWarner Loshfunction do_volume() 321f9002b85SWarner Losh{ 322f9002b85SWarner Losh Volume = get_value($0) 323f9002b85SWarner Losh} 324f9002b85SWarner Losh 325f9002b85SWarner Losh 326f9002b85SWarner Loshfunction do_year() 327f9002b85SWarner Losh{ 328f9002b85SWarner Losh Year = get_value($0) 329f9002b85SWarner Losh} 330f9002b85SWarner Losh 331f9002b85SWarner Losh 332f9002b85SWarner Loshfunction get_abbrev(s) 333f9002b85SWarner Losh{ # return abbrev from ``key = abbrev,'' 334f9002b85SWarner Losh sub(/^[^=]*= */,"",s) # discard text up to start of non-blank value 335f9002b85SWarner Losh sub(/ *,? *$/,"",s) # discard trailing optional whitspace, quote, 336f9002b85SWarner Losh # optional comma, and optional space 337f9002b85SWarner Losh return (s) 338f9002b85SWarner Losh} 339f9002b85SWarner Losh 340f9002b85SWarner Losh 341f9002b85SWarner Loshfunction get_key(s) 342f9002b85SWarner Losh{ # return kay from ``key = "value",'' 343f9002b85SWarner Losh sub(/^ */,"",s) # discard leading space 344f9002b85SWarner Losh sub(/ *=.*$/,"",s) # discard everthing after key 345f9002b85SWarner Losh 346f9002b85SWarner Losh return (s) 347f9002b85SWarner Losh} 348f9002b85SWarner Losh 349f9002b85SWarner Losh 350f9002b85SWarner Loshfunction get_value(s) 351f9002b85SWarner Losh{ # return value from ``key = "value",'' 352f9002b85SWarner Losh sub(/^[^\"]*\" */,"",s) # discard text up to start of non-blank value 353f9002b85SWarner Losh sub(/ *\",? *$/,"",s) # discard trailing optional whitspace, quote, 354f9002b85SWarner Losh # optional comma, and optional space 355f9002b85SWarner Losh return (s) 356f9002b85SWarner Losh} 357f9002b85SWarner Losh 358f9002b85SWarner Losh 359f9002b85SWarner Loshfunction html_accents(s) 360f9002b85SWarner Losh{ 361f9002b85SWarner Losh if (index(s,"\\") > 0) # important optimization 362f9002b85SWarner Losh { 363f9002b85SWarner Losh # Convert common lower-case accented letters according to the 364f9002b85SWarner Losh # table on p. 169 of in Peter Flynn's ``The World Wide Web 365f9002b85SWarner Losh # Handbook'', International Thomson Computer Press, 1995, ISBN 366f9002b85SWarner Losh # 1-85032-205-8. The official table of ISO Latin 1 SGML 367f9002b85SWarner Losh # entities used in HTML can be found in the file 368f9002b85SWarner Losh # /usr/local/lib/html-check/lib/ISOlat1.sgml (your path 369f9002b85SWarner Losh # may differ). 370f9002b85SWarner Losh 371f9002b85SWarner Losh gsub(/{\\\a}/, "\\à", s) 372f9002b85SWarner Losh gsub(/{\\'a}/, "\\á", s) 373f9002b85SWarner Losh gsub(/{\\[\^]a}/,"\\â", s) 374f9002b85SWarner Losh gsub(/{\\~a}/, "\\ã", s) 375f9002b85SWarner Losh gsub(/{\\\"a}/, "\\ä", s) 376f9002b85SWarner Losh gsub(/{\\aa}/, "\\å", s) 377f9002b85SWarner Losh gsub(/{\\ae}/, "\\æ", s) 378f9002b85SWarner Losh 379f9002b85SWarner Losh gsub(/{\\c{c}}/,"\\ç", s) 380f9002b85SWarner Losh 381f9002b85SWarner Losh gsub(/{\\\e}/, "\\è", s) 382f9002b85SWarner Losh gsub(/{\\'e}/, "\\é", s) 383f9002b85SWarner Losh gsub(/{\\[\^]e}/,"\\ê", s) 384f9002b85SWarner Losh gsub(/{\\\"e}/, "\\ë", s) 385f9002b85SWarner Losh 386f9002b85SWarner Losh gsub(/{\\\i}/, "\\ì", s) 387f9002b85SWarner Losh gsub(/{\\'i}/, "\\í", s) 388f9002b85SWarner Losh gsub(/{\\[\^]i}/,"\\î", s) 389f9002b85SWarner Losh gsub(/{\\\"i}/, "\\ï", s) 390f9002b85SWarner Losh 391f9002b85SWarner Losh # ignore eth and thorn 392f9002b85SWarner Losh 393f9002b85SWarner Losh gsub(/{\\~n}/, "\\ñ", s) 394f9002b85SWarner Losh 395f9002b85SWarner Losh gsub(/{\\\o}/, "\\ò", s) 396f9002b85SWarner Losh gsub(/{\\'o}/, "\\ó", s) 397f9002b85SWarner Losh gsub(/{\\[\^]o}/, "\\ô", s) 398f9002b85SWarner Losh gsub(/{\\~o}/, "\\õ", s) 399f9002b85SWarner Losh gsub(/{\\\"o}/, "\\ö", s) 400f9002b85SWarner Losh gsub(/{\\o}/, "\\ø", s) 401f9002b85SWarner Losh 402f9002b85SWarner Losh gsub(/{\\\u}/, "\\ù", s) 403f9002b85SWarner Losh gsub(/{\\'u}/, "\\ú", s) 404f9002b85SWarner Losh gsub(/{\\[\^]u}/,"\\û", s) 405f9002b85SWarner Losh gsub(/{\\\"u}/, "\\ü", s) 406f9002b85SWarner Losh 407f9002b85SWarner Losh gsub(/{\\'y}/, "\\ý", s) 408f9002b85SWarner Losh gsub(/{\\\"y}/, "\\ÿ", s) 409f9002b85SWarner Losh 410f9002b85SWarner Losh # Now do the same for upper-case accents 411f9002b85SWarner Losh 412f9002b85SWarner Losh gsub(/{\\\A}/, "\\À", s) 413f9002b85SWarner Losh gsub(/{\\'A}/, "\\Á", s) 414f9002b85SWarner Losh gsub(/{\\[\^]A}/, "\\Â", s) 415f9002b85SWarner Losh gsub(/{\\~A}/, "\\Ã", s) 416f9002b85SWarner Losh gsub(/{\\\"A}/, "\\Ä", s) 417f9002b85SWarner Losh gsub(/{\\AA}/, "\\Å", s) 418f9002b85SWarner Losh gsub(/{\\AE}/, "\\Æ", s) 419f9002b85SWarner Losh 420f9002b85SWarner Losh gsub(/{\\c{C}}/,"\\Ç", s) 421f9002b85SWarner Losh 422f9002b85SWarner Losh gsub(/{\\\e}/, "\\È", s) 423f9002b85SWarner Losh gsub(/{\\'E}/, "\\É", s) 424f9002b85SWarner Losh gsub(/{\\[\^]E}/, "\\Ê", s) 425f9002b85SWarner Losh gsub(/{\\\"E}/, "\\Ë", s) 426f9002b85SWarner Losh 427f9002b85SWarner Losh gsub(/{\\\I}/, "\\Ì", s) 428f9002b85SWarner Losh gsub(/{\\'I}/, "\\Í", s) 429f9002b85SWarner Losh gsub(/{\\[\^]I}/, "\\Î", s) 430f9002b85SWarner Losh gsub(/{\\\"I}/, "\\Ï", s) 431f9002b85SWarner Losh 432f9002b85SWarner Losh # ignore eth and thorn 433f9002b85SWarner Losh 434f9002b85SWarner Losh gsub(/{\\~N}/, "\\Ñ", s) 435f9002b85SWarner Losh 436f9002b85SWarner Losh gsub(/{\\\O}/, "\\Ò", s) 437f9002b85SWarner Losh gsub(/{\\'O}/, "\\Ó", s) 438f9002b85SWarner Losh gsub(/{\\[\^]O}/, "\\Ô", s) 439f9002b85SWarner Losh gsub(/{\\~O}/, "\\Õ", s) 440f9002b85SWarner Losh gsub(/{\\\"O}/, "\\Ö", s) 441f9002b85SWarner Losh gsub(/{\\O}/, "\\Ø", s) 442f9002b85SWarner Losh 443f9002b85SWarner Losh gsub(/{\\\U}/, "\\Ù", s) 444f9002b85SWarner Losh gsub(/{\\'U}/, "\\Ú", s) 445f9002b85SWarner Losh gsub(/{\\[\^]U}/, "\\Û", s) 446f9002b85SWarner Losh gsub(/{\\\"U}/, "\\Ü", s) 447f9002b85SWarner Losh 448f9002b85SWarner Losh gsub(/{\\'Y}/, "\\Ý", s) 449f9002b85SWarner Losh 450f9002b85SWarner Losh gsub(/{\\ss}/, "\\ß", s) 451f9002b85SWarner Losh 452f9002b85SWarner Losh # Others not mentioned in Flynn's book 453f9002b85SWarner Losh gsub(/{\\'\\i}/,"\\í", s) 454f9002b85SWarner Losh gsub(/{\\'\\j}/,"j", s) 455f9002b85SWarner Losh } 456f9002b85SWarner Losh return (s) 457f9002b85SWarner Losh} 458f9002b85SWarner Losh 459f9002b85SWarner Losh 460f9002b85SWarner Loshfunction html_begin_issue() 461f9002b85SWarner Losh{ 462f9002b85SWarner Losh print_line("") 463f9002b85SWarner Losh print_line(prefix(2) "<HR>") 464f9002b85SWarner Losh print_line("") 465f9002b85SWarner Losh print_line(prefix(2) "<H1>") 466f9002b85SWarner Losh print_line(prefix(3) "<A NAME=\"" html_label() "\">") 467f9002b85SWarner Losh} 468f9002b85SWarner Losh 469f9002b85SWarner Losh 470f9002b85SWarner Loshfunction html_begin_pages() 471f9002b85SWarner Losh{ 472f9002b85SWarner Losh return ((HTML && (BIBFILEURL != "")) ? ("<A HREF=\"" BIBFILEURL "#" Citation_label "\">") : "") 473f9002b85SWarner Losh} 474f9002b85SWarner Losh 475f9002b85SWarner Losh 476f9002b85SWarner Loshfunction html_begin_pre() 477f9002b85SWarner Losh{ 478f9002b85SWarner Losh In_PRE = 1 479f9002b85SWarner Losh print_line("<PRE>") 480f9002b85SWarner Losh} 481f9002b85SWarner Losh 482f9002b85SWarner Losh 483f9002b85SWarner Loshfunction html_begin_title() 484f9002b85SWarner Losh{ 485f9002b85SWarner Losh return ((HTML && (Url != "")) ? ("<A HREF=\"" Url "\">") : "") 486f9002b85SWarner Losh} 487f9002b85SWarner Losh 488f9002b85SWarner Losh 489f9002b85SWarner Loshfunction html_begin_toc() 490f9002b85SWarner Losh{ 491f9002b85SWarner Losh html_end_toc() 492f9002b85SWarner Losh html_begin_pre() 493f9002b85SWarner Losh} 494f9002b85SWarner Losh 495f9002b85SWarner Losh 496f9002b85SWarner Loshfunction html_body( k) 497f9002b85SWarner Losh{ 498f9002b85SWarner Losh for (k = 1; k <= BodyLines; ++k) 499f9002b85SWarner Losh print Body[k] 500f9002b85SWarner Losh} 501f9002b85SWarner Losh 502f9002b85SWarner Loshfunction html_breakpoint(title,maxlength, break_after,k) 503f9002b85SWarner Losh{ 504f9002b85SWarner Losh # Return the largest character position in title AFTER which we 505f9002b85SWarner Losh # can break the title across lines, without exceeding maxlength 506f9002b85SWarner Losh # visible characters. 507f9002b85SWarner Losh if (html_length(title) > maxlength) # then need to split title across lines 508f9002b85SWarner Losh { 509f9002b85SWarner Losh # In the presence of HTML markup, the initialization of 510f9002b85SWarner Losh # k here is complicated, because we need to advance it 511f9002b85SWarner Losh # until html_length(title) is at least maxlength, 512f9002b85SWarner Losh # without invoking the expensive html_length() function 513f9002b85SWarner Losh # too frequently. The need to split the title makes the 514f9002b85SWarner Losh # alternative of delayed insertion of HTML markup much 515f9002b85SWarner Losh # more complicated. 516f9002b85SWarner Losh break_after = 0 517f9002b85SWarner Losh for (k = min(maxlength,length(title)); k < length(title); ++k) 518f9002b85SWarner Losh { 519f9002b85SWarner Losh if (substr(title,k+1,1) == " ") 520f9002b85SWarner Losh { # could break after position k 521f9002b85SWarner Losh if (html_length(substr(title,1,k)) <= maxlength) 522f9002b85SWarner Losh break_after = k 523f9002b85SWarner Losh else # advanced too far, retreat back to last break_after 524f9002b85SWarner Losh break 525f9002b85SWarner Losh } 526f9002b85SWarner Losh } 527f9002b85SWarner Losh if (break_after == 0) # no breakpoint found by forward scan 528f9002b85SWarner Losh { # so switch to backward scan 529f9002b85SWarner Losh for (k = min(maxlength,length(title)) - 1; \ 530f9002b85SWarner Losh (k > 0) && (substr(title,k+1,1) != " "); --k) 531f9002b85SWarner Losh ; # find space at which to break title 532f9002b85SWarner Losh if (k < 1) # no break point found 533f9002b85SWarner Losh k = length(title) # so must print entire string 534f9002b85SWarner Losh } 535f9002b85SWarner Losh else 536f9002b85SWarner Losh k = break_after 537f9002b85SWarner Losh } 538f9002b85SWarner Losh else # title fits on one line 539f9002b85SWarner Losh k = length(title) 540f9002b85SWarner Losh return (k) 541f9002b85SWarner Losh} 542f9002b85SWarner Losh 543f9002b85SWarner Losh 544f9002b85SWarner Losh 545f9002b85SWarner Loshfunction html_end_issue() 546f9002b85SWarner Losh{ 547f9002b85SWarner Losh print_line(prefix(3) "</A>") 548f9002b85SWarner Losh print_line(prefix(2) "</H1>") 549f9002b85SWarner Losh} 550f9002b85SWarner Losh 551f9002b85SWarner Losh 552f9002b85SWarner Loshfunction html_end_pages() 553f9002b85SWarner Losh{ 554f9002b85SWarner Losh return ((HTML && (BIBFILEURL != "")) ? "</A>" : "") 555f9002b85SWarner Losh} 556f9002b85SWarner Losh 557f9002b85SWarner Losh 558f9002b85SWarner Loshfunction html_end_pre() 559f9002b85SWarner Losh{ 560f9002b85SWarner Losh if (In_PRE) 561f9002b85SWarner Losh { 562f9002b85SWarner Losh print_line("</PRE>") 563f9002b85SWarner Losh In_PRE = 0 564f9002b85SWarner Losh } 565f9002b85SWarner Losh} 566f9002b85SWarner Losh 567f9002b85SWarner Losh 568f9002b85SWarner Loshfunction html_end_title() 569f9002b85SWarner Losh{ 570f9002b85SWarner Losh return ((HTML && (Url != "")) ? "</A>" : "") 571f9002b85SWarner Losh} 572f9002b85SWarner Losh 573f9002b85SWarner Losh 574f9002b85SWarner Loshfunction html_end_toc() 575f9002b85SWarner Losh{ 576f9002b85SWarner Losh html_end_pre() 577f9002b85SWarner Losh} 578f9002b85SWarner Losh 579f9002b85SWarner Losh 580f9002b85SWarner Loshfunction html_fonts(s, arg,control_word,k,level,n,open_brace) 581f9002b85SWarner Losh{ 582f9002b85SWarner Losh open_brace = index(s,"{") 583f9002b85SWarner Losh if (open_brace > 0) # important optimization 584f9002b85SWarner Losh { 585f9002b85SWarner Losh level = 1 586f9002b85SWarner Losh for (k = open_brace + 1; (level != 0) && (k <= length(s)); ++k) 587f9002b85SWarner Losh { 588f9002b85SWarner Losh if (substr(s,k,1) == "{") 589f9002b85SWarner Losh level++ 590f9002b85SWarner Losh else if (substr(s,k,1) == "}") 591f9002b85SWarner Losh level-- 592f9002b85SWarner Losh } 593f9002b85SWarner Losh 594f9002b85SWarner Losh # {...} is now found at open_brace ... (k-1) 595f9002b85SWarner Losh for (control_word in Font_decl_map) # look for {\xxx ...} 596f9002b85SWarner Losh { 597f9002b85SWarner Losh if (substr(s,open_brace+1,length(control_word)+1) ~ \ 598f9002b85SWarner Losh ("\\" control_word "[^A-Za-z]")) 599f9002b85SWarner Losh { 600f9002b85SWarner Losh n = open_brace + 1 + length(control_word) 601f9002b85SWarner Losh arg = trim(substr(s,n,k - n)) 602f9002b85SWarner Losh if (Font_decl_map[control_word] == "toupper") # arg -> ARG 603f9002b85SWarner Losh arg = toupper(arg) 604f9002b85SWarner Losh else if (Font_decl_map[control_word] != "") # arg -> <TAG>arg</TAG> 605f9002b85SWarner Losh arg = "<" Font_decl_map[control_word] ">" arg "</" Font_decl_map[control_word] ">" 606f9002b85SWarner Losh return (substr(s,1,open_brace-1) arg html_fonts(substr(s,k))) 607f9002b85SWarner Losh } 608f9002b85SWarner Losh } 609f9002b85SWarner Losh for (control_word in Font_cmd_map) # look for \xxx{...} 610f9002b85SWarner Losh { 611f9002b85SWarner Losh if (substr(s,open_brace - length(control_word),length(control_word)) ~ \ 612f9002b85SWarner Losh ("\\" control_word)) 613f9002b85SWarner Losh { 614f9002b85SWarner Losh n = open_brace + 1 615f9002b85SWarner Losh arg = trim(substr(s,n,k - n)) 616f9002b85SWarner Losh if (Font_cmd_map[control_word] == "toupper") # arg -> ARG 617f9002b85SWarner Losh arg = toupper(arg) 618f9002b85SWarner Losh else if (Font_cmd_map[control_word] != "") # arg -> <TAG>arg</TAG> 619f9002b85SWarner Losh arg = "<" Font_cmd_map[control_word] ">" arg "</" Font_cmd_map[control_word] ">" 620f9002b85SWarner Losh n = open_brace - length(control_word) - 1 621f9002b85SWarner Losh return (substr(s,1,n) arg html_fonts(substr(s,k))) 622f9002b85SWarner Losh } 623f9002b85SWarner Losh } 624f9002b85SWarner Losh } 625f9002b85SWarner Losh return (s) 626f9002b85SWarner Losh} 627f9002b85SWarner Losh 628f9002b85SWarner Losh 629f9002b85SWarner Loshfunction html_header() 630f9002b85SWarner Losh{ 631f9002b85SWarner Losh USER = ENVIRON["USER"] 632f9002b85SWarner Losh if (USER == "") 633f9002b85SWarner Losh USER = ENVIRON["LOGNAME"] 634f9002b85SWarner Losh if (USER == "") 635f9002b85SWarner Losh USER = "????" 636f9002b85SWarner Losh "hostname" | getline HOSTNAME 637f9002b85SWarner Losh "date" | getline DATE 638f9002b85SWarner Losh ("ypcat passwd | grep '^" USER ":' | awk -F: '{print $5}'") | getline PERSONAL_NAME 639f9002b85SWarner Losh if (PERSONAL_NAME == "") 640f9002b85SWarner Losh ("grep '^" USER ":' /etc/passwd | awk -F: '{print $5}'") | getline PERSONAL_NAME 641f9002b85SWarner Losh 642f9002b85SWarner Losh 643f9002b85SWarner Losh print "<!-- WARNING: Do NOT edit this file. It was converted from -->" 644f9002b85SWarner Losh print "<!-- BibTeX format to HTML by journal-toc.awk version " VERSION_NUMBER " " VERSION_DATE " -->" 645f9002b85SWarner Losh print "<!-- on " DATE " -->" 646f9002b85SWarner Losh print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->" 647f9002b85SWarner Losh print "" 648f9002b85SWarner Losh print "" 649f9002b85SWarner Losh print "<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">" 650f9002b85SWarner Losh print "" 651f9002b85SWarner Losh print "<HTML>" 652f9002b85SWarner Losh print prefix(1) "<HEAD>" 653f9002b85SWarner Losh print prefix(2) "<TITLE>" 654f9002b85SWarner Losh print prefix(3) Journal 655f9002b85SWarner Losh print prefix(2) "</TITLE>" 656f9002b85SWarner Losh print prefix(2) "<LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">" 657f9002b85SWarner Losh print prefix(1) "</HEAD>" 658f9002b85SWarner Losh print "" 659f9002b85SWarner Losh print prefix(1) "<BODY>" 660f9002b85SWarner Losh} 661f9002b85SWarner Losh 662f9002b85SWarner Losh 663f9002b85SWarner Loshfunction html_label( label) 664f9002b85SWarner Losh{ 665f9002b85SWarner Losh label = Volume "(" Number "):" Month ":" Year 666f9002b85SWarner Losh gsub(/[^A-Za-z0-9():,;.\/\-]/,"",label) 667f9002b85SWarner Losh return (label) 668f9002b85SWarner Losh} 669f9002b85SWarner Losh 670f9002b85SWarner Losh 671f9002b85SWarner Loshfunction html_length(s) 672f9002b85SWarner Losh{ # Return visible length of s, ignoring any HTML markup 673f9002b85SWarner Losh if (HTML) 674f9002b85SWarner Losh { 675f9002b85SWarner Losh gsub(/<\/?[^>]*>/,"",s) # remove SGML tags 676f9002b85SWarner Losh gsub(/&[A-Za-z0-9]+;/,"",s) # remove SGML entities 677f9002b85SWarner Losh } 678f9002b85SWarner Losh return (length(s)) 679f9002b85SWarner Losh} 680f9002b85SWarner Losh 681f9002b85SWarner Losh 682f9002b85SWarner Loshfunction html_toc() 683f9002b85SWarner Losh{ 684f9002b85SWarner Losh print prefix(2) "<H1>" 685f9002b85SWarner Losh print prefix(3) "Table of contents for issues of " Journal 686f9002b85SWarner Losh print prefix(2) "</H1>" 687f9002b85SWarner Losh print HTML_TOC 688f9002b85SWarner Losh} 689f9002b85SWarner Losh 690f9002b85SWarner Losh 691f9002b85SWarner Loshfunction html_toc_entry() 692f9002b85SWarner Losh{ 693f9002b85SWarner Losh HTML_TOC = HTML_TOC " <A HREF=\"#" html_label() "\">" 694f9002b85SWarner Losh HTML_TOC = HTML_TOC vol_no_month_year() 695f9002b85SWarner Losh HTML_TOC = HTML_TOC "</A><BR>" "\n" 696f9002b85SWarner Losh} 697f9002b85SWarner Losh 698f9002b85SWarner Losh 699f9002b85SWarner Loshfunction html_trailer() 700f9002b85SWarner Losh{ 701f9002b85SWarner Losh html_end_pre() 702f9002b85SWarner Losh print prefix(1) "</BODY>" 703f9002b85SWarner Losh print "</HTML>" 704f9002b85SWarner Losh} 705f9002b85SWarner Losh 706f9002b85SWarner Losh 707f9002b85SWarner Loshfunction initialize() 708f9002b85SWarner Losh{ 709f9002b85SWarner Losh # NB: Update these when the program changes 710f9002b85SWarner Losh VERSION_DATE = "[09-Oct-1996]" 711f9002b85SWarner Losh VERSION_NUMBER = "1.00" 712f9002b85SWarner Losh 713f9002b85SWarner Losh HTML = (HTML == "") ? 0 : (0 + HTML) 714f9002b85SWarner Losh 715f9002b85SWarner Losh if (INDENT == "") 716f9002b85SWarner Losh INDENT = 4 717f9002b85SWarner Losh 718f9002b85SWarner Losh if (HTML == 0) 719f9002b85SWarner Losh INDENT = 0 # indentation suppressed in ASCII mode 720f9002b85SWarner Losh 721f9002b85SWarner Losh LEADERS = " . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ." 722f9002b85SWarner Losh 723f9002b85SWarner Losh MAX_TITLE_CHARS = 36 # 36 produces a 79-char output line when there is 724f9002b85SWarner Losh # just an initial page number. If this is 725f9002b85SWarner Losh # increased, the LEADERS string may need to be 726f9002b85SWarner Losh # lengthened. 727f9002b85SWarner Losh 728f9002b85SWarner Losh MIN_LEADERS = 4 # Minimum number of characters from LEADERS 729f9002b85SWarner Losh # required when leaders are used. The total 730f9002b85SWarner Losh # number of characters that can appear in a 731f9002b85SWarner Losh # title line is MAX_TITLE_CHARS + MIN_LEADERS. 732f9002b85SWarner Losh # Leaders are omitted when the title length is 733f9002b85SWarner Losh # between MAX_TITLE_CHARS and this sum. 734f9002b85SWarner Losh 735f9002b85SWarner Losh MIN_LEADERS_SPACE = " " # must be at least MIN_LEADERS characters long 736f9002b85SWarner Losh 737f9002b85SWarner Losh Month_expansion["jan"] = "January" 738f9002b85SWarner Losh Month_expansion["feb"] = "February" 739f9002b85SWarner Losh Month_expansion["mar"] = "March" 740f9002b85SWarner Losh Month_expansion["apr"] = "April" 741f9002b85SWarner Losh Month_expansion["may"] = "May" 742f9002b85SWarner Losh Month_expansion["jun"] = "June" 743f9002b85SWarner Losh Month_expansion["jul"] = "July" 744f9002b85SWarner Losh Month_expansion["aug"] = "August" 745f9002b85SWarner Losh Month_expansion["sep"] = "September" 746f9002b85SWarner Losh Month_expansion["oct"] = "October" 747f9002b85SWarner Losh Month_expansion["nov"] = "November" 748f9002b85SWarner Losh Month_expansion["dec"] = "December" 749f9002b85SWarner Losh 750f9002b85SWarner Losh Font_cmd_map["\\emph"] = "EM" 751f9002b85SWarner Losh Font_cmd_map["\\textbf"] = "B" 752f9002b85SWarner Losh Font_cmd_map["\\textit"] = "I" 753f9002b85SWarner Losh Font_cmd_map["\\textmd"] = "" 754f9002b85SWarner Losh Font_cmd_map["\\textrm"] = "" 755f9002b85SWarner Losh Font_cmd_map["\\textsc"] = "toupper" 756f9002b85SWarner Losh Font_cmd_map["\\textsl"] = "I" 757f9002b85SWarner Losh Font_cmd_map["\\texttt"] = "t" 758f9002b85SWarner Losh Font_cmd_map["\\textup"] = "" 759f9002b85SWarner Losh 760f9002b85SWarner Losh Font_decl_map["\\bf"] = "B" 761f9002b85SWarner Losh Font_decl_map["\\em"] = "EM" 762f9002b85SWarner Losh Font_decl_map["\\it"] = "I" 763f9002b85SWarner Losh Font_decl_map["\\rm"] = "" 764f9002b85SWarner Losh Font_decl_map["\\sc"] = "toupper" 765f9002b85SWarner Losh Font_decl_map["\\sf"] = "" 766f9002b85SWarner Losh Font_decl_map["\\tt"] = "TT" 767f9002b85SWarner Losh Font_decl_map["\\itshape"] = "I" 768f9002b85SWarner Losh Font_decl_map["\\upshape"] = "" 769f9002b85SWarner Losh Font_decl_map["\\slshape"] = "I" 770f9002b85SWarner Losh Font_decl_map["\\scshape"] = "toupper" 771f9002b85SWarner Losh Font_decl_map["\\mdseries"] = "" 772f9002b85SWarner Losh Font_decl_map["\\bfseries"] = "B" 773f9002b85SWarner Losh Font_decl_map["\\rmfamily"] = "" 774f9002b85SWarner Losh Font_decl_map["\\sffamily"] = "" 775f9002b85SWarner Losh Font_decl_map["\\ttfamily"] = "TT" 776f9002b85SWarner Losh} 777f9002b85SWarner Losh 778f9002b85SWarner Loshfunction min(a,b) 779f9002b85SWarner Losh{ 780f9002b85SWarner Losh return (a < b) ? a : b 781f9002b85SWarner Losh} 782f9002b85SWarner Losh 783f9002b85SWarner Losh 784f9002b85SWarner Loshfunction prefix(level) 785f9002b85SWarner Losh{ 786f9002b85SWarner Losh # Return a prefix of up to 60 blanks 787f9002b85SWarner Losh 788f9002b85SWarner Losh if (In_PRE) 789f9002b85SWarner Losh return ("") 790f9002b85SWarner Losh else 791f9002b85SWarner Losh return (substr(" ", \ 792f9002b85SWarner Losh 1, INDENT * level)) 793f9002b85SWarner Losh} 794f9002b85SWarner Losh 795f9002b85SWarner Losh 796f9002b85SWarner Loshfunction print_line(line) 797f9002b85SWarner Losh{ 798f9002b85SWarner Losh if (HTML) # must buffer in memory so that we can accumulate TOC 799f9002b85SWarner Losh Body[++BodyLines] = line 800f9002b85SWarner Losh else 801f9002b85SWarner Losh print line 802f9002b85SWarner Losh} 803f9002b85SWarner Losh 804f9002b85SWarner Losh 805f9002b85SWarner Loshfunction print_toc_line(author,title,pages, extra,leaders,n,t) 806f9002b85SWarner Losh{ 807f9002b85SWarner Losh # When we have a multiline title, the hypertext link goes only 808f9002b85SWarner Losh # on the first line. A multiline hypertext link looks awful 809f9002b85SWarner Losh # because of long underlines under the leading indentation. 810f9002b85SWarner Losh 811f9002b85SWarner Losh if (pages == "") # then no leaders needed in title lines other than last one 812f9002b85SWarner Losh t = sprintf("%31s %s%s%s", author, Title_prefix, title, Title_suffix) 813f9002b85SWarner Losh else # last title line, with page number 814f9002b85SWarner Losh { 815f9002b85SWarner Losh n = html_length(title) # potentially expensive 816f9002b85SWarner Losh extra = n % 2 # extra space for aligned leader dots 817f9002b85SWarner Losh if (n <= MAX_TITLE_CHARS) # then need leaders 818f9002b85SWarner Losh leaders = substr(LEADERS, 1, MAX_TITLE_CHARS + MIN_LEADERS - extra - \ 819f9002b85SWarner Losh min(MAX_TITLE_CHARS,n)) 820f9002b85SWarner Losh else # title (almost) fills line, so no leaders 821f9002b85SWarner Losh leaders = substr(MIN_LEADERS_SPACE,1, \ 822f9002b85SWarner Losh (MAX_TITLE_CHARS + MIN_LEADERS - extra - n)) 823f9002b85SWarner Losh t = sprintf("%31s %s%s%s%s%s %4s", \ 824f9002b85SWarner Losh author, Title_prefix, title, Title_suffix, \ 825f9002b85SWarner Losh (extra ? " " : ""), leaders, pages) 826f9002b85SWarner Losh } 827f9002b85SWarner Losh 828f9002b85SWarner Losh Title_prefix = "" # forget any hypertext 829f9002b85SWarner Losh Title_suffix = "" # link material 830f9002b85SWarner Losh 831f9002b85SWarner Losh # Efficency note: an earlier version accumulated the body in a 832f9002b85SWarner Losh # single scalar like this: "Body = Body t". Profiling revealed 833f9002b85SWarner Losh # this statement as the major hot spot, and the change to array 834f9002b85SWarner Losh # storage made the program more than twice as fast. This 835f9002b85SWarner Losh # suggests that awk might benefit from an optimization of 836f9002b85SWarner Losh # "s = s t" that uses realloc() instead of malloc(). 837f9002b85SWarner Losh if (HTML) 838f9002b85SWarner Losh Body[++BodyLines] = t 839f9002b85SWarner Losh else 840f9002b85SWarner Losh print t 841f9002b85SWarner Losh} 842f9002b85SWarner Losh 843f9002b85SWarner Losh 844f9002b85SWarner Loshfunction protect_SGML_characters(s) 845f9002b85SWarner Losh{ 846f9002b85SWarner Losh gsub(/&/,"\\&",s) # NB: this one MUST be first 847f9002b85SWarner Losh gsub(/</,"\\<",s) 848f9002b85SWarner Losh gsub(/>/,"\\>",s) 849f9002b85SWarner Losh gsub(/\"/,"\\"",s) 850f9002b85SWarner Losh return (s) 851f9002b85SWarner Losh} 852f9002b85SWarner Losh 853f9002b85SWarner Losh 854f9002b85SWarner Loshfunction strip_braces(s, k) 855f9002b85SWarner Losh{ # strip non-backslashed braces from s and return the result 856f9002b85SWarner Losh 857f9002b85SWarner Losh return (strip_char(strip_char(s,"{"),"}")) 858f9002b85SWarner Losh} 859f9002b85SWarner Losh 860f9002b85SWarner Losh 861f9002b85SWarner Loshfunction strip_char(s,c, k) 862f9002b85SWarner Losh{ # strip non-backslashed instances of c from s, and return the result 863f9002b85SWarner Losh k = index(s,c) 864f9002b85SWarner Losh if (k > 0) # then found the character 865f9002b85SWarner Losh { 866f9002b85SWarner Losh if (substr(s,k-1,1) != "\\") # then not backslashed char 867f9002b85SWarner Losh s = substr(s,1,k-1) strip_char(substr(s,k+1),c) # so remove it (recursively) 868f9002b85SWarner Losh else # preserve backslashed char 869f9002b85SWarner Losh s = substr(s,1,k) strip_char(s,k+1,c) 870f9002b85SWarner Losh } 871f9002b85SWarner Losh return (s) 872f9002b85SWarner Losh} 873f9002b85SWarner Losh 874f9002b85SWarner Losh 875f9002b85SWarner Loshfunction strip_html(s) 876f9002b85SWarner Losh{ 877f9002b85SWarner Losh gsub(/<\/?[^>]*>/,"",s) 878f9002b85SWarner Losh return (s) 879f9002b85SWarner Losh} 880f9002b85SWarner Losh 881f9002b85SWarner Losh 882f9002b85SWarner Loshfunction terminate() 883f9002b85SWarner Losh{ 884f9002b85SWarner Losh if (HTML) 885f9002b85SWarner Losh { 886f9002b85SWarner Losh html_end_pre() 887f9002b85SWarner Losh 888f9002b85SWarner Losh HTML = 0 # NB: stop line buffering 889f9002b85SWarner Losh html_header() 890f9002b85SWarner Losh html_toc() 891f9002b85SWarner Losh html_body() 892f9002b85SWarner Losh html_trailer() 893f9002b85SWarner Losh } 894f9002b85SWarner Losh} 895f9002b85SWarner Losh 896f9002b85SWarner Losh 897f9002b85SWarner Loshfunction TeX_to_HTML(s, k,n,parts) 898f9002b85SWarner Losh{ 899f9002b85SWarner Losh # First convert the four SGML reserved characters to SGML entities 900f9002b85SWarner Losh if (HTML) 901f9002b85SWarner Losh { 902f9002b85SWarner Losh gsub(/>/, "\\>", s) 903f9002b85SWarner Losh gsub(/</, "\\<", s) 904f9002b85SWarner Losh gsub(/"/, "\\"", s) 905f9002b85SWarner Losh } 906f9002b85SWarner Losh 907f9002b85SWarner Losh gsub(/[$][$]/,"$$",s) # change display math to triple dollars for split 908f9002b85SWarner Losh n = split(s,parts,/[$]/)# split into non-math (odd) and math (even) parts 909f9002b85SWarner Losh 910f9002b85SWarner Losh s = "" 911f9002b85SWarner Losh for (k = 1; k <= n; ++k) # unbrace non-math part, leaving math mode intact 912f9002b85SWarner Losh s = s ((k > 1) ? "$" : "") \ 913f9002b85SWarner Losh ((k % 2) ? strip_braces(TeX_to_HTML_nonmath(parts[k])) : \ 914f9002b85SWarner Losh TeX_to_HTML_math(parts[k])) 915f9002b85SWarner Losh 916f9002b85SWarner Losh gsub(/[$][$][$]/,"$$",s) # restore display math 917f9002b85SWarner Losh 918f9002b85SWarner Losh return (s) 919f9002b85SWarner Losh} 920f9002b85SWarner Losh 921f9002b85SWarner Losh 922f9002b85SWarner Loshfunction TeX_to_HTML_math(s) 923f9002b85SWarner Losh{ 924f9002b85SWarner Losh # Mostly a dummy for now, but HTML 3 could support some math translation 925f9002b85SWarner Losh 926f9002b85SWarner Losh gsub(/\\&/,"\\&",s) # reduce TeX ampersands to SGML entities 927f9002b85SWarner Losh 928f9002b85SWarner Losh return (s) 929f9002b85SWarner Losh} 930f9002b85SWarner Losh 931f9002b85SWarner Losh 932f9002b85SWarner Loshfunction TeX_to_HTML_nonmath(s) 933f9002b85SWarner Losh{ 934f9002b85SWarner Losh if (index(s,"\\") > 0) # important optimization 935f9002b85SWarner Losh { 936f9002b85SWarner Losh gsub(/\\slash +/,"/",s) # replace TeX slashes with conventional ones 937f9002b85SWarner Losh gsub(/ *\\emdash +/," --- ",s) # replace BibNet emdashes with conventional ones 938f9002b85SWarner Losh gsub(/\\%/,"%",s) # reduce TeX percents to conventional ones 939f9002b85SWarner Losh gsub(/\\[$]/,"$",s) # reduce TeX dollars to conventional ones 940f9002b85SWarner Losh gsub(/\\#/,"#",s) # reduce TeX sharps to conventional ones 941f9002b85SWarner Losh 942f9002b85SWarner Losh if (HTML) # translate TeX markup to HTML 943f9002b85SWarner Losh { 944f9002b85SWarner Losh gsub(/\\&/,"\\&",s) # reduce TeX ampersands to SGML entities 945f9002b85SWarner Losh s = html_accents(s) 946f9002b85SWarner Losh s = html_fonts(s) 947f9002b85SWarner Losh } 948f9002b85SWarner Losh else # plain ASCII text output: discard all TeX markup 949f9002b85SWarner Losh { 950f9002b85SWarner Losh gsub(/\\\&/, "\\&", s) # reduce TeX ampersands to conventional ones 951f9002b85SWarner Losh 952f9002b85SWarner Losh gsub(/\\[a-z][a-z] +/,"",s) # remove TeX font changes 953f9002b85SWarner Losh gsub(/\\[^A-Za-z]/,"",s) # remove remaining TeX control symbols 954f9002b85SWarner Losh } 955f9002b85SWarner Losh } 956f9002b85SWarner Losh return (s) 957f9002b85SWarner Losh} 958f9002b85SWarner Losh 959f9002b85SWarner Losh 960f9002b85SWarner Loshfunction trim(s) 961f9002b85SWarner Losh{ 962f9002b85SWarner Losh gsub(/^[ \t]+/,"",s) 963f9002b85SWarner Losh gsub(/[ \t]+$/,"",s) 964f9002b85SWarner Losh return (s) 965f9002b85SWarner Losh} 966f9002b85SWarner Losh 967f9002b85SWarner Losh 968f9002b85SWarner Loshfunction vol_no_month_year() 969f9002b85SWarner Losh{ 970f9002b85SWarner Losh return ("Volume " wrap(Volume) ", Number " wrap(Number) ", " wrap(Month) ", " wrap(Year)) 971f9002b85SWarner Losh} 972f9002b85SWarner Losh 973f9002b85SWarner Losh 974f9002b85SWarner Loshfunction wrap(value) 975f9002b85SWarner Losh{ 976f9002b85SWarner Losh return (HTML ? ("<STRONG>" value "</STRONG>") : value) 977f9002b85SWarner Losh} 978