1 /* Generate assembler source containing symbol information 2 * 3 * Copyright 2002 by Kai Germaschewski 4 * 5 * This software may be used and distributed according to the terms 6 * of the GNU General Public License, incorporated herein by reference. 7 * 8 * Usage: kallsyms [--all-symbols] in.map > out.S 9 * 10 * Table compression uses all the unused char codes on the symbols and 11 * maps these to the most used substrings (tokens). For instance, it might 12 * map char code 0xF7 to represent "write_" and then in every symbol where 13 * "write_" appears it can be replaced by 0xF7, saving 5 bytes. 14 * The used codes themselves are also placed in the table so that the 15 * decompresion can work without "special cases". 16 * Applied to kernel symbols, this usually produces a compression ratio 17 * of about 50%. 18 * 19 */ 20 21 #include <errno.h> 22 #include <getopt.h> 23 #include <stdbool.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <ctype.h> 28 #include <limits.h> 29 30 #include <xalloc.h> 31 32 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 33 34 #define KSYM_NAME_LEN 512 35 36 struct sym_entry { 37 unsigned long long addr; 38 unsigned int len; 39 unsigned int seq; 40 unsigned char sym[]; 41 }; 42 43 struct addr_range { 44 const char *start_sym, *end_sym; 45 unsigned long long start, end; 46 }; 47 48 static unsigned long long _text; 49 static unsigned long long relative_base; 50 static struct addr_range text_ranges[] = { 51 { "_stext", "_etext" }, 52 { "_sinittext", "_einittext" }, 53 }; 54 #define text_range_text (&text_ranges[0]) 55 #define text_range_inittext (&text_ranges[1]) 56 57 static struct sym_entry **table; 58 static unsigned int table_size, table_cnt; 59 static int all_symbols; 60 61 static int token_profit[0x10000]; 62 63 /* the table that holds the result of the compression */ 64 static unsigned char best_table[256][2]; 65 static unsigned char best_table_len[256]; 66 67 68 static void usage(void) 69 { 70 fprintf(stderr, "Usage: kallsyms [--all-symbols] in.map > out.S\n"); 71 exit(1); 72 } 73 74 static char *sym_name(const struct sym_entry *s) 75 { 76 return (char *)s->sym + 1; 77 } 78 79 static bool is_ignored_symbol(const char *name, char type) 80 { 81 if (type == 'u' || type == 'n') 82 return true; 83 84 if (toupper(type) == 'A') { 85 /* Keep these useful absolute symbols */ 86 if (strcmp(name, "__kernel_syscall_via_break") && 87 strcmp(name, "__kernel_syscall_via_epc") && 88 strcmp(name, "__kernel_sigtramp") && 89 strcmp(name, "__gp")) 90 return true; 91 } 92 93 return false; 94 } 95 96 static void check_symbol_range(const char *sym, unsigned long long addr, 97 struct addr_range *ranges, int entries) 98 { 99 size_t i; 100 struct addr_range *ar; 101 102 for (i = 0; i < entries; ++i) { 103 ar = &ranges[i]; 104 105 if (strcmp(sym, ar->start_sym) == 0) { 106 ar->start = addr; 107 return; 108 } else if (strcmp(sym, ar->end_sym) == 0) { 109 ar->end = addr; 110 return; 111 } 112 } 113 } 114 115 static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len) 116 { 117 char *name, type, *p; 118 unsigned long long addr; 119 size_t len; 120 ssize_t readlen; 121 struct sym_entry *sym; 122 123 errno = 0; 124 readlen = getline(buf, buf_len, in); 125 if (readlen < 0) { 126 if (errno) { 127 perror("read_symbol"); 128 exit(EXIT_FAILURE); 129 } 130 return NULL; 131 } 132 133 if ((*buf)[readlen - 1] == '\n') 134 (*buf)[readlen - 1] = 0; 135 136 addr = strtoull(*buf, &p, 16); 137 138 if (*buf == p || *p++ != ' ' || !isascii((type = *p++)) || *p++ != ' ') { 139 fprintf(stderr, "line format error\n"); 140 exit(EXIT_FAILURE); 141 } 142 143 name = p; 144 len = strlen(name); 145 146 if (len >= KSYM_NAME_LEN) { 147 fprintf(stderr, "Symbol %s too long for kallsyms (%zu >= %d).\n" 148 "Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n", 149 name, len, KSYM_NAME_LEN); 150 return NULL; 151 } 152 153 if (strcmp(name, "_text") == 0) 154 _text = addr; 155 156 /* Ignore most absolute/undefined (?) symbols. */ 157 if (is_ignored_symbol(name, type)) 158 return NULL; 159 160 check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges)); 161 162 /* include the type field in the symbol name, so that it gets 163 * compressed together */ 164 len++; 165 166 sym = xmalloc(sizeof(*sym) + len + 1); 167 sym->addr = addr; 168 sym->len = len; 169 sym->sym[0] = type; 170 strcpy(sym_name(sym), name); 171 172 return sym; 173 } 174 175 static int symbol_in_range(const struct sym_entry *s, 176 const struct addr_range *ranges, int entries) 177 { 178 size_t i; 179 const struct addr_range *ar; 180 181 for (i = 0; i < entries; ++i) { 182 ar = &ranges[i]; 183 184 if (s->addr >= ar->start && s->addr <= ar->end) 185 return 1; 186 } 187 188 return 0; 189 } 190 191 static bool string_starts_with(const char *s, const char *prefix) 192 { 193 return strncmp(s, prefix, strlen(prefix)) == 0; 194 } 195 196 static int symbol_valid(const struct sym_entry *s) 197 { 198 const char *name = sym_name(s); 199 200 /* if --all-symbols is not specified, then symbols outside the text 201 * and inittext sections are discarded */ 202 if (!all_symbols) { 203 /* 204 * Symbols starting with __start and __stop are used to denote 205 * section boundaries, and should always be included: 206 */ 207 if (string_starts_with(name, "__start_") || 208 string_starts_with(name, "__stop_")) 209 return 1; 210 211 if (symbol_in_range(s, text_ranges, 212 ARRAY_SIZE(text_ranges)) == 0) 213 return 0; 214 /* Corner case. Discard any symbols with the same value as 215 * _etext _einittext; they can move between pass 1 and 2 when 216 * the kallsyms data are added. If these symbols move then 217 * they may get dropped in pass 2, which breaks the kallsyms 218 * rules. 219 */ 220 if ((s->addr == text_range_text->end && 221 strcmp(name, text_range_text->end_sym)) || 222 (s->addr == text_range_inittext->end && 223 strcmp(name, text_range_inittext->end_sym))) 224 return 0; 225 } 226 227 return 1; 228 } 229 230 /* remove all the invalid symbols from the table */ 231 static void shrink_table(void) 232 { 233 unsigned int i, pos; 234 235 pos = 0; 236 for (i = 0; i < table_cnt; i++) { 237 if (symbol_valid(table[i])) { 238 if (pos != i) 239 table[pos] = table[i]; 240 pos++; 241 } else { 242 free(table[i]); 243 } 244 } 245 table_cnt = pos; 246 } 247 248 static void read_map(const char *in) 249 { 250 FILE *fp; 251 struct sym_entry *sym; 252 char *buf = NULL; 253 size_t buflen = 0; 254 255 fp = fopen(in, "r"); 256 if (!fp) { 257 perror(in); 258 exit(1); 259 } 260 261 while (!feof(fp)) { 262 sym = read_symbol(fp, &buf, &buflen); 263 if (!sym) 264 continue; 265 266 sym->seq = table_cnt; 267 268 if (table_cnt >= table_size) { 269 table_size += 10000; 270 table = xrealloc(table, sizeof(*table) * table_size); 271 } 272 273 table[table_cnt++] = sym; 274 } 275 276 free(buf); 277 fclose(fp); 278 } 279 280 static void output_label(const char *label) 281 { 282 printf(".globl %s\n", label); 283 printf("\tALGN\n"); 284 printf("%s:\n", label); 285 } 286 287 /* uncompress a compressed symbol. When this function is called, the best table 288 * might still be compressed itself, so the function needs to be recursive */ 289 static int expand_symbol(const unsigned char *data, int len, char *result) 290 { 291 int c, rlen, total=0; 292 293 while (len) { 294 c = *data; 295 /* if the table holds a single char that is the same as the one 296 * we are looking for, then end the search */ 297 if (best_table[c][0]==c && best_table_len[c]==1) { 298 *result++ = c; 299 total++; 300 } else { 301 /* if not, recurse and expand */ 302 rlen = expand_symbol(best_table[c], best_table_len[c], result); 303 total += rlen; 304 result += rlen; 305 } 306 data++; 307 len--; 308 } 309 *result=0; 310 311 return total; 312 } 313 314 static int compare_names(const void *a, const void *b) 315 { 316 int ret; 317 const struct sym_entry *sa = *(const struct sym_entry **)a; 318 const struct sym_entry *sb = *(const struct sym_entry **)b; 319 320 ret = strcmp(sym_name(sa), sym_name(sb)); 321 if (!ret) { 322 if (sa->addr > sb->addr) 323 return 1; 324 else if (sa->addr < sb->addr) 325 return -1; 326 327 /* keep old order */ 328 return (int)(sa->seq - sb->seq); 329 } 330 331 return ret; 332 } 333 334 static void sort_symbols_by_name(void) 335 { 336 qsort(table, table_cnt, sizeof(table[0]), compare_names); 337 } 338 339 static void write_src(void) 340 { 341 unsigned int i, k, off; 342 unsigned int best_idx[256]; 343 unsigned int *markers, markers_cnt; 344 char buf[KSYM_NAME_LEN]; 345 346 printf("#include <asm/bitsperlong.h>\n"); 347 printf("#if BITS_PER_LONG == 64\n"); 348 printf("#define PTR .quad\n"); 349 printf("#define ALGN .balign 8\n"); 350 printf("#else\n"); 351 printf("#define PTR .long\n"); 352 printf("#define ALGN .balign 4\n"); 353 printf("#endif\n"); 354 355 printf("\t.section .rodata, \"a\"\n"); 356 357 output_label("kallsyms_num_syms"); 358 printf("\t.long\t%u\n", table_cnt); 359 printf("\n"); 360 361 /* table of offset markers, that give the offset in the compressed stream 362 * every 256 symbols */ 363 markers_cnt = (table_cnt + 255) / 256; 364 markers = xmalloc(sizeof(*markers) * markers_cnt); 365 366 output_label("kallsyms_names"); 367 off = 0; 368 for (i = 0; i < table_cnt; i++) { 369 if ((i & 0xFF) == 0) 370 markers[i >> 8] = off; 371 table[i]->seq = i; 372 373 /* There cannot be any symbol of length zero. */ 374 if (table[i]->len == 0) { 375 fprintf(stderr, "kallsyms failure: " 376 "unexpected zero symbol length\n"); 377 exit(EXIT_FAILURE); 378 } 379 380 /* Only lengths that fit in up-to-two-byte ULEB128 are supported. */ 381 if (table[i]->len > 0x3FFF) { 382 fprintf(stderr, "kallsyms failure: " 383 "unexpected huge symbol length\n"); 384 exit(EXIT_FAILURE); 385 } 386 387 /* Encode length with ULEB128. */ 388 if (table[i]->len <= 0x7F) { 389 /* Most symbols use a single byte for the length. */ 390 printf("\t.byte 0x%02x", table[i]->len); 391 off += table[i]->len + 1; 392 } else { 393 /* "Big" symbols use two bytes. */ 394 printf("\t.byte 0x%02x, 0x%02x", 395 (table[i]->len & 0x7F) | 0x80, 396 (table[i]->len >> 7) & 0x7F); 397 off += table[i]->len + 2; 398 } 399 for (k = 0; k < table[i]->len; k++) 400 printf(", 0x%02x", table[i]->sym[k]); 401 402 /* 403 * Now that we wrote out the compressed symbol name, restore the 404 * original name and print it in the comment. 405 */ 406 expand_symbol(table[i]->sym, table[i]->len, buf); 407 strcpy((char *)table[i]->sym, buf); 408 printf("\t/* %s */\n", table[i]->sym); 409 } 410 printf("\n"); 411 412 output_label("kallsyms_markers"); 413 for (i = 0; i < markers_cnt; i++) 414 printf("\t.long\t%u\n", markers[i]); 415 printf("\n"); 416 417 free(markers); 418 419 output_label("kallsyms_token_table"); 420 off = 0; 421 for (i = 0; i < 256; i++) { 422 best_idx[i] = off; 423 expand_symbol(best_table[i], best_table_len[i], buf); 424 printf("\t.asciz\t\"%s\"\n", buf); 425 off += strlen(buf) + 1; 426 } 427 printf("\n"); 428 429 output_label("kallsyms_token_index"); 430 for (i = 0; i < 256; i++) 431 printf("\t.short\t%d\n", best_idx[i]); 432 printf("\n"); 433 434 output_label("kallsyms_offsets"); 435 436 for (i = 0; i < table_cnt; i++) { 437 /* 438 * Use the offset relative to the lowest value 439 * encountered of all relative symbols, and emit 440 * non-relocatable fixed offsets that will be fixed 441 * up at runtime. 442 */ 443 444 long long offset; 445 446 offset = table[i]->addr - relative_base; 447 if (offset < 0 || offset > UINT_MAX) { 448 fprintf(stderr, "kallsyms failure: " 449 "relative symbol value %#llx out of range\n", 450 table[i]->addr); 451 exit(EXIT_FAILURE); 452 } 453 printf("\t.long\t%#x\t/* %s */\n", (int)offset, table[i]->sym); 454 } 455 printf("\n"); 456 457 output_label("kallsyms_relative_base"); 458 /* Provide proper symbols relocatability by their '_text' relativeness. */ 459 if (_text <= relative_base) 460 printf("\tPTR\t_text + %#llx\n", relative_base - _text); 461 else 462 printf("\tPTR\t_text - %#llx\n", _text - relative_base); 463 printf("\n"); 464 465 sort_symbols_by_name(); 466 output_label("kallsyms_seqs_of_names"); 467 for (i = 0; i < table_cnt; i++) 468 printf("\t.byte 0x%02x, 0x%02x, 0x%02x\t/* %s */\n", 469 (unsigned char)(table[i]->seq >> 16), 470 (unsigned char)(table[i]->seq >> 8), 471 (unsigned char)(table[i]->seq >> 0), 472 table[i]->sym); 473 printf("\n"); 474 } 475 476 477 /* table lookup compression functions */ 478 479 /* count all the possible tokens in a symbol */ 480 static void learn_symbol(const unsigned char *symbol, int len) 481 { 482 int i; 483 484 for (i = 0; i < len - 1; i++) 485 token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++; 486 } 487 488 /* decrease the count for all the possible tokens in a symbol */ 489 static void forget_symbol(const unsigned char *symbol, int len) 490 { 491 int i; 492 493 for (i = 0; i < len - 1; i++) 494 token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--; 495 } 496 497 /* do the initial token count */ 498 static void build_initial_token_table(void) 499 { 500 unsigned int i; 501 502 for (i = 0; i < table_cnt; i++) 503 learn_symbol(table[i]->sym, table[i]->len); 504 } 505 506 static unsigned char *find_token(unsigned char *str, int len, 507 const unsigned char *token) 508 { 509 int i; 510 511 for (i = 0; i < len - 1; i++) { 512 if (str[i] == token[0] && str[i+1] == token[1]) 513 return &str[i]; 514 } 515 return NULL; 516 } 517 518 /* replace a given token in all the valid symbols. Use the sampled symbols 519 * to update the counts */ 520 static void compress_symbols(const unsigned char *str, int idx) 521 { 522 unsigned int i, len, size; 523 unsigned char *p1, *p2; 524 525 for (i = 0; i < table_cnt; i++) { 526 527 len = table[i]->len; 528 p1 = table[i]->sym; 529 530 /* find the token on the symbol */ 531 p2 = find_token(p1, len, str); 532 if (!p2) continue; 533 534 /* decrease the counts for this symbol's tokens */ 535 forget_symbol(table[i]->sym, len); 536 537 size = len; 538 539 do { 540 *p2 = idx; 541 p2++; 542 size -= (p2 - p1); 543 memmove(p2, p2 + 1, size); 544 p1 = p2; 545 len--; 546 547 if (size < 2) break; 548 549 /* find the token on the symbol */ 550 p2 = find_token(p1, size, str); 551 552 } while (p2); 553 554 table[i]->len = len; 555 556 /* increase the counts for this symbol's new tokens */ 557 learn_symbol(table[i]->sym, len); 558 } 559 } 560 561 /* search the token with the maximum profit */ 562 static int find_best_token(void) 563 { 564 int i, best, bestprofit; 565 566 bestprofit=-10000; 567 best = 0; 568 569 for (i = 0; i < 0x10000; i++) { 570 if (token_profit[i] > bestprofit) { 571 best = i; 572 bestprofit = token_profit[i]; 573 } 574 } 575 return best; 576 } 577 578 /* this is the core of the algorithm: calculate the "best" table */ 579 static void optimize_result(void) 580 { 581 int i, best; 582 583 /* using the '\0' symbol last allows compress_symbols to use standard 584 * fast string functions */ 585 for (i = 255; i >= 0; i--) { 586 587 /* if this table slot is empty (it is not used by an actual 588 * original char code */ 589 if (!best_table_len[i]) { 590 591 /* find the token with the best profit value */ 592 best = find_best_token(); 593 if (token_profit[best] == 0) 594 break; 595 596 /* place it in the "best" table */ 597 best_table_len[i] = 2; 598 best_table[i][0] = best & 0xFF; 599 best_table[i][1] = (best >> 8) & 0xFF; 600 601 /* replace this token in all the valid symbols */ 602 compress_symbols(best_table[i], i); 603 } 604 } 605 } 606 607 /* start by placing the symbols that are actually used on the table */ 608 static void insert_real_symbols_in_table(void) 609 { 610 unsigned int i, j, c; 611 612 for (i = 0; i < table_cnt; i++) { 613 for (j = 0; j < table[i]->len; j++) { 614 c = table[i]->sym[j]; 615 best_table[c][0]=c; 616 best_table_len[c]=1; 617 } 618 } 619 } 620 621 static void optimize_token_table(void) 622 { 623 build_initial_token_table(); 624 625 insert_real_symbols_in_table(); 626 627 optimize_result(); 628 } 629 630 /* guess for "linker script provide" symbol */ 631 static int may_be_linker_script_provide_symbol(const struct sym_entry *se) 632 { 633 const char *symbol = sym_name(se); 634 int len = se->len - 1; 635 636 if (len < 8) 637 return 0; 638 639 if (symbol[0] != '_' || symbol[1] != '_') 640 return 0; 641 642 /* __start_XXXXX */ 643 if (!memcmp(symbol + 2, "start_", 6)) 644 return 1; 645 646 /* __stop_XXXXX */ 647 if (!memcmp(symbol + 2, "stop_", 5)) 648 return 1; 649 650 /* __end_XXXXX */ 651 if (!memcmp(symbol + 2, "end_", 4)) 652 return 1; 653 654 /* __XXXXX_start */ 655 if (!memcmp(symbol + len - 6, "_start", 6)) 656 return 1; 657 658 /* __XXXXX_end */ 659 if (!memcmp(symbol + len - 4, "_end", 4)) 660 return 1; 661 662 return 0; 663 } 664 665 static int compare_symbols(const void *a, const void *b) 666 { 667 const struct sym_entry *sa = *(const struct sym_entry **)a; 668 const struct sym_entry *sb = *(const struct sym_entry **)b; 669 int wa, wb; 670 671 /* sort by address first */ 672 if (sa->addr > sb->addr) 673 return 1; 674 if (sa->addr < sb->addr) 675 return -1; 676 677 /* sort by "weakness" type */ 678 wa = (sa->sym[0] == 'w') || (sa->sym[0] == 'W'); 679 wb = (sb->sym[0] == 'w') || (sb->sym[0] == 'W'); 680 if (wa != wb) 681 return wa - wb; 682 683 /* sort by "linker script provide" type */ 684 wa = may_be_linker_script_provide_symbol(sa); 685 wb = may_be_linker_script_provide_symbol(sb); 686 if (wa != wb) 687 return wa - wb; 688 689 /* sort by the number of prefix underscores */ 690 wa = strspn(sym_name(sa), "_"); 691 wb = strspn(sym_name(sb), "_"); 692 if (wa != wb) 693 return wa - wb; 694 695 /* sort by initial order, so that other symbols are left undisturbed */ 696 return sa->seq - sb->seq; 697 } 698 699 static void sort_symbols(void) 700 { 701 qsort(table, table_cnt, sizeof(table[0]), compare_symbols); 702 } 703 704 /* find the minimum non-absolute symbol address */ 705 static void record_relative_base(void) 706 { 707 /* 708 * The table is sorted by address. 709 * Take the first symbol value. 710 */ 711 if (table_cnt) 712 relative_base = table[0]->addr; 713 } 714 715 int main(int argc, char **argv) 716 { 717 while (1) { 718 static const struct option long_options[] = { 719 {"all-symbols", no_argument, &all_symbols, 1}, 720 {}, 721 }; 722 723 int c = getopt_long(argc, argv, "", long_options, NULL); 724 725 if (c == -1) 726 break; 727 if (c != 0) 728 usage(); 729 } 730 731 if (optind >= argc) 732 usage(); 733 734 read_map(argv[optind]); 735 shrink_table(); 736 sort_symbols(); 737 record_relative_base(); 738 optimize_token_table(); 739 write_src(); 740 741 return 0; 742 } 743