19eb461aaSDag-Erling Smørgrav /* Commandline diff utility to test diff implementations. */
29eb461aaSDag-Erling Smørgrav /*
39eb461aaSDag-Erling Smørgrav * Copyright (c) 2018 Martin Pieuchot
49eb461aaSDag-Erling Smørgrav * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
59eb461aaSDag-Erling Smørgrav *
69eb461aaSDag-Erling Smørgrav * Permission to use, copy, modify, and distribute this software for any
79eb461aaSDag-Erling Smørgrav * purpose with or without fee is hereby granted, provided that the above
89eb461aaSDag-Erling Smørgrav * copyright notice and this permission notice appear in all copies.
99eb461aaSDag-Erling Smørgrav *
109eb461aaSDag-Erling Smørgrav * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
119eb461aaSDag-Erling Smørgrav * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
129eb461aaSDag-Erling Smørgrav * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
139eb461aaSDag-Erling Smørgrav * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
149eb461aaSDag-Erling Smørgrav * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
159eb461aaSDag-Erling Smørgrav * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
169eb461aaSDag-Erling Smørgrav * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
179eb461aaSDag-Erling Smørgrav */
189eb461aaSDag-Erling Smørgrav
199eb461aaSDag-Erling Smørgrav #include <sys/mman.h>
209eb461aaSDag-Erling Smørgrav #include <sys/stat.h>
219eb461aaSDag-Erling Smørgrav #include <sys/types.h>
229eb461aaSDag-Erling Smørgrav
239eb461aaSDag-Erling Smørgrav #include <err.h>
249eb461aaSDag-Erling Smørgrav #include <fcntl.h>
259eb461aaSDag-Erling Smørgrav #include <stdint.h>
269eb461aaSDag-Erling Smørgrav #include <stdio.h>
279eb461aaSDag-Erling Smørgrav #include <stdlib.h>
289eb461aaSDag-Erling Smørgrav #include <stdbool.h>
299eb461aaSDag-Erling Smørgrav #include <string.h>
309eb461aaSDag-Erling Smørgrav #include <unistd.h>
319eb461aaSDag-Erling Smørgrav
329eb461aaSDag-Erling Smørgrav #include <arraylist.h>
339eb461aaSDag-Erling Smørgrav #include <diff_main.h>
349eb461aaSDag-Erling Smørgrav #include <diff_output.h>
359eb461aaSDag-Erling Smørgrav
369eb461aaSDag-Erling Smørgrav enum diffreg_algo {
379eb461aaSDag-Erling Smørgrav DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0,
389eb461aaSDag-Erling Smørgrav DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1,
399eb461aaSDag-Erling Smørgrav DIFFREG_ALGO_PATIENCE = 2,
409eb461aaSDag-Erling Smørgrav DIFFREG_ALGO_NONE = 3,
419eb461aaSDag-Erling Smørgrav };
429eb461aaSDag-Erling Smørgrav
439eb461aaSDag-Erling Smørgrav __dead void usage(void);
449eb461aaSDag-Erling Smørgrav int diffreg(char *, char *, enum diffreg_algo, bool, bool, bool,
459eb461aaSDag-Erling Smørgrav int, bool);
469eb461aaSDag-Erling Smørgrav FILE * openfile(const char *, char **, struct stat *);
479eb461aaSDag-Erling Smørgrav
489eb461aaSDag-Erling Smørgrav __dead void
usage(void)499eb461aaSDag-Erling Smørgrav usage(void)
509eb461aaSDag-Erling Smørgrav {
519eb461aaSDag-Erling Smørgrav fprintf(stderr,
529eb461aaSDag-Erling Smørgrav "usage: %s [-apPQTwe] [-U n] file1 file2\n"
539eb461aaSDag-Erling Smørgrav "\n"
549eb461aaSDag-Erling Smørgrav " -a Treat input as ASCII even if binary data is detected\n"
559eb461aaSDag-Erling Smørgrav " -p Show function prototypes in hunk headers\n"
569eb461aaSDag-Erling Smørgrav " -P Use Patience Diff (slower but often nicer)\n"
579eb461aaSDag-Erling Smørgrav " -Q Use forward-Myers for small files, otherwise Patience\n"
589eb461aaSDag-Erling Smørgrav " -T Trivial algo: detect similar start and end only\n"
599eb461aaSDag-Erling Smørgrav " -w Ignore Whitespace\n"
609eb461aaSDag-Erling Smørgrav " -U n Number of Context Lines\n"
619eb461aaSDag-Erling Smørgrav " -e Produce ed script output\n"
629eb461aaSDag-Erling Smørgrav , getprogname());
639eb461aaSDag-Erling Smørgrav exit(1);
649eb461aaSDag-Erling Smørgrav }
659eb461aaSDag-Erling Smørgrav
669eb461aaSDag-Erling Smørgrav int
main(int argc,char * argv[])679eb461aaSDag-Erling Smørgrav main(int argc, char *argv[])
689eb461aaSDag-Erling Smørgrav {
699eb461aaSDag-Erling Smørgrav int ch, rc;
709eb461aaSDag-Erling Smørgrav bool force_text = false;
719eb461aaSDag-Erling Smørgrav bool ignore_whitespace = false;
729eb461aaSDag-Erling Smørgrav bool show_function_prototypes = false;
739eb461aaSDag-Erling Smørgrav bool edscript = false;
749eb461aaSDag-Erling Smørgrav int context_lines = 3;
759eb461aaSDag-Erling Smørgrav enum diffreg_algo algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE;
769eb461aaSDag-Erling Smørgrav
779eb461aaSDag-Erling Smørgrav while ((ch = getopt(argc, argv, "apPQTwU:e")) != -1) {
789eb461aaSDag-Erling Smørgrav switch (ch) {
799eb461aaSDag-Erling Smørgrav case 'a':
809eb461aaSDag-Erling Smørgrav force_text = true;
819eb461aaSDag-Erling Smørgrav break;
829eb461aaSDag-Erling Smørgrav case 'p':
839eb461aaSDag-Erling Smørgrav show_function_prototypes = true;
849eb461aaSDag-Erling Smørgrav break;
859eb461aaSDag-Erling Smørgrav case 'P':
869eb461aaSDag-Erling Smørgrav algo = DIFFREG_ALGO_PATIENCE;
879eb461aaSDag-Erling Smørgrav break;
889eb461aaSDag-Erling Smørgrav case 'Q':
899eb461aaSDag-Erling Smørgrav algo = DIFFREG_ALGO_MYERS_THEN_PATIENCE;
909eb461aaSDag-Erling Smørgrav break;
919eb461aaSDag-Erling Smørgrav case 'T':
929eb461aaSDag-Erling Smørgrav algo = DIFFREG_ALGO_NONE;
939eb461aaSDag-Erling Smørgrav break;
949eb461aaSDag-Erling Smørgrav case 'w':
959eb461aaSDag-Erling Smørgrav ignore_whitespace = true;
969eb461aaSDag-Erling Smørgrav break;
979eb461aaSDag-Erling Smørgrav case 'U':
989eb461aaSDag-Erling Smørgrav context_lines = atoi(optarg);
999eb461aaSDag-Erling Smørgrav break;
1009eb461aaSDag-Erling Smørgrav case 'e':
1019eb461aaSDag-Erling Smørgrav edscript = true;
1029eb461aaSDag-Erling Smørgrav break;
1039eb461aaSDag-Erling Smørgrav default:
1049eb461aaSDag-Erling Smørgrav usage();
1059eb461aaSDag-Erling Smørgrav }
1069eb461aaSDag-Erling Smørgrav }
1079eb461aaSDag-Erling Smørgrav
1089eb461aaSDag-Erling Smørgrav argc -= optind;
1099eb461aaSDag-Erling Smørgrav argv += optind;
1109eb461aaSDag-Erling Smørgrav
1119eb461aaSDag-Erling Smørgrav if (argc != 2)
1129eb461aaSDag-Erling Smørgrav usage();
1139eb461aaSDag-Erling Smørgrav
1149eb461aaSDag-Erling Smørgrav rc = diffreg(argv[0], argv[1], algo, force_text, ignore_whitespace,
1159eb461aaSDag-Erling Smørgrav show_function_prototypes, context_lines, edscript);
1169eb461aaSDag-Erling Smørgrav if (rc != DIFF_RC_OK) {
1179eb461aaSDag-Erling Smørgrav fprintf(stderr, "diff: %s\n", strerror(rc));
1189eb461aaSDag-Erling Smørgrav return 1;
1199eb461aaSDag-Erling Smørgrav }
1209eb461aaSDag-Erling Smørgrav return 0;
1219eb461aaSDag-Erling Smørgrav }
1229eb461aaSDag-Erling Smørgrav
1239eb461aaSDag-Erling Smørgrav const struct diff_algo_config myers_then_patience;
1249eb461aaSDag-Erling Smørgrav const struct diff_algo_config myers_then_myers_divide;
1259eb461aaSDag-Erling Smørgrav const struct diff_algo_config patience;
1269eb461aaSDag-Erling Smørgrav const struct diff_algo_config myers_divide;
1279eb461aaSDag-Erling Smørgrav
1289eb461aaSDag-Erling Smørgrav const struct diff_algo_config myers_then_patience = (struct diff_algo_config){
1299eb461aaSDag-Erling Smørgrav .impl = diff_algo_myers,
1309eb461aaSDag-Erling Smørgrav .permitted_state_size = 1024 * 1024 * sizeof(int),
1319eb461aaSDag-Erling Smørgrav .fallback_algo = &patience,
1329eb461aaSDag-Erling Smørgrav };
1339eb461aaSDag-Erling Smørgrav
1349eb461aaSDag-Erling Smørgrav const struct diff_algo_config myers_then_myers_divide =
1359eb461aaSDag-Erling Smørgrav (struct diff_algo_config){
1369eb461aaSDag-Erling Smørgrav .impl = diff_algo_myers,
1379eb461aaSDag-Erling Smørgrav .permitted_state_size = 1024 * 1024 * sizeof(int),
1389eb461aaSDag-Erling Smørgrav .fallback_algo = &myers_divide,
1399eb461aaSDag-Erling Smørgrav };
1409eb461aaSDag-Erling Smørgrav
1419eb461aaSDag-Erling Smørgrav const struct diff_algo_config patience = (struct diff_algo_config){
1429eb461aaSDag-Erling Smørgrav .impl = diff_algo_patience,
1439eb461aaSDag-Erling Smørgrav /* After subdivision, do Patience again: */
1449eb461aaSDag-Erling Smørgrav .inner_algo = &patience,
1459eb461aaSDag-Erling Smørgrav /* If subdivision failed, do Myers Divide et Impera: */
1469eb461aaSDag-Erling Smørgrav .fallback_algo = &myers_then_myers_divide,
1479eb461aaSDag-Erling Smørgrav };
1489eb461aaSDag-Erling Smørgrav
1499eb461aaSDag-Erling Smørgrav const struct diff_algo_config myers_divide = (struct diff_algo_config){
1509eb461aaSDag-Erling Smørgrav .impl = diff_algo_myers_divide,
1519eb461aaSDag-Erling Smørgrav /* When division succeeded, start from the top: */
1529eb461aaSDag-Erling Smørgrav .inner_algo = &myers_then_myers_divide,
1539eb461aaSDag-Erling Smørgrav /* (fallback_algo = NULL implies diff_algo_none). */
1549eb461aaSDag-Erling Smørgrav };
1559eb461aaSDag-Erling Smørgrav
1569eb461aaSDag-Erling Smørgrav const struct diff_algo_config no_algo = (struct diff_algo_config){
1579eb461aaSDag-Erling Smørgrav .impl = diff_algo_none,
1589eb461aaSDag-Erling Smørgrav };
1599eb461aaSDag-Erling Smørgrav
1609eb461aaSDag-Erling Smørgrav /* If the state for a forward-Myers is small enough, use Myers, otherwise first
1619eb461aaSDag-Erling Smørgrav * do a Myers-divide. */
1629eb461aaSDag-Erling Smørgrav const struct diff_config diff_config_myers_then_myers_divide = {
1639eb461aaSDag-Erling Smørgrav .atomize_func = diff_atomize_text_by_line,
1649eb461aaSDag-Erling Smørgrav .algo = &myers_then_myers_divide,
1659eb461aaSDag-Erling Smørgrav };
1669eb461aaSDag-Erling Smørgrav
1679eb461aaSDag-Erling Smørgrav /* If the state for a forward-Myers is small enough, use Myers, otherwise first
1689eb461aaSDag-Erling Smørgrav * do a Patience. */
1699eb461aaSDag-Erling Smørgrav const struct diff_config diff_config_myers_then_patience = {
1709eb461aaSDag-Erling Smørgrav .atomize_func = diff_atomize_text_by_line,
1719eb461aaSDag-Erling Smørgrav .algo = &myers_then_patience,
1729eb461aaSDag-Erling Smørgrav };
1739eb461aaSDag-Erling Smørgrav
1749eb461aaSDag-Erling Smørgrav /* Directly force Patience as a first divider of the source file. */
1759eb461aaSDag-Erling Smørgrav const struct diff_config diff_config_patience = {
1769eb461aaSDag-Erling Smørgrav .atomize_func = diff_atomize_text_by_line,
1779eb461aaSDag-Erling Smørgrav .algo = &patience,
1789eb461aaSDag-Erling Smørgrav };
1799eb461aaSDag-Erling Smørgrav
1809eb461aaSDag-Erling Smørgrav /* Directly force Patience as a first divider of the source file. */
1819eb461aaSDag-Erling Smørgrav const struct diff_config diff_config_no_algo = {
1829eb461aaSDag-Erling Smørgrav .atomize_func = diff_atomize_text_by_line,
1839eb461aaSDag-Erling Smørgrav };
1849eb461aaSDag-Erling Smørgrav
1859eb461aaSDag-Erling Smørgrav int
diffreg(char * file1,char * file2,enum diffreg_algo algo,bool force_text,bool ignore_whitespace,bool show_function_prototypes,int context_lines,bool edscript)1869eb461aaSDag-Erling Smørgrav diffreg(char *file1, char *file2, enum diffreg_algo algo, bool force_text,
1879eb461aaSDag-Erling Smørgrav bool ignore_whitespace, bool show_function_prototypes, int context_lines,
1889eb461aaSDag-Erling Smørgrav bool edscript)
1899eb461aaSDag-Erling Smørgrav {
1909eb461aaSDag-Erling Smørgrav char *str1, *str2;
1919eb461aaSDag-Erling Smørgrav FILE *f1, *f2;
1929eb461aaSDag-Erling Smørgrav struct stat st1, st2;
1939eb461aaSDag-Erling Smørgrav struct diff_input_info info = {
1949eb461aaSDag-Erling Smørgrav .left_path = file1,
1959eb461aaSDag-Erling Smørgrav .right_path = file2,
1969eb461aaSDag-Erling Smørgrav };
1979eb461aaSDag-Erling Smørgrav struct diff_data left = {}, right = {};
1989eb461aaSDag-Erling Smørgrav struct diff_result *result = NULL;
1999eb461aaSDag-Erling Smørgrav int rc;
2009eb461aaSDag-Erling Smørgrav const struct diff_config *cfg;
2019eb461aaSDag-Erling Smørgrav int diff_flags = 0;
2029eb461aaSDag-Erling Smørgrav
2039eb461aaSDag-Erling Smørgrav switch (algo) {
2049eb461aaSDag-Erling Smørgrav default:
2059eb461aaSDag-Erling Smørgrav case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE:
2069eb461aaSDag-Erling Smørgrav cfg = &diff_config_myers_then_myers_divide;
2079eb461aaSDag-Erling Smørgrav break;
2089eb461aaSDag-Erling Smørgrav case DIFFREG_ALGO_MYERS_THEN_PATIENCE:
2099eb461aaSDag-Erling Smørgrav cfg = &diff_config_myers_then_patience;
2109eb461aaSDag-Erling Smørgrav break;
2119eb461aaSDag-Erling Smørgrav case DIFFREG_ALGO_PATIENCE:
2129eb461aaSDag-Erling Smørgrav cfg = &diff_config_patience;
2139eb461aaSDag-Erling Smørgrav break;
2149eb461aaSDag-Erling Smørgrav case DIFFREG_ALGO_NONE:
2159eb461aaSDag-Erling Smørgrav cfg = &diff_config_no_algo;
2169eb461aaSDag-Erling Smørgrav break;
2179eb461aaSDag-Erling Smørgrav }
2189eb461aaSDag-Erling Smørgrav
2199eb461aaSDag-Erling Smørgrav f1 = openfile(file1, &str1, &st1);
2209eb461aaSDag-Erling Smørgrav f2 = openfile(file2, &str2, &st2);
2219eb461aaSDag-Erling Smørgrav
2229eb461aaSDag-Erling Smørgrav if (force_text)
2239eb461aaSDag-Erling Smørgrav diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
2249eb461aaSDag-Erling Smørgrav if (ignore_whitespace)
2259eb461aaSDag-Erling Smørgrav diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
2269eb461aaSDag-Erling Smørgrav if (show_function_prototypes)
2279eb461aaSDag-Erling Smørgrav diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
2289eb461aaSDag-Erling Smørgrav
2299eb461aaSDag-Erling Smørgrav rc = diff_atomize_file(&left, cfg, f1, str1, st1.st_size, diff_flags);
2309eb461aaSDag-Erling Smørgrav if (rc)
2319eb461aaSDag-Erling Smørgrav goto done;
2329eb461aaSDag-Erling Smørgrav rc = diff_atomize_file(&right, cfg, f2, str2, st2.st_size, diff_flags);
2339eb461aaSDag-Erling Smørgrav if (rc)
2349eb461aaSDag-Erling Smørgrav goto done;
2359eb461aaSDag-Erling Smørgrav
2369eb461aaSDag-Erling Smørgrav result = diff_main(cfg, &left, &right);
2379eb461aaSDag-Erling Smørgrav #if 0
2389eb461aaSDag-Erling Smørgrav rc = diff_output_plain(stdout, &info, result);
2399eb461aaSDag-Erling Smørgrav #else
2409eb461aaSDag-Erling Smørgrav if (edscript)
2419eb461aaSDag-Erling Smørgrav rc = diff_output_edscript(NULL, stdout, &info, result);
2429eb461aaSDag-Erling Smørgrav else {
2439eb461aaSDag-Erling Smørgrav rc = diff_output_unidiff(NULL, stdout, &info, result,
2449eb461aaSDag-Erling Smørgrav context_lines);
2459eb461aaSDag-Erling Smørgrav }
2469eb461aaSDag-Erling Smørgrav #endif
2479eb461aaSDag-Erling Smørgrav done:
2489eb461aaSDag-Erling Smørgrav diff_result_free(result);
2499eb461aaSDag-Erling Smørgrav diff_data_free(&left);
2509eb461aaSDag-Erling Smørgrav diff_data_free(&right);
2519eb461aaSDag-Erling Smørgrav if (str1)
2529eb461aaSDag-Erling Smørgrav munmap(str1, st1.st_size);
2539eb461aaSDag-Erling Smørgrav if (str2)
2549eb461aaSDag-Erling Smørgrav munmap(str2, st2.st_size);
2559eb461aaSDag-Erling Smørgrav fclose(f1);
2569eb461aaSDag-Erling Smørgrav fclose(f2);
2579eb461aaSDag-Erling Smørgrav
2589eb461aaSDag-Erling Smørgrav return rc;
2599eb461aaSDag-Erling Smørgrav }
2609eb461aaSDag-Erling Smørgrav
2619eb461aaSDag-Erling Smørgrav FILE *
openfile(const char * path,char ** p,struct stat * st)2629eb461aaSDag-Erling Smørgrav openfile(const char *path, char **p, struct stat *st)
2639eb461aaSDag-Erling Smørgrav {
2649eb461aaSDag-Erling Smørgrav FILE *f = NULL;
2659eb461aaSDag-Erling Smørgrav
2669eb461aaSDag-Erling Smørgrav f = fopen(path, "r");
2679eb461aaSDag-Erling Smørgrav if (f == NULL)
2689eb461aaSDag-Erling Smørgrav err(2, "%s", path);
2699eb461aaSDag-Erling Smørgrav
2709eb461aaSDag-Erling Smørgrav if (fstat(fileno(f), st) == -1)
2719eb461aaSDag-Erling Smørgrav err(2, "%s", path);
2729eb461aaSDag-Erling Smørgrav
2739eb461aaSDag-Erling Smørgrav #ifndef DIFF_NO_MMAP
2749eb461aaSDag-Erling Smørgrav *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0);
2759eb461aaSDag-Erling Smørgrav if (*p == MAP_FAILED)
2769eb461aaSDag-Erling Smørgrav #endif
2779eb461aaSDag-Erling Smørgrav *p = NULL; /* fall back on file I/O */
2789eb461aaSDag-Erling Smørgrav
2799eb461aaSDag-Erling Smørgrav return f;
2809eb461aaSDag-Erling Smørgrav }
281