1 /*
2 * Copyright (c) 2018 Martin Pieuchot
3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 #include <sys/types.h>
19 #include <sys/capsicum.h>
20 #ifndef DIFF_NO_MMAP
21 #include <sys/mman.h>
22 #endif
23 #include <sys/stat.h>
24
25 #include <capsicum_helpers.h>
26 #include <err.h>
27 #include <fcntl.h>
28 #include <stdbool.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <time.h>
34 #include <unistd.h>
35
36 #include "pr.h"
37 #include "diff.h"
38 #include <arraylist.h>
39 #include <diff_main.h>
40 #include <diff_output.h>
41
42 const char *format_label(const char *, struct stat *);
43
44 enum diffreg_algo {
45 DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0,
46 DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1,
47 DIFFREG_ALGO_PATIENCE = 2,
48 DIFFREG_ALGO_NONE = 3,
49 };
50
51 int diffreg_new(char *, char *, int, int);
52 FILE * openfile(const char *, char **, struct stat *);
53
54 static const struct diff_algo_config myers_then_patience;
55 static const struct diff_algo_config myers_then_myers_divide;
56 static const struct diff_algo_config patience;
57 static const struct diff_algo_config myers_divide;
58
59 static const struct diff_algo_config myers_then_patience = {
60 .impl = diff_algo_myers,
61 .permitted_state_size = 1024 * 1024 * sizeof(int),
62 .fallback_algo = &patience,
63 };
64
65 static const struct diff_algo_config myers_then_myers_divide =
66 (struct diff_algo_config){
67 .impl = diff_algo_myers,
68 .permitted_state_size = 1024 * 1024 * sizeof(int),
69 .fallback_algo = &myers_divide,
70 };
71
72 static const struct diff_algo_config patience = {
73 .impl = diff_algo_patience,
74 /* After subdivision, do Patience again: */
75 .inner_algo = &patience,
76 /* If subdivision failed, do Myers Divide et Impera: */
77 .fallback_algo = &myers_then_myers_divide,
78 };
79
80 static const struct diff_algo_config myers_divide = {
81 .impl = diff_algo_myers_divide,
82 /* When division succeeded, start from the top: */
83 .inner_algo = &myers_then_myers_divide,
84 /* (fallback_algo = NULL implies diff_algo_none). */
85 };
86
87 static const struct diff_algo_config none = {
88 .impl = diff_algo_none,
89 };
90
91 /* If the state for a forward-Myers is small enough, use Myers, otherwise first
92 * do a Myers-divide. */
93 static const struct diff_config diff_config_myers_then_myers_divide = {
94 .atomize_func = diff_atomize_text_by_line,
95 .algo = &myers_then_myers_divide,
96 };
97
98 /* If the state for a forward-Myers is small enough, use Myers, otherwise first
99 * do a Patience. */
100 static const struct diff_config diff_config_myers_then_patience = {
101 .atomize_func = diff_atomize_text_by_line,
102 .algo = &myers_then_patience,
103 };
104
105 /* Directly force Patience as a first divider of the source file. */
106 static const struct diff_config diff_config_patience = {
107 .atomize_func = diff_atomize_text_by_line,
108 .algo = &patience,
109 };
110
111 /* Directly force Patience as a first divider of the source file. */
112 static const struct diff_config diff_config_none = {
113 .atomize_func = diff_atomize_text_by_line,
114 .algo = &none,
115 };
116
117 const char *
format_label(const char * oldlabel,struct stat * stb)118 format_label(const char *oldlabel, struct stat *stb)
119 {
120 const char *time_format = "%Y-%m-%d %H:%M:%S";
121 char *newlabel;
122 char buf[256];
123 char end[10];
124 struct tm tm, *tm_ptr;
125 int nsec = stb->st_mtim.tv_nsec;
126 size_t newlabellen, timelen, endlen;
127 tm_ptr = localtime_r(&stb->st_mtime, &tm);
128
129 timelen = strftime(buf, 256, time_format, tm_ptr);
130 endlen = strftime(end, 10, "%z", tm_ptr);
131
132 /*
133 * The new label is the length of the time, old label, timezone,
134 * 9 characters for nanoseconds, and 4 characters for a period
135 * and for formatting.
136 */
137 newlabellen = timelen + strlen(oldlabel) + endlen + 9 + 4;
138 newlabel = calloc(newlabellen, sizeof(char));
139
140 snprintf(newlabel, newlabellen ,"%s\t%s.%.9d %s\n",
141 oldlabel, buf, nsec, end);
142
143 return newlabel;
144 }
145
146 int
diffreg_new(char * file1,char * file2,int flags,int capsicum)147 diffreg_new(char *file1, char *file2, int flags, int capsicum)
148 {
149 char *str1, *str2;
150 FILE *f1, *f2;
151 struct pr *pr = NULL;
152 struct stat st1, st2;
153 struct diff_input_info info;
154 struct diff_data left = {}, right = {};
155 struct diff_result *result = NULL;
156 bool force_text, have_binary;
157 int rc, atomizer_flags, rflags, diff_flags = 0;
158 int context_lines = diff_context;
159 const struct diff_config *cfg;
160 enum diffreg_algo algo;
161 cap_rights_t rights_ro;
162 int ret;
163
164 algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE;
165
166 switch (algo) {
167 default:
168 case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE:
169 cfg = &diff_config_myers_then_myers_divide;
170 break;
171 case DIFFREG_ALGO_MYERS_THEN_PATIENCE:
172 cfg = &diff_config_myers_then_patience;
173 break;
174 case DIFFREG_ALGO_PATIENCE:
175 cfg = &diff_config_patience;
176 break;
177 case DIFFREG_ALGO_NONE:
178 cfg = &diff_config_none;
179 break;
180 }
181
182 f1 = openfile(file1, &str1, &st1);
183 f2 = openfile(file2, &str2, &st2);
184
185 if (flags & D_PAGINATION)
186 pr = start_pr(file1, file2);
187
188 if (capsicum) {
189 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
190 if (caph_rights_limit(fileno(f1), &rights_ro) < 0)
191 err(2, "unable to limit rights on: %s", file1);
192 if (caph_rights_limit(fileno(f2), &rights_ro) < 0)
193 err(2, "unable to limit rights on: %s", file2);
194 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
195 /* stdin has already been limited */
196 if (caph_limit_stderr() == -1)
197 err(2, "unable to limit stderr");
198 if (caph_limit_stdout() == -1)
199 err(2, "unable to limit stdout");
200 } else if (caph_limit_stdio() == -1)
201 err(2, "unable to limit stdio");
202 caph_cache_catpages();
203 caph_cache_tzdata();
204 if (caph_enter() < 0)
205 err(2, "unable to enter capability mode");
206 }
207 /*
208 * If we have been given a label use that for the paths, if not format
209 * the path with the files modification time.
210 */
211 info.flags = 0;
212 info.left_path = (label[0] != NULL) ?
213 label[0] : format_label(file1, &stb1);
214 info.right_path = (label[1] != NULL) ?
215 label[1] : format_label(file2, &stb2);
216
217 if (flags & D_FORCEASCII)
218 diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
219 if (flags & D_IGNOREBLANKS)
220 diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
221 if (flags & D_PROTOTYPE)
222 diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
223
224 ret = diff_atomize_file(&left, cfg, f1, (uint8_t *)str1, st1.st_size,
225 diff_flags);
226 if (ret != DIFF_RC_OK) {
227 warnc(ret, "%s", file1);
228 rc = D_ERROR;
229 status |= 2;
230 goto done;
231 }
232 ret = diff_atomize_file(&right, cfg, f2, (uint8_t *)str2, st2.st_size,
233 diff_flags);
234 if (ret != DIFF_RC_OK) {
235 warnc(ret, "%s", file2);
236 rc = D_ERROR;
237 status |= 2;
238 goto done;
239 }
240
241 result = diff_main(cfg, &left, &right);
242 if (result->rc != DIFF_RC_OK) {
243 rc = D_ERROR;
244 status |= 2;
245 goto done;
246 }
247 /*
248 * If there wasn't an error, but we don't have any printable chunks
249 * then the files must match.
250 */
251 if (!diff_result_contains_printable_chunks(result)) {
252 rc = D_SAME;
253 goto done;
254 }
255
256 atomizer_flags = (result->left->atomizer_flags | result->right->atomizer_flags);
257 rflags = (result->left->root->diff_flags | result->right->root->diff_flags);
258 force_text = (rflags & DIFF_FLAG_FORCE_TEXT_DATA);
259 have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
260
261 if (have_binary && !force_text) {
262 rc = D_BINARY;
263 status |= 1;
264 goto done;
265 }
266
267 if (color)
268 diff_output_set_colors(color, del_code, add_code);
269 if (diff_format == D_NORMAL) {
270 rc = diff_output_plain(NULL, stdout, &info, result, false);
271 } else if (diff_format == D_EDIT) {
272 rc = diff_output_edscript(NULL, stdout, &info, result);
273 } else {
274 rc = diff_output_unidiff(NULL, stdout, &info, result,
275 context_lines);
276 }
277 if (rc != DIFF_RC_OK) {
278 rc = D_ERROR;
279 status |= 2;
280 } else {
281 rc = D_DIFFER;
282 status |= 1;
283 }
284 done:
285 if (pr != NULL)
286 stop_pr(pr);
287 diff_result_free(result);
288 diff_data_free(&left);
289 diff_data_free(&right);
290 #ifndef DIFF_NO_MMAP
291 if (str1)
292 munmap(str1, st1.st_size);
293 if (str2)
294 munmap(str2, st2.st_size);
295 #endif
296 fclose(f1);
297 fclose(f2);
298
299 return rc;
300 }
301
302 FILE *
openfile(const char * path,char ** p,struct stat * st)303 openfile(const char *path, char **p, struct stat *st)
304 {
305 FILE *f = NULL;
306
307 if (strcmp(path, "-") == 0)
308 f = stdin;
309 else
310 f = fopen(path, "r");
311
312 if (f == NULL)
313 err(2, "%s", path);
314
315 if (fstat(fileno(f), st) == -1)
316 err(2, "%s", path);
317
318 #ifndef DIFF_NO_MMAP
319 *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0);
320 if (*p == MAP_FAILED)
321 #endif
322 *p = NULL; /* fall back on file I/O */
323
324 return f;
325 }
326
327 bool
can_libdiff(int flags)328 can_libdiff(int flags)
329 {
330 /* libdiff's atomizer can only deal with files */
331 if (!S_ISREG(stb1.st_mode) || !S_ISREG(stb2.st_mode))
332 return false;
333
334 /* Is this one of the supported input/output modes for diffreg_new? */
335 if ((flags == 0 || !(flags & ~D_NEWALGO_FLAGS)) &&
336 ignore_pats == NULL && (
337 diff_format == D_NORMAL ||
338 #if 0
339 diff_format == D_EDIT ||
340 #endif
341 diff_format == D_UNIFIED) &&
342 (diff_algorithm == D_DIFFMYERS || diff_algorithm == D_DIFFPATIENCE)) {
343 return true;
344 }
345
346 /* Fallback to using stone. */
347 return false;
348 }
349