xref: /kvm-unit-tests/scripts/get_maintainer.pl (revision 23749665ff61f476aa80171a398f06a9e5bd0a6c)
1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3#
4# (c) 2007, Joe Perches <joe@perches.com>
5#           created from checkpatch.pl
6#
7# Print selected MAINTAINERS information for
8# the files modified in a patch or for a file
9#
10# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
11#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
12
13use warnings;
14use strict;
15
16my $P = $0;
17my $V = '0.26';
18
19use Getopt::Long qw(:config no_auto_abbrev);
20use Cwd;
21use File::Find;
22use File::Spec::Functions;
23
24my $cur_path = fastgetcwd() . '/';
25my $lk_path = "./";
26my $email = 1;
27my $email_usename = 1;
28my $email_maintainer = 1;
29my $email_reviewer = 1;
30my $email_fixes = 1;
31my $email_list = 1;
32my $email_moderated_list = 1;
33my $email_subscriber_list = 0;
34my $email_git = 0;
35my $email_git_all_signature_types = 0;
36my $email_git_blame = 0;
37my $email_git_blame_signatures = 1;
38my $email_git_fallback = 1;
39my $email_git_min_signatures = 1;
40my $email_git_max_maintainers = 5;
41my $email_git_min_percent = 5;
42my $email_git_since = "1-year-ago";
43my $email_hg_since = "-365";
44my $interactive = 0;
45my $email_remove_duplicates = 1;
46my $email_use_mailmap = 1;
47my $output_multiline = 1;
48my $output_separator = ", ";
49my $output_roles = 0;
50my $output_rolestats = 1;
51my $output_section_maxlen = 50;
52my $scm = 0;
53my $tree = 1;
54my $web = 0;
55my $subsystem = 0;
56my $status = 0;
57my $letters = "";
58my $keywords = 1;
59my $sections = 0;
60my $email_file_emails = 0;
61my $from_filename = 0;
62my $pattern_depth = 0;
63my $self_test = undef;
64my $version = 0;
65my $help = 0;
66my $find_maintainer_files = 0;
67my $maintainer_path;
68my $vcs_used = 0;
69
70my $exit = 0;
71
72my @files = ();
73my @fixes = ();			# If a patch description includes Fixes: lines
74my @range = ();
75my @keyword_tvi = ();
76my @file_emails = ();
77
78my %commit_author_hash;
79my %commit_signer_hash;
80
81# Signature types of people who are either
82# 	a) responsible for the code in question, or
83# 	b) familiar enough with it to give relevant feedback
84my @signature_tags = ();
85push(@signature_tags, "Signed-off-by:");
86push(@signature_tags, "Reviewed-by:");
87push(@signature_tags, "Acked-by:");
88
89my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
90
91# rfc822 email address - preloaded methods go here.
92my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
93my $rfc822_char = '[\\000-\\377]';
94
95# VCS command support: class-like functions and strings
96
97my %VCS_cmds;
98
99my %VCS_cmds_git = (
100    "execute_cmd" => \&git_execute_cmd,
101    "available" => '(which("git") ne "") && (-e ".git")',
102    "find_signers_cmd" =>
103	"git log --no-color --follow --since=\$email_git_since " .
104	    '--numstat --no-merges ' .
105	    '--format="GitCommit: %H%n' .
106		      'GitAuthor: %an <%ae>%n' .
107		      'GitDate: %aD%n' .
108		      'GitSubject: %s%n' .
109		      '%b%n"' .
110	    " -- \$file",
111    "find_commit_signers_cmd" =>
112	"git log --no-color " .
113	    '--numstat ' .
114	    '--format="GitCommit: %H%n' .
115		      'GitAuthor: %an <%ae>%n' .
116		      'GitDate: %aD%n' .
117		      'GitSubject: %s%n' .
118		      '%b%n"' .
119	    " -1 \$commit",
120    "find_commit_author_cmd" =>
121	"git log --no-color " .
122	    '--numstat ' .
123	    '--format="GitCommit: %H%n' .
124		      'GitAuthor: %an <%ae>%n' .
125		      'GitDate: %aD%n' .
126		      'GitSubject: %s%n"' .
127	    " -1 \$commit",
128    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
129    "blame_file_cmd" => "git blame -l \$file",
130    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
131    "blame_commit_pattern" => "^([0-9a-f]+) ",
132    "author_pattern" => "^GitAuthor: (.*)",
133    "subject_pattern" => "^GitSubject: (.*)",
134    "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$",
135    "file_exists_cmd" => "git ls-files \$file",
136    "list_files_cmd" => "git ls-files \$file",
137);
138
139my %VCS_cmds_hg = (
140    "execute_cmd" => \&hg_execute_cmd,
141    "available" => '(which("hg") ne "") && (-d ".hg")',
142    "find_signers_cmd" =>
143	"hg log --date=\$email_hg_since " .
144	    "--template='HgCommit: {node}\\n" .
145	                "HgAuthor: {author}\\n" .
146			"HgSubject: {desc}\\n'" .
147	    " -- \$file",
148    "find_commit_signers_cmd" =>
149	"hg log " .
150	    "--template='HgSubject: {desc}\\n'" .
151	    " -r \$commit",
152    "find_commit_author_cmd" =>
153	"hg log " .
154	    "--template='HgCommit: {node}\\n" .
155		        "HgAuthor: {author}\\n" .
156			"HgSubject: {desc|firstline}\\n'" .
157	    " -r \$commit",
158    "blame_range_cmd" => "",		# not supported
159    "blame_file_cmd" => "hg blame -n \$file",
160    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
161    "blame_commit_pattern" => "^([ 0-9a-f]+):",
162    "author_pattern" => "^HgAuthor: (.*)",
163    "subject_pattern" => "^HgSubject: (.*)",
164    "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$",
165    "file_exists_cmd" => "hg files \$file",
166    "list_files_cmd" => "hg manifest -R \$file",
167);
168
169my $conf = which_conf(".get_maintainer.conf");
170if (-f $conf) {
171    my @conf_args;
172    open(my $conffile, '<', "$conf")
173	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
174
175    while (<$conffile>) {
176	my $line = $_;
177
178	$line =~ s/\s*\n?$//g;
179	$line =~ s/^\s*//g;
180	$line =~ s/\s+/ /g;
181
182	next if ($line =~ m/^\s*#/);
183	next if ($line =~ m/^\s*$/);
184
185	my @words = split(" ", $line);
186	foreach my $word (@words) {
187	    last if ($word =~ m/^#/);
188	    push (@conf_args, $word);
189	}
190    }
191    close($conffile);
192    unshift(@ARGV, @conf_args) if @conf_args;
193}
194
195my @ignore_emails = ();
196my $ignore_file = which_conf(".get_maintainer.ignore");
197if (-f $ignore_file) {
198    open(my $ignore, '<', "$ignore_file")
199	or warn "$P: Can't find a readable .get_maintainer.ignore file $!\n";
200    while (<$ignore>) {
201	my $line = $_;
202
203	$line =~ s/\s*\n?$//;
204	$line =~ s/^\s*//;
205	$line =~ s/\s+$//;
206	$line =~ s/#.*$//;
207
208	next if ($line =~ m/^\s*$/);
209	if (rfc822_valid($line)) {
210	    push(@ignore_emails, $line);
211	}
212    }
213    close($ignore);
214}
215
216if ($#ARGV > 0) {
217    foreach (@ARGV) {
218        if ($_ =~ /^-{1,2}self-test(?:=|$)/) {
219            die "$P: using --self-test does not allow any other option or argument\n";
220        }
221    }
222}
223
224if (!GetOptions(
225		'email!' => \$email,
226		'git!' => \$email_git,
227		'git-all-signature-types!' => \$email_git_all_signature_types,
228		'git-blame!' => \$email_git_blame,
229		'git-blame-signatures!' => \$email_git_blame_signatures,
230		'git-fallback!' => \$email_git_fallback,
231		'git-min-signatures=i' => \$email_git_min_signatures,
232		'git-max-maintainers=i' => \$email_git_max_maintainers,
233		'git-min-percent=i' => \$email_git_min_percent,
234		'git-since=s' => \$email_git_since,
235		'hg-since=s' => \$email_hg_since,
236		'i|interactive!' => \$interactive,
237		'remove-duplicates!' => \$email_remove_duplicates,
238		'mailmap!' => \$email_use_mailmap,
239		'm!' => \$email_maintainer,
240		'r!' => \$email_reviewer,
241		'n!' => \$email_usename,
242		'l!' => \$email_list,
243		'fixes!' => \$email_fixes,
244		'moderated!' => \$email_moderated_list,
245		's!' => \$email_subscriber_list,
246		'multiline!' => \$output_multiline,
247		'roles!' => \$output_roles,
248		'rolestats!' => \$output_rolestats,
249		'separator=s' => \$output_separator,
250		'subsystem!' => \$subsystem,
251		'status!' => \$status,
252		'scm!' => \$scm,
253		'tree!' => \$tree,
254		'web!' => \$web,
255		'letters=s' => \$letters,
256		'pattern-depth=i' => \$pattern_depth,
257		'k|keywords!' => \$keywords,
258		'sections!' => \$sections,
259		'fe|file-emails!' => \$email_file_emails,
260		'f|file' => \$from_filename,
261		'find-maintainer-files' => \$find_maintainer_files,
262		'mpath|maintainer-path=s' => \$maintainer_path,
263		'self-test:s' => \$self_test,
264		'v|version' => \$version,
265		'h|help|usage' => \$help,
266		)) {
267    die "$P: invalid argument - use --help if necessary\n";
268}
269
270if ($help != 0) {
271    usage();
272    exit 0;
273}
274
275if ($version != 0) {
276    print("${P} ${V}\n");
277    exit 0;
278}
279
280if (defined $self_test) {
281    read_all_maintainer_files();
282    self_test();
283    exit 0;
284}
285
286if (-t STDIN && !@ARGV) {
287    # We're talking to a terminal, but have no command line arguments.
288    die "$P: missing patchfile or -f file - use --help if necessary\n";
289}
290
291$output_multiline = 0 if ($output_separator ne ", ");
292$output_rolestats = 1 if ($interactive);
293$output_roles = 1 if ($output_rolestats);
294
295if ($sections || $letters ne "") {
296    $sections = 1;
297    $email = 0;
298    $email_list = 0;
299    $scm = 0;
300    $status = 0;
301    $subsystem = 0;
302    $web = 0;
303    $keywords = 0;
304    $interactive = 0;
305} else {
306    my $selections = $email + $scm + $status + $subsystem + $web;
307    if ($selections == 0) {
308	die "$P:  Missing required option: email, scm, status, subsystem or web\n";
309    }
310}
311
312if ($email &&
313    ($email_maintainer + $email_reviewer +
314     $email_list + $email_subscriber_list +
315     $email_git + $email_git_blame) == 0) {
316    die "$P: Please select at least 1 email option\n";
317}
318
319## Read MAINTAINERS for type/value pairs
320
321my @typevalue = ();
322my %keyword_hash;
323my @mfiles = ();
324my @self_test_info = ();
325
326sub read_maintainer_file {
327    my ($file) = @_;
328
329    open (my $maint, '<', "$file")
330	or die "$P: Can't open MAINTAINERS file '$file': $!\n";
331    my $i = 1;
332    while (<$maint>) {
333	my $line = $_;
334	chomp $line;
335
336	if ($line =~ m/^([A-Z]):\s*(.*)/) {
337	    my $type = $1;
338	    my $value = $2;
339
340	    ##Filename pattern matching
341	    if ($type eq "F" || $type eq "X") {
342		$value =~ s@\.@\\\.@g;       ##Convert . to \.
343		$value =~ s/\*/\.\*/g;       ##Convert * to .*
344		$value =~ s/\?/\./g;         ##Convert ? to .
345		##if pattern is a directory and it lacks a trailing slash, add one
346		if ((-d $value)) {
347		    $value =~ s@([^/])$@$1/@;
348		}
349	    } elsif ($type eq "K") {
350		$keyword_hash{@typevalue} = $value;
351	    }
352	    push(@typevalue, "$type:$value");
353	} elsif (!(/^\s*$/ || /^\s*\#/)) {
354	    push(@typevalue, $line);
355	}
356	if (defined $self_test) {
357	    push(@self_test_info, {file=>$file, linenr=>$i, line=>$line});
358	}
359	$i++;
360    }
361    close($maint);
362}
363
364sub find_is_maintainer_file {
365    my ($file) = $_;
366    return if ($file !~ m@/MAINTAINERS$@);
367    $file = $File::Find::name;
368    return if (! -f $file);
369    push(@mfiles, $file);
370}
371
372sub find_ignore_git {
373    return grep { $_ !~ /^\.git$/; } @_;
374}
375
376read_all_maintainer_files();
377
378sub read_all_maintainer_files {
379    my $path = "${lk_path}MAINTAINERS";
380    if (defined $maintainer_path) {
381	$path = $maintainer_path;
382	# Perl Cookbook tilde expansion if necessary
383	$path =~ s@^~([^/]*)@ $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($<))[7])@ex;
384    }
385
386    if (-d $path) {
387	$path .= '/' if ($path !~ m@/$@);
388	if ($find_maintainer_files) {
389	    find( { wanted => \&find_is_maintainer_file,
390		    preprocess => \&find_ignore_git,
391		    no_chdir => 1,
392		}, "$path");
393	} else {
394	    opendir(DIR, "$path") or die $!;
395	    my @files = readdir(DIR);
396	    closedir(DIR);
397	    foreach my $file (@files) {
398		push(@mfiles, "$path$file") if ($file !~ /^\./);
399	    }
400	}
401    } elsif (-f "$path") {
402	push(@mfiles, "$path");
403    } else {
404	die "$P: MAINTAINER file not found '$path'\n";
405    }
406    die "$P: No MAINTAINER files found in '$path'\n" if (scalar(@mfiles) == 0);
407    foreach my $file (@mfiles) {
408	read_maintainer_file("$file");
409    }
410}
411
412sub maintainers_in_file {
413    my ($file) = @_;
414
415    return if ($file =~ m@\bMAINTAINERS$@);
416
417    if (-f $file && ($email_file_emails || $file =~ /\.yaml$/)) {
418	open(my $f, '<', $file)
419	    or die "$P: Can't open $file: $!\n";
420	my $text = do { local($/) ; <$f> };
421	close($f);
422
423	my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
424	push(@file_emails, clean_file_emails(@poss_addr));
425    }
426}
427
428#
429# Read mail address map
430#
431
432my $mailmap;
433
434read_mailmap();
435
436sub read_mailmap {
437    $mailmap = {
438	names => {},
439	addresses => {}
440    };
441
442    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
443
444    open(my $mailmap_file, '<', "${lk_path}.mailmap")
445	or warn "$P: Can't open .mailmap: $!\n";
446
447    while (<$mailmap_file>) {
448	s/#.*$//; #strip comments
449	s/^\s+|\s+$//g; #trim
450
451	next if (/^\s*$/); #skip empty lines
452	#entries have one of the following formats:
453	# name1 <mail1>
454	# <mail1> <mail2>
455	# name1 <mail1> <mail2>
456	# name1 <mail1> name2 <mail2>
457	# (see man git-shortlog)
458
459	if (/^([^<]+)<([^>]+)>$/) {
460	    my $real_name = $1;
461	    my $address = $2;
462
463	    $real_name =~ s/\s+$//;
464	    ($real_name, $address) = parse_email("$real_name <$address>");
465	    $mailmap->{names}->{$address} = $real_name;
466
467	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
468	    my $real_address = $1;
469	    my $wrong_address = $2;
470
471	    $mailmap->{addresses}->{$wrong_address} = $real_address;
472
473	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
474	    my $real_name = $1;
475	    my $real_address = $2;
476	    my $wrong_address = $3;
477
478	    $real_name =~ s/\s+$//;
479	    ($real_name, $real_address) =
480		parse_email("$real_name <$real_address>");
481	    $mailmap->{names}->{$wrong_address} = $real_name;
482	    $mailmap->{addresses}->{$wrong_address} = $real_address;
483
484	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
485	    my $real_name = $1;
486	    my $real_address = $2;
487	    my $wrong_name = $3;
488	    my $wrong_address = $4;
489
490	    $real_name =~ s/\s+$//;
491	    ($real_name, $real_address) =
492		parse_email("$real_name <$real_address>");
493
494	    $wrong_name =~ s/\s+$//;
495	    ($wrong_name, $wrong_address) =
496		parse_email("$wrong_name <$wrong_address>");
497
498	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
499	    $mailmap->{names}->{$wrong_email} = $real_name;
500	    $mailmap->{addresses}->{$wrong_email} = $real_address;
501	}
502    }
503    close($mailmap_file);
504}
505
506## use the filenames on the command line or find the filenames in the patchfiles
507
508if (!@ARGV) {
509    push(@ARGV, "&STDIN");
510}
511
512foreach my $file (@ARGV) {
513    if ($file ne "&STDIN") {
514	$file = canonpath($file);
515	##if $file is a directory and it lacks a trailing slash, add one
516	if ((-d $file)) {
517	    $file =~ s@([^/])$@$1/@;
518	} elsif (!(-f $file)) {
519	    die "$P: file '${file}' not found\n";
520	}
521    }
522    if ($from_filename && (vcs_exists() && !vcs_file_exists($file))) {
523	warn "$P: file '$file' not found in version control $!\n";
524    }
525    if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
526	$file =~ s/^\Q${cur_path}\E//;	#strip any absolute path
527	$file =~ s/^\Q${lk_path}\E//;	#or the path to the lk tree
528	push(@files, $file);
529	if ($file ne "MAINTAINERS" && -f $file && $keywords) {
530	    open(my $f, '<', $file)
531		or die "$P: Can't open $file: $!\n";
532	    my $text = do { local($/) ; <$f> };
533	    close($f);
534	    if ($keywords) {
535		foreach my $line (keys %keyword_hash) {
536		    if ($text =~ m/$keyword_hash{$line}/x) {
537			push(@keyword_tvi, $line);
538		    }
539		}
540	    }
541	}
542    } else {
543	my $file_cnt = @files;
544	my $lastfile;
545
546	open(my $patch, "< $file")
547	    or die "$P: Can't open $file: $!\n";
548
549	# We can check arbitrary information before the patch
550	# like the commit message, mail headers, etc...
551	# This allows us to match arbitrary keywords against any part
552	# of a git format-patch generated file (subject tags, etc...)
553
554	my $patch_prefix = "";			#Parsing the intro
555
556	while (<$patch>) {
557	    my $patch_line = $_;
558	    if (m/^ mode change [0-7]+ => [0-7]+ (\S+)\s*$/) {
559		my $filename = $1;
560		push(@files, $filename);
561	    } elsif (m/^rename (?:from|to) (\S+)\s*$/) {
562		my $filename = $1;
563		push(@files, $filename);
564	    } elsif (m/^diff --git a\/(\S+) b\/(\S+)\s*$/) {
565		my $filename1 = $1;
566		my $filename2 = $2;
567		push(@files, $filename1);
568		push(@files, $filename2);
569	    } elsif (m/^Fixes:\s+([0-9a-fA-F]{6,40})/) {
570		push(@fixes, $1) if ($email_fixes);
571	    } elsif (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
572		my $filename = $1;
573		$filename =~ s@^[^/]*/@@;
574		$filename =~ s@\n@@;
575		$lastfile = $filename;
576		push(@files, $filename);
577		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
578	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
579		if ($email_git_blame) {
580		    push(@range, "$lastfile:$1:$2");
581		}
582	    } elsif ($keywords) {
583		foreach my $line (keys %keyword_hash) {
584		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
585			push(@keyword_tvi, $line);
586		    }
587		}
588	    }
589	}
590	close($patch);
591
592	if ($file_cnt == @files) {
593	    warn "$P: file '${file}' doesn't appear to be a patch.  "
594		. "Add -f to options?\n";
595	}
596	@files = sort_and_uniq(@files);
597    }
598}
599
600@file_emails = uniq(@file_emails);
601@fixes = uniq(@fixes);
602
603my %email_hash_name;
604my %email_hash_address;
605my @email_to = ();
606my %hash_list_to;
607my @list_to = ();
608my @scm = ();
609my @web = ();
610my @subsystem = ();
611my @status = ();
612my %deduplicate_name_hash = ();
613my %deduplicate_address_hash = ();
614
615my @maintainers = get_maintainers();
616if (@maintainers) {
617    @maintainers = merge_email(@maintainers);
618    output(@maintainers);
619}
620
621if ($scm) {
622    @scm = uniq(@scm);
623    output(@scm);
624}
625
626if ($status) {
627    @status = uniq(@status);
628    output(@status);
629}
630
631if ($subsystem) {
632    @subsystem = uniq(@subsystem);
633    output(@subsystem);
634}
635
636if ($web) {
637    @web = uniq(@web);
638    output(@web);
639}
640
641exit($exit);
642
643sub self_test {
644    my @lsfiles = ();
645    my @good_links = ();
646    my @bad_links = ();
647    my @section_headers = ();
648    my $index = 0;
649
650    @lsfiles = vcs_list_files($lk_path);
651
652    for my $x (@self_test_info) {
653	$index++;
654
655	## Section header duplication and missing section content
656	if (($self_test eq "" || $self_test =~ /\bsections\b/) &&
657	    $x->{line} =~ /^\S[^:]/ &&
658	    defined $self_test_info[$index] &&
659	    $self_test_info[$index]->{line} =~ /^([A-Z]):\s*\S/) {
660	    my $has_S = 0;
661	    my $has_F = 0;
662	    my $has_ML = 0;
663	    my $status = "";
664	    if (grep(m@^\Q$x->{line}\E@, @section_headers)) {
665		print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n");
666	    } else {
667		push(@section_headers, $x->{line});
668	    }
669	    my $nextline = $index;
670	    while (defined $self_test_info[$nextline] &&
671		   $self_test_info[$nextline]->{line} =~ /^([A-Z]):\s*(\S.*)/) {
672		my $type = $1;
673		my $value = $2;
674		if ($type eq "S") {
675		    $has_S = 1;
676		    $status = $value;
677		} elsif ($type eq "F" || $type eq "N") {
678		    $has_F = 1;
679		} elsif ($type eq "M" || $type eq "R" || $type eq "L") {
680		    $has_ML = 1;
681		}
682		$nextline++;
683	    }
684	    if (!$has_ML && $status !~ /orphan|obsolete/i) {
685		print("$x->{file}:$x->{linenr}: warning: section without email address\t$x->{line}\n");
686	    }
687	    if (!$has_S) {
688		print("$x->{file}:$x->{linenr}: warning: section without status \t$x->{line}\n");
689	    }
690	    if (!$has_F) {
691		print("$x->{file}:$x->{linenr}: warning: section without file pattern\t$x->{line}\n");
692	    }
693	}
694
695	next if ($x->{line} !~ /^([A-Z]):\s*(.*)/);
696
697	my $type = $1;
698	my $value = $2;
699
700	## Filename pattern matching
701	if (($type eq "F" || $type eq "X") &&
702	    ($self_test eq "" || $self_test =~ /\bpatterns\b/)) {
703	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
704	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
705	    $value =~ s/\?/\./g;         ##Convert ? to .
706	    ##if pattern is a directory and it lacks a trailing slash, add one
707	    if ((-d $value)) {
708		$value =~ s@([^/])$@$1/@;
709	    }
710	    if (!grep(m@^$value@, @lsfiles)) {
711		print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n");
712	    }
713
714	## Link reachability
715	} elsif (($type eq "W" || $type eq "Q" || $type eq "B") &&
716		 $value =~ /^https?:/ &&
717		 ($self_test eq "" || $self_test =~ /\blinks\b/)) {
718	    next if (grep(m@^\Q$value\E$@, @good_links));
719	    my $isbad = 0;
720	    if (grep(m@^\Q$value\E$@, @bad_links)) {
721	        $isbad = 1;
722	    } else {
723		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`;
724		if ($? == 0) {
725		    push(@good_links, $value);
726		} else {
727		    push(@bad_links, $value);
728		    $isbad = 1;
729		}
730	    }
731	    if ($isbad) {
732	        print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
733	    }
734
735	## SCM reachability
736	} elsif ($type eq "T" &&
737		 ($self_test eq "" || $self_test =~ /\bscm\b/)) {
738	    next if (grep(m@^\Q$value\E$@, @good_links));
739	    my $isbad = 0;
740	    if (grep(m@^\Q$value\E$@, @bad_links)) {
741	        $isbad = 1;
742            } elsif ($value !~ /^(?:git|quilt|hg)\s+\S/) {
743		print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n");
744	    } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) {
745		my $url = $1;
746		my $branch = "";
747		$branch = $3 if $3;
748		my $output = `git ls-remote --exit-code -h "$url" $branch > /dev/null 2>&1`;
749		if ($? == 0) {
750		    push(@good_links, $value);
751		} else {
752		    push(@bad_links, $value);
753		    $isbad = 1;
754		}
755	    } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
756		my $url = $1;
757		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`;
758		if ($? == 0) {
759		    push(@good_links, $value);
760		} else {
761		    push(@bad_links, $value);
762		    $isbad = 1;
763		}
764	    }
765	    if ($isbad) {
766		print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
767	    }
768	}
769    }
770}
771
772sub ignore_email_address {
773    my ($address) = @_;
774
775    foreach my $ignore (@ignore_emails) {
776	return 1 if ($ignore eq $address);
777    }
778
779    return 0;
780}
781
782sub range_is_maintained {
783    my ($start, $end) = @_;
784
785    for (my $i = $start; $i < $end; $i++) {
786	my $line = $typevalue[$i];
787	if ($line =~ m/^([A-Z]):\s*(.*)/) {
788	    my $type = $1;
789	    my $value = $2;
790	    if ($type eq 'S') {
791		if ($value =~ /(maintain|support)/i) {
792		    return 1;
793		}
794	    }
795	}
796    }
797    return 0;
798}
799
800sub range_has_maintainer {
801    my ($start, $end) = @_;
802
803    for (my $i = $start; $i < $end; $i++) {
804	my $line = $typevalue[$i];
805	if ($line =~ m/^([A-Z]):\s*(.*)/) {
806	    my $type = $1;
807	    my $value = $2;
808	    if ($type eq 'M') {
809		return 1;
810	    }
811	}
812    }
813    return 0;
814}
815
816sub get_maintainers {
817    %email_hash_name = ();
818    %email_hash_address = ();
819    %commit_author_hash = ();
820    %commit_signer_hash = ();
821    @email_to = ();
822    %hash_list_to = ();
823    @list_to = ();
824    @scm = ();
825    @web = ();
826    @subsystem = ();
827    @status = ();
828    %deduplicate_name_hash = ();
829    %deduplicate_address_hash = ();
830    if ($email_git_all_signature_types) {
831	$signature_pattern = "(.+?)[Bb][Yy]:";
832    } else {
833	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
834    }
835
836    # Find responsible parties
837
838    my %exact_pattern_match_hash = ();
839
840    foreach my $file (@files) {
841
842	my %hash;
843	my $tvi = find_first_section();
844	while ($tvi < @typevalue) {
845	    my $start = find_starting_index($tvi);
846	    my $end = find_ending_index($tvi);
847	    my $exclude = 0;
848	    my $i;
849
850	    #Do not match excluded file patterns
851
852	    for ($i = $start; $i < $end; $i++) {
853		my $line = $typevalue[$i];
854		if ($line =~ m/^([A-Z]):\s*(.*)/) {
855		    my $type = $1;
856		    my $value = $2;
857		    if ($type eq 'X') {
858			if (file_match_pattern($file, $value)) {
859			    $exclude = 1;
860			    last;
861			}
862		    }
863		}
864	    }
865
866	    if (!$exclude) {
867		for ($i = $start; $i < $end; $i++) {
868		    my $line = $typevalue[$i];
869		    if ($line =~ m/^([A-Z]):\s*(.*)/) {
870			my $type = $1;
871			my $value = $2;
872			if ($type eq 'F') {
873			    if (file_match_pattern($file, $value)) {
874				my $value_pd = ($value =~ tr@/@@);
875				my $file_pd = ($file  =~ tr@/@@);
876				$value_pd++ if (substr($value,-1,1) ne "/");
877				$value_pd = -1 if ($value =~ /^\.\*/);
878				if ($value_pd >= $file_pd &&
879				    range_is_maintained($start, $end) &&
880				    range_has_maintainer($start, $end)) {
881				    $exact_pattern_match_hash{$file} = 1;
882				}
883				if ($pattern_depth == 0 ||
884				    (($file_pd - $value_pd) < $pattern_depth)) {
885				    $hash{$tvi} = $value_pd;
886				}
887			    }
888			} elsif ($type eq 'N') {
889			    if ($file =~ m/$value/x) {
890				$hash{$tvi} = 0;
891			    }
892			}
893		    }
894		}
895	    }
896	    $tvi = $end + 1;
897	}
898
899	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
900	    add_categories($line);
901	    if ($sections) {
902		my $i;
903		my $start = find_starting_index($line);
904		my $end = find_ending_index($line);
905		for ($i = $start; $i < $end; $i++) {
906		    my $line = $typevalue[$i];
907		    if ($line =~ /^[FX]:/) {		##Restore file patterns
908			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
909			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
910			$line =~ s/\\\./\./g;       	##Convert \. to .
911			$line =~ s/\.\*/\*/g;       	##Convert .* to *
912		    }
913		    my $count = $line =~ s/^([A-Z]):/$1:\t/g;
914		    if ($letters eq "" || (!$count || $letters =~ /$1/i)) {
915			print("$line\n");
916		    }
917		}
918		print("\n");
919	    }
920	}
921
922	maintainers_in_file($file);
923    }
924
925    if ($keywords) {
926	@keyword_tvi = sort_and_uniq(@keyword_tvi);
927	foreach my $line (@keyword_tvi) {
928	    add_categories($line);
929	}
930    }
931
932    foreach my $email (@email_to, @list_to) {
933	$email->[0] = deduplicate_email($email->[0]);
934    }
935
936    foreach my $file (@files) {
937	if ($email &&
938	    ($email_git ||
939	     ($email_git_fallback &&
940	      $file !~ /MAINTAINERS$/ &&
941	      !$exact_pattern_match_hash{$file}))) {
942	    vcs_file_signoffs($file);
943	}
944	if ($email && $email_git_blame) {
945	    vcs_file_blame($file);
946	}
947    }
948
949    if ($email) {
950	foreach my $email (@file_emails) {
951	    my ($name, $address) = parse_email($email);
952
953	    my $tmp_email = format_email($name, $address, $email_usename);
954	    push_email_address($tmp_email, '');
955	    add_role($tmp_email, 'in file');
956	}
957    }
958
959    foreach my $fix (@fixes) {
960	vcs_add_commit_signers($fix, "blamed_fixes");
961    }
962
963    my @to = ();
964    if ($email || $email_list) {
965	if ($email) {
966	    @to = (@to, @email_to);
967	}
968	if ($email_list) {
969	    @to = (@to, @list_to);
970	}
971    }
972
973    if ($interactive) {
974	@to = interactive_get_maintainers(\@to);
975    }
976
977    return @to;
978}
979
980sub file_match_pattern {
981    my ($file, $pattern) = @_;
982    if (substr($pattern, -1) eq "/") {
983	if ($file =~ m@^$pattern@) {
984	    return 1;
985	}
986    } else {
987	if ($file =~ m@^$pattern@) {
988	    my $s1 = ($file =~ tr@/@@);
989	    my $s2 = ($pattern =~ tr@/@@);
990	    if ($s1 == $s2) {
991		return 1;
992	    }
993	}
994    }
995    return 0;
996}
997
998sub usage {
999    print <<EOT;
1000usage: $P [options] patchfile
1001       $P [options] -f file|directory
1002version: $V
1003
1004MAINTAINER field selection options:
1005  --email => print email address(es) if any
1006    --git => include recent git \*-by: signers
1007    --git-all-signature-types => include signers regardless of signature type
1008        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
1009    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
1010    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
1011    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
1012    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
1013    --git-blame => use git blame to find modified commits for patch or file
1014    --git-blame-signatures => when used with --git-blame, also include all commit signers
1015    --git-since => git history to use (default: $email_git_since)
1016    --hg-since => hg history to use (default: $email_hg_since)
1017    --interactive => display a menu (mostly useful if used with the --git option)
1018    --m => include maintainer(s) if any
1019    --r => include reviewer(s) if any
1020    --n => include name 'Full Name <addr\@domain.tld>'
1021    --l => include list(s) if any
1022    --moderated => include moderated lists(s) if any (default: true)
1023    --s => include subscriber only list(s) if any (default: false)
1024    --remove-duplicates => minimize duplicate email names/addresses
1025    --roles => show roles (status:subsystem, git-signer, list, etc...)
1026    --rolestats => show roles and statistics (commits/total_commits, %)
1027    --file-emails => add email addresses found in -f file (default: 0 (off))
1028    --fixes => for patches, add signatures of commits with 'Fixes: <commit>' (default: 1 (on))
1029  --scm => print SCM tree(s) if any
1030  --status => print status if any
1031  --subsystem => print subsystem name if any
1032  --web => print website(s) if any
1033
1034Output type options:
1035  --separator [, ] => separator for multiple entries on 1 line
1036    using --separator also sets --nomultiline if --separator is not [, ]
1037  --multiline => print 1 entry per line
1038
1039Other options:
1040  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
1041  --keywords => scan patch for keywords (default: $keywords)
1042  --sections => print all of the subsystem sections with pattern matches
1043  --letters => print all matching 'letter' types from all matching sections
1044  --mailmap => use .mailmap file (default: $email_use_mailmap)
1045  --no-tree => run without a kvm-user-space tree
1046  --self-test => show potential issues with MAINTAINERS file content
1047  --version => show version
1048  --help => show this help information
1049
1050Default options:
1051  [--email --tree --nogit --git-fallback --m --r --n --l --multiline
1052   --pattern-depth=0 --remove-duplicates --rolestats]
1053
1054Notes:
1055  Using "-f directory" may give unexpected results:
1056      Used with "--git", git signators for _all_ files in and below
1057          directory are examined as git recurses directories.
1058          Any specified X: (exclude) pattern matches are _not_ ignored.
1059      Used with "--nogit", directory is used as a pattern match,
1060          no individual file within the directory or subdirectory
1061          is matched.
1062      Used with "--git-blame", does not iterate all files in directory
1063  Using "--git-blame" is slow and may add old committers and authors
1064      that are no longer active maintainers to the output.
1065  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
1066      other automated tools that expect only ["name"] <email address>
1067      may not work because of additional output after <email address>.
1068  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
1069      not the percentage of the entire file authored.  # of commits is
1070      not a good measure of amount of code authored.  1 major commit may
1071      contain a thousand lines, 5 trivial commits may modify a single line.
1072  If git is not installed, but mercurial (hg) is installed and an .hg
1073      repository exists, the following options apply to mercurial:
1074          --git,
1075          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
1076          --git-blame
1077      Use --hg-since not --git-since to control date selection
1078  File ".get_maintainer.conf", if it exists in the kvm-unit-tests source root
1079      directory, can change whatever get_maintainer defaults are desired.
1080      Entries in this file can be any command line argument.
1081      This file is prepended to any additional command line arguments.
1082      Multiple lines and # comments are allowed.
1083  Most options have both positive and negative forms.
1084      The negative forms for --<foo> are --no<foo> and --no-<foo>.
1085
1086EOT
1087}
1088
1089sub parse_email {
1090    my ($formatted_email) = @_;
1091
1092    my $name = "";
1093    my $address = "";
1094
1095    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
1096	$name = $1;
1097	$address = $2;
1098    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
1099	$address = $1;
1100    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
1101	$address = $1;
1102    }
1103
1104    $name =~ s/^\s+|\s+$//g;
1105    $name =~ s/^\"|\"$//g;
1106    $address =~ s/^\s+|\s+$//g;
1107
1108    if ($name =~ /[^\w \-]/i) {  	 ##has "must quote" chars
1109	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1110	$name = "\"$name\"";
1111    }
1112
1113    return ($name, $address);
1114}
1115
1116sub format_email {
1117    my ($name, $address, $usename) = @_;
1118
1119    my $formatted_email;
1120
1121    $name =~ s/^\s+|\s+$//g;
1122    $name =~ s/^\"|\"$//g;
1123    $address =~ s/^\s+|\s+$//g;
1124
1125    if ($name =~ /[^\w \-]/i) {          ##has "must quote" chars
1126	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1127	$name = "\"$name\"";
1128    }
1129
1130    if ($usename) {
1131	if ("$name" eq "") {
1132	    $formatted_email = "$address";
1133	} else {
1134	    $formatted_email = "$name <$address>";
1135	}
1136    } else {
1137	$formatted_email = $address;
1138    }
1139
1140    return $formatted_email;
1141}
1142
1143sub find_first_section {
1144    my $index = 0;
1145
1146    while ($index < @typevalue) {
1147	my $tv = $typevalue[$index];
1148	if (($tv =~ m/^([A-Z]):\s*(.*)/)) {
1149	    last;
1150	}
1151	$index++;
1152    }
1153
1154    return $index;
1155}
1156
1157sub find_starting_index {
1158    my ($index) = @_;
1159
1160    while ($index > 0) {
1161	my $tv = $typevalue[$index];
1162	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1163	    last;
1164	}
1165	$index--;
1166    }
1167
1168    return $index;
1169}
1170
1171sub find_ending_index {
1172    my ($index) = @_;
1173
1174    while ($index < @typevalue) {
1175	my $tv = $typevalue[$index];
1176	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1177	    last;
1178	}
1179	$index++;
1180    }
1181
1182    return $index;
1183}
1184
1185sub get_subsystem_name {
1186    my ($index) = @_;
1187
1188    my $start = find_starting_index($index);
1189
1190    my $subsystem = $typevalue[$start];
1191    if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) {
1192	$subsystem = substr($subsystem, 0, $output_section_maxlen - 3);
1193	$subsystem =~ s/\s*$//;
1194	$subsystem = $subsystem . "...";
1195    }
1196    return $subsystem;
1197}
1198
1199sub get_maintainer_role {
1200    my ($index) = @_;
1201
1202    my $i;
1203    my $start = find_starting_index($index);
1204    my $end = find_ending_index($index);
1205
1206    my $role = "unknown";
1207    my $subsystem = get_subsystem_name($index);
1208
1209    for ($i = $start + 1; $i < $end; $i++) {
1210	my $tv = $typevalue[$i];
1211	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1212	    my $ptype = $1;
1213	    my $pvalue = $2;
1214	    if ($ptype eq "S") {
1215		$role = $pvalue;
1216	    }
1217	}
1218    }
1219
1220    $role = lc($role);
1221    if      ($role eq "supported") {
1222	$role = "supporter";
1223    } elsif ($role eq "maintained") {
1224	$role = "maintainer";
1225    } elsif ($role eq "odd fixes") {
1226	$role = "odd fixer";
1227    } elsif ($role eq "orphan") {
1228	$role = "orphan minder";
1229    } elsif ($role eq "obsolete") {
1230	$role = "obsolete minder";
1231    } elsif ($role eq "buried alive in reporters") {
1232	$role = "chief penguin";
1233    }
1234
1235    return $role . ":" . $subsystem;
1236}
1237
1238sub get_list_role {
1239    my ($index) = @_;
1240
1241    my $subsystem = get_subsystem_name($index);
1242
1243    if ($subsystem eq "THE REST") {
1244	$subsystem = "";
1245    }
1246
1247    return $subsystem;
1248}
1249
1250sub add_categories {
1251    my ($index) = @_;
1252
1253    my $i;
1254    my $start = find_starting_index($index);
1255    my $end = find_ending_index($index);
1256
1257    push(@subsystem, $typevalue[$start]);
1258
1259    for ($i = $start + 1; $i < $end; $i++) {
1260	my $tv = $typevalue[$i];
1261	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1262	    my $ptype = $1;
1263	    my $pvalue = $2;
1264	    if ($ptype eq "L") {
1265		my $list_address = $pvalue;
1266		my $list_additional = "";
1267		my $list_role = get_list_role($i);
1268
1269		if ($list_role ne "") {
1270		    $list_role = ":" . $list_role;
1271		}
1272		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1273		    $list_address = $1;
1274		    $list_additional = $2;
1275		}
1276		if ($list_additional =~ m/subscribers-only/) {
1277		    if ($email_subscriber_list) {
1278			if (!$hash_list_to{lc($list_address)}) {
1279			    $hash_list_to{lc($list_address)} = 1;
1280			    push(@list_to, [$list_address,
1281					    "subscriber list${list_role}"]);
1282			}
1283		    }
1284		} else {
1285		    if ($email_list) {
1286			if (!$hash_list_to{lc($list_address)}) {
1287			    if ($list_additional =~ m/moderated/) {
1288				if ($email_moderated_list) {
1289				    $hash_list_to{lc($list_address)} = 1;
1290				    push(@list_to, [$list_address,
1291						    "moderated list${list_role}"]);
1292				}
1293			    } else {
1294				$hash_list_to{lc($list_address)} = 1;
1295				push(@list_to, [$list_address,
1296						"open list${list_role}"]);
1297			    }
1298			}
1299		    }
1300		}
1301	    } elsif ($ptype eq "M") {
1302		if ($email_maintainer) {
1303		    my $role = get_maintainer_role($i);
1304		    push_email_addresses($pvalue, $role);
1305		}
1306	    } elsif ($ptype eq "R") {
1307		if ($email_reviewer) {
1308		    my $subsystem = get_subsystem_name($i);
1309		    push_email_addresses($pvalue, "reviewer:$subsystem");
1310		}
1311	    } elsif ($ptype eq "T") {
1312		push(@scm, $pvalue);
1313	    } elsif ($ptype eq "W") {
1314		push(@web, $pvalue);
1315	    } elsif ($ptype eq "S") {
1316		push(@status, $pvalue);
1317	    }
1318	}
1319    }
1320}
1321
1322sub email_inuse {
1323    my ($name, $address) = @_;
1324
1325    return 1 if (($name eq "") && ($address eq ""));
1326    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1327    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1328
1329    return 0;
1330}
1331
1332sub push_email_address {
1333    my ($line, $role) = @_;
1334
1335    my ($name, $address) = parse_email($line);
1336
1337    if ($address eq "") {
1338	return 0;
1339    }
1340
1341    if (!$email_remove_duplicates) {
1342	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1343    } elsif (!email_inuse($name, $address)) {
1344	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1345	$email_hash_name{lc($name)}++ if ($name ne "");
1346	$email_hash_address{lc($address)}++;
1347    }
1348
1349    return 1;
1350}
1351
1352sub push_email_addresses {
1353    my ($address, $role) = @_;
1354
1355    my @address_list = ();
1356
1357    if (rfc822_valid($address)) {
1358	push_email_address($address, $role);
1359    } elsif (@address_list = rfc822_validlist($address)) {
1360	my $array_count = shift(@address_list);
1361	while (my $entry = shift(@address_list)) {
1362	    push_email_address($entry, $role);
1363	}
1364    } else {
1365	if (!push_email_address($address, $role)) {
1366	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1367	}
1368    }
1369}
1370
1371sub add_role {
1372    my ($line, $role) = @_;
1373
1374    my ($name, $address) = parse_email($line);
1375    my $email = format_email($name, $address, $email_usename);
1376
1377    foreach my $entry (@email_to) {
1378	if ($email_remove_duplicates) {
1379	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1380	    if (($name eq $entry_name || $address eq $entry_address)
1381		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1382	    ) {
1383		if ($entry->[1] eq "") {
1384		    $entry->[1] = "$role";
1385		} else {
1386		    $entry->[1] = "$entry->[1],$role";
1387		}
1388	    }
1389	} else {
1390	    if ($email eq $entry->[0]
1391		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1392	    ) {
1393		if ($entry->[1] eq "") {
1394		    $entry->[1] = "$role";
1395		} else {
1396		    $entry->[1] = "$entry->[1],$role";
1397		}
1398	    }
1399	}
1400    }
1401}
1402
1403sub which {
1404    my ($bin) = @_;
1405
1406    foreach my $path (split(/:/, $ENV{PATH})) {
1407	if (-e "$path/$bin") {
1408	    return "$path/$bin";
1409	}
1410    }
1411
1412    return "";
1413}
1414
1415sub which_conf {
1416    my ($conf) = @_;
1417
1418    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1419	if (-e "$path/$conf") {
1420	    return "$path/$conf";
1421	}
1422    }
1423
1424    return "";
1425}
1426
1427sub mailmap_email {
1428    my ($line) = @_;
1429
1430    my ($name, $address) = parse_email($line);
1431    my $email = format_email($name, $address, 1);
1432    my $real_name = $name;
1433    my $real_address = $address;
1434
1435    if (exists $mailmap->{names}->{$email} ||
1436	exists $mailmap->{addresses}->{$email}) {
1437	if (exists $mailmap->{names}->{$email}) {
1438	    $real_name = $mailmap->{names}->{$email};
1439	}
1440	if (exists $mailmap->{addresses}->{$email}) {
1441	    $real_address = $mailmap->{addresses}->{$email};
1442	}
1443    } else {
1444	if (exists $mailmap->{names}->{$address}) {
1445	    $real_name = $mailmap->{names}->{$address};
1446	}
1447	if (exists $mailmap->{addresses}->{$address}) {
1448	    $real_address = $mailmap->{addresses}->{$address};
1449	}
1450    }
1451    return format_email($real_name, $real_address, 1);
1452}
1453
1454sub mailmap {
1455    my (@addresses) = @_;
1456
1457    my @mapped_emails = ();
1458    foreach my $line (@addresses) {
1459	push(@mapped_emails, mailmap_email($line));
1460    }
1461    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1462    return @mapped_emails;
1463}
1464
1465sub merge_by_realname {
1466    my %address_map;
1467    my (@emails) = @_;
1468
1469    foreach my $email (@emails) {
1470	my ($name, $address) = parse_email($email);
1471	if (exists $address_map{$name}) {
1472	    $address = $address_map{$name};
1473	    $email = format_email($name, $address, 1);
1474	} else {
1475	    $address_map{$name} = $address;
1476	}
1477    }
1478}
1479
1480sub git_execute_cmd {
1481    my ($cmd) = @_;
1482    my @lines = ();
1483
1484    my $output = `$cmd`;
1485    $output =~ s/^\s*//gm;
1486    @lines = split("\n", $output);
1487
1488    return @lines;
1489}
1490
1491sub hg_execute_cmd {
1492    my ($cmd) = @_;
1493    my @lines = ();
1494
1495    my $output = `$cmd`;
1496    @lines = split("\n", $output);
1497
1498    return @lines;
1499}
1500
1501sub extract_formatted_signatures {
1502    my (@signature_lines) = @_;
1503
1504    my @type = @signature_lines;
1505
1506    s/\s*(.*):.*/$1/ for (@type);
1507
1508    # cut -f2- -d":"
1509    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1510
1511## Reformat email addresses (with names) to avoid badly written signatures
1512
1513    foreach my $signer (@signature_lines) {
1514	$signer = deduplicate_email($signer);
1515    }
1516
1517    return (\@type, \@signature_lines);
1518}
1519
1520sub vcs_find_signers {
1521    my ($cmd, $file) = @_;
1522    my $commits;
1523    my @lines = ();
1524    my @signatures = ();
1525    my @authors = ();
1526    my @stats = ();
1527
1528    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1529
1530    my $pattern = $VCS_cmds{"commit_pattern"};
1531    my $author_pattern = $VCS_cmds{"author_pattern"};
1532    my $stat_pattern = $VCS_cmds{"stat_pattern"};
1533
1534    $stat_pattern =~ s/(\$\w+)/$1/eeg;		#interpolate $stat_pattern
1535
1536    $commits = grep(/$pattern/, @lines);	# of commits
1537
1538    @authors = grep(/$author_pattern/, @lines);
1539    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1540    @stats = grep(/$stat_pattern/, @lines);
1541
1542#    print("stats: <@stats>\n");
1543
1544    return (0, \@signatures, \@authors, \@stats) if !@signatures;
1545
1546    save_commits_by_author(@lines) if ($interactive);
1547    save_commits_by_signer(@lines) if ($interactive);
1548
1549    my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors);
1550    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1551
1552    return ($commits, $signers_ref, $authors_ref, \@stats);
1553}
1554
1555sub vcs_find_author {
1556    my ($cmd) = @_;
1557    my @lines = ();
1558
1559    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1560
1561    return @lines if !@lines;
1562
1563    my @authors = ();
1564    foreach my $line (@lines) {
1565	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1566	    my $author = $1;
1567	    my ($name, $address) = parse_email($author);
1568	    $author = format_email($name, $address, 1);
1569	    push(@authors, $author);
1570	}
1571    }
1572
1573    save_commits_by_author(@lines) if ($interactive);
1574    save_commits_by_signer(@lines) if ($interactive);
1575
1576    return @authors;
1577}
1578
1579sub vcs_save_commits {
1580    my ($cmd) = @_;
1581    my @lines = ();
1582    my @commits = ();
1583
1584    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1585
1586    foreach my $line (@lines) {
1587	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1588	    push(@commits, $1);
1589	}
1590    }
1591
1592    return @commits;
1593}
1594
1595sub vcs_blame {
1596    my ($file) = @_;
1597    my $cmd;
1598    my @commits = ();
1599
1600    return @commits if (!(-f $file));
1601
1602    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1603	my @all_commits = ();
1604
1605	$cmd = $VCS_cmds{"blame_file_cmd"};
1606	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1607	@all_commits = vcs_save_commits($cmd);
1608
1609	foreach my $file_range_diff (@range) {
1610	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1611	    my $diff_file = $1;
1612	    my $diff_start = $2;
1613	    my $diff_length = $3;
1614	    next if ("$file" ne "$diff_file");
1615	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1616		push(@commits, $all_commits[$i]);
1617	    }
1618	}
1619    } elsif (@range) {
1620	foreach my $file_range_diff (@range) {
1621	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1622	    my $diff_file = $1;
1623	    my $diff_start = $2;
1624	    my $diff_length = $3;
1625	    next if ("$file" ne "$diff_file");
1626	    $cmd = $VCS_cmds{"blame_range_cmd"};
1627	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1628	    push(@commits, vcs_save_commits($cmd));
1629	}
1630    } else {
1631	$cmd = $VCS_cmds{"blame_file_cmd"};
1632	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1633	@commits = vcs_save_commits($cmd);
1634    }
1635
1636    foreach my $commit (@commits) {
1637	$commit =~ s/^\^//g;
1638    }
1639
1640    return @commits;
1641}
1642
1643my $printed_novcs = 0;
1644sub vcs_exists {
1645    %VCS_cmds = %VCS_cmds_git;
1646    return 1 if eval $VCS_cmds{"available"};
1647    %VCS_cmds = %VCS_cmds_hg;
1648    return 2 if eval $VCS_cmds{"available"};
1649    %VCS_cmds = ();
1650    if (!$printed_novcs) {
1651	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1652	warn("Using a git repository produces better results.\n");
1653	warn("Try kvm-unit-tests's latest git repository using:\n");
1654	warn("git clone https://gitlab.com/kvm-unit-tests/kvm-unit-tests\n");
1655	$printed_novcs = 1;
1656    }
1657    return 0;
1658}
1659
1660sub vcs_is_git {
1661    vcs_exists();
1662    return $vcs_used == 1;
1663}
1664
1665sub vcs_is_hg {
1666    return $vcs_used == 2;
1667}
1668
1669sub vcs_add_commit_signers {
1670    return if (!vcs_exists());
1671
1672    my ($commit, $desc) = @_;
1673    my $commit_count = 0;
1674    my $commit_authors_ref;
1675    my $commit_signers_ref;
1676    my $stats_ref;
1677    my @commit_authors = ();
1678    my @commit_signers = ();
1679    my $cmd;
1680
1681    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
1682    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1683
1684    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, "");
1685    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
1686    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
1687
1688    foreach my $signer (@commit_signers) {
1689	$signer = deduplicate_email($signer);
1690    }
1691
1692    vcs_assign($desc, 1, @commit_signers);
1693}
1694
1695sub interactive_get_maintainers {
1696    my ($list_ref) = @_;
1697    my @list = @$list_ref;
1698
1699    vcs_exists();
1700
1701    my %selected;
1702    my %authored;
1703    my %signed;
1704    my $count = 0;
1705    my $maintained = 0;
1706    foreach my $entry (@list) {
1707	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1708	$selected{$count} = 1;
1709	$authored{$count} = 0;
1710	$signed{$count} = 0;
1711	$count++;
1712    }
1713
1714    #menu loop
1715    my $done = 0;
1716    my $print_options = 0;
1717    my $redraw = 1;
1718    while (!$done) {
1719	$count = 0;
1720	if ($redraw) {
1721	    printf STDERR "\n%1s %2s %-65s",
1722			  "*", "#", "email/list and role:stats";
1723	    if ($email_git ||
1724		($email_git_fallback && !$maintained) ||
1725		$email_git_blame) {
1726		print STDERR "auth sign";
1727	    }
1728	    print STDERR "\n";
1729	    foreach my $entry (@list) {
1730		my $email = $entry->[0];
1731		my $role = $entry->[1];
1732		my $sel = "";
1733		$sel = "*" if ($selected{$count});
1734		my $commit_author = $commit_author_hash{$email};
1735		my $commit_signer = $commit_signer_hash{$email};
1736		my $authored = 0;
1737		my $signed = 0;
1738		$authored++ for (@{$commit_author});
1739		$signed++ for (@{$commit_signer});
1740		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1741		printf STDERR "%4d %4d", $authored, $signed
1742		    if ($authored > 0 || $signed > 0);
1743		printf STDERR "\n     %s\n", $role;
1744		if ($authored{$count}) {
1745		    my $commit_author = $commit_author_hash{$email};
1746		    foreach my $ref (@{$commit_author}) {
1747			print STDERR "     Author: @{$ref}[1]\n";
1748		    }
1749		}
1750		if ($signed{$count}) {
1751		    my $commit_signer = $commit_signer_hash{$email};
1752		    foreach my $ref (@{$commit_signer}) {
1753			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1754		    }
1755		}
1756
1757		$count++;
1758	    }
1759	}
1760	my $date_ref = \$email_git_since;
1761	$date_ref = \$email_hg_since if (vcs_is_hg());
1762	if ($print_options) {
1763	    $print_options = 0;
1764	    if (vcs_exists()) {
1765		print STDERR <<EOT
1766
1767Version Control options:
1768g  use git history      [$email_git]
1769gf use git-fallback     [$email_git_fallback]
1770b  use git blame        [$email_git_blame]
1771bs use blame signatures [$email_git_blame_signatures]
1772c# minimum commits      [$email_git_min_signatures]
1773%# min percent          [$email_git_min_percent]
1774d# history to use       [$$date_ref]
1775x# max maintainers      [$email_git_max_maintainers]
1776t  all signature types  [$email_git_all_signature_types]
1777m  use .mailmap         [$email_use_mailmap]
1778EOT
1779	    }
1780	    print STDERR <<EOT
1781
1782Additional options:
17830  toggle all
1784tm toggle maintainers
1785tg toggle git entries
1786tl toggle open list entries
1787ts toggle subscriber list entries
1788f  emails in file       [$email_file_emails]
1789k  keywords in file     [$keywords]
1790r  remove duplicates    [$email_remove_duplicates]
1791p# pattern match depth  [$pattern_depth]
1792EOT
1793	}
1794	print STDERR
1795"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1796
1797	my $input = <STDIN>;
1798	chomp($input);
1799
1800	$redraw = 1;
1801	my $rerun = 0;
1802	my @wish = split(/[, ]+/, $input);
1803	foreach my $nr (@wish) {
1804	    $nr = lc($nr);
1805	    my $sel = substr($nr, 0, 1);
1806	    my $str = substr($nr, 1);
1807	    my $val = 0;
1808	    $val = $1 if $str =~ /^(\d+)$/;
1809
1810	    if ($sel eq "y") {
1811		$interactive = 0;
1812		$done = 1;
1813		$output_rolestats = 0;
1814		$output_roles = 0;
1815		last;
1816	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1817		$selected{$nr - 1} = !$selected{$nr - 1};
1818	    } elsif ($sel eq "*" || $sel eq '^') {
1819		my $toggle = 0;
1820		$toggle = 1 if ($sel eq '*');
1821		for (my $i = 0; $i < $count; $i++) {
1822		    $selected{$i} = $toggle;
1823		}
1824	    } elsif ($sel eq "0") {
1825		for (my $i = 0; $i < $count; $i++) {
1826		    $selected{$i} = !$selected{$i};
1827		}
1828	    } elsif ($sel eq "t") {
1829		if (lc($str) eq "m") {
1830		    for (my $i = 0; $i < $count; $i++) {
1831			$selected{$i} = !$selected{$i}
1832			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1833		    }
1834		} elsif (lc($str) eq "g") {
1835		    for (my $i = 0; $i < $count; $i++) {
1836			$selected{$i} = !$selected{$i}
1837			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1838		    }
1839		} elsif (lc($str) eq "l") {
1840		    for (my $i = 0; $i < $count; $i++) {
1841			$selected{$i} = !$selected{$i}
1842			    if ($list[$i]->[1] =~ /^(open list)/i);
1843		    }
1844		} elsif (lc($str) eq "s") {
1845		    for (my $i = 0; $i < $count; $i++) {
1846			$selected{$i} = !$selected{$i}
1847			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1848		    }
1849		}
1850	    } elsif ($sel eq "a") {
1851		if ($val > 0 && $val <= $count) {
1852		    $authored{$val - 1} = !$authored{$val - 1};
1853		} elsif ($str eq '*' || $str eq '^') {
1854		    my $toggle = 0;
1855		    $toggle = 1 if ($str eq '*');
1856		    for (my $i = 0; $i < $count; $i++) {
1857			$authored{$i} = $toggle;
1858		    }
1859		}
1860	    } elsif ($sel eq "s") {
1861		if ($val > 0 && $val <= $count) {
1862		    $signed{$val - 1} = !$signed{$val - 1};
1863		} elsif ($str eq '*' || $str eq '^') {
1864		    my $toggle = 0;
1865		    $toggle = 1 if ($str eq '*');
1866		    for (my $i = 0; $i < $count; $i++) {
1867			$signed{$i} = $toggle;
1868		    }
1869		}
1870	    } elsif ($sel eq "o") {
1871		$print_options = 1;
1872		$redraw = 1;
1873	    } elsif ($sel eq "g") {
1874		if ($str eq "f") {
1875		    bool_invert(\$email_git_fallback);
1876		} else {
1877		    bool_invert(\$email_git);
1878		}
1879		$rerun = 1;
1880	    } elsif ($sel eq "b") {
1881		if ($str eq "s") {
1882		    bool_invert(\$email_git_blame_signatures);
1883		} else {
1884		    bool_invert(\$email_git_blame);
1885		}
1886		$rerun = 1;
1887	    } elsif ($sel eq "c") {
1888		if ($val > 0) {
1889		    $email_git_min_signatures = $val;
1890		    $rerun = 1;
1891		}
1892	    } elsif ($sel eq "x") {
1893		if ($val > 0) {
1894		    $email_git_max_maintainers = $val;
1895		    $rerun = 1;
1896		}
1897	    } elsif ($sel eq "%") {
1898		if ($str ne "" && $val >= 0) {
1899		    $email_git_min_percent = $val;
1900		    $rerun = 1;
1901		}
1902	    } elsif ($sel eq "d") {
1903		if (vcs_is_git()) {
1904		    $email_git_since = $str;
1905		} elsif (vcs_is_hg()) {
1906		    $email_hg_since = $str;
1907		}
1908		$rerun = 1;
1909	    } elsif ($sel eq "t") {
1910		bool_invert(\$email_git_all_signature_types);
1911		$rerun = 1;
1912	    } elsif ($sel eq "f") {
1913		bool_invert(\$email_file_emails);
1914		$rerun = 1;
1915	    } elsif ($sel eq "r") {
1916		bool_invert(\$email_remove_duplicates);
1917		$rerun = 1;
1918	    } elsif ($sel eq "m") {
1919		bool_invert(\$email_use_mailmap);
1920		read_mailmap();
1921		$rerun = 1;
1922	    } elsif ($sel eq "k") {
1923		bool_invert(\$keywords);
1924		$rerun = 1;
1925	    } elsif ($sel eq "p") {
1926		if ($str ne "" && $val >= 0) {
1927		    $pattern_depth = $val;
1928		    $rerun = 1;
1929		}
1930	    } elsif ($sel eq "h" || $sel eq "?") {
1931		print STDERR <<EOT
1932
1933Interactive mode allows you to select the various maintainers, submitters,
1934commit signers and mailing lists that could be CC'd on a patch.
1935
1936Any *'d entry is selected.
1937
1938If you have git or hg installed, you can choose to summarize the commit
1939history of files in the patch.  Also, each line of the current file can
1940be matched to its commit author and that commits signers with blame.
1941
1942Various knobs exist to control the length of time for active commit
1943tracking, the maximum number of commit authors and signers to add,
1944and such.
1945
1946Enter selections at the prompt until you are satisfied that the selected
1947maintainers are appropriate.  You may enter multiple selections separated
1948by either commas or spaces.
1949
1950EOT
1951	    } else {
1952		print STDERR "invalid option: '$nr'\n";
1953		$redraw = 0;
1954	    }
1955	}
1956	if ($rerun) {
1957	    print STDERR "git-blame can be very slow, please have patience..."
1958		if ($email_git_blame);
1959	    goto &get_maintainers;
1960	}
1961    }
1962
1963    #drop not selected entries
1964    $count = 0;
1965    my @new_emailto = ();
1966    foreach my $entry (@list) {
1967	if ($selected{$count}) {
1968	    push(@new_emailto, $list[$count]);
1969	}
1970	$count++;
1971    }
1972    return @new_emailto;
1973}
1974
1975sub bool_invert {
1976    my ($bool_ref) = @_;
1977
1978    if ($$bool_ref) {
1979	$$bool_ref = 0;
1980    } else {
1981	$$bool_ref = 1;
1982    }
1983}
1984
1985sub deduplicate_email {
1986    my ($email) = @_;
1987
1988    my $matched = 0;
1989    my ($name, $address) = parse_email($email);
1990    $email = format_email($name, $address, 1);
1991    $email = mailmap_email($email);
1992
1993    return $email if (!$email_remove_duplicates);
1994
1995    ($name, $address) = parse_email($email);
1996
1997    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
1998	$name = $deduplicate_name_hash{lc($name)}->[0];
1999	$address = $deduplicate_name_hash{lc($name)}->[1];
2000	$matched = 1;
2001    } elsif ($deduplicate_address_hash{lc($address)}) {
2002	$name = $deduplicate_address_hash{lc($address)}->[0];
2003	$address = $deduplicate_address_hash{lc($address)}->[1];
2004	$matched = 1;
2005    }
2006    if (!$matched) {
2007	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
2008	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
2009    }
2010    $email = format_email($name, $address, 1);
2011    $email = mailmap_email($email);
2012    return $email;
2013}
2014
2015sub save_commits_by_author {
2016    my (@lines) = @_;
2017
2018    my @authors = ();
2019    my @commits = ();
2020    my @subjects = ();
2021
2022    foreach my $line (@lines) {
2023	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2024	    my $author = $1;
2025	    $author = deduplicate_email($author);
2026	    push(@authors, $author);
2027	}
2028	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2029	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2030    }
2031
2032    for (my $i = 0; $i < @authors; $i++) {
2033	my $exists = 0;
2034	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
2035	    if (@{$ref}[0] eq $commits[$i] &&
2036		@{$ref}[1] eq $subjects[$i]) {
2037		$exists = 1;
2038		last;
2039	    }
2040	}
2041	if (!$exists) {
2042	    push(@{$commit_author_hash{$authors[$i]}},
2043		 [ ($commits[$i], $subjects[$i]) ]);
2044	}
2045    }
2046}
2047
2048sub save_commits_by_signer {
2049    my (@lines) = @_;
2050
2051    my $commit = "";
2052    my $subject = "";
2053
2054    foreach my $line (@lines) {
2055	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2056	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2057	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
2058	    my @signatures = ($line);
2059	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
2060	    my @types = @$types_ref;
2061	    my @signers = @$signers_ref;
2062
2063	    my $type = $types[0];
2064	    my $signer = $signers[0];
2065
2066	    $signer = deduplicate_email($signer);
2067
2068	    my $exists = 0;
2069	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
2070		if (@{$ref}[0] eq $commit &&
2071		    @{$ref}[1] eq $subject &&
2072		    @{$ref}[2] eq $type) {
2073		    $exists = 1;
2074		    last;
2075		}
2076	    }
2077	    if (!$exists) {
2078		push(@{$commit_signer_hash{$signer}},
2079		     [ ($commit, $subject, $type) ]);
2080	    }
2081	}
2082    }
2083}
2084
2085sub vcs_assign {
2086    my ($role, $divisor, @lines) = @_;
2087
2088    my %hash;
2089    my $count = 0;
2090
2091    return if (@lines <= 0);
2092
2093    if ($divisor <= 0) {
2094	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
2095	$divisor = 1;
2096    }
2097
2098    @lines = mailmap(@lines);
2099
2100    return if (@lines <= 0);
2101
2102    @lines = sort(@lines);
2103
2104    # uniq -c
2105    $hash{$_}++ for @lines;
2106
2107    # sort -rn
2108    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
2109	my $sign_offs = $hash{$line};
2110	my $percent = $sign_offs * 100 / $divisor;
2111
2112	$percent = 100 if ($percent > 100);
2113	next if (ignore_email_address($line));
2114	$count++;
2115	last if ($sign_offs < $email_git_min_signatures ||
2116		 $count > $email_git_max_maintainers ||
2117		 $percent < $email_git_min_percent);
2118	push_email_address($line, '');
2119	if ($output_rolestats) {
2120	    my $fmt_percent = sprintf("%.0f", $percent);
2121	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
2122	} else {
2123	    add_role($line, $role);
2124	}
2125    }
2126}
2127
2128sub vcs_file_signoffs {
2129    my ($file) = @_;
2130
2131    my $authors_ref;
2132    my $signers_ref;
2133    my $stats_ref;
2134    my @authors = ();
2135    my @signers = ();
2136    my @stats = ();
2137    my $commits;
2138
2139    $vcs_used = vcs_exists();
2140    return if (!$vcs_used);
2141
2142    my $cmd = $VCS_cmds{"find_signers_cmd"};
2143    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2144
2145    ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2146
2147    @signers = @{$signers_ref} if defined $signers_ref;
2148    @authors = @{$authors_ref} if defined $authors_ref;
2149    @stats = @{$stats_ref} if defined $stats_ref;
2150
2151#    print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n");
2152
2153    foreach my $signer (@signers) {
2154	$signer = deduplicate_email($signer);
2155    }
2156
2157    vcs_assign("commit_signer", $commits, @signers);
2158    vcs_assign("authored", $commits, @authors);
2159    if ($#authors == $#stats) {
2160	my $stat_pattern = $VCS_cmds{"stat_pattern"};
2161	$stat_pattern =~ s/(\$\w+)/$1/eeg;	#interpolate $stat_pattern
2162
2163	my $added = 0;
2164	my $deleted = 0;
2165	for (my $i = 0; $i <= $#stats; $i++) {
2166	    if ($stats[$i] =~ /$stat_pattern/) {
2167		$added += $1;
2168		$deleted += $2;
2169	    }
2170	}
2171	my @tmp_authors = uniq(@authors);
2172	foreach my $author (@tmp_authors) {
2173	    $author = deduplicate_email($author);
2174	}
2175	@tmp_authors = uniq(@tmp_authors);
2176	my @list_added = ();
2177	my @list_deleted = ();
2178	foreach my $author (@tmp_authors) {
2179	    my $auth_added = 0;
2180	    my $auth_deleted = 0;
2181	    for (my $i = 0; $i <= $#stats; $i++) {
2182		if ($author eq deduplicate_email($authors[$i]) &&
2183		    $stats[$i] =~ /$stat_pattern/) {
2184		    $auth_added += $1;
2185		    $auth_deleted += $2;
2186		}
2187	    }
2188	    for (my $i = 0; $i < $auth_added; $i++) {
2189		push(@list_added, $author);
2190	    }
2191	    for (my $i = 0; $i < $auth_deleted; $i++) {
2192		push(@list_deleted, $author);
2193	    }
2194	}
2195	vcs_assign("added_lines", $added, @list_added);
2196	vcs_assign("removed_lines", $deleted, @list_deleted);
2197    }
2198}
2199
2200sub vcs_file_blame {
2201    my ($file) = @_;
2202
2203    my @signers = ();
2204    my @all_commits = ();
2205    my @commits = ();
2206    my $total_commits;
2207    my $total_lines;
2208
2209    $vcs_used = vcs_exists();
2210    return if (!$vcs_used);
2211
2212    @all_commits = vcs_blame($file);
2213    @commits = uniq(@all_commits);
2214    $total_commits = @commits;
2215    $total_lines = @all_commits;
2216
2217    if ($email_git_blame_signatures) {
2218	if (vcs_is_hg()) {
2219	    my $commit_count;
2220	    my $commit_authors_ref;
2221	    my $commit_signers_ref;
2222	    my $stats_ref;
2223	    my @commit_authors = ();
2224	    my @commit_signers = ();
2225	    my $commit = join(" -r ", @commits);
2226	    my $cmd;
2227
2228	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
2229	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2230
2231	    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2232	    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2233	    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2234
2235	    push(@signers, @commit_signers);
2236	} else {
2237	    foreach my $commit (@commits) {
2238		my $commit_count;
2239		my $commit_authors_ref;
2240		my $commit_signers_ref;
2241		my $stats_ref;
2242		my @commit_authors = ();
2243		my @commit_signers = ();
2244		my $cmd;
2245
2246		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
2247		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2248
2249		($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2250		@commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2251		@commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2252
2253		push(@signers, @commit_signers);
2254	    }
2255	}
2256    }
2257
2258    if ($from_filename) {
2259	if ($output_rolestats) {
2260	    my @blame_signers;
2261	    if (vcs_is_hg()) {{		# Double brace for last exit
2262		my $commit_count;
2263		my @commit_signers = ();
2264		@commits = uniq(@commits);
2265		@commits = sort(@commits);
2266		my $commit = join(" -r ", @commits);
2267		my $cmd;
2268
2269		$cmd = $VCS_cmds{"find_commit_author_cmd"};
2270		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2271
2272		my @lines = ();
2273
2274		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
2275
2276		last if !@lines;
2277
2278		my @authors = ();
2279		foreach my $line (@lines) {
2280		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2281			my $author = $1;
2282			$author = deduplicate_email($author);
2283			push(@authors, $author);
2284		    }
2285		}
2286
2287		save_commits_by_author(@lines) if ($interactive);
2288		save_commits_by_signer(@lines) if ($interactive);
2289
2290		push(@signers, @authors);
2291	    }}
2292	    else {
2293		foreach my $commit (@commits) {
2294		    my $i;
2295		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
2296		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
2297		    my @author = vcs_find_author($cmd);
2298		    next if !@author;
2299
2300		    my $formatted_author = deduplicate_email($author[0]);
2301
2302		    my $count = grep(/$commit/, @all_commits);
2303		    for ($i = 0; $i < $count ; $i++) {
2304			push(@blame_signers, $formatted_author);
2305		    }
2306		}
2307	    }
2308	    if (@blame_signers) {
2309		vcs_assign("authored lines", $total_lines, @blame_signers);
2310	    }
2311	}
2312	foreach my $signer (@signers) {
2313	    $signer = deduplicate_email($signer);
2314	}
2315	vcs_assign("commits", $total_commits, @signers);
2316    } else {
2317	foreach my $signer (@signers) {
2318	    $signer = deduplicate_email($signer);
2319	}
2320	vcs_assign("modified commits", $total_commits, @signers);
2321    }
2322}
2323
2324sub vcs_file_exists {
2325    my ($file) = @_;
2326
2327    my $exists;
2328
2329    my $vcs_used = vcs_exists();
2330    return 0 if (!$vcs_used);
2331
2332    my $cmd = $VCS_cmds{"file_exists_cmd"};
2333    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2334    $cmd .= " 2>&1";
2335    $exists = &{$VCS_cmds{"execute_cmd"}}($cmd);
2336
2337    return 0 if ($? != 0);
2338
2339    return $exists;
2340}
2341
2342sub vcs_list_files {
2343    my ($file) = @_;
2344
2345    my @lsfiles = ();
2346
2347    my $vcs_used = vcs_exists();
2348    return 0 if (!$vcs_used);
2349
2350    my $cmd = $VCS_cmds{"list_files_cmd"};
2351    $cmd =~ s/(\$\w+)/$1/eeg;   # interpolate $cmd
2352    @lsfiles = &{$VCS_cmds{"execute_cmd"}}($cmd);
2353
2354    return () if ($? != 0);
2355
2356    return @lsfiles;
2357}
2358
2359sub uniq {
2360    my (@parms) = @_;
2361
2362    my %saw;
2363    @parms = grep(!$saw{$_}++, @parms);
2364    return @parms;
2365}
2366
2367sub sort_and_uniq {
2368    my (@parms) = @_;
2369
2370    my %saw;
2371    @parms = sort @parms;
2372    @parms = grep(!$saw{$_}++, @parms);
2373    return @parms;
2374}
2375
2376sub clean_file_emails {
2377    my (@file_emails) = @_;
2378    my @fmt_emails = ();
2379
2380    foreach my $email (@file_emails) {
2381	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
2382	my ($name, $address) = parse_email($email);
2383	if ($name eq '"[,\.]"') {
2384	    $name = "";
2385	}
2386
2387	my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
2388	if (@nw > 2) {
2389	    my $first = $nw[@nw - 3];
2390	    my $middle = $nw[@nw - 2];
2391	    my $last = $nw[@nw - 1];
2392
2393	    if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
2394		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2395		(length($middle) == 1 ||
2396		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2397		$name = "$first $middle $last";
2398	    } else {
2399		$name = "$middle $last";
2400	    }
2401	}
2402
2403	if (substr($name, -1) =~ /[,\.]/) {
2404	    $name = substr($name, 0, length($name) - 1);
2405	} elsif (substr($name, -2) =~ /[,\.]"/) {
2406	    $name = substr($name, 0, length($name) - 2) . '"';
2407	}
2408
2409	if (substr($name, 0, 1) =~ /[,\.]/) {
2410	    $name = substr($name, 1, length($name) - 1);
2411	} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
2412	    $name = '"' . substr($name, 2, length($name) - 2);
2413	}
2414
2415	my $fmt_email = format_email($name, $address, $email_usename);
2416	push(@fmt_emails, $fmt_email);
2417    }
2418    return @fmt_emails;
2419}
2420
2421sub merge_email {
2422    my @lines;
2423    my %saw;
2424
2425    for (@_) {
2426	my ($address, $role) = @$_;
2427	if (!$saw{$address}) {
2428	    if ($output_roles) {
2429		push(@lines, "$address ($role)");
2430	    } else {
2431		push(@lines, $address);
2432	    }
2433	    $saw{$address} = 1;
2434	}
2435    }
2436
2437    return @lines;
2438}
2439
2440sub output {
2441    my (@parms) = @_;
2442
2443    if ($output_multiline) {
2444	foreach my $line (@parms) {
2445	    print("${line}\n");
2446	}
2447    } else {
2448	print(join($output_separator, @parms));
2449	print("\n");
2450    }
2451}
2452
2453my $rfc822re;
2454
2455sub make_rfc822re {
2456#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2457#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2458#   This regexp will only work on addresses which have had comments stripped
2459#   and replaced with rfc822_lwsp.
2460
2461    my $specials = '()<>@,;:\\\\".\\[\\]';
2462    my $controls = '\\000-\\037\\177';
2463
2464    my $dtext = "[^\\[\\]\\r\\\\]";
2465    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2466
2467    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2468
2469#   Use zero-width assertion to spot the limit of an atom.  A simple
2470#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2471    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2472    my $word = "(?:$atom|$quoted_string)";
2473    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2474
2475    my $sub_domain = "(?:$atom|$domain_literal)";
2476    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2477
2478    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2479
2480    my $phrase = "$word*";
2481    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2482    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2483    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2484
2485    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2486    my $address = "(?:$mailbox|$group)";
2487
2488    return "$rfc822_lwsp*$address";
2489}
2490
2491sub rfc822_strip_comments {
2492    my $s = shift;
2493#   Recursively remove comments, and replace with a single space.  The simpler
2494#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2495#   chars in atoms, for example.
2496
2497    while ($s =~ s/^((?:[^"\\]|\\.)*
2498                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2499                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2500    return $s;
2501}
2502
2503#   valid: returns true if the parameter is an RFC822 valid address
2504#
2505sub rfc822_valid {
2506    my $s = rfc822_strip_comments(shift);
2507
2508    if (!$rfc822re) {
2509        $rfc822re = make_rfc822re();
2510    }
2511
2512    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2513}
2514
2515#   validlist: In scalar context, returns true if the parameter is an RFC822
2516#              valid list of addresses.
2517#
2518#              In list context, returns an empty list on failure (an invalid
2519#              address was found); otherwise a list whose first element is the
2520#              number of addresses found and whose remaining elements are the
2521#              addresses.  This is needed to disambiguate failure (invalid)
2522#              from success with no addresses found, because an empty string is
2523#              a valid list.
2524
2525sub rfc822_validlist {
2526    my $s = rfc822_strip_comments(shift);
2527
2528    if (!$rfc822re) {
2529        $rfc822re = make_rfc822re();
2530    }
2531    # * null list items are valid according to the RFC
2532    # * the '1' business is to aid in distinguishing failure from no results
2533
2534    my @r;
2535    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2536	$s =~ m/^$rfc822_char*$/) {
2537        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2538            push(@r, $1);
2539        }
2540        return wantarray ? (scalar(@r), @r) : 1;
2541    }
2542    return wantarray ? () : 0;
2543}
2544