1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3#
4# (c) 2007, Joe Perches <joe@perches.com>
5#           created from checkpatch.pl
6#
7# Print selected MAINTAINERS information for
8# the files modified in a patch or for a file
9#
10# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
11#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
12
13use warnings;
14use strict;
15
16my $P = $0;
17my $V = '0.26';
18
19use Getopt::Long qw(:config no_auto_abbrev);
20use Cwd;
21use File::Find;
22use File::Spec::Functions;
23use open qw(:std :encoding(UTF-8));
24
25my $cur_path = fastgetcwd() . '/';
26my $lk_path = "./";
27my $email = 1;
28my $email_usename = 1;
29my $email_maintainer = 1;
30my $email_reviewer = 1;
31my $email_fixes = 1;
32my $email_list = 1;
33my $email_moderated_list = 1;
34my $email_subscriber_list = 0;
35my $email_git_penguin_chiefs = 0;
36my $email_git = 0;
37my $email_git_all_signature_types = 0;
38my $email_git_blame = 0;
39my $email_git_blame_signatures = 1;
40my $email_git_fallback = 1;
41my $email_git_min_signatures = 1;
42my $email_git_max_maintainers = 5;
43my $email_git_min_percent = 5;
44my $email_git_since = "1-year-ago";
45my $email_hg_since = "-365";
46my $interactive = 0;
47my $email_remove_duplicates = 1;
48my $email_use_mailmap = 1;
49my $output_multiline = 1;
50my $output_separator = ", ";
51my $output_roles = 0;
52my $output_rolestats = 1;
53my $output_substatus = undef;
54my $output_section_maxlen = 50;
55my $scm = 0;
56my $tree = 1;
57my $web = 0;
58my $bug = 0;
59my $subsystem = 0;
60my $status = 0;
61my $letters = "";
62my $keywords = 1;
63my $keywords_in_file = 0;
64my $sections = 0;
65my $email_file_emails = 0;
66my $from_filename = 0;
67my $pattern_depth = 0;
68my $self_test = undef;
69my $version = 0;
70my $help = 0;
71my $find_maintainer_files = 0;
72my $maintainer_path;
73my $vcs_used = 0;
74
75my $exit = 0;
76
77my @files = ();
78my @fixes = ();			# If a patch description includes Fixes: lines
79my @range = ();
80my @keyword_tvi = ();
81my @file_emails = ();
82
83my %commit_author_hash;
84my %commit_signer_hash;
85
86my @penguin_chief = ();
87push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
88#Andrew wants in on most everything - 2009/01/14
89#push(@penguin_chief, "Andrew Morton:akpm\@linux-foundation.org");
90
91my @penguin_chief_names = ();
92foreach my $chief (@penguin_chief) {
93    if ($chief =~ m/^(.*):(.*)/) {
94	my $chief_name = $1;
95	my $chief_addr = $2;
96	push(@penguin_chief_names, $chief_name);
97    }
98}
99my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
100
101# Signature types of people who are either
102# 	a) responsible for the code in question, or
103# 	b) familiar enough with it to give relevant feedback
104my @signature_tags = ();
105push(@signature_tags, "Signed-off-by:");
106push(@signature_tags, "Reviewed-by:");
107push(@signature_tags, "Acked-by:");
108
109my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
110
111# rfc822 email address - preloaded methods go here.
112my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
113my $rfc822_char = '[\\000-\\377]';
114
115# VCS command support: class-like functions and strings
116
117my %VCS_cmds;
118
119my %VCS_cmds_git = (
120    "execute_cmd" => \&git_execute_cmd,
121    "available" => '(which("git") ne "") && (-e ".git")',
122    "find_signers_cmd" =>
123	"git log --no-color --follow --since=\$email_git_since " .
124	    '--numstat --no-merges ' .
125	    '--format="GitCommit: %H%n' .
126		      'GitAuthor: %an <%ae>%n' .
127		      'GitDate: %aD%n' .
128		      'GitSubject: %s%n' .
129		      '%b%n"' .
130	    " -- \$file",
131    "find_commit_signers_cmd" =>
132	"git log --no-color " .
133	    '--numstat ' .
134	    '--format="GitCommit: %H%n' .
135		      'GitAuthor: %an <%ae>%n' .
136		      'GitDate: %aD%n' .
137		      'GitSubject: %s%n' .
138		      '%b%n"' .
139	    " -1 \$commit",
140    "find_commit_author_cmd" =>
141	"git log --no-color " .
142	    '--numstat ' .
143	    '--format="GitCommit: %H%n' .
144		      'GitAuthor: %an <%ae>%n' .
145		      'GitDate: %aD%n' .
146		      'GitSubject: %s%n"' .
147	    " -1 \$commit",
148    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
149    "blame_file_cmd" => "git blame -l \$file",
150    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
151    "blame_commit_pattern" => "^([0-9a-f]+) ",
152    "author_pattern" => "^GitAuthor: (.*)",
153    "subject_pattern" => "^GitSubject: (.*)",
154    "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$",
155    "file_exists_cmd" => "git ls-files \$file",
156    "list_files_cmd" => "git ls-files \$file",
157);
158
159my %VCS_cmds_hg = (
160    "execute_cmd" => \&hg_execute_cmd,
161    "available" => '(which("hg") ne "") && (-d ".hg")',
162    "find_signers_cmd" =>
163	"hg log --date=\$email_hg_since " .
164	    "--template='HgCommit: {node}\\n" .
165	                "HgAuthor: {author}\\n" .
166			"HgSubject: {desc}\\n'" .
167	    " -- \$file",
168    "find_commit_signers_cmd" =>
169	"hg log " .
170	    "--template='HgSubject: {desc}\\n'" .
171	    " -r \$commit",
172    "find_commit_author_cmd" =>
173	"hg log " .
174	    "--template='HgCommit: {node}\\n" .
175		        "HgAuthor: {author}\\n" .
176			"HgSubject: {desc|firstline}\\n'" .
177	    " -r \$commit",
178    "blame_range_cmd" => "",		# not supported
179    "blame_file_cmd" => "hg blame -n \$file",
180    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
181    "blame_commit_pattern" => "^([ 0-9a-f]+):",
182    "author_pattern" => "^HgAuthor: (.*)",
183    "subject_pattern" => "^HgSubject: (.*)",
184    "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$",
185    "file_exists_cmd" => "hg files \$file",
186    "list_files_cmd" => "hg manifest -R \$file",
187);
188
189my $conf = which_conf(".get_maintainer.conf");
190if (-f $conf) {
191    my @conf_args;
192    open(my $conffile, '<', "$conf")
193	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
194
195    while (<$conffile>) {
196	my $line = $_;
197
198	$line =~ s/\s*\n?$//g;
199	$line =~ s/^\s*//g;
200	$line =~ s/\s+/ /g;
201
202	next if ($line =~ m/^\s*#/);
203	next if ($line =~ m/^\s*$/);
204
205	my @words = split(" ", $line);
206	foreach my $word (@words) {
207	    last if ($word =~ m/^#/);
208	    push (@conf_args, $word);
209	}
210    }
211    close($conffile);
212    unshift(@ARGV, @conf_args) if @conf_args;
213}
214
215my @ignore_emails = ();
216my $ignore_file = which_conf(".get_maintainer.ignore");
217if (-f $ignore_file) {
218    open(my $ignore, '<', "$ignore_file")
219	or warn "$P: Can't find a readable .get_maintainer.ignore file $!\n";
220    while (<$ignore>) {
221	my $line = $_;
222
223	$line =~ s/\s*\n?$//;
224	$line =~ s/^\s*//;
225	$line =~ s/\s+$//;
226	$line =~ s/#.*$//;
227
228	next if ($line =~ m/^\s*$/);
229	if (rfc822_valid($line)) {
230	    push(@ignore_emails, $line);
231	}
232    }
233    close($ignore);
234}
235
236if ($#ARGV > 0) {
237    foreach (@ARGV) {
238        if ($_ =~ /^-{1,2}self-test(?:=|$)/) {
239            die "$P: using --self-test does not allow any other option or argument\n";
240        }
241    }
242}
243
244if (!GetOptions(
245		'email!' => \$email,
246		'git!' => \$email_git,
247		'git-all-signature-types!' => \$email_git_all_signature_types,
248		'git-blame!' => \$email_git_blame,
249		'git-blame-signatures!' => \$email_git_blame_signatures,
250		'git-fallback!' => \$email_git_fallback,
251		'git-chief-penguins!' => \$email_git_penguin_chiefs,
252		'git-min-signatures=i' => \$email_git_min_signatures,
253		'git-max-maintainers=i' => \$email_git_max_maintainers,
254		'git-min-percent=i' => \$email_git_min_percent,
255		'git-since=s' => \$email_git_since,
256		'hg-since=s' => \$email_hg_since,
257		'i|interactive!' => \$interactive,
258		'remove-duplicates!' => \$email_remove_duplicates,
259		'mailmap!' => \$email_use_mailmap,
260		'm!' => \$email_maintainer,
261		'r!' => \$email_reviewer,
262		'n!' => \$email_usename,
263		'l!' => \$email_list,
264		'fixes!' => \$email_fixes,
265		'moderated!' => \$email_moderated_list,
266		's!' => \$email_subscriber_list,
267		'multiline!' => \$output_multiline,
268		'roles!' => \$output_roles,
269		'rolestats!' => \$output_rolestats,
270		'separator=s' => \$output_separator,
271		'subsystem!' => \$subsystem,
272		'status!' => \$status,
273		'substatus!' => \$output_substatus,
274		'scm!' => \$scm,
275		'tree!' => \$tree,
276		'web!' => \$web,
277		'bug!' => \$bug,
278		'letters=s' => \$letters,
279		'pattern-depth=i' => \$pattern_depth,
280		'k|keywords!' => \$keywords,
281		'kf|keywords-in-file!' => \$keywords_in_file,
282		'sections!' => \$sections,
283		'fe|file-emails!' => \$email_file_emails,
284		'f|file' => \$from_filename,
285		'find-maintainer-files' => \$find_maintainer_files,
286		'mpath|maintainer-path=s' => \$maintainer_path,
287		'self-test:s' => \$self_test,
288		'v|version' => \$version,
289		'h|help|usage' => \$help,
290		)) {
291    die "$P: invalid argument - use --help if necessary\n";
292}
293
294if ($help != 0) {
295    usage();
296    exit 0;
297}
298
299if ($version != 0) {
300    print("${P} ${V}\n");
301    exit 0;
302}
303
304if (defined $self_test) {
305    read_all_maintainer_files();
306    self_test();
307    exit 0;
308}
309
310if (-t STDIN && !@ARGV) {
311    # We're talking to a terminal, but have no command line arguments.
312    die "$P: missing patchfile or -f file - use --help if necessary\n";
313}
314
315$output_multiline = 0 if ($output_separator ne ", ");
316$output_rolestats = 1 if ($interactive);
317$output_roles = 1 if ($output_rolestats);
318
319if (!defined $output_substatus) {
320    $output_substatus = $email && $output_roles && -t STDOUT;
321}
322
323if ($sections || $letters ne "") {
324    $sections = 1;
325    $email = 0;
326    $email_list = 0;
327    $scm = 0;
328    $status = 0;
329    $subsystem = 0;
330    $web = 0;
331    $bug = 0;
332    $keywords = 0;
333    $keywords_in_file = 0;
334    $interactive = 0;
335} else {
336    my $selections = $email + $scm + $status + $subsystem + $web + $bug;
337    if ($selections == 0) {
338	die "$P:  Missing required option: email, scm, status, subsystem, web or bug\n";
339    }
340}
341
342if ($email &&
343    ($email_maintainer + $email_reviewer +
344     $email_list + $email_subscriber_list +
345     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
346    die "$P: Please select at least 1 email option\n";
347}
348
349if ($tree && !top_of_kernel_tree($lk_path)) {
350    die "$P: The current directory does not appear to be "
351	. "a linux kernel source tree.\n";
352}
353
354## Read MAINTAINERS for type/value pairs
355
356my @typevalue = ();
357my %keyword_hash;
358my @mfiles = ();
359my @self_test_info = ();
360
361sub read_maintainer_file {
362    my ($file) = @_;
363
364    open (my $maint, '<', "$file")
365	or die "$P: Can't open MAINTAINERS file '$file': $!\n";
366    my $i = 1;
367    while (<$maint>) {
368	my $line = $_;
369	chomp $line;
370
371	if ($line =~ m/^([A-Z]):\s*(.*)/) {
372	    my $type = $1;
373	    my $value = $2;
374
375	    ##Filename pattern matching
376	    if ($type eq "F" || $type eq "X") {
377		$value =~ s@\.@\\\.@g;       ##Convert . to \.
378		$value =~ s/\*/\.\*/g;       ##Convert * to .*
379		$value =~ s/\?/\./g;         ##Convert ? to .
380		##if pattern is a directory and it lacks a trailing slash, add one
381		if ((-d $value)) {
382		    $value =~ s@([^/])$@$1/@;
383		}
384	    } elsif ($type eq "K") {
385		$keyword_hash{@typevalue} = $value;
386	    }
387	    push(@typevalue, "$type:$value");
388	} elsif (!(/^\s*$/ || /^\s*\#/)) {
389	    push(@typevalue, $line);
390	}
391	if (defined $self_test) {
392	    push(@self_test_info, {file=>$file, linenr=>$i, line=>$line});
393	}
394	$i++;
395    }
396    close($maint);
397}
398
399sub find_is_maintainer_file {
400    my ($file) = $_;
401    return if ($file !~ m@/MAINTAINERS$@);
402    $file = $File::Find::name;
403    return if (! -f $file);
404    push(@mfiles, $file);
405}
406
407sub find_ignore_git {
408    return grep { $_ !~ /^\.git$/; } @_;
409}
410
411read_all_maintainer_files();
412
413sub read_all_maintainer_files {
414    my $path = "${lk_path}MAINTAINERS";
415    if (defined $maintainer_path) {
416	$path = $maintainer_path;
417	# Perl Cookbook tilde expansion if necessary
418	$path =~ s@^~([^/]*)@ $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($<))[7])@ex;
419    }
420
421    if (-d $path) {
422	$path .= '/' if ($path !~ m@/$@);
423	if ($find_maintainer_files) {
424	    find( { wanted => \&find_is_maintainer_file,
425		    preprocess => \&find_ignore_git,
426		    no_chdir => 1,
427		}, "$path");
428	} else {
429	    opendir(DIR, "$path") or die $!;
430	    my @files = readdir(DIR);
431	    closedir(DIR);
432	    foreach my $file (@files) {
433		push(@mfiles, "$path$file") if ($file !~ /^\./);
434	    }
435	}
436    } elsif (-f "$path") {
437	push(@mfiles, "$path");
438    } else {
439	die "$P: MAINTAINER file not found '$path'\n";
440    }
441    die "$P: No MAINTAINER files found in '$path'\n" if (scalar(@mfiles) == 0);
442    foreach my $file (@mfiles) {
443	read_maintainer_file("$file");
444    }
445}
446
447sub maintainers_in_file {
448    my ($file) = @_;
449
450    return if ($file =~ m@\bMAINTAINERS$@);
451
452    if (-f $file && ($email_file_emails || $file =~ /\.yaml$/)) {
453	open(my $f, '<', $file)
454	    or die "$P: Can't open $file: $!\n";
455	my $text = do { local($/) ; <$f> };
456	close($f);
457
458	my @poss_addr = $text =~ m$[\p{L}\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
459	push(@file_emails, clean_file_emails(@poss_addr));
460    }
461}
462
463#
464# Read mail address map
465#
466
467my $mailmap;
468
469read_mailmap();
470
471sub read_mailmap {
472    $mailmap = {
473	names => {},
474	addresses => {}
475    };
476
477    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
478
479    open(my $mailmap_file, '<', "${lk_path}.mailmap")
480	or warn "$P: Can't open .mailmap: $!\n";
481
482    while (<$mailmap_file>) {
483	s/#.*$//; #strip comments
484	s/^\s+|\s+$//g; #trim
485
486	next if (/^\s*$/); #skip empty lines
487	#entries have one of the following formats:
488	# name1 <mail1>
489	# <mail1> <mail2>
490	# name1 <mail1> <mail2>
491	# name1 <mail1> name2 <mail2>
492	# (see man git-shortlog)
493
494	if (/^([^<]+)<([^>]+)>$/) {
495	    my $real_name = $1;
496	    my $address = $2;
497
498	    $real_name =~ s/\s+$//;
499	    ($real_name, $address) = parse_email("$real_name <$address>");
500	    $mailmap->{names}->{$address} = $real_name;
501
502	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
503	    my $real_address = $1;
504	    my $wrong_address = $2;
505
506	    $mailmap->{addresses}->{$wrong_address} = $real_address;
507
508	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
509	    my $real_name = $1;
510	    my $real_address = $2;
511	    my $wrong_address = $3;
512
513	    $real_name =~ s/\s+$//;
514	    ($real_name, $real_address) =
515		parse_email("$real_name <$real_address>");
516	    $mailmap->{names}->{$wrong_address} = $real_name;
517	    $mailmap->{addresses}->{$wrong_address} = $real_address;
518
519	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
520	    my $real_name = $1;
521	    my $real_address = $2;
522	    my $wrong_name = $3;
523	    my $wrong_address = $4;
524
525	    $real_name =~ s/\s+$//;
526	    ($real_name, $real_address) =
527		parse_email("$real_name <$real_address>");
528
529	    $wrong_name =~ s/\s+$//;
530	    ($wrong_name, $wrong_address) =
531		parse_email("$wrong_name <$wrong_address>");
532
533	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
534	    $mailmap->{names}->{$wrong_email} = $real_name;
535	    $mailmap->{addresses}->{$wrong_email} = $real_address;
536	}
537    }
538    close($mailmap_file);
539}
540
541## use the filenames on the command line or find the filenames in the patchfiles
542
543if (!@ARGV) {
544    push(@ARGV, "&STDIN");
545}
546
547foreach my $file (@ARGV) {
548    if ($file ne "&STDIN") {
549	$file = canonpath($file);
550	##if $file is a directory and it lacks a trailing slash, add one
551	if ((-d $file)) {
552	    $file =~ s@([^/])$@$1/@;
553	} elsif (!(-f $file)) {
554	    die "$P: file '${file}' not found\n";
555	}
556    }
557    if ($from_filename && (vcs_exists() && !vcs_file_exists($file))) {
558	warn "$P: file '$file' not found in version control $!\n";
559    }
560    if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
561	$file =~ s/^\Q${cur_path}\E//;	#strip any absolute path
562	$file =~ s/^\Q${lk_path}\E//;	#or the path to the lk tree
563	push(@files, $file);
564	if ($file ne "MAINTAINERS" && -f $file && $keywords && $keywords_in_file) {
565	    open(my $f, '<', $file)
566		or die "$P: Can't open $file: $!\n";
567	    my $text = do { local($/) ; <$f> };
568	    close($f);
569	    foreach my $line (keys %keyword_hash) {
570		if ($text =~ m/$keyword_hash{$line}/x) {
571		    push(@keyword_tvi, $line);
572		}
573	    }
574	}
575    } else {
576	my $file_cnt = @files;
577	my $lastfile;
578
579	open(my $patch, "< $file")
580	    or die "$P: Can't open $file: $!\n";
581
582	# We can check arbitrary information before the patch
583	# like the commit message, mail headers, etc...
584	# This allows us to match arbitrary keywords against any part
585	# of a git format-patch generated file (subject tags, etc...)
586
587	my $patch_prefix = "";			#Parsing the intro
588
589	while (<$patch>) {
590	    my $patch_line = $_;
591	    if (m/^ mode change [0-7]+ => [0-7]+ (\S+)\s*$/) {
592		my $filename = $1;
593		push(@files, $filename);
594	    } elsif (m/^rename (?:from|to) (\S+)\s*$/) {
595		my $filename = $1;
596		push(@files, $filename);
597	    } elsif (m/^diff --git a\/(\S+) b\/(\S+)\s*$/) {
598		my $filename1 = $1;
599		my $filename2 = $2;
600		push(@files, $filename1);
601		push(@files, $filename2);
602	    } elsif (m/^Fixes:\s+([0-9a-fA-F]{6,40})/) {
603		push(@fixes, $1) if ($email_fixes);
604	    } elsif (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
605		my $filename = $1;
606		$filename =~ s@^[^/]*/@@;
607		$filename =~ s@\n@@;
608		$lastfile = $filename;
609		push(@files, $filename);
610		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
611	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
612		if ($email_git_blame) {
613		    push(@range, "$lastfile:$1:$2");
614		}
615	    } elsif ($keywords) {
616		foreach my $line (keys %keyword_hash) {
617		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
618			push(@keyword_tvi, $line);
619		    }
620		}
621	    }
622	}
623	close($patch);
624
625	if ($file_cnt == @files) {
626	    warn "$P: file '${file}' doesn't appear to be a patch.  "
627		. "Add -f to options?\n";
628	}
629	@files = sort_and_uniq(@files);
630    }
631}
632
633@file_emails = uniq(@file_emails);
634@fixes = uniq(@fixes);
635
636my %email_hash_name;
637my %email_hash_address;
638my @email_to = ();
639my %hash_list_to;
640my @list_to = ();
641my @scm = ();
642my @web = ();
643my @bug = ();
644my @subsystem = ();
645my @status = ();
646my @substatus = ();
647my %deduplicate_name_hash = ();
648my %deduplicate_address_hash = ();
649
650my @maintainers = get_maintainers();
651if (@maintainers) {
652    @maintainers = merge_email(@maintainers);
653    output(@maintainers);
654}
655
656if ($scm) {
657    @scm = uniq(@scm);
658    output(@scm);
659}
660
661if ($output_substatus) {
662    @substatus = uniq(@substatus);
663    output(@substatus);
664}
665
666if ($status) {
667    @status = uniq(@status);
668    output(@status);
669}
670
671if ($subsystem) {
672    @subsystem = uniq(@subsystem);
673    output(@subsystem);
674}
675
676if ($web) {
677    @web = uniq(@web);
678    output(@web);
679}
680
681if ($bug) {
682    @bug = uniq(@bug);
683    output(@bug);
684}
685
686exit($exit);
687
688sub self_test {
689    my @lsfiles = ();
690    my @good_links = ();
691    my @bad_links = ();
692    my @section_headers = ();
693    my $index = 0;
694
695    @lsfiles = vcs_list_files($lk_path);
696
697    for my $x (@self_test_info) {
698	$index++;
699
700	## Section header duplication and missing section content
701	if (($self_test eq "" || $self_test =~ /\bsections\b/) &&
702	    $x->{line} =~ /^\S[^:]/ &&
703	    defined $self_test_info[$index] &&
704	    $self_test_info[$index]->{line} =~ /^([A-Z]):\s*\S/) {
705	    my $has_S = 0;
706	    my $has_F = 0;
707	    my $has_ML = 0;
708	    my $status = "";
709	    if (grep(m@^\Q$x->{line}\E@, @section_headers)) {
710		print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n");
711	    } else {
712		push(@section_headers, $x->{line});
713	    }
714	    my $nextline = $index;
715	    while (defined $self_test_info[$nextline] &&
716		   $self_test_info[$nextline]->{line} =~ /^([A-Z]):\s*(\S.*)/) {
717		my $type = $1;
718		my $value = $2;
719		if ($type eq "S") {
720		    $has_S = 1;
721		    $status = $value;
722		} elsif ($type eq "F" || $type eq "N") {
723		    $has_F = 1;
724		} elsif ($type eq "M" || $type eq "R" || $type eq "L") {
725		    $has_ML = 1;
726		}
727		$nextline++;
728	    }
729	    if (!$has_ML && $status !~ /orphan|obsolete/i) {
730		print("$x->{file}:$x->{linenr}: warning: section without email address\t$x->{line}\n");
731	    }
732	    if (!$has_S) {
733		print("$x->{file}:$x->{linenr}: warning: section without status \t$x->{line}\n");
734	    }
735	    if (!$has_F) {
736		print("$x->{file}:$x->{linenr}: warning: section without file pattern\t$x->{line}\n");
737	    }
738	}
739
740	next if ($x->{line} !~ /^([A-Z]):\s*(.*)/);
741
742	my $type = $1;
743	my $value = $2;
744
745	## Filename pattern matching
746	if (($type eq "F" || $type eq "X") &&
747	    ($self_test eq "" || $self_test =~ /\bpatterns\b/)) {
748	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
749	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
750	    $value =~ s/\?/\./g;         ##Convert ? to .
751	    ##if pattern is a directory and it lacks a trailing slash, add one
752	    if ((-d $value)) {
753		$value =~ s@([^/])$@$1/@;
754	    }
755	    if (!grep(m@^$value@, @lsfiles)) {
756		print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n");
757	    }
758
759	## Link reachability
760	} elsif (($type eq "W" || $type eq "Q" || $type eq "B") &&
761		 $value =~ /^https?:/ &&
762		 ($self_test eq "" || $self_test =~ /\blinks\b/)) {
763	    next if (grep(m@^\Q$value\E$@, @good_links));
764	    my $isbad = 0;
765	    if (grep(m@^\Q$value\E$@, @bad_links)) {
766	        $isbad = 1;
767	    } else {
768		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`;
769		if ($? == 0) {
770		    push(@good_links, $value);
771		} else {
772		    push(@bad_links, $value);
773		    $isbad = 1;
774		}
775	    }
776	    if ($isbad) {
777	        print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
778	    }
779
780	## SCM reachability
781	} elsif ($type eq "T" &&
782		 ($self_test eq "" || $self_test =~ /\bscm\b/)) {
783	    next if (grep(m@^\Q$value\E$@, @good_links));
784	    my $isbad = 0;
785	    if (grep(m@^\Q$value\E$@, @bad_links)) {
786	        $isbad = 1;
787            } elsif ($value !~ /^(?:git|quilt|hg)\s+\S/) {
788		print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n");
789	    } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) {
790		my $url = $1;
791		my $branch = "";
792		$branch = $3 if $3;
793		my $output = `git ls-remote --exit-code -h "$url" $branch > /dev/null 2>&1`;
794		if ($? == 0) {
795		    push(@good_links, $value);
796		} else {
797		    push(@bad_links, $value);
798		    $isbad = 1;
799		}
800	    } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
801		my $url = $1;
802		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`;
803		if ($? == 0) {
804		    push(@good_links, $value);
805		} else {
806		    push(@bad_links, $value);
807		    $isbad = 1;
808		}
809	    }
810	    if ($isbad) {
811		print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
812	    }
813	}
814    }
815}
816
817sub ignore_email_address {
818    my ($address) = @_;
819
820    foreach my $ignore (@ignore_emails) {
821	return 1 if ($ignore eq $address);
822    }
823
824    return 0;
825}
826
827sub range_is_maintained {
828    my ($start, $end) = @_;
829
830    for (my $i = $start; $i < $end; $i++) {
831	my $line = $typevalue[$i];
832	if ($line =~ m/^([A-Z]):\s*(.*)/) {
833	    my $type = $1;
834	    my $value = $2;
835	    if ($type eq 'S') {
836		if ($value =~ /(maintain|support)/i) {
837		    return 1;
838		}
839	    }
840	}
841    }
842    return 0;
843}
844
845sub range_has_maintainer {
846    my ($start, $end) = @_;
847
848    for (my $i = $start; $i < $end; $i++) {
849	my $line = $typevalue[$i];
850	if ($line =~ m/^([A-Z]):\s*(.*)/) {
851	    my $type = $1;
852	    my $value = $2;
853	    if ($type eq 'M') {
854		return 1;
855	    }
856	}
857    }
858    return 0;
859}
860
861sub get_maintainers {
862    %email_hash_name = ();
863    %email_hash_address = ();
864    %commit_author_hash = ();
865    %commit_signer_hash = ();
866    @email_to = ();
867    %hash_list_to = ();
868    @list_to = ();
869    @scm = ();
870    @web = ();
871    @bug = ();
872    @subsystem = ();
873    @status = ();
874    @substatus = ();
875    %deduplicate_name_hash = ();
876    %deduplicate_address_hash = ();
877    if ($email_git_all_signature_types) {
878	$signature_pattern = "(.+?)[Bb][Yy]:";
879    } else {
880	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
881    }
882
883    # Find responsible parties
884
885    my %exact_pattern_match_hash = ();
886
887    foreach my $file (@files) {
888
889	my %hash;
890	my $tvi = find_first_section();
891	while ($tvi < @typevalue) {
892	    my $start = find_starting_index($tvi);
893	    my $end = find_ending_index($tvi);
894	    my $exclude = 0;
895	    my $i;
896
897	    #Do not match excluded file patterns
898
899	    for ($i = $start; $i < $end; $i++) {
900		my $line = $typevalue[$i];
901		if ($line =~ m/^([A-Z]):\s*(.*)/) {
902		    my $type = $1;
903		    my $value = $2;
904		    if ($type eq 'X') {
905			if (file_match_pattern($file, $value)) {
906			    $exclude = 1;
907			    last;
908			}
909		    }
910		}
911	    }
912
913	    if (!$exclude) {
914		for ($i = $start; $i < $end; $i++) {
915		    my $line = $typevalue[$i];
916		    if ($line =~ m/^([A-Z]):\s*(.*)/) {
917			my $type = $1;
918			my $value = $2;
919			if ($type eq 'F') {
920			    if (file_match_pattern($file, $value)) {
921				my $value_pd = ($value =~ tr@/@@);
922				my $file_pd = ($file  =~ tr@/@@);
923				$value_pd++ if (substr($value,-1,1) ne "/");
924				$value_pd = -1 if ($value =~ /^\.\*/);
925				if ($value_pd >= $file_pd &&
926				    range_is_maintained($start, $end) &&
927				    range_has_maintainer($start, $end)) {
928				    $exact_pattern_match_hash{$file} = 1;
929				}
930				if ($pattern_depth == 0 ||
931				    (($file_pd - $value_pd) < $pattern_depth)) {
932				    $hash{$tvi} = $value_pd;
933				}
934			    }
935			} elsif ($type eq 'N') {
936			    if ($file =~ m/$value/x) {
937				$hash{$tvi} = 0;
938			    }
939			}
940		    }
941		}
942	    }
943	    $tvi = $end + 1;
944	}
945
946	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
947	    add_categories($line, "");
948	    if ($sections) {
949		my $i;
950		my $start = find_starting_index($line);
951		my $end = find_ending_index($line);
952		for ($i = $start; $i < $end; $i++) {
953		    my $line = $typevalue[$i];
954		    if ($line =~ /^[FX]:/) {		##Restore file patterns
955			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
956			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
957			$line =~ s/\\\./\./g;       	##Convert \. to .
958			$line =~ s/\.\*/\*/g;       	##Convert .* to *
959		    }
960		    my $count = $line =~ s/^([A-Z]):/$1:\t/g;
961		    if ($letters eq "" || (!$count || $letters =~ /$1/i)) {
962			print("$line\n");
963		    }
964		}
965		print("\n");
966	    }
967	}
968
969	maintainers_in_file($file);
970    }
971
972    if ($keywords) {
973	@keyword_tvi = sort_and_uniq(@keyword_tvi);
974	foreach my $line (@keyword_tvi) {
975	    add_categories($line, ":Keyword:$keyword_hash{$line}");
976	}
977    }
978
979    foreach my $email (@email_to, @list_to) {
980	$email->[0] = deduplicate_email($email->[0]);
981    }
982
983    foreach my $file (@files) {
984	if ($email &&
985	    ($email_git ||
986	     ($email_git_fallback &&
987	      $file !~ /MAINTAINERS$/ &&
988	      !$exact_pattern_match_hash{$file}))) {
989	    vcs_file_signoffs($file);
990	}
991	if ($email && $email_git_blame) {
992	    vcs_file_blame($file);
993	}
994    }
995
996    if ($email) {
997	foreach my $chief (@penguin_chief) {
998	    if ($chief =~ m/^(.*):(.*)/) {
999		my $email_address;
1000
1001		$email_address = format_email($1, $2, $email_usename);
1002		if ($email_git_penguin_chiefs) {
1003		    push(@email_to, [$email_address, 'chief penguin']);
1004		} else {
1005		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
1006		}
1007	    }
1008	}
1009
1010	foreach my $email (@file_emails) {
1011	    $email = mailmap_email($email);
1012	    my ($name, $address) = parse_email($email);
1013
1014	    my $tmp_email = format_email($name, $address, $email_usename);
1015	    push_email_address($tmp_email, '');
1016	    add_role($tmp_email, 'in file');
1017	}
1018    }
1019
1020    foreach my $fix (@fixes) {
1021	vcs_add_commit_signers($fix, "blamed_fixes");
1022    }
1023
1024    my @to = ();
1025    if ($email || $email_list) {
1026	if ($email) {
1027	    @to = (@to, @email_to);
1028	}
1029	if ($email_list) {
1030	    @to = (@to, @list_to);
1031	}
1032    }
1033
1034    if ($interactive) {
1035	@to = interactive_get_maintainers(\@to);
1036    }
1037
1038    return @to;
1039}
1040
1041sub file_match_pattern {
1042    my ($file, $pattern) = @_;
1043    if (substr($pattern, -1) eq "/") {
1044	if ($file =~ m@^$pattern@) {
1045	    return 1;
1046	}
1047    } else {
1048	if ($file =~ m@^$pattern@) {
1049	    my $s1 = ($file =~ tr@/@@);
1050	    my $s2 = ($pattern =~ tr@/@@);
1051	    if ($s1 == $s2) {
1052		return 1;
1053	    }
1054	}
1055    }
1056    return 0;
1057}
1058
1059sub usage {
1060    print <<EOT;
1061usage: $P [options] patchfile
1062       $P [options] -f file|directory
1063version: $V
1064
1065MAINTAINER field selection options:
1066  --email => print email address(es) if any
1067    --git => include recent git \*-by: signers
1068    --git-all-signature-types => include signers regardless of signature type
1069        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
1070    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
1071    --git-chief-penguins => include ${penguin_chiefs}
1072    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
1073    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
1074    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
1075    --git-blame => use git blame to find modified commits for patch or file
1076    --git-blame-signatures => when used with --git-blame, also include all commit signers
1077    --git-since => git history to use (default: $email_git_since)
1078    --hg-since => hg history to use (default: $email_hg_since)
1079    --interactive => display a menu (mostly useful if used with the --git option)
1080    --m => include maintainer(s) if any
1081    --r => include reviewer(s) if any
1082    --n => include name 'Full Name <addr\@domain.tld>'
1083    --l => include list(s) if any
1084    --moderated => include moderated lists(s) if any (default: true)
1085    --s => include subscriber only list(s) if any (default: false)
1086    --remove-duplicates => minimize duplicate email names/addresses
1087    --roles => show roles (role:subsystem, git-signer, list, etc...)
1088    --rolestats => show roles and statistics (commits/total_commits, %)
1089    --substatus => show subsystem status if not Maintained (default: match --roles when output is tty)"
1090    --file-emails => add email addresses found in -f file (default: 0 (off))
1091    --fixes => for patches, add signatures of commits with 'Fixes: <commit>' (default: 1 (on))
1092  --scm => print SCM tree(s) if any
1093  --status => print status if any
1094  --subsystem => print subsystem name if any
1095  --web => print website(s) if any
1096  --bug => print bug reporting info if any
1097
1098Output type options:
1099  --separator [, ] => separator for multiple entries on 1 line
1100    using --separator also sets --nomultiline if --separator is not [, ]
1101  --multiline => print 1 entry per line
1102
1103Other options:
1104  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
1105  --keywords => scan patch for keywords (default: $keywords)
1106  --keywords-in-file => scan file for keywords (default: $keywords_in_file)
1107  --sections => print all of the subsystem sections with pattern matches
1108  --letters => print all matching 'letter' types from all matching sections
1109  --mailmap => use .mailmap file (default: $email_use_mailmap)
1110  --no-tree => run without a kernel tree
1111  --self-test => show potential issues with MAINTAINERS file content
1112  --version => show version
1113  --help => show this help information
1114
1115Default options:
1116  [--email --tree --nogit --git-fallback --m --r --n --l --multiline
1117   --pattern-depth=0 --remove-duplicates --rolestats --keywords]
1118
1119Notes:
1120  Using "-f directory" may give unexpected results:
1121      Used with "--git", git signators for _all_ files in and below
1122          directory are examined as git recurses directories.
1123          Any specified X: (exclude) pattern matches are _not_ ignored.
1124      Used with "--nogit", directory is used as a pattern match,
1125          no individual file within the directory or subdirectory
1126          is matched.
1127      Used with "--git-blame", does not iterate all files in directory
1128  Using "--git-blame" is slow and may add old committers and authors
1129      that are no longer active maintainers to the output.
1130  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
1131      other automated tools that expect only ["name"] <email address>
1132      may not work because of additional output after <email address>.
1133  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
1134      not the percentage of the entire file authored.  # of commits is
1135      not a good measure of amount of code authored.  1 major commit may
1136      contain a thousand lines, 5 trivial commits may modify a single line.
1137  If git is not installed, but mercurial (hg) is installed and an .hg
1138      repository exists, the following options apply to mercurial:
1139          --git,
1140          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
1141          --git-blame
1142      Use --hg-since not --git-since to control date selection
1143  File ".get_maintainer.conf", if it exists in the linux kernel source root
1144      directory, can change whatever get_maintainer defaults are desired.
1145      Entries in this file can be any command line argument.
1146      This file is prepended to any additional command line arguments.
1147      Multiple lines and # comments are allowed.
1148  Most options have both positive and negative forms.
1149      The negative forms for --<foo> are --no<foo> and --no-<foo>.
1150
1151EOT
1152}
1153
1154sub top_of_kernel_tree {
1155    my ($lk_path) = @_;
1156
1157    if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
1158	$lk_path .= "/";
1159    }
1160    if (   (-f "${lk_path}COPYING")
1161	&& (-f "${lk_path}CREDITS")
1162	&& (-f "${lk_path}Kbuild")
1163	&& (-e "${lk_path}MAINTAINERS")
1164	&& (-f "${lk_path}Makefile")
1165	&& (-f "${lk_path}README")
1166	&& (-d "${lk_path}Documentation")
1167	&& (-d "${lk_path}arch")
1168	&& (-d "${lk_path}include")
1169	&& (-d "${lk_path}drivers")
1170	&& (-d "${lk_path}fs")
1171	&& (-d "${lk_path}init")
1172	&& (-d "${lk_path}ipc")
1173	&& (-d "${lk_path}kernel")
1174	&& (-d "${lk_path}lib")
1175	&& (-d "${lk_path}scripts")) {
1176	return 1;
1177    }
1178    return 0;
1179}
1180
1181sub escape_name {
1182    my ($name) = @_;
1183
1184    if ($name =~ /[^\w \-]/ai) {  	 ##has "must quote" chars
1185	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1186	$name = "\"$name\"";
1187    }
1188
1189    return $name;
1190}
1191
1192sub parse_email {
1193    my ($formatted_email) = @_;
1194
1195    my $name = "";
1196    my $address = "";
1197
1198    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
1199	$name = $1;
1200	$address = $2;
1201    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
1202	$address = $1;
1203    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
1204	$address = $1;
1205    }
1206
1207    $name =~ s/^\s+|\s+$//g;
1208    $name =~ s/^\"|\"$//g;
1209    $name = escape_name($name);
1210    $address =~ s/^\s+|\s+$//g;
1211
1212    return ($name, $address);
1213}
1214
1215sub format_email {
1216    my ($name, $address, $usename) = @_;
1217
1218    my $formatted_email;
1219
1220    $name =~ s/^\s+|\s+$//g;
1221    $name =~ s/^\"|\"$//g;
1222    $name = escape_name($name);
1223    $address =~ s/^\s+|\s+$//g;
1224
1225    if ($usename) {
1226	if ("$name" eq "") {
1227	    $formatted_email = "$address";
1228	} else {
1229	    $formatted_email = "$name <$address>";
1230	}
1231    } else {
1232	$formatted_email = $address;
1233    }
1234
1235    return $formatted_email;
1236}
1237
1238sub find_first_section {
1239    my $index = 0;
1240
1241    while ($index < @typevalue) {
1242	my $tv = $typevalue[$index];
1243	if (($tv =~ m/^([A-Z]):\s*(.*)/)) {
1244	    last;
1245	}
1246	$index++;
1247    }
1248
1249    return $index;
1250}
1251
1252sub find_starting_index {
1253    my ($index) = @_;
1254
1255    while ($index > 0) {
1256	my $tv = $typevalue[$index];
1257	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1258	    last;
1259	}
1260	$index--;
1261    }
1262
1263    return $index;
1264}
1265
1266sub find_ending_index {
1267    my ($index) = @_;
1268
1269    while ($index < @typevalue) {
1270	my $tv = $typevalue[$index];
1271	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1272	    last;
1273	}
1274	$index++;
1275    }
1276
1277    return $index;
1278}
1279
1280sub get_subsystem_name {
1281    my ($index) = @_;
1282
1283    my $start = find_starting_index($index);
1284
1285    my $subsystem = $typevalue[$start];
1286    if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) {
1287	$subsystem = substr($subsystem, 0, $output_section_maxlen - 3);
1288	$subsystem =~ s/\s*$//;
1289	$subsystem = $subsystem . "...";
1290    }
1291    return $subsystem;
1292}
1293
1294sub get_maintainer_role {
1295    my ($index) = @_;
1296
1297    my $i;
1298    my $start = find_starting_index($index);
1299    my $end = find_ending_index($index);
1300
1301    my $role = "maintainer";
1302    my $subsystem = get_subsystem_name($index);
1303    my $status = "unknown";
1304
1305    for ($i = $start + 1; $i < $end; $i++) {
1306	my $tv = $typevalue[$i];
1307	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1308	    my $ptype = $1;
1309	    my $pvalue = $2;
1310	    if ($ptype eq "S") {
1311		$status = $pvalue;
1312	    }
1313	}
1314    }
1315
1316    $status = lc($status);
1317    if ($status eq "buried alive in reporters") {
1318	$role = "chief penguin";
1319    }
1320
1321    return $role . ":" . $subsystem;
1322}
1323
1324sub get_list_role {
1325    my ($index) = @_;
1326
1327    my $subsystem = get_subsystem_name($index);
1328
1329    if ($subsystem eq "THE REST") {
1330	$subsystem = "";
1331    }
1332
1333    return $subsystem;
1334}
1335
1336sub add_categories {
1337    my ($index, $suffix) = @_;
1338
1339    my $i;
1340    my $start = find_starting_index($index);
1341    my $end = find_ending_index($index);
1342
1343    my $subsystem = $typevalue[$start];
1344    push(@subsystem, $subsystem);
1345    my $status = "Unknown";
1346
1347    for ($i = $start + 1; $i < $end; $i++) {
1348	my $tv = $typevalue[$i];
1349	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1350	    my $ptype = $1;
1351	    my $pvalue = $2;
1352	    if ($ptype eq "L") {
1353		my $list_address = $pvalue;
1354		my $list_additional = "";
1355		my $list_role = get_list_role($i);
1356
1357		if ($list_role ne "") {
1358		    $list_role = ":" . $list_role;
1359		}
1360		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1361		    $list_address = $1;
1362		    $list_additional = $2;
1363		}
1364		if ($list_additional =~ m/subscribers-only/) {
1365		    if ($email_subscriber_list) {
1366			if (!$hash_list_to{lc($list_address)}) {
1367			    $hash_list_to{lc($list_address)} = 1;
1368			    push(@list_to, [$list_address,
1369					    "subscriber list${list_role}" . $suffix]);
1370			}
1371		    }
1372		} else {
1373		    if ($email_list) {
1374			if (!$hash_list_to{lc($list_address)}) {
1375			    if ($list_additional =~ m/moderated/) {
1376				if ($email_moderated_list) {
1377				    $hash_list_to{lc($list_address)} = 1;
1378				    push(@list_to, [$list_address,
1379						    "moderated list${list_role}" . $suffix]);
1380				}
1381			    } else {
1382				$hash_list_to{lc($list_address)} = 1;
1383				push(@list_to, [$list_address,
1384						"open list${list_role}" . $suffix]);
1385			    }
1386			}
1387		    }
1388		}
1389	    } elsif ($ptype eq "M") {
1390		if ($email_maintainer) {
1391		    my $role = get_maintainer_role($i);
1392		    push_email_addresses($pvalue, $role . $suffix);
1393		}
1394	    } elsif ($ptype eq "R") {
1395		if ($email_reviewer) {
1396		    my $subs = get_subsystem_name($i);
1397		    push_email_addresses($pvalue, "reviewer:$subs" . $suffix);
1398		}
1399	    } elsif ($ptype eq "T") {
1400		push(@scm, $pvalue . $suffix);
1401	    } elsif ($ptype eq "W") {
1402		push(@web, $pvalue . $suffix);
1403	    } elsif ($ptype eq "B") {
1404		push(@bug, $pvalue . $suffix);
1405	    } elsif ($ptype eq "S") {
1406		push(@status, $pvalue . $suffix);
1407		$status = $pvalue;
1408	    }
1409	}
1410    }
1411
1412    if ($subsystem ne "THE REST" and $status ne "Maintained") {
1413	push(@substatus, $subsystem . " status: " . $status . $suffix)
1414    }
1415}
1416
1417sub email_inuse {
1418    my ($name, $address) = @_;
1419
1420    return 1 if (($name eq "") && ($address eq ""));
1421    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1422    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1423
1424    return 0;
1425}
1426
1427sub push_email_address {
1428    my ($line, $role) = @_;
1429
1430    my ($name, $address) = parse_email($line);
1431
1432    if ($address eq "") {
1433	return 0;
1434    }
1435
1436    if (!$email_remove_duplicates) {
1437	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1438    } elsif (!email_inuse($name, $address)) {
1439	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1440	$email_hash_name{lc($name)}++ if ($name ne "");
1441	$email_hash_address{lc($address)}++;
1442    }
1443
1444    return 1;
1445}
1446
1447sub push_email_addresses {
1448    my ($address, $role) = @_;
1449
1450    my @address_list = ();
1451
1452    if (rfc822_valid($address)) {
1453	push_email_address($address, $role);
1454    } elsif (@address_list = rfc822_validlist($address)) {
1455	my $array_count = shift(@address_list);
1456	while (my $entry = shift(@address_list)) {
1457	    push_email_address($entry, $role);
1458	}
1459    } else {
1460	if (!push_email_address($address, $role)) {
1461	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1462	}
1463    }
1464}
1465
1466sub add_role {
1467    my ($line, $role) = @_;
1468
1469    my ($name, $address) = parse_email($line);
1470    my $email = format_email($name, $address, $email_usename);
1471
1472    foreach my $entry (@email_to) {
1473	if ($email_remove_duplicates) {
1474	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1475	    if (($name eq $entry_name || $address eq $entry_address)
1476		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1477	    ) {
1478		if ($entry->[1] eq "") {
1479		    $entry->[1] = "$role";
1480		} else {
1481		    $entry->[1] = "$entry->[1],$role";
1482		}
1483	    }
1484	} else {
1485	    if ($email eq $entry->[0]
1486		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1487	    ) {
1488		if ($entry->[1] eq "") {
1489		    $entry->[1] = "$role";
1490		} else {
1491		    $entry->[1] = "$entry->[1],$role";
1492		}
1493	    }
1494	}
1495    }
1496}
1497
1498sub which {
1499    my ($bin) = @_;
1500
1501    foreach my $path (split(/:/, $ENV{PATH})) {
1502	if (-e "$path/$bin") {
1503	    return "$path/$bin";
1504	}
1505    }
1506
1507    return "";
1508}
1509
1510sub which_conf {
1511    my ($conf) = @_;
1512
1513    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1514	if (-e "$path/$conf") {
1515	    return "$path/$conf";
1516	}
1517    }
1518
1519    return "";
1520}
1521
1522sub mailmap_email {
1523    my ($line) = @_;
1524
1525    my ($name, $address) = parse_email($line);
1526    my $email = format_email($name, $address, 1);
1527    my $real_name = $name;
1528    my $real_address = $address;
1529
1530    if (exists $mailmap->{names}->{$email} ||
1531	exists $mailmap->{addresses}->{$email}) {
1532	if (exists $mailmap->{names}->{$email}) {
1533	    $real_name = $mailmap->{names}->{$email};
1534	}
1535	if (exists $mailmap->{addresses}->{$email}) {
1536	    $real_address = $mailmap->{addresses}->{$email};
1537	}
1538    } else {
1539	if (exists $mailmap->{names}->{$address}) {
1540	    $real_name = $mailmap->{names}->{$address};
1541	}
1542	if (exists $mailmap->{addresses}->{$address}) {
1543	    $real_address = $mailmap->{addresses}->{$address};
1544	}
1545    }
1546    return format_email($real_name, $real_address, 1);
1547}
1548
1549sub mailmap {
1550    my (@addresses) = @_;
1551
1552    my @mapped_emails = ();
1553    foreach my $line (@addresses) {
1554	push(@mapped_emails, mailmap_email($line));
1555    }
1556    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1557    return @mapped_emails;
1558}
1559
1560sub merge_by_realname {
1561    my %address_map;
1562    my (@emails) = @_;
1563
1564    foreach my $email (@emails) {
1565	my ($name, $address) = parse_email($email);
1566	if (exists $address_map{$name}) {
1567	    $address = $address_map{$name};
1568	    $email = format_email($name, $address, 1);
1569	} else {
1570	    $address_map{$name} = $address;
1571	}
1572    }
1573}
1574
1575sub git_execute_cmd {
1576    my ($cmd) = @_;
1577    my @lines = ();
1578
1579    my $output = `$cmd`;
1580    $output =~ s/^\s*//gm;
1581    @lines = split("\n", $output);
1582
1583    return @lines;
1584}
1585
1586sub hg_execute_cmd {
1587    my ($cmd) = @_;
1588    my @lines = ();
1589
1590    my $output = `$cmd`;
1591    @lines = split("\n", $output);
1592
1593    return @lines;
1594}
1595
1596sub extract_formatted_signatures {
1597    my (@signature_lines) = @_;
1598
1599    my @type = @signature_lines;
1600
1601    s/\s*(.*):.*/$1/ for (@type);
1602
1603    # cut -f2- -d":"
1604    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1605
1606## Reformat email addresses (with names) to avoid badly written signatures
1607
1608    foreach my $signer (@signature_lines) {
1609	$signer = deduplicate_email($signer);
1610    }
1611
1612    return (\@type, \@signature_lines);
1613}
1614
1615sub vcs_find_signers {
1616    my ($cmd, $file) = @_;
1617    my $commits;
1618    my @lines = ();
1619    my @signatures = ();
1620    my @authors = ();
1621    my @stats = ();
1622
1623    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1624
1625    my $pattern = $VCS_cmds{"commit_pattern"};
1626    my $author_pattern = $VCS_cmds{"author_pattern"};
1627    my $stat_pattern = $VCS_cmds{"stat_pattern"};
1628
1629    $stat_pattern =~ s/(\$\w+)/$1/eeg;		#interpolate $stat_pattern
1630
1631    $commits = grep(/$pattern/, @lines);	# of commits
1632
1633    @authors = grep(/$author_pattern/, @lines);
1634    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1635    @stats = grep(/$stat_pattern/, @lines);
1636
1637#    print("stats: <@stats>\n");
1638
1639    return (0, \@signatures, \@authors, \@stats) if !@signatures;
1640
1641    save_commits_by_author(@lines) if ($interactive);
1642    save_commits_by_signer(@lines) if ($interactive);
1643
1644    if (!$email_git_penguin_chiefs) {
1645	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
1646    }
1647
1648    my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors);
1649    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1650
1651    return ($commits, $signers_ref, $authors_ref, \@stats);
1652}
1653
1654sub vcs_find_author {
1655    my ($cmd) = @_;
1656    my @lines = ();
1657
1658    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1659
1660    if (!$email_git_penguin_chiefs) {
1661	@lines = grep(!/${penguin_chiefs}/i, @lines);
1662    }
1663
1664    return @lines if !@lines;
1665
1666    my @authors = ();
1667    foreach my $line (@lines) {
1668	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1669	    my $author = $1;
1670	    my ($name, $address) = parse_email($author);
1671	    $author = format_email($name, $address, 1);
1672	    push(@authors, $author);
1673	}
1674    }
1675
1676    save_commits_by_author(@lines) if ($interactive);
1677    save_commits_by_signer(@lines) if ($interactive);
1678
1679    return @authors;
1680}
1681
1682sub vcs_save_commits {
1683    my ($cmd) = @_;
1684    my @lines = ();
1685    my @commits = ();
1686
1687    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1688
1689    foreach my $line (@lines) {
1690	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1691	    push(@commits, $1);
1692	}
1693    }
1694
1695    return @commits;
1696}
1697
1698sub vcs_blame {
1699    my ($file) = @_;
1700    my $cmd;
1701    my @commits = ();
1702
1703    return @commits if (!(-f $file));
1704
1705    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1706	my @all_commits = ();
1707
1708	$cmd = $VCS_cmds{"blame_file_cmd"};
1709	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1710	@all_commits = vcs_save_commits($cmd);
1711
1712	foreach my $file_range_diff (@range) {
1713	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1714	    my $diff_file = $1;
1715	    my $diff_start = $2;
1716	    my $diff_length = $3;
1717	    next if ("$file" ne "$diff_file");
1718	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1719		push(@commits, $all_commits[$i]);
1720	    }
1721	}
1722    } elsif (@range) {
1723	foreach my $file_range_diff (@range) {
1724	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1725	    my $diff_file = $1;
1726	    my $diff_start = $2;
1727	    my $diff_length = $3;
1728	    next if ("$file" ne "$diff_file");
1729	    $cmd = $VCS_cmds{"blame_range_cmd"};
1730	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1731	    push(@commits, vcs_save_commits($cmd));
1732	}
1733    } else {
1734	$cmd = $VCS_cmds{"blame_file_cmd"};
1735	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1736	@commits = vcs_save_commits($cmd);
1737    }
1738
1739    foreach my $commit (@commits) {
1740	$commit =~ s/^\^//g;
1741    }
1742
1743    return @commits;
1744}
1745
1746my $printed_novcs = 0;
1747sub vcs_exists {
1748    %VCS_cmds = %VCS_cmds_git;
1749    return 1 if eval $VCS_cmds{"available"};
1750    %VCS_cmds = %VCS_cmds_hg;
1751    return 2 if eval $VCS_cmds{"available"};
1752    %VCS_cmds = ();
1753    if (!$printed_novcs && $email_git) {
1754	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1755	warn("Using a git repository produces better results.\n");
1756	warn("Try Linus Torvalds' latest git repository using:\n");
1757	warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
1758	$printed_novcs = 1;
1759    }
1760    return 0;
1761}
1762
1763sub vcs_is_git {
1764    vcs_exists();
1765    return $vcs_used == 1;
1766}
1767
1768sub vcs_is_hg {
1769    return $vcs_used == 2;
1770}
1771
1772sub vcs_add_commit_signers {
1773    return if (!vcs_exists());
1774
1775    my ($commit, $desc) = @_;
1776    my $commit_count = 0;
1777    my $commit_authors_ref;
1778    my $commit_signers_ref;
1779    my $stats_ref;
1780    my @commit_authors = ();
1781    my @commit_signers = ();
1782    my $cmd;
1783
1784    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
1785    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1786
1787    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, "");
1788    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
1789    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
1790
1791    foreach my $signer (@commit_signers) {
1792	$signer = deduplicate_email($signer);
1793    }
1794
1795    vcs_assign($desc, 1, @commit_signers);
1796}
1797
1798sub interactive_get_maintainers {
1799    my ($list_ref) = @_;
1800    my @list = @$list_ref;
1801
1802    vcs_exists();
1803
1804    my %selected;
1805    my %authored;
1806    my %signed;
1807    my $count = 0;
1808    my $maintained = 0;
1809    foreach my $entry (@list) {
1810	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1811	$selected{$count} = 1;
1812	$authored{$count} = 0;
1813	$signed{$count} = 0;
1814	$count++;
1815    }
1816
1817    #menu loop
1818    my $done = 0;
1819    my $print_options = 0;
1820    my $redraw = 1;
1821    while (!$done) {
1822	$count = 0;
1823	if ($redraw) {
1824	    printf STDERR "\n%1s %2s %-65s",
1825			  "*", "#", "email/list and role:stats";
1826	    if ($email_git ||
1827		($email_git_fallback && !$maintained) ||
1828		$email_git_blame) {
1829		print STDERR "auth sign";
1830	    }
1831	    print STDERR "\n";
1832	    foreach my $entry (@list) {
1833		my $email = $entry->[0];
1834		my $role = $entry->[1];
1835		my $sel = "";
1836		$sel = "*" if ($selected{$count});
1837		my $commit_author = $commit_author_hash{$email};
1838		my $commit_signer = $commit_signer_hash{$email};
1839		my $authored = 0;
1840		my $signed = 0;
1841		$authored++ for (@{$commit_author});
1842		$signed++ for (@{$commit_signer});
1843		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1844		printf STDERR "%4d %4d", $authored, $signed
1845		    if ($authored > 0 || $signed > 0);
1846		printf STDERR "\n     %s\n", $role;
1847		if ($authored{$count}) {
1848		    my $commit_author = $commit_author_hash{$email};
1849		    foreach my $ref (@{$commit_author}) {
1850			print STDERR "     Author: @{$ref}[1]\n";
1851		    }
1852		}
1853		if ($signed{$count}) {
1854		    my $commit_signer = $commit_signer_hash{$email};
1855		    foreach my $ref (@{$commit_signer}) {
1856			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1857		    }
1858		}
1859
1860		$count++;
1861	    }
1862	}
1863	my $date_ref = \$email_git_since;
1864	$date_ref = \$email_hg_since if (vcs_is_hg());
1865	if ($print_options) {
1866	    $print_options = 0;
1867	    if (vcs_exists()) {
1868		print STDERR <<EOT
1869
1870Version Control options:
1871g  use git history      [$email_git]
1872gf use git-fallback     [$email_git_fallback]
1873b  use git blame        [$email_git_blame]
1874bs use blame signatures [$email_git_blame_signatures]
1875c# minimum commits      [$email_git_min_signatures]
1876%# min percent          [$email_git_min_percent]
1877d# history to use       [$$date_ref]
1878x# max maintainers      [$email_git_max_maintainers]
1879t  all signature types  [$email_git_all_signature_types]
1880m  use .mailmap         [$email_use_mailmap]
1881EOT
1882	    }
1883	    print STDERR <<EOT
1884
1885Additional options:
18860  toggle all
1887tm toggle maintainers
1888tg toggle git entries
1889tl toggle open list entries
1890ts toggle subscriber list entries
1891f  emails in file       [$email_file_emails]
1892k  keywords in file     [$keywords]
1893r  remove duplicates    [$email_remove_duplicates]
1894p# pattern match depth  [$pattern_depth]
1895EOT
1896	}
1897	print STDERR
1898"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1899
1900	my $input = <STDIN>;
1901	chomp($input);
1902
1903	$redraw = 1;
1904	my $rerun = 0;
1905	my @wish = split(/[, ]+/, $input);
1906	foreach my $nr (@wish) {
1907	    $nr = lc($nr);
1908	    my $sel = substr($nr, 0, 1);
1909	    my $str = substr($nr, 1);
1910	    my $val = 0;
1911	    $val = $1 if $str =~ /^(\d+)$/;
1912
1913	    if ($sel eq "y") {
1914		$interactive = 0;
1915		$done = 1;
1916		$output_rolestats = 0;
1917		$output_roles = 0;
1918		$output_substatus = 0;
1919		last;
1920	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1921		$selected{$nr - 1} = !$selected{$nr - 1};
1922	    } elsif ($sel eq "*" || $sel eq '^') {
1923		my $toggle = 0;
1924		$toggle = 1 if ($sel eq '*');
1925		for (my $i = 0; $i < $count; $i++) {
1926		    $selected{$i} = $toggle;
1927		}
1928	    } elsif ($sel eq "0") {
1929		for (my $i = 0; $i < $count; $i++) {
1930		    $selected{$i} = !$selected{$i};
1931		}
1932	    } elsif ($sel eq "t") {
1933		if (lc($str) eq "m") {
1934		    for (my $i = 0; $i < $count; $i++) {
1935			$selected{$i} = !$selected{$i}
1936			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1937		    }
1938		} elsif (lc($str) eq "g") {
1939		    for (my $i = 0; $i < $count; $i++) {
1940			$selected{$i} = !$selected{$i}
1941			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1942		    }
1943		} elsif (lc($str) eq "l") {
1944		    for (my $i = 0; $i < $count; $i++) {
1945			$selected{$i} = !$selected{$i}
1946			    if ($list[$i]->[1] =~ /^(open list)/i);
1947		    }
1948		} elsif (lc($str) eq "s") {
1949		    for (my $i = 0; $i < $count; $i++) {
1950			$selected{$i} = !$selected{$i}
1951			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1952		    }
1953		}
1954	    } elsif ($sel eq "a") {
1955		if ($val > 0 && $val <= $count) {
1956		    $authored{$val - 1} = !$authored{$val - 1};
1957		} elsif ($str eq '*' || $str eq '^') {
1958		    my $toggle = 0;
1959		    $toggle = 1 if ($str eq '*');
1960		    for (my $i = 0; $i < $count; $i++) {
1961			$authored{$i} = $toggle;
1962		    }
1963		}
1964	    } elsif ($sel eq "s") {
1965		if ($val > 0 && $val <= $count) {
1966		    $signed{$val - 1} = !$signed{$val - 1};
1967		} elsif ($str eq '*' || $str eq '^') {
1968		    my $toggle = 0;
1969		    $toggle = 1 if ($str eq '*');
1970		    for (my $i = 0; $i < $count; $i++) {
1971			$signed{$i} = $toggle;
1972		    }
1973		}
1974	    } elsif ($sel eq "o") {
1975		$print_options = 1;
1976		$redraw = 1;
1977	    } elsif ($sel eq "g") {
1978		if ($str eq "f") {
1979		    bool_invert(\$email_git_fallback);
1980		} else {
1981		    bool_invert(\$email_git);
1982		}
1983		$rerun = 1;
1984	    } elsif ($sel eq "b") {
1985		if ($str eq "s") {
1986		    bool_invert(\$email_git_blame_signatures);
1987		} else {
1988		    bool_invert(\$email_git_blame);
1989		}
1990		$rerun = 1;
1991	    } elsif ($sel eq "c") {
1992		if ($val > 0) {
1993		    $email_git_min_signatures = $val;
1994		    $rerun = 1;
1995		}
1996	    } elsif ($sel eq "x") {
1997		if ($val > 0) {
1998		    $email_git_max_maintainers = $val;
1999		    $rerun = 1;
2000		}
2001	    } elsif ($sel eq "%") {
2002		if ($str ne "" && $val >= 0) {
2003		    $email_git_min_percent = $val;
2004		    $rerun = 1;
2005		}
2006	    } elsif ($sel eq "d") {
2007		if (vcs_is_git()) {
2008		    $email_git_since = $str;
2009		} elsif (vcs_is_hg()) {
2010		    $email_hg_since = $str;
2011		}
2012		$rerun = 1;
2013	    } elsif ($sel eq "t") {
2014		bool_invert(\$email_git_all_signature_types);
2015		$rerun = 1;
2016	    } elsif ($sel eq "f") {
2017		bool_invert(\$email_file_emails);
2018		$rerun = 1;
2019	    } elsif ($sel eq "r") {
2020		bool_invert(\$email_remove_duplicates);
2021		$rerun = 1;
2022	    } elsif ($sel eq "m") {
2023		bool_invert(\$email_use_mailmap);
2024		read_mailmap();
2025		$rerun = 1;
2026	    } elsif ($sel eq "k") {
2027		bool_invert(\$keywords);
2028		$rerun = 1;
2029	    } elsif ($sel eq "p") {
2030		if ($str ne "" && $val >= 0) {
2031		    $pattern_depth = $val;
2032		    $rerun = 1;
2033		}
2034	    } elsif ($sel eq "h" || $sel eq "?") {
2035		print STDERR <<EOT
2036
2037Interactive mode allows you to select the various maintainers, submitters,
2038commit signers and mailing lists that could be CC'd on a patch.
2039
2040Any *'d entry is selected.
2041
2042If you have git or hg installed, you can choose to summarize the commit
2043history of files in the patch.  Also, each line of the current file can
2044be matched to its commit author and that commits signers with blame.
2045
2046Various knobs exist to control the length of time for active commit
2047tracking, the maximum number of commit authors and signers to add,
2048and such.
2049
2050Enter selections at the prompt until you are satisfied that the selected
2051maintainers are appropriate.  You may enter multiple selections separated
2052by either commas or spaces.
2053
2054EOT
2055	    } else {
2056		print STDERR "invalid option: '$nr'\n";
2057		$redraw = 0;
2058	    }
2059	}
2060	if ($rerun) {
2061	    print STDERR "git-blame can be very slow, please have patience..."
2062		if ($email_git_blame);
2063	    goto &get_maintainers;
2064	}
2065    }
2066
2067    #drop not selected entries
2068    $count = 0;
2069    my @new_emailto = ();
2070    foreach my $entry (@list) {
2071	if ($selected{$count}) {
2072	    push(@new_emailto, $list[$count]);
2073	}
2074	$count++;
2075    }
2076    return @new_emailto;
2077}
2078
2079sub bool_invert {
2080    my ($bool_ref) = @_;
2081
2082    if ($$bool_ref) {
2083	$$bool_ref = 0;
2084    } else {
2085	$$bool_ref = 1;
2086    }
2087}
2088
2089sub deduplicate_email {
2090    my ($email) = @_;
2091
2092    my $matched = 0;
2093    my ($name, $address) = parse_email($email);
2094    $email = format_email($name, $address, 1);
2095    $email = mailmap_email($email);
2096
2097    return $email if (!$email_remove_duplicates);
2098
2099    ($name, $address) = parse_email($email);
2100
2101    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
2102	$name = $deduplicate_name_hash{lc($name)}->[0];
2103	$address = $deduplicate_name_hash{lc($name)}->[1];
2104	$matched = 1;
2105    } elsif ($deduplicate_address_hash{lc($address)}) {
2106	$name = $deduplicate_address_hash{lc($address)}->[0];
2107	$address = $deduplicate_address_hash{lc($address)}->[1];
2108	$matched = 1;
2109    }
2110    if (!$matched) {
2111	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
2112	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
2113    }
2114    $email = format_email($name, $address, 1);
2115    $email = mailmap_email($email);
2116    return $email;
2117}
2118
2119sub save_commits_by_author {
2120    my (@lines) = @_;
2121
2122    my @authors = ();
2123    my @commits = ();
2124    my @subjects = ();
2125
2126    foreach my $line (@lines) {
2127	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2128	    my $author = $1;
2129	    $author = deduplicate_email($author);
2130	    push(@authors, $author);
2131	}
2132	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2133	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2134    }
2135
2136    for (my $i = 0; $i < @authors; $i++) {
2137	my $exists = 0;
2138	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
2139	    if (@{$ref}[0] eq $commits[$i] &&
2140		@{$ref}[1] eq $subjects[$i]) {
2141		$exists = 1;
2142		last;
2143	    }
2144	}
2145	if (!$exists) {
2146	    push(@{$commit_author_hash{$authors[$i]}},
2147		 [ ($commits[$i], $subjects[$i]) ]);
2148	}
2149    }
2150}
2151
2152sub save_commits_by_signer {
2153    my (@lines) = @_;
2154
2155    my $commit = "";
2156    my $subject = "";
2157
2158    foreach my $line (@lines) {
2159	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2160	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2161	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
2162	    my @signatures = ($line);
2163	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
2164	    my @types = @$types_ref;
2165	    my @signers = @$signers_ref;
2166
2167	    my $type = $types[0];
2168	    my $signer = $signers[0];
2169
2170	    $signer = deduplicate_email($signer);
2171
2172	    my $exists = 0;
2173	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
2174		if (@{$ref}[0] eq $commit &&
2175		    @{$ref}[1] eq $subject &&
2176		    @{$ref}[2] eq $type) {
2177		    $exists = 1;
2178		    last;
2179		}
2180	    }
2181	    if (!$exists) {
2182		push(@{$commit_signer_hash{$signer}},
2183		     [ ($commit, $subject, $type) ]);
2184	    }
2185	}
2186    }
2187}
2188
2189sub vcs_assign {
2190    my ($role, $divisor, @lines) = @_;
2191
2192    my %hash;
2193    my $count = 0;
2194
2195    return if (@lines <= 0);
2196
2197    if ($divisor <= 0) {
2198	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
2199	$divisor = 1;
2200    }
2201
2202    @lines = mailmap(@lines);
2203
2204    return if (@lines <= 0);
2205
2206    @lines = sort(@lines);
2207
2208    # uniq -c
2209    $hash{$_}++ for @lines;
2210
2211    # sort -rn
2212    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
2213	my $sign_offs = $hash{$line};
2214	my $percent = $sign_offs * 100 / $divisor;
2215
2216	$percent = 100 if ($percent > 100);
2217	next if (ignore_email_address($line));
2218	$count++;
2219	last if ($sign_offs < $email_git_min_signatures ||
2220		 $count > $email_git_max_maintainers ||
2221		 $percent < $email_git_min_percent);
2222	push_email_address($line, '');
2223	if ($output_rolestats) {
2224	    my $fmt_percent = sprintf("%.0f", $percent);
2225	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
2226	} else {
2227	    add_role($line, $role);
2228	}
2229    }
2230}
2231
2232sub vcs_file_signoffs {
2233    my ($file) = @_;
2234
2235    my $authors_ref;
2236    my $signers_ref;
2237    my $stats_ref;
2238    my @authors = ();
2239    my @signers = ();
2240    my @stats = ();
2241    my $commits;
2242
2243    $vcs_used = vcs_exists();
2244    return if (!$vcs_used);
2245
2246    my $cmd = $VCS_cmds{"find_signers_cmd"};
2247    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2248
2249    ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2250
2251    @signers = @{$signers_ref} if defined $signers_ref;
2252    @authors = @{$authors_ref} if defined $authors_ref;
2253    @stats = @{$stats_ref} if defined $stats_ref;
2254
2255#    print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n");
2256
2257    foreach my $signer (@signers) {
2258	$signer = deduplicate_email($signer);
2259    }
2260
2261    vcs_assign("commit_signer", $commits, @signers);
2262    vcs_assign("authored", $commits, @authors);
2263    if ($#authors == $#stats) {
2264	my $stat_pattern = $VCS_cmds{"stat_pattern"};
2265	$stat_pattern =~ s/(\$\w+)/$1/eeg;	#interpolate $stat_pattern
2266
2267	my $added = 0;
2268	my $deleted = 0;
2269	for (my $i = 0; $i <= $#stats; $i++) {
2270	    if ($stats[$i] =~ /$stat_pattern/) {
2271		$added += $1;
2272		$deleted += $2;
2273	    }
2274	}
2275	my @tmp_authors = uniq(@authors);
2276	foreach my $author (@tmp_authors) {
2277	    $author = deduplicate_email($author);
2278	}
2279	@tmp_authors = uniq(@tmp_authors);
2280	my @list_added = ();
2281	my @list_deleted = ();
2282	foreach my $author (@tmp_authors) {
2283	    my $auth_added = 0;
2284	    my $auth_deleted = 0;
2285	    for (my $i = 0; $i <= $#stats; $i++) {
2286		if ($author eq deduplicate_email($authors[$i]) &&
2287		    $stats[$i] =~ /$stat_pattern/) {
2288		    $auth_added += $1;
2289		    $auth_deleted += $2;
2290		}
2291	    }
2292	    for (my $i = 0; $i < $auth_added; $i++) {
2293		push(@list_added, $author);
2294	    }
2295	    for (my $i = 0; $i < $auth_deleted; $i++) {
2296		push(@list_deleted, $author);
2297	    }
2298	}
2299	vcs_assign("added_lines", $added, @list_added);
2300	vcs_assign("removed_lines", $deleted, @list_deleted);
2301    }
2302}
2303
2304sub vcs_file_blame {
2305    my ($file) = @_;
2306
2307    my @signers = ();
2308    my @all_commits = ();
2309    my @commits = ();
2310    my $total_commits;
2311    my $total_lines;
2312
2313    $vcs_used = vcs_exists();
2314    return if (!$vcs_used);
2315
2316    @all_commits = vcs_blame($file);
2317    @commits = uniq(@all_commits);
2318    $total_commits = @commits;
2319    $total_lines = @all_commits;
2320
2321    if ($email_git_blame_signatures) {
2322	if (vcs_is_hg()) {
2323	    my $commit_count;
2324	    my $commit_authors_ref;
2325	    my $commit_signers_ref;
2326	    my $stats_ref;
2327	    my @commit_authors = ();
2328	    my @commit_signers = ();
2329	    my $commit = join(" -r ", @commits);
2330	    my $cmd;
2331
2332	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
2333	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2334
2335	    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2336	    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2337	    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2338
2339	    push(@signers, @commit_signers);
2340	} else {
2341	    foreach my $commit (@commits) {
2342		my $commit_count;
2343		my $commit_authors_ref;
2344		my $commit_signers_ref;
2345		my $stats_ref;
2346		my @commit_authors = ();
2347		my @commit_signers = ();
2348		my $cmd;
2349
2350		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
2351		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2352
2353		($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2354		@commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2355		@commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2356
2357		push(@signers, @commit_signers);
2358	    }
2359	}
2360    }
2361
2362    if ($from_filename) {
2363	if ($output_rolestats) {
2364	    my @blame_signers;
2365	    if (vcs_is_hg()) {{		# Double brace for last exit
2366		my $commit_count;
2367		my @commit_signers = ();
2368		@commits = uniq(@commits);
2369		@commits = sort(@commits);
2370		my $commit = join(" -r ", @commits);
2371		my $cmd;
2372
2373		$cmd = $VCS_cmds{"find_commit_author_cmd"};
2374		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2375
2376		my @lines = ();
2377
2378		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
2379
2380		if (!$email_git_penguin_chiefs) {
2381		    @lines = grep(!/${penguin_chiefs}/i, @lines);
2382		}
2383
2384		last if !@lines;
2385
2386		my @authors = ();
2387		foreach my $line (@lines) {
2388		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2389			my $author = $1;
2390			$author = deduplicate_email($author);
2391			push(@authors, $author);
2392		    }
2393		}
2394
2395		save_commits_by_author(@lines) if ($interactive);
2396		save_commits_by_signer(@lines) if ($interactive);
2397
2398		push(@signers, @authors);
2399	    }}
2400	    else {
2401		foreach my $commit (@commits) {
2402		    my $i;
2403		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
2404		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
2405		    my @author = vcs_find_author($cmd);
2406		    next if !@author;
2407
2408		    my $formatted_author = deduplicate_email($author[0]);
2409
2410		    my $count = grep(/$commit/, @all_commits);
2411		    for ($i = 0; $i < $count ; $i++) {
2412			push(@blame_signers, $formatted_author);
2413		    }
2414		}
2415	    }
2416	    if (@blame_signers) {
2417		vcs_assign("authored lines", $total_lines, @blame_signers);
2418	    }
2419	}
2420	foreach my $signer (@signers) {
2421	    $signer = deduplicate_email($signer);
2422	}
2423	vcs_assign("commits", $total_commits, @signers);
2424    } else {
2425	foreach my $signer (@signers) {
2426	    $signer = deduplicate_email($signer);
2427	}
2428	vcs_assign("modified commits", $total_commits, @signers);
2429    }
2430}
2431
2432sub vcs_file_exists {
2433    my ($file) = @_;
2434
2435    my $exists;
2436
2437    my $vcs_used = vcs_exists();
2438    return 0 if (!$vcs_used);
2439
2440    my $cmd = $VCS_cmds{"file_exists_cmd"};
2441    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2442    $cmd .= " 2>&1";
2443    $exists = &{$VCS_cmds{"execute_cmd"}}($cmd);
2444
2445    return 0 if ($? != 0);
2446
2447    return $exists;
2448}
2449
2450sub vcs_list_files {
2451    my ($file) = @_;
2452
2453    my @lsfiles = ();
2454
2455    my $vcs_used = vcs_exists();
2456    return 0 if (!$vcs_used);
2457
2458    my $cmd = $VCS_cmds{"list_files_cmd"};
2459    $cmd =~ s/(\$\w+)/$1/eeg;   # interpolate $cmd
2460    @lsfiles = &{$VCS_cmds{"execute_cmd"}}($cmd);
2461
2462    return () if ($? != 0);
2463
2464    return @lsfiles;
2465}
2466
2467sub uniq {
2468    my (@parms) = @_;
2469
2470    my %saw;
2471    @parms = grep(!$saw{$_}++, @parms);
2472    return @parms;
2473}
2474
2475sub sort_and_uniq {
2476    my (@parms) = @_;
2477
2478    my %saw;
2479    @parms = sort @parms;
2480    @parms = grep(!$saw{$_}++, @parms);
2481    return @parms;
2482}
2483
2484sub clean_file_emails {
2485    my (@file_emails) = @_;
2486    my @fmt_emails = ();
2487
2488    foreach my $email (@file_emails) {
2489	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
2490	my ($name, $address) = parse_email($email);
2491
2492	# Strip quotes for easier processing, format_email will add them back
2493	$name =~ s/^"(.*)"$/$1/;
2494
2495	# Split into name-like parts and remove stray punctuation particles
2496	my @nw = split(/[^\p{L}\'\,\.\+-]/, $name);
2497	@nw = grep(!/^[\'\,\.\+-]$/, @nw);
2498
2499	# Make a best effort to extract the name, and only the name, by taking
2500	# only the last two names, or in the case of obvious initials, the last
2501	# three names.
2502	if (@nw > 2) {
2503	    my $first = $nw[@nw - 3];
2504	    my $middle = $nw[@nw - 2];
2505	    my $last = $nw[@nw - 1];
2506
2507	    if (((length($first) == 1 && $first =~ m/\p{L}/) ||
2508		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2509		(length($middle) == 1 ||
2510		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2511		$name = "$first $middle $last";
2512	    } else {
2513		$name = "$middle $last";
2514	    }
2515	} else {
2516	    $name = "@nw";
2517	}
2518
2519	if (substr($name, -1) =~ /[,\.]/) {
2520	    $name = substr($name, 0, length($name) - 1);
2521	}
2522
2523	if (substr($name, 0, 1) =~ /[,\.]/) {
2524	    $name = substr($name, 1, length($name) - 1);
2525	}
2526
2527	my $fmt_email = format_email($name, $address, $email_usename);
2528	push(@fmt_emails, $fmt_email);
2529    }
2530    return @fmt_emails;
2531}
2532
2533sub merge_email {
2534    my @lines;
2535    my %saw;
2536
2537    for (@_) {
2538	my ($address, $role) = @$_;
2539	if (!$saw{$address}) {
2540	    if ($output_roles) {
2541		push(@lines, "$address ($role)");
2542	    } else {
2543		push(@lines, $address);
2544	    }
2545	    $saw{$address} = 1;
2546	}
2547    }
2548
2549    return @lines;
2550}
2551
2552sub output {
2553    my (@parms) = @_;
2554
2555    if ($output_multiline) {
2556	foreach my $line (@parms) {
2557	    print("${line}\n");
2558	}
2559    } else {
2560	print(join($output_separator, @parms));
2561	print("\n");
2562    }
2563}
2564
2565my $rfc822re;
2566
2567sub make_rfc822re {
2568#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2569#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2570#   This regexp will only work on addresses which have had comments stripped
2571#   and replaced with rfc822_lwsp.
2572
2573    my $specials = '()<>@,;:\\\\".\\[\\]';
2574    my $controls = '\\000-\\037\\177';
2575
2576    my $dtext = "[^\\[\\]\\r\\\\]";
2577    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2578
2579    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2580
2581#   Use zero-width assertion to spot the limit of an atom.  A simple
2582#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2583    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2584    my $word = "(?:$atom|$quoted_string)";
2585    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2586
2587    my $sub_domain = "(?:$atom|$domain_literal)";
2588    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2589
2590    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2591
2592    my $phrase = "$word*";
2593    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2594    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2595    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2596
2597    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2598    my $address = "(?:$mailbox|$group)";
2599
2600    return "$rfc822_lwsp*$address";
2601}
2602
2603sub rfc822_strip_comments {
2604    my $s = shift;
2605#   Recursively remove comments, and replace with a single space.  The simpler
2606#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2607#   chars in atoms, for example.
2608
2609    while ($s =~ s/^((?:[^"\\]|\\.)*
2610                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2611                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2612    return $s;
2613}
2614
2615#   valid: returns true if the parameter is an RFC822 valid address
2616#
2617sub rfc822_valid {
2618    my $s = rfc822_strip_comments(shift);
2619
2620    if (!$rfc822re) {
2621        $rfc822re = make_rfc822re();
2622    }
2623
2624    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2625}
2626
2627#   validlist: In scalar context, returns true if the parameter is an RFC822
2628#              valid list of addresses.
2629#
2630#              In list context, returns an empty list on failure (an invalid
2631#              address was found); otherwise a list whose first element is the
2632#              number of addresses found and whose remaining elements are the
2633#              addresses.  This is needed to disambiguate failure (invalid)
2634#              from success with no addresses found, because an empty string is
2635#              a valid list.
2636
2637sub rfc822_validlist {
2638    my $s = rfc822_strip_comments(shift);
2639
2640    if (!$rfc822re) {
2641        $rfc822re = make_rfc822re();
2642    }
2643    # * null list items are valid according to the RFC
2644    # * the '1' business is to aid in distinguishing failure from no results
2645
2646    my @r;
2647    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2648	$s =~ m/^$rfc822_char*$/) {
2649        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2650            push(@r, $1);
2651        }
2652        return wantarray ? (scalar(@r), @r) : 1;
2653    }
2654    return wantarray ? () : 0;
2655}
2656