1#!/usr/bin/env perl 2 3# SPDX-License-Identifier: MIT 4# 5# Copyright (c) 2025, Rob Norris <robn@despairlabs.com> 6# 7# Permission is hereby granted, free of charge, to any person obtaining a copy 8# of this software and associated documentation files (the "Software"), to 9# deal in the Software without restriction, including without limitation the 10# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 11# sell copies of the Software, and to permit persons to whom the Software is 12# furnished to do so, subject to the following conditions: 13# 14# The above copyright notice and this permission notice shall be included in 15# all copies or substantial portions of the Software. 16# 17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23# IN THE SOFTWARE. 24 25use 5.010; 26use warnings; 27use strict; 28 29# All files known to git are either "tagged" or "untagged". Tagged files are 30# expected to have a license tag, while untagged files are expected to _not_ 31# have a license tag. There is no "optional" tag; all files are either "tagged" 32# or "untagged". 33# 34# Whether or not a file is tagged or untagged is determined using the patterns 35# in $tagged_patterns and $untagged_patterns and the following sequence: 36# 37# - if the file's full path is explicity listed in $tagged_patterns, then the 38# file is tagged. 39# 40# - if the file's full path is explicitly listed in $untagged_patterns, then 41# file is untagged. 42# 43# - if the filename matches a pattern in $tagged_patterns, and does not match a 44# pattern in $untagged_patterns, then the file is tagged 45# 46# - otherwise, the file is untagged. 47# 48# The patterns do a simple glob-like match over the entire path relative to the 49# root of the git repo (no leading /). '*' matches as anything at that point, 50# across path fragments. '?' matches a single character. 51 52my $tagged_patterns = q( 53 # Compiled source files 54 *.c 55 *.h 56 *.S 57 58 # Python files, eg test suite drivers, libzfs bindings 59 *.py 60 *.py.in 61 62 # Various support scripts 63 *.sh 64 *.pl 65 66 # Test suite 67 *.ksh 68 *.ksh.in 69 *.kshlib 70 *.kshlib.in 71 *.shlib 72 73 # Test suite data files 74 *.run 75 *.cfg 76 *.cfg.in 77 *.fio 78 *.lua 79 *.zcp 80 81 # Manpages 82 man/man?/*.? 83 man/man?/*.?.in 84 85 # Build system 86 *.ac 87 *.am 88 *.m4 89 90 # Unsuffixed programs (or generated of same) 91 cmd/zarcstat.in 92 cmd/zarcsummary 93 cmd/dbufstat.in 94 cmd/zilstat.in 95 cmd/zpool/zpool.d/* 96 etc/init.d/zfs-import.in 97 etc/init.d/zfs-load-key.in 98 etc/init.d/zfs-mount.in 99 etc/init.d/zfs-share.in 100 etc/init.d/zfs-zed.in 101 etc/zfs/zfs-functions.in 102 scripts/objtool-wrapper.in 103 104 # Misc items that have clear licensing info but aren't easily matched, 105 # or are the first of a class that we aren't ready to match yet. 106 module/lua/README.zfs 107 scripts/kmodtool 108 tests/zfs-tests/tests/functional/inheritance/README.config 109 tests/zfs-tests/tests/functional/inheritance/README.state 110 cmd/zed/zed.d/statechange-notify.sh 111); 112 113my $untagged_patterns = q( 114 # Exclude CI tooling as it's not interesting for overall project 115 # licensing. 116 .github/* 117 118 # Everything below this has unclear licensing. Work is happening to 119 # identify and update them. Once one gains a tag it should be removed 120 # from this list. 121 122 cmd/zed/zed.d/*.sh 123 cmd/zpool/zpool.d/* 124 125 contrib/coverity/model.c 126 include/libzdb.h 127 include/os/freebsd/spl/sys/inttypes.h 128 include/os/freebsd/spl/sys/mode.h 129 include/os/freebsd/spl/sys/trace.h 130 include/os/freebsd/zfs/sys/trace_zfs.h 131 include/os/freebsd/zfs/sys/zpl.h 132 include/os/linux/kernel/linux/page_compat.h 133 lib/libspl/include/sys/string.h 134 lib/libzdb/libzdb.c 135 lib/libzpool/include/sys/trace_zfs.h 136 module/lua/setjmp/setjmp.S 137 module/lua/setjmp/setjmp_ppc.S 138 module/zstd/include/sparc_compat.h 139 module/zstd/zstd_sparc.c 140 tests/zfs-tests/cmd/cp_files.c 141 tests/zfs-tests/cmd/zed_fd_spill-zedlet.c 142 tests/zfs-tests/tests/functional/tmpfile/tmpfile_001_pos.c 143 tests/zfs-tests/tests/functional/tmpfile/tmpfile_002_pos.c 144 tests/zfs-tests/tests/functional/tmpfile/tmpfile_003_pos.c 145 tests/zfs-tests/tests/functional/tmpfile/tmpfile_test.c 146 147 contrib/bpftrace/zfs-trace.sh 148 contrib/pyzfs/docs/source/conf.py 149 contrib/pyzfs/libzfs_core/test/__init__.py 150 contrib/pyzfs/setup.py.in 151 contrib/zcp/autosnap.lua 152 scripts/commitcheck.sh 153 scripts/man-dates.sh 154 scripts/mancheck.sh 155 scripts/paxcheck.sh 156 scripts/zfs-helpers.sh 157 scripts/zfs-tests-color.sh 158 scripts/zfs.sh 159 scripts/zimport.sh 160 tests/zfs-tests/callbacks/zfs_failsafe.ksh 161 tests/zfs-tests/include/commands.cfg 162 tests/zfs-tests/include/tunables.cfg 163 tests/zfs-tests/include/zpool_script.shlib 164 tests/zfs-tests/tests/functional/mv_files/random_creation.ksh 165); 166 167# For files expected to have a license tags, these are the acceptable tags by 168# path. A file in one of these paths with a tag not listed here must be in the 169# override list below. If the file is not in any of these paths, then 170# $default_license_tags is used. 171my $default_license_tags = [ 172 'CDDL-1.0', '0BSD', 'BSD-2-Clause', 'BSD-3-Clause', 'MIT' 173]; 174 175my @path_license_tags = ( 176 # Conventional wisdom is that the Linux SPL must be GPL2+ for 177 # kernel compatibility. 178 'module/os/linux/spl' => ['GPL-2.0-or-later'], 179 'include/os/linux/spl' => ['GPL-2.0-or-later'], 180 181 # Third-party code should keep it's original license 182 'module/zstd/lib' => ['BSD-3-Clause OR GPL-2.0-only'], 183 'module/lua' => ['MIT'], 184 185 # lua/setjmp is platform-specific code sourced from various places 186 'module/lua/setjmp' => $default_license_tags, 187 188 # Some of the fletcher modules are dual-licensed 189 'module/zcommon/zfs_fletcher' => 190 ['BSD-2-Clause OR GPL-2.0-only', 'CDDL-1.0'], 191 192 'module/icp' => ['Apache-2.0', 'CDDL-1.0'], 193 'contrib/icp' => ['Apache-2.0', 'CDDL-1.0'], 194 195 # Python bindings are always Apache-2.0; CDDL is available for build 196 # files in that dir. 197 'contrib/pyzfs' => ['Apache-2.0', 'CDDL-1.0'], 198 199 # Common licenses for autoconf macros; some of these are complex 200 # with exceptions, so we don't have a "generic" list as such, just 201 # a list of all the ones currently in use. 202 'config' => [ 203 'CDDL-1.0', 'LGPL-2.1-or-later', 'FSFAP', 'FSFULLR', 204 'GPL-2.0-or-later WITH Autoconf-exception-generic', 205 'GPL-3.0-or-later WITH Autoconf-exception-macro', 206 ], 207); 208 209# This is a list of "special case" license tags that are in use in the tree, 210# and the files where they occur. these exist for a variety of reasons, and 211# generally should not be used for new code. If you need to bring in code that 212# has a different license from the acceptable ones listed above, then you will 213# also need to add it here, with rationale provided and approval given in your 214# PR. 215my %override_file_license_tags = ( 216 217 # SPDX have repeatedly rejected the creation of a tag for a public 218 # domain dedication, as not all dedications are clear and unambiguious 219 # in their meaning and not all jurisdictions permit relinquishing a 220 # copyright anyway. 221 # 222 # A reasonably common workaround appears to be to create a local 223 # (project-specific) identifier to convey whatever meaning the project 224 # wishes it to. To cover OpenZFS' use of third-party code with a 225 # public domain dedication, we use this custom tag. 226 # 227 # Further reading: 228 # https://github.com/spdx/old-wiki/blob/main/Pages/Legal%20Team/Decisions/Dealing%20with%20Public%20Domain%20within%20SPDX%20Files.md 229 # https://spdx.github.io/spdx-spec/v2.3/other-licensing-information-detected/ 230 # https://cr.yp.to/spdx.html 231 # 232 'LicenseRef-OpenZFS-ThirdParty-PublicDomain' => [qw( 233 include/sys/skein.h 234 module/icp/algs/skein/skein_block.c 235 module/icp/algs/skein/skein.c 236 module/icp/algs/skein/skein_impl.h 237 module/icp/algs/skein/skein_iv.c 238 module/icp/algs/skein/skein_port.h 239 module/zfs/vdev_draid_rand.c 240 )], 241 242 # Legacy inclusions 243 'Brian-Gladman-3-Clause' => [qw( 244 module/icp/asm-x86_64/aes/aestab.h 245 module/icp/asm-x86_64/aes/aesopt.h 246 module/icp/asm-x86_64/aes/aeskey.c 247 module/icp/asm-x86_64/aes/aes_amd64.S 248 )], 249 'OpenSSL-standalone' => [qw( 250 module/icp/asm-x86_64/aes/aes_aesni.S 251 )], 252 253 # Legacy inclusions of BSD-2-Clause files in Linux SPL. 254 'BSD-2-Clause' => [qw( 255 include/os/linux/spl/sys/debug.h 256 module/os/linux/spl/spl-zone.c 257 )], 258 259 # Temporary overrides for things that have the wrong license for 260 # their path. Work is underway to understand and resolve these. 261 'GPL-2.0-or-later' => [qw( 262 include/os/freebsd/spl/sys/kstat.h 263 include/os/freebsd/spl/sys/sunddi.h 264 )], 265 'CDDL-1.0' => [qw( 266 include/os/linux/spl/sys/errno.h 267 include/os/linux/spl/sys/ia32/asm_linkage.h 268 include/os/linux/spl/sys/misc.h 269 include/os/linux/spl/sys/procfs_list.h 270 include/os/linux/spl/sys/trace.h 271 include/os/linux/spl/sys/trace_spl.h 272 include/os/linux/spl/sys/trace_taskq.h 273 include/os/linux/spl/sys/wmsum.h 274 module/os/linux/spl/spl-procfs-list.c 275 module/os/linux/spl/spl-trace.c 276 module/lua/README.zfs 277 )], 278); 279 280########## 281 282sub setup_patterns { 283 my ($patterns) = @_; 284 285 my @re; 286 my @files; 287 288 for my $pat (split "\n", $patterns) { 289 # remove leading/trailing whitespace and comments 290 $pat =~ s/(:?^\s*|\s*(:?#.*)?$)//g; 291 # skip (now-)empty lines 292 next if $pat eq ''; 293 294 # if the "pattern" has no metachars, then it's a literal file 295 # path and gets matched a bit more strongly 296 unless ($pat =~ m/[?*]/) { 297 push @files, $pat; 298 next; 299 } 300 301 # naive pattern to regex conversion 302 303 # escape simple metachars 304 $pat =~ s/([\.\(\[])/\Q$1\E/g; 305 306 $pat =~ s/\?/./g; # glob ? -> regex . 307 $pat =~ s/\*/.*/g; # glob * -> regex .* 308 309 push @re, $pat; 310 } 311 312 my $re = join '|', @re; 313 return (qr/^(?:$re)$/, { map { $_ => 1 } @files }); 314}; 315 316my ($tagged_re, $tagged_files) = setup_patterns($tagged_patterns); 317my ($untagged_re, $untagged_files) = setup_patterns($untagged_patterns); 318 319sub file_is_tagged { 320 my ($file) = @_; 321 322 # explicitly tagged 323 if ($tagged_files->{$file}) { 324 delete $tagged_files->{$file}; 325 return 1; 326 } 327 328 # explicitly untagged 329 if ($untagged_files->{$file}) { 330 delete $untagged_files->{$file}; 331 return 0; 332 } 333 334 # must match tagged patterns and not match untagged patterns 335 return ($file =~ $tagged_re) && !($file =~ $untagged_re); 336} 337 338my %override_tags = map { 339 my $tag = $_; 340 map { $_ => $tag } @{$override_file_license_tags{$_}}; 341} keys %override_file_license_tags; 342 343########## 344 345my $rc = 0; 346 347# Get a list of all files known to git. This is a crude way of avoiding any 348# build artifacts that have tags embedded in them. 349my @git_files = sort grep { chomp } qx(git ls-tree --name-only -r HEAD); 350 351# Scan all files and work out if their tags are correct. 352for my $file (@git_files) { 353 # Ignore non-files. git can store other types of objects (submodule 354 # dirs, symlinks, etc) that aren't interesting for licensing. 355 next unless -f $file && ! -l $file; 356 357 # Open the file, and extract its license tag. We only check the first 358 # 4K of each file because many of these files are large, binary, or 359 # both. For a typical source file that means the tag should be found 360 # within the first ~50 lines. 361 open my $fh, '<', $file or die "$0: couldn't open $file: $!\n"; 362 my $nbytes = read $fh, my $buf, 4096; 363 die "$0: couldn't read $file: $!\n" if !defined $nbytes; 364 365 my ($tag) = 366 $buf =~ m/\bSPDX-License-Identifier: ([A-Za-z0-9_\-\. ]+)$/smg; 367 368 close $fh; 369 370 # Decide if the file should have a tag at all 371 my $tagged = file_is_tagged($file); 372 373 # If no license tag is wanted, there's not much left to do 374 if (!$tagged) { 375 if (defined $tag) { 376 # untagged file has a tag, pattern change required 377 say "unexpected license tag: $file"; 378 $rc = 1; 379 } 380 next; 381 } 382 383 # If a tag is required, but doesn't have one, warn and loop. 384 if (!defined $tag) { 385 say "missing license tag: $file"; 386 $rc = 1; 387 next; 388 } 389 390 # Determine the set of valid license tags for this file. Start with 391 # the defaults. 392 my $tags = $default_license_tags; 393 394 if ($override_tags{$file}) { 395 # File has an explicit override, use it. 396 $tags = [delete $override_tags{$file}]; 397 } else { 398 # Work through the path tag sets, taking the set with the 399 # most precise match. If no sets match, we fall through and 400 # are left with the default set. 401 my $matchlen = 0; 402 for (my $n = 0; $n < @path_license_tags; $n += 2) { 403 my ($path, $t) = @path_license_tags[$n,$n+1]; 404 if (substr($file, 0, length($path)) eq $path && 405 length($path) > $matchlen) { 406 $tags = $t; 407 $matchlen = length($path); 408 } 409 } 410 } 411 412 # Confirm the file's tag is in the set, and warn if not. 413 my %tags = map { $_ => 1 } @$tags; 414 unless ($tags{$tag}) { 415 say "invalid license tag: $file"; 416 say " (got $tag; expected: @$tags)"; 417 $rc = 1; 418 next; 419 } 420} 421 422########## 423 424# List any files explicitly listed as tagged or untagged that we didn't see. 425# Likely the file was removed from the repo but not from our lists. 426 427for my $file (sort keys %$tagged_files) { 428 say "explicitly tagged file not on disk: $file"; 429 $rc = 1; 430} 431for my $file (sort keys %$untagged_files) { 432 say "explicitly untagged file not on disk: $file"; 433 $rc = 1; 434} 435for my $file (sort keys %override_tags) { 436 say "explicitly overridden file not on disk: $file"; 437 $rc = 1; 438} 439 440exit $rc; 441