xref: /src/share/mk/meta2deps.sh (revision b1bebaaba9b9c0ddfe503c43ca8e9e3917ee2c57)
1#!/bin/sh
2
3# NAME:
4#	meta2deps.sh - extract useful info from .meta files
5#
6# SYNOPSIS:
7#	meta2deps.sh SB="SB" "meta" ...
8#
9# DESCRIPTION:
10#	This script looks each "meta" file and extracts the
11#	information needed to deduce build and src dependencies.
12#
13#	To do this, we extract the 'CWD' record as well as all the
14#	syscall traces which describe 'R'ead, 'C'hdir and 'E'xec
15#	syscalls.
16#
17#	The typical meta file looks like::
18#.nf
19#
20#	# Meta data file "path"
21#	CMD "command-line"
22#	CWD "cwd"
23#	TARGET "target"
24#	-- command output --
25#	-- filemon acquired metadata --
26#	# buildmon version 2
27#	V 2
28#	E "pid" "path"
29#	R "pid" "path"
30#	C "pid" "cwd"
31#	R "pid" "path"
32#	X "pid" "status"
33#.fi
34#
35#	The fact that all the syscall entry lines start with a single
36#	character make these files quite easy to process using sed(1).
37#
38#	To simplify the logic the 'CWD' line is made to look like a
39#	normal 'C'hdir entry, and "cwd" is remembered so that it can
40#	be prefixed to any "path" which is not absolute.
41#
42#	If the "path" being read ends in '.srcrel' it is the content
43#	of (actually the first line of) that file that we are
44#	interested in.
45#
46#	Any "path" which lies outside of the sandbox "SB" is generally
47#	not of interest and is ignored.
48#
49#	The output, is a set of absolute paths with "SB" like:
50#.nf
51#
52#	$SB/obj-i386/bsd/gnu/lib/csu
53#	$SB/obj-i386/bsd/gnu/lib/libgcc
54#	$SB/obj-i386/bsd/include
55#	$SB/obj-i386/bsd/lib/csu/i386-elf
56#	$SB/obj-i386/bsd/lib/libc
57#	$SB/src/bsd/include
58#	$SB/src/bsd/sys/i386/include
59#	$SB/src/bsd/sys/sys
60#	$SB/src/pan-release/rtsock
61#	$SB/src/pfe-shared/include/jnx
62#.fi
63#
64#	Which can then be further processed by 'gendirdeps.mk'
65#
66#	If we are passed 'DPDEPS='"dpdeps", then for each src file
67#	outside of "CURDIR" we read, we output a line like:
68#.nf
69#
70#	DPDEPS_$path += $RELDIR
71#.fi
72#
73#	with "$path" geting turned into reldir's, so that we can end
74#	up with a list of all the directories which depend on each src
75#	file in another directory.  This can allow for efficient yet
76#	complete testing of changes.
77
78
79# RCSid:
80#	$Id: meta2deps.sh,v 1.26 2025/12/08 17:34:02 sjg Exp $
81
82# SPDX-License-Identifier: BSD-2-Clause
83#
84# Copyright (c) 2011-2025, Simon J. Gerraty
85# Copyright (c) 2010-2013, Juniper Networks, Inc.
86# All rights reserved.
87#
88# Redistribution and use in source and binary forms, with or without
89# modification, are permitted provided that the following conditions
90# are met:
91# 1. Redistributions of source code must retain the above copyright
92#    notice, this list of conditions and the following disclaimer.
93# 2. Redistributions in binary form must reproduce the above copyright
94#    notice, this list of conditions and the following disclaimer in the
95#    documentation and/or other materials provided with the distribution.
96#
97# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
98# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
99# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
100# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
101# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
102# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
103# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
104# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
105# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
106# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
107# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
108
109case ",$DEBUG_SH," in
110*,meta2deps*) set -x;;
111esac
112
113meta2src() {
114    cat /dev/null "$@" |
115    sed -n '/^R .*\.[chyl]$/s,^..[0-9]* ,,p' |
116    sort -u
117}
118
119meta2dirs() {
120    cat /dev/null "$@" |
121    sed -n '/^R .*\/.*\.[a-z0-9][^\/]*$/s,^..[0-9]* \(.*\)/[^/]*$,\1,p' |
122    sort -u
123}
124
125add_list() {
126    sep=' '
127    suffix=
128    while :
129    do
130	case "$1" in
131	"|") sep="$1"; shift;;
132	-s) suffix="$2"; shift 2;;
133	*) break;;
134	esac
135    done
136    name=$1
137    shift
138    eval list="\$$name"
139    for top in "$@"
140    do
141	case "$sep$list$sep" in
142	*"$sep$top$suffix$sep"*) continue;;
143	esac
144	list="${list:+$list$sep}$top$suffix"
145    done
146    eval "$name=\"$list\""
147}
148
149# some Linux systems have deprecated egrep in favor of grep -E
150# but not everyone supports that
151case "`echo bmake | egrep 'a|b' 2>&1`" in
152bmake) ;;
153*) egrep() { grep -E "$@"; }
154esac
155
156_excludes_f() {
157    egrep -v "$EXCLUDES"
158}
159
160error() {
161    echo "ERROR: $@" >&2
162    exit 1
163}
164
165meta2deps() {
166    DPDEPS=
167    SRCTOPS=$SRCTOP
168    OBJROOTS=
169    EXCLUDES=
170    while :
171    do
172	case "$1" in
173	*=*) eval export "$1"; shift;;
174	-a) MACHINE_ARCH=$2; shift 2;;
175	-m) MACHINE=$2; shift 2;;
176	-C) CURDIR=$2; shift 2;;
177	-H) HOST_TARGET=$2; shift 2;;
178	-S) add_list SRCTOPS $2; shift 2;;
179	-O) add_list OBJROOTS $2; shift 2;;
180	-X) add_list EXCLUDES '|' $2; shift 2;;
181	-R) RELDIR=$2; shift 2;;
182	-T) TARGET_SPEC=$2; shift 2;;
183	*) break;;
184	esac
185    done
186
187    _th= _o=
188    case "$MACHINE" in
189    host) _ht=$HOST_TARGET;;
190    esac
191
192    for o in $OBJROOTS
193    do
194	case "$MACHINE,/$o/" in
195	host,*$HOST_TARGET*) ;;
196	*$MACHINE*|*${TARGET_SPEC:-$MACHINE}*) ;;
197	*) add_list _o $o; continue;;
198	esac
199	for x in $_ht $TARGET_SPEC $MACHINE
200	do
201	    case "$o" in
202	    "") continue;;
203	    */$x/) add_list _o ${o%$x/}; o=;;
204	    */$x) add_list _o ${o%$x}; o=;;
205	    *$x/) add_list _o ${o%$x/}; o=;;
206	    *$x) add_list _o ${o%$x}; o=;;
207	    esac
208	done
209    done
210    OBJROOTS="$_o"
211
212    case "$OBJTOP" in
213    "")
214	for o in $OBJROOTS
215	do
216	    OBJTOP=$o${TARGET_SPEC:-$MACHINE}
217	    break
218	done
219	;;
220    esac
221    src_re=
222    obj_re=
223    add_list '|' -s '/*' src_re $SRCTOPS
224    add_list '|' -s '*' obj_re $OBJROOTS
225
226    [ -z "$RELDIR" ] && unset DPDEPS
227    tf=/tmp/m2d$$-$USER
228    rm -f $tf.*
229    trap 'rm -f $tf.*; trap 0' 0
230
231    > $tf.dirdep
232    > $tf.qual
233    > $tf.srcdep
234    > $tf.srcrel
235    > $tf.dpdeps
236
237    seenit=
238    seensrc=
239    lpid=
240    case "$EXCLUDES" in
241    "") _excludes=cat;;
242    *) _excludes=_excludes_f;;
243    esac
244    # handle @list files
245    case "$@" in
246    *@[!.]*)
247	for f in "$@"
248	do
249	    case "$f" in
250	    *.meta) cat $f;;
251	    @*) xargs cat < ${f#@};;
252	    *) cat $f;;
253	    esac
254	done
255	;;
256    *) cat /dev/null "$@";;
257    esac 2> /dev/null |
258    sed -e 's,^CWD,C C,;/^[#CREFLMVWX] /!d' -e "s,',,g" |
259    $_excludes | ( version=no epids= xpids= eof_token=no
260    while read op pid path path2
261    do
262	: op=$op pid=$pid path=$path path2=$path2
263	# first a sanity check - filemon on Linux is not very reliable
264	# path2 should only be non-empty for op L or M
265	# and it should not contain spaces.
266	# It will also be non-empty for # Meta line
267	# which tells us which meta_file we are processing
268	case "$op,$path2" in
269	\#*,*.meta) # new file, reset some vars
270	    version=no epids= xpids= eof_token=no lpid=
271	    meta_file=`set -- $path2; echo $2`
272	    continue
273	    ;;
274	\#*) ;;			# ok
275	[LM],) error "missing path2 in: '$op $pid $path'";;
276	[LMX],*" "*) error "wrong number of words in: '$op $pid $path $path2'";;
277	*,|[LMX],*) ;;		# ok
278	*) error "wrong number of words in: '$op $pid $path $path2'";;
279	esac
280	# we track cwd and ldir (of interest) per pid
281	# CWD is bmake's cwd
282	: lpid=$lpid,pid=$pid
283	case "$lpid,$pid" in
284	,C) CWD=$path cwd=$path ldir=$path
285	    if [ -z "$SB" ]; then
286		SB=`echo $CWD | sed 's,/obj.*,,'`
287	    fi
288	    SRCTOP=${SRCTOP:-$SB/src}
289	    case "$verion" in
290	    no) ;;		# ignore
291	    0) error "no filemon data: $meta_file";;
292	    *) ;;
293	    esac
294	    version=0
295	    case "$eof_token" in
296	    no) ;;		# ignore
297	    0) error "truncated filemon data: $meta_file";;
298	    esac
299	    eof_token=0
300	    continue
301	    ;;
302	$pid,$pid) ;;
303	[1-9]*)
304	    case "$lpid" in
305	    "") ;;
306	    *) eval ldir_$lpid=$ldir;;
307	    esac
308	    eval ldir=\${ldir_$pid:-$CWD} cwd=\${cwd_$pid:-$CWD}
309	    lpid=$pid
310	    ;;
311	esac
312
313	: op=$op path=$path
314	case "$op,$path" in
315	V,*) version=$pid; continue;;
316	W,*srcrel|*.dirdep) continue;;
317	C,*)
318	    case "$path" in
319	    /*) cwd=$path;;
320	    *) cwd=`cd $cwd/$path 2> /dev/null && /bin/pwd`;;
321	    esac
322	    # watch out for temp dirs that no longer exist
323	    test -d ${cwd:-/dev/null/no/such} || cwd=$CWD
324	    eval cwd_$pid=$cwd
325	    continue
326	    ;;
327	F,*) # $path is new pid
328	    eval cwd_$path=$cwd ldir_$path=$ldir
329	    continue
330	    ;;
331	\#,bye) eof_token=1; continue;;
332	\#*) continue;;
333	*)  dir=${path%/*}
334	    case "$op" in
335	    E)	# setid apps get no tracing so we won't see eXit
336		case `'ls' -l $path 2> /dev/null | sed 's, .*,,'` in
337		*s*) ;;
338		*) epids="$epids $pid";;
339		esac
340		;;
341	    X) xpids="$xpids $pid"; continue;;
342	    esac
343	    case "$path" in
344	    $src_re|$obj_re) ;;
345	    /*/stage/*) ;;
346	    /*) continue;;
347	    *)
348		rlist="$ldir/$path $cwd/$path"
349		case "$op,$path" in
350		[ML],../*) rlist="$rlist $path2/$path `dirname $path2`/$path";;
351		esac
352		for path in $rlist
353		do
354		    test -e $path && break
355		done
356		dir=${path%/*}
357		;;
358	    esac
359	    ;;
360	esac
361	# avoid repeating ourselves...
362	case "$DPDEPS,$seensrc," in
363	,*)
364	    case ",$seenit," in
365	    *,$dir,*) continue;;
366	    esac
367	    ;;
368	*,$path,*) continue;;
369	esac
370	# canonicalize if needed
371	case "/$dir/" in
372	*/../*|*/./*)
373	    rdir=$dir
374	    dir=`cd $dir 2> /dev/null && /bin/pwd`
375	    seen="$rdir,$dir"
376	    ;;
377	*)  seen=$dir;;
378	esac
379	case "$dir" in
380	${CURDIR:-.}|"") continue;;
381	$src_re)
382	    # avoid repeating ourselves...
383	    case "$DPDEPS,$seensrc," in
384	    ,*)
385		case ",$seenit," in
386		*,$dir,*) continue;;
387		esac
388		;;
389	    esac
390	    ;;
391	*)
392	    case ",$seenit," in
393	    *,$dir,*) continue;;
394	    esac
395	    ;;
396	esac
397	if [ -d $path ]; then
398	    case "$path" in
399	    */..) ldir=${dir%/*};;
400	    *) ldir=$path;;
401	    esac
402	    continue
403	fi
404	[ -f $path ] || continue
405	case "$dir" in
406	$CWD) continue;;		# ignore
407	$src_re)
408	    seenit="$seenit,$seen"
409	    echo $dir >> $tf.srcdep
410	    case "$DPDEPS,$reldir,$seensrc," in
411	    ,*) ;;
412	    *)	seensrc="$seensrc,$path"
413		echo "DPDEPS_$dir/${path##*/} += $RELDIR" >> $tf.dpdeps
414		;;
415	    esac
416	    continue
417	    ;;
418	esac
419	# if there is a .dirdep we cannot skip
420	# just because we've seen the dir before.
421	if [ -s $path.dirdep ]; then
422	    # this file contains:
423	    # '# ${RELDIR}.<machine>'
424	    echo $path.dirdep >> $tf.qual
425	    continue
426	elif [ -s $dir.dirdep ]; then
427	    echo $dir.dirdep >> $tf.qual
428	    seenit="$seenit,$seen"
429	    continue
430	fi
431	seenit="$seenit,$seen"
432	case "$dir" in
433	$obj_re)
434	    echo $dir;;
435	esac
436    done > $tf.dirdep
437    : version=$version
438    case "$version" in
439    0) error "no filemon data: $meta_file";;
440    esac
441    : eof_token=$eof_token
442    case "$eof_token" in
443    0) error "truncated filemon data: $meta_file";;
444    esac
445    for p in $epids
446    do
447	: p=$p
448	case " $xpids " in
449	*" $p "*) ;;
450	*) error "missing eXit for pid $p: $meta_file";;
451	esac
452    done ) || exit 1
453    _nl=echo
454    for f in $tf.dirdep $tf.qual $tf.srcdep
455    do
456	[ -s $f ] || continue
457	case $f in
458	*qual) # a list of .dirdep files
459	    # we can prefix everything with $OBJTOP to
460	    # tell gendirdeps.mk that these are
461	    # DIRDEP entries, since they are already
462	    # qualified with .<machine> as needed.
463	    # We strip .$MACHINE though
464	    xargs cat < $f | sort -u |
465	    sed "s,^# ,,;s,^,$OBJTOP/,;s,\.${TARGET_SPEC:-$MACHINE}\$,,;s,\.$MACHINE\$,,"
466	    ;;
467	*)  sort -u $f;;
468	esac
469	_nl=:
470    done
471    if [ -s $tf.dpdeps ]; then
472	case "$DPDEPS" in
473	*/*) ;;
474	*) echo > $DPDEPS;;		# the echo is needed!
475	esac
476	sort -u $tf.dpdeps |
477	sed "s,${SRCTOP}/,,;s,${SB_BACKING_SB:-$SB}/src/,," >> $DPDEPS
478    fi
479    # ensure we produce _something_ else egrep -v gets upset
480    $_nl
481}
482
483case /$0 in
484*/meta2dep*) meta2deps "$@";;
485*/meta2dirs*) meta2dirs "$@";;
486*/meta2src*) meta2src "$@";;
487esac
488