xref: /qemu/scripts/oss-fuzz/minimize_qtest_trace.py (revision 247ab240c2aa391c611a5cf7b79226b89722d53e)
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3
4"""
5This takes a crashing qtest trace and tries to remove superflous operations
6"""
7
8import sys
9import os
10import subprocess
11import time
12import struct
13
14QEMU_ARGS = None
15QEMU_PATH = None
16TIMEOUT = 5
17CRASH_TOKEN = None
18
19write_suffix_lookup = {"b": (1, "B"),
20                       "w": (2, "H"),
21                       "l": (4, "L"),
22                       "q": (8, "Q")}
23
24def usage():
25    sys.exit("""\
26Usage: QEMU_PATH="/path/to/qemu" QEMU_ARGS="args" {} input_trace output_trace
27By default, will try to use the second-to-last line in the output to identify
28whether the crash occred. Optionally, manually set a string that idenitifes the
29crash by setting CRASH_TOKEN=
30""".format((sys.argv[0])))
31
32deduplication_note = """\n\
33Note: While trimming the input, sometimes the mutated trace triggers a different
34type crash but indicates the same bug. Under this situation, our minimizer is
35incapable of recognizing and stopped from removing it. In the future, we may
36use a more sophisticated crash case deduplication method.
37\n"""
38
39def check_if_trace_crashes(trace, path):
40    with open(path, "w") as tracefile:
41        tracefile.write("".join(trace))
42
43    rc = subprocess.Popen("timeout -s 9 {timeout}s {qemu_path} {qemu_args} 2>&1\
44    < {trace_path}".format(timeout=TIMEOUT,
45                           qemu_path=QEMU_PATH,
46                           qemu_args=QEMU_ARGS,
47                           trace_path=path),
48                          shell=True,
49                          stdin=subprocess.PIPE,
50                          stdout=subprocess.PIPE,
51                          encoding="utf-8")
52    global CRASH_TOKEN
53    if CRASH_TOKEN is None:
54        try:
55            outs, _ = rc.communicate(timeout=5)
56            CRASH_TOKEN = " ".join(outs.splitlines()[-2].split()[0:3])
57        except subprocess.TimeoutExpired:
58            print("subprocess.TimeoutExpired")
59            return False
60        print("Identifying Crashes by this string: {}".format(CRASH_TOKEN))
61        global deduplication_note
62        print(deduplication_note)
63        return True
64
65    for line in iter(rc.stdout.readline, ""):
66        if "CLOSED" in line:
67            return False
68        if CRASH_TOKEN in line:
69            return True
70
71    print("\nWarning:")
72    print("  There is no 'CLOSED'or CRASH_TOKEN in the stdout of subprocess.")
73    print("  Usually this indicates a different type of crash.\n")
74    return False
75
76
77def remove_lines(newtrace, outpath):
78    remove_step = 1
79    i = 0
80    while i < len(newtrace):
81        # 1.) Try to remove lines completely and reproduce the crash.
82        # If it works, we're done.
83        if (i+remove_step) >= len(newtrace):
84            remove_step = 1
85        prior = newtrace[i:i+remove_step]
86        for j in range(i, i+remove_step):
87            newtrace[j] = ""
88        print("Removing {lines} ...\n".format(lines=prior))
89        if check_if_trace_crashes(newtrace, outpath):
90            i += remove_step
91            # Double the number of lines to remove for next round
92            remove_step *= 2
93            continue
94        # Failed to remove multiple IOs, fast recovery
95        if remove_step > 1:
96            for j in range(i, i+remove_step):
97                newtrace[j] = prior[j-i]
98            remove_step = 1
99            continue
100        newtrace[i] = prior[0] # remove_step = 1
101
102        # 2.) Try to replace write{bwlq} commands with a write addr, len
103        # command. Since this can require swapping endianness, try both LE and
104        # BE options. We do this, so we can "trim" the writes in (3)
105
106        if (newtrace[i].startswith("write") and not
107            newtrace[i].startswith("write ")):
108            suffix = newtrace[i].split()[0][-1]
109            assert(suffix in write_suffix_lookup)
110            addr = int(newtrace[i].split()[1], 16)
111            value = int(newtrace[i].split()[2], 16)
112            for endianness in ['<', '>']:
113                data = struct.pack("{end}{size}".format(end=endianness,
114                                   size=write_suffix_lookup[suffix][1]),
115                                   value)
116                newtrace[i] = "write {addr} {size} 0x{data}\n".format(
117                    addr=hex(addr),
118                    size=hex(write_suffix_lookup[suffix][0]),
119                    data=data.hex())
120                if(check_if_trace_crashes(newtrace, outpath)):
121                    break
122            else:
123                newtrace[i] = prior[0]
124
125        # 3.) If it is a qtest write command: write addr len data, try to split
126        # it into two separate write commands. If splitting the data operand
127        # from length/2^n bytes to the left does not work, try to move the pivot
128        # to the right side, then add one to n, until length/2^n == 0. The idea
129        # is to prune unneccessary bytes from long writes, while accommodating
130        # arbitrary MemoryRegion access sizes and alignments.
131
132        # This algorithm will fail under some rare situations.
133        # e.g., xxxxxxxxxuxxxxxx (u is the unnecessary byte)
134
135        if newtrace[i].startswith("write "):
136            addr = int(newtrace[i].split()[1], 16)
137            length = int(newtrace[i].split()[2], 16)
138            data = newtrace[i].split()[3][2:]
139            if length > 1:
140                leftlength = int(length/2)
141                rightlength = length - leftlength
142                newtrace.insert(i+1, "")
143                power = 1
144                while leftlength > 0:
145                    newtrace[i] = "write {addr} {size} 0x{data}\n".format(
146                            addr=hex(addr),
147                            size=hex(leftlength),
148                            data=data[:leftlength*2])
149                    newtrace[i+1] = "write {addr} {size} 0x{data}\n".format(
150                            addr=hex(addr+leftlength),
151                            size=hex(rightlength),
152                            data=data[leftlength*2:])
153                    if check_if_trace_crashes(newtrace, outpath):
154                        break
155                    # move the pivot to right side
156                    if leftlength < rightlength:
157                        rightlength, leftlength = leftlength, rightlength
158                        continue
159                    power += 1
160                    leftlength = int(length/pow(2, power))
161                    rightlength = length - leftlength
162                if check_if_trace_crashes(newtrace, outpath):
163                    i -= 1
164                else:
165                    newtrace[i] = prior[0]
166                    del newtrace[i+1]
167        i += 1
168
169
170def minimize_trace(inpath, outpath):
171    global TIMEOUT
172    with open(inpath) as f:
173        trace = f.readlines()
174    start = time.time()
175    if not check_if_trace_crashes(trace, outpath):
176        sys.exit("The input qtest trace didn't cause a crash...")
177    end = time.time()
178    print("Crashed in {} seconds".format(end-start))
179    TIMEOUT = (end-start)*5
180    print("Setting the timeout for {} seconds".format(TIMEOUT))
181
182    newtrace = trace[:]
183
184    # remove lines
185    old_len = len(newtrace) + 1
186    while(old_len > len(newtrace)):
187        old_len = len(newtrace)
188        remove_lines(newtrace, outpath)
189        newtrace = list(filter(lambda s: s != "", newtrace))
190
191    assert(check_if_trace_crashes(newtrace, outpath))
192
193
194if __name__ == '__main__':
195    if len(sys.argv) < 3:
196        usage()
197
198    QEMU_PATH = os.getenv("QEMU_PATH")
199    QEMU_ARGS = os.getenv("QEMU_ARGS")
200    if QEMU_PATH is None or QEMU_ARGS is None:
201        usage()
202    # if "accel" not in QEMU_ARGS:
203    #     QEMU_ARGS += " -accel qtest"
204    CRASH_TOKEN = os.getenv("CRASH_TOKEN")
205    QEMU_ARGS += " -qtest stdio -monitor none -serial none "
206    minimize_trace(sys.argv[1], sys.argv[2])
207