From 681153b954e195c9ba999177a133ab58de2b40d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9ophile=20Bastian?= <contact@tobast.fr>
Date: Fri, 29 Mar 2019 19:26:58 +0100
Subject: [PATCH] Script to check csmith synth validity

---
 csmith/check_generated_eh_frame.py | 184 +++++++++++++++++++++++++++++
 csmith/csmith_gen.sh               |   1 +
 2 files changed, 185 insertions(+)
 create mode 100644 csmith/check_generated_eh_frame.py

diff --git a/csmith/check_generated_eh_frame.py b/csmith/check_generated_eh_frame.py
new file mode 100644
index 0000000..94d1e8e
--- /dev/null
+++ b/csmith/check_generated_eh_frame.py
@@ -0,0 +1,184 @@
+import sys
+
+
+class NotFDE(Exception):
+    pass
+
+
+def parse_fde_head(line):
+    spl = line.strip().split()
+    assert len(spl) >= 2
+    if spl[1] == "ZERO":
+        raise NotFDE
+    assert len(spl) >= 4
+    typ = spl[3]
+    if typ != "FDE":
+        raise NotFDE
+    assert len(spl) == 6
+    pc_range = spl[5][3:]
+    pc_beg, pc_end = map(lambda x: int(x, 16), pc_range.split(".."))
+
+    return pc_beg, pc_end
+
+
+def parse_fde_row(line, ra_col):
+    vals = list(map(lambda x: x.strip(), line.split()))
+    assert len(vals) > ra_col  # ra is the rightmost useful column
+    out = {"LOC": int(vals[0], 16), "CFA": vals[1], "ra": vals[ra_col]}
+    return out
+
+
+def clean_rows(rows):
+    # Merge equivalent contiguous rows
+    if not rows:
+        return rows
+    assert len(rows) > 0
+    out_rows = [rows[0]]
+    for row in rows[1:]:
+        if not row == out_rows[-1]:
+            out_rows.append(row)
+    return out_rows
+
+
+def parse_fde(lines):
+    assert len(lines) > 0
+    try:
+        pc_beg, pc_end = parse_fde_head(lines[0])
+    except NotFDE:
+        return
+
+    rows = [{"LOC": 0, "CFA": "rsp+8", "ra": "c-8"}]  # Implicit CIE row
+
+    if len(lines) >= 2:  # Has content
+        head_row = list(map(lambda x: x.strip(), lines[1].split()))
+        ra_col = head_row.index("ra")
+
+        for line in lines[2:]:
+            rows.append(parse_fde_row(line, ra_col))
+
+    return {"beg": pc_beg, "end": pc_end, "rows": clean_rows(rows)}
+
+
+def parse_eh_frame(handle):
+    output = []
+    cur_lines = []
+    for line in handle:
+        line = line.strip()
+        if line == "===":
+            return output
+        if line.startswith("Contents of"):
+            continue
+        if line == "":
+            if cur_lines != []:
+                infos = parse_fde(cur_lines)
+                if infos:
+                    output.append(infos)
+                cur_lines = []
+        else:
+            cur_lines.append(line)
+    return sorted(output, key=lambda x: x["beg"])
+
+
+def match_segments(orig_eh, synth_eh):
+    out = []
+    matches = [[False] * len(orig_eh), [False] * len(synth_eh)]
+    for orig_id, orig_fde in enumerate(orig_eh):
+        is_plt = False
+        for row in orig_fde["rows"]:
+            if row["CFA"] == "exp":
+                is_plt = True
+
+        for synth_id, synth_fde in enumerate(synth_eh):
+            if orig_fde["beg"] == synth_fde["beg"]:
+                if is_plt:
+                    matches[1][synth_id] = True  # PLT -- fake match
+                    continue
+                if matches[1][synth_id]:
+                    print("Multiple matches (synth)")
+                if matches[0][orig_id]:
+                    print(
+                        "Multiple matches (orig) {}--{}".format(
+                            hex(orig_fde["beg"]), hex(orig_fde["end"])
+                        )
+                    )
+                else:
+                    matches[0][orig_id] = True
+                    matches[1][synth_id] = True
+                    out.append((orig_fde, synth_fde))
+            elif (
+                is_plt
+                and orig_fde["beg"] <= synth_fde["beg"]
+                and synth_fde["end"] <= orig_fde["end"]
+            ):
+                matches[1][synth_id] = True  # PLT -- fake match
+        if is_plt:
+            matches[0][orig_id] = True  # plt -- fake match
+
+    unmatched_orig, unmatched_synth = [], []
+    for orig_id, orig_match in enumerate(matches[0]):
+        if not orig_match:
+            unmatched_orig.append(orig_eh[orig_id])
+    for synth_id, synth_match in enumerate(matches[1]):
+        if not synth_match:
+            unmatched_synth.append(synth_eh[synth_id])
+    return out, unmatched_orig, unmatched_synth
+
+
+def fde_pos(fde):
+    return "{}--{}".format(hex(fde["beg"]), hex(fde["end"]))
+
+
+def dump_light_fdes(fdes):
+    for fde in fdes:
+        print("FDE: {}".format(fde_pos(fde)))
+
+
+def match_fde(orig, synth):
+    def vals_of(row):
+        return {"CFA": row["CFA"], "ra": row["ra"]}
+
+    def loc_of(rch):
+        return rch[1]["LOC"]
+
+    rows = [orig["rows"], synth["rows"]]
+    cur_val = [vals_of(rows[0][0]), vals_of(rows[1][0])]
+
+    rowchanges = []
+    for typ in [0, 1]:
+        for row in rows[typ]:
+            rowchanges.append((typ, row))
+    rowchanges.sort(key=loc_of)
+
+    for rowid, rowch in enumerate(rowchanges):
+        typ, row = rowch[0], rowch[1]
+        cur_val[typ] = vals_of(row)
+        if len(rowchanges) > rowid + 1 and loc_of(rowch) == loc_of(
+            rowchanges[rowid + 1]
+        ):
+            continue
+        if cur_val[0] != cur_val[1]:
+            print("Mis {} ; {}".format(cur_val[0], cur_val[1]))
+            return False
+
+    return True
+
+
+def main():
+    orig_eh = parse_eh_frame(sys.stdin)
+    synth_eh = parse_eh_frame(sys.stdin)
+    matched, unmatched_orig, unmatched_synth = match_segments(orig_eh, synth_eh)
+    print(len(matched), len(unmatched_orig), len(unmatched_synth))
+    dump_light_fdes(unmatched_orig)
+    print("==")
+    dump_light_fdes(unmatched_synth)
+
+    mismatches = 0
+    for (orig, synth) in matched:
+        if not match_fde(orig, synth):
+            print("MISMATCH: {} ; {}".format(fde_pos(orig), fde_pos(synth)))
+            mismatches += 1
+    print("TOTAL: {}/{}".format(mismatches, len(matched)))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/csmith/csmith_gen.sh b/csmith/csmith_gen.sh
index e89b090..adb6fa1 100755
--- a/csmith/csmith_gen.sh
+++ b/csmith/csmith_gen.sh
@@ -17,6 +17,7 @@ for _num in $(seq 1 $NB_TESTS); do
     echo -ne "\r>>> $num.c          "
     path="$DIR/$num"
     csmith > "$path.c"
+    sed -i 's/^static \(.* func_\)/\1/g' "$path.c"
     echo -ne "\r>>> $num.bin          "
     gcc -O2 -I/usr/include/csmith-2.3.0/ -w "$path.c" -o "$path.orig.bin"
     objcopy --remove-section '.eh_frame' --remove-section '.eh_frame_hdr' \