dwarf-synthesis/csmith/check_generated_eh_frame.py

#!/usr/bin/env python3
import sys
import argparse

verbose = False


class NotFDE(Exception):
    pass


def func_name(infos, symtb):
    for sym in symtb:
        if infos["beg"] == symtb[sym][0]:
            return sym
    return None


def parse_fde_head(line):
    spl = line.strip().split()
    assert len(spl) >= 2
    if spl[1] == "ZERO":
        raise NotFDE
    assert len(spl) >= 4
    typ = spl[3]
    if typ != "FDE":
        raise NotFDE
    assert len(spl) == 6
    pc_range = spl[5][3:]
    pc_beg, pc_end = map(lambda x: int(x, 16), pc_range.split(".."))

    return pc_beg, pc_end


def parse_fde_row(line, ra_col, rbp_col):
    vals = list(map(lambda x: x.strip(), line.split()))
    assert len(vals) > ra_col  # ra is the rightmost useful column
    out = {
        "LOC": int(vals[0], 16),
        "CFA": vals[1],
        "rbp": vals[rbp_col] if rbp_col else "u",
        "ra": vals[ra_col],
    }
    return out


def clean_rows(rows):
    # Merge equivalent contiguous rows
    if not rows:
        return rows
    assert len(rows) > 0
    out_rows = [rows[0]]
    for row in rows[1:]:
        if not row == out_rows[-1]:
            out_rows.append(row)
    return out_rows


def parse_fde(lines):
    assert len(lines) > 0
    try:
        pc_beg, pc_end = parse_fde_head(lines[0])
    except NotFDE:
        return

    rows = [{"LOC": 0, "CFA": "rsp+8", "rbp": "u", "ra": "c-8"}]  # Implicit CIE row

    if len(lines) >= 2:  # Has content
        head_row = list(map(lambda x: x.strip(), lines[1].split()))
        ra_col = head_row.index("ra")
        try:
            rbp_col = head_row.index("rbp")
        except ValueError:
            rbp_col = None

        for line in lines[2:]:
            rows.append(parse_fde_row(line, ra_col, rbp_col))

    return {"beg": pc_beg, "end": pc_end, "rows": clean_rows(rows)}


def parse_eh_frame(handle, symtb):
    output = []
    cur_lines = []
    for line in handle:
        line = line.strip()
        if line == "===":
            return output
        if line.startswith("Contents of"):
            continue
        if line == "":
            if cur_lines != []:
                infos = parse_fde(cur_lines)
                if infos:
                    symname = func_name(infos, symtb)
                    if symname not in ["_start", "__libc_csu_init"]:
                        # These functions have weird instructions
                        output.append(infos)
                cur_lines = []
        else:
            cur_lines.append(line)
    return sorted(output, key=lambda x: x["beg"])


def match_segments(orig_eh, synth_eh):
    out = []
    matches = [[False] * len(orig_eh), [False] * len(synth_eh)]
    for orig_id, orig_fde in enumerate(orig_eh):
        is_plt = False
        for row in orig_fde["rows"]:
            if row["CFA"] == "exp":
                is_plt = True

        for synth_id, synth_fde in enumerate(synth_eh):
            if orig_fde["beg"] == synth_fde["beg"]:
                if is_plt:
                    matches[1][synth_id] = True  # PLT -- fake match
                    continue
                if matches[1][synth_id]:
                    if verbose:
                        print("Multiple matches (synth)")
                if matches[0][orig_id]:
                    if verbose:
                        print(
                            "Multiple matches (orig) {}--{}".format(
                                hex(orig_fde["beg"]), hex(orig_fde["end"])
                            )
                        )
                else:
                    matches[0][orig_id] = True
                    matches[1][synth_id] = True
                    out.append((orig_fde, synth_fde))
            elif (
                is_plt
                and orig_fde["beg"] <= synth_fde["beg"]
                and synth_fde["end"] <= orig_fde["end"]
            ):
                matches[1][synth_id] = True  # PLT -- fake match
        if is_plt:
            matches[0][orig_id] = True  # plt -- fake match

    unmatched_orig, unmatched_synth = [], []
    for orig_id, orig_match in enumerate(matches[0]):
        if not orig_match:
            unmatched_orig.append(orig_eh[orig_id])
    for synth_id, synth_match in enumerate(matches[1]):
        if not synth_match:
            unmatched_synth.append(synth_eh[synth_id])
    return out, unmatched_orig, unmatched_synth


def fde_pos(fde):
    return "{}--{}".format(hex(fde["beg"]), hex(fde["end"]))


def dump_light_fdes(fdes):
    for fde in fdes:
        print("FDE: {}".format(fde_pos(fde)))


def match_fde(orig, synth):
    def vals_of(row):
        return {"CFA": row["CFA"], "ra": row["ra"], "rbp": row["rbp"]}

    def loc_of(rch):
        return rch[1]["LOC"]

    rows = [orig["rows"], synth["rows"]]
    cur_val = [vals_of(rows[0][0]), vals_of(rows[1][0])]

    rowchanges = []
    for typ in [0, 1]:
        for row in rows[typ]:
            rowchanges.append((typ, row))
    rowchanges.sort(key=loc_of)

    mismatch_count = 0
    for rowid, rowch in enumerate(rowchanges):
        typ, row = rowch[0], rowch[1]
        cur_val[typ] = vals_of(row)
        if len(rowchanges) > rowid + 1 and loc_of(rowch) == loc_of(
            rowchanges[rowid + 1]
        ):
            continue
        if cur_val[0] != cur_val[1]:
            if verbose:
                print(
                    "Mismatch {}: {} ; {}".format(
                        hex(row["LOC"]), cur_val[0], cur_val[1]
                    )
                )
            mismatch_count += 1

    return mismatch_count


def parse_sym_table(handle):
    def readint(x):
        if x.startswith("0x"):
            return int(x[2:], 16)
        return int(x)

    out_map = {}
    for line in handle:
        line = line.strip()
        if line == "===":
            break

        spl = list(map(lambda x: x.strip(), line.split()))
        loc = int(spl[1], 16)
        size = readint(spl[2])
        name = spl[7]
        out_map[name] = (loc, size)
    return out_map


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v", "--verbose", action="store_true", help="Display verbose results"
    )
    parser.add_argument(
        "test_name",
        action="store",
        help="Base path of the test case (eg. some_test/01)",
    )
    return parser.parse_args()


def main():
    global verbose
    parser_args = parse_args()
    test_name = parser_args.test_name
    verbose = parser_args.verbose
    symtb = parse_sym_table(sys.stdin)
    orig_eh = parse_eh_frame(sys.stdin, symtb)
    synth_eh = parse_eh_frame(sys.stdin, symtb)
    matched, unmatched_orig, unmatched_synth = match_segments(orig_eh, synth_eh)
    # dump_light_fdes(unmatched_orig)
    # dump_light_fdes(unmatched_synth)

    mismatches = 0
    for (orig, synth) in matched:
        mismatches += match_fde(orig, synth)
    reports = []
    if mismatches > 0:
        reports.append("{} mismatches".format(mismatches))
    if unmatched_orig:
        reports.append("{} unmatched (orig)".format(len(unmatched_orig)))
    if unmatched_synth:
        reports.append("{} unmatched (synth)".format(len(unmatched_synth)))

    if reports:
        print("{}: {}".format(test_name, "; ".join(reports)))


if __name__ == "__main__":
    main()