dwarf-synthesis/csmith/check_generated_eh_frame.py
Théophile Bastian 11fa3fab40 Check_gen: patch for flat vs pyramid clang tables
clang generates tables like this

      LOC           CFA      rbx   rbp   r14   r15   ra
   00000000000028a0 rsp+8    u     u     u     u     c-8
   00000000000028a1 rsp+16   u     u     u     u     c-8
   00000000000028a3 rsp+24   u     u     u     u     c-8
   00000000000028a5 rsp+32   u     u     u     u     c-8
   00000000000028a6 rsp+40   u     u     u     u     c-8
   00000000000028aa rsp+64   c-40  c-16  c-32  c-24  c-8
2019-04-05 16:32:00 +02:00

381 lines
11 KiB
Python
Executable file

#!/usr/bin/env python3
import sys
import argparse
verbose = False
class NotFDE(Exception):
pass
def func_name(infos, symtb):
for sym in symtb:
if infos["beg"] == symtb[sym][0]:
return sym
return None
def parse_fde_head(line):
spl = line.strip().split()
assert len(spl) >= 2
if spl[1] == "ZERO":
raise NotFDE
assert len(spl) >= 4
typ = spl[3]
if typ != "FDE":
raise NotFDE
assert len(spl) == 6
pc_range = spl[5][3:]
pc_beg, pc_end = map(lambda x: int(x, 16), pc_range.split(".."))
return pc_beg, pc_end
def detect_clang_flat_to_pyramid(rows):
""" Artificially repair clang flat callee-saved saving to a gcc pyramid-like shape.
Eg., clang will generate
LOC CFA rbx rbp ra
0000000000007180 rsp+8 u u c-8
0000000000007181 rsp+16 u u c-8
0000000000007182 rsp+24 u u c-8
0000000000007189 rsp+7632 c-24 c-16 c-8
while we would wish to have
LOC CFA rbx rbp ra
0000000000007180 rsp+8 u u c-8
0000000000007181 rsp+16 u c-16 c-8
0000000000007182 rsp+24 c-24 c-16 c-8
0000000000007189 rsp+7632 c-24 c-16 c-8
The repair strategy is as follows:
- ignore the implicit first row
- find the first k lines such that only CFA changes from line to line, with a delta
of +8, with address delta of 2. (push is 2 bytes long)
- for every callee-saved R that concerns us and that is defined at line k+1 with
offset c-x, while rsp+x is the CFA of line k' <= k, redefine R as c-k in lines
[k'; k[
"""
def try_starting_at(start_row):
if len(rows) < start_row + 1: # Ensure we have at least the start row
return rows, False
flatness_row_id = start_row
if rows[1]["CFA"][:4] != "rsp+":
return rows, False
first_cfa = int(rows[start_row]["CFA"][4:])
prev_cfa = first_cfa
prev_loc = rows[start_row]["LOC"]
for row in rows[start_row + 1 :]:
for reg in row:
if reg not in ["LOC", "CFA", "ra"] and row[reg] != "u":
break
cfa = row["CFA"]
if cfa[:4] != "rsp+":
break
cfa_offset = int(cfa[4:])
if cfa_offset != prev_cfa + 8:
break
prev_cfa += 8
loc = row["LOC"]
if loc > prev_loc + 2:
break
prev_loc = loc
flatness_row_id += 1
flatness_row_id += 1
if flatness_row_id - start_row <= 1 or flatness_row_id >= len(rows):
return rows, False # nothing to change
flatness_row = rows[flatness_row_id]
reg_changes = {}
for reg in flatness_row:
if reg in ["LOC", "CFA", "ra"]:
continue
rule = flatness_row[reg]
if rule[:2] != "c-":
return rows, False # Not a flat_to_pyramid after all
rule_offset = int(rule[2:])
rule_offset_rectified = rule_offset - first_cfa
if rule_offset_rectified % 8 != 0:
return rows, False
row_change_id = rule_offset_rectified // 8 + start_row
reg_changes[reg] = (row_change_id, rule)
for reg in reg_changes:
change_from, rule = reg_changes[reg]
for row in rows[change_from:flatness_row_id]:
row[reg] = rule
return rows, True
for start_row in [1, 2]:
mod_rows, modified = try_starting_at(start_row)
if modified:
return mod_rows
return rows
def parse_fde_row(line, reg_cols):
vals = list(map(lambda x: x.strip(), line.split()))
assert len(vals) > reg_cols["ra"] # ra is the rightmost useful column
out = {"LOC": int(vals[0], 16), "CFA": vals[1]}
for reg in reg_cols:
col_id = reg_cols[reg]
out[reg] = vals[col_id]
if "rbp" not in out:
out["rbp"] = "u"
return out
def clean_rows(rows):
# Merge equivalent contiguous rows
if not rows:
return rows
assert len(rows) > 0
out_rows = [rows[0]]
for row in rows[1:]:
if not row == out_rows[-1]:
filtered_row = row
filter_out = []
for reg in filtered_row:
if reg not in ["LOC", "CFA", "rbp", "ra"]:
filter_out.append(reg)
for reg in filter_out:
filtered_row.pop(reg)
out_rows.append(filtered_row)
return out_rows
def parse_fde(lines):
assert len(lines) > 0
try:
pc_beg, pc_end = parse_fde_head(lines[0])
except NotFDE:
return
rows = [{"LOC": 0, "CFA": "rsp+8", "rbp": "u", "ra": "c-8"}] # Implicit CIE row
if len(lines) >= 2: # Has content
head_row = list(map(lambda x: x.strip(), lines[1].split()))
reg_cols = {}
for pos, reg in enumerate(head_row):
if reg not in ["LOC", "CFA"]:
reg_cols[reg] = pos
for line in lines[2:]:
rows.append(parse_fde_row(line, reg_cols))
# if pc_beg == 0x1160:
# print("===== FDE: {}..{} ====".format(hex(pc_beg), hex(pc_end)))
# print("BEFORE:")
# for row in rows:
# print(row)
rows = detect_clang_flat_to_pyramid(rows)
# if pc_beg == 0x1160:
# print("AFTER:")
# for row in rows:
# print(row)
return {"beg": pc_beg, "end": pc_end, "rows": clean_rows(rows)}
def parse_eh_frame(handle, symtb):
output = []
cur_lines = []
for line in handle:
line = line.strip()
if line == "===":
return output
if line.startswith("Contents of"):
continue
if line == "":
if cur_lines != []:
infos = parse_fde(cur_lines)
if infos:
symname = func_name(infos, symtb)
if symname not in ["_start", "__libc_csu_init"]:
# These functions have weird instructions
output.append(infos)
cur_lines = []
else:
cur_lines.append(line)
return sorted(output, key=lambda x: x["beg"])
def match_segments(orig_eh, synth_eh):
out = []
matches = [[False] * len(orig_eh), [False] * len(synth_eh)]
for orig_id, orig_fde in enumerate(orig_eh):
is_plt = False
for row in orig_fde["rows"]:
if row["CFA"] == "exp":
is_plt = True
for synth_id, synth_fde in enumerate(synth_eh):
if orig_fde["beg"] == synth_fde["beg"]:
if is_plt:
matches[1][synth_id] = True # PLT -- fake match
continue
if matches[1][synth_id]:
if verbose:
print("Multiple matches (synth)")
if matches[0][orig_id]:
if verbose:
print(
"Multiple matches (orig) {}--{}".format(
hex(orig_fde["beg"]), hex(orig_fde["end"])
)
)
else:
matches[0][orig_id] = True
matches[1][synth_id] = True
out.append((orig_fde, synth_fde))
elif (
is_plt
and orig_fde["beg"] <= synth_fde["beg"]
and synth_fde["end"] <= orig_fde["end"]
):
matches[1][synth_id] = True # PLT -- fake match
if is_plt:
matches[0][orig_id] = True # plt -- fake match
unmatched_orig, unmatched_synth = [], []
for orig_id, orig_match in enumerate(matches[0]):
if not orig_match:
unmatched_orig.append(orig_eh[orig_id])
for synth_id, synth_match in enumerate(matches[1]):
if not synth_match:
unmatched_synth.append(synth_eh[synth_id])
return out, unmatched_orig, unmatched_synth
def fde_pos(fde):
return "{}--{}".format(hex(fde["beg"]), hex(fde["end"]))
def dump_light_fdes(fdes):
for fde in fdes:
print("FDE: {}".format(fde_pos(fde)))
def match_fde(orig, synth):
def vals_of(row):
return {"CFA": row["CFA"], "ra": row["ra"], "rbp": row["rbp"]}
def loc_of(rch):
return rch[1]["LOC"]
rows = [orig["rows"], synth["rows"]]
cur_val = [vals_of(rows[0][0]), vals_of(rows[1][0])]
rowchanges = []
for typ in [0, 1]:
for row in rows[typ]:
rowchanges.append((typ, row))
rowchanges.sort(key=loc_of)
mismatch_count = 0
for rowid, rowch in enumerate(rowchanges):
typ, row = rowch[0], rowch[1]
cur_val[typ] = vals_of(row)
if len(rowchanges) > rowid + 1 and loc_of(rowch) == loc_of(
rowchanges[rowid + 1]
):
continue
if cur_val[0] != cur_val[1]:
if verbose:
print(
"Mismatch {}: {} ; {}".format(
hex(row["LOC"]), cur_val[0], cur_val[1]
)
)
mismatch_count += 1
return mismatch_count
def parse_sym_table(handle):
def readint(x):
if x.startswith("0x"):
return int(x[2:], 16)
return int(x)
out_map = {}
for line in handle:
line = line.strip()
if line == "===":
break
spl = list(map(lambda x: x.strip(), line.split()))
loc = int(spl[1], 16)
size = readint(spl[2])
name = spl[7]
out_map[name] = (loc, size)
return out_map
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"-v", "--verbose", action="store_true", help="Display verbose results"
)
parser.add_argument(
"test_name",
action="store",
help="Base path of the test case (eg. some_test/01)",
)
return parser.parse_args()
def main():
global verbose
parser_args = parse_args()
test_name = parser_args.test_name
verbose = parser_args.verbose
symtb = parse_sym_table(sys.stdin)
orig_eh = parse_eh_frame(sys.stdin, symtb)
synth_eh = parse_eh_frame(sys.stdin, symtb)
matched, unmatched_orig, unmatched_synth = match_segments(orig_eh, synth_eh)
# dump_light_fdes(unmatched_orig)
# dump_light_fdes(unmatched_synth)
mismatches = 0
for (orig, synth) in matched:
mismatches += match_fde(orig, synth)
reports = []
if mismatches > 0:
reports.append("{} mismatches".format(mismatches))
if unmatched_orig:
worth_reporting = False
for unmatched in unmatched_orig:
if len(unmatched["rows"]) > 1:
worth_reporting = True
break
if worth_reporting:
unmatched_addrs = [fde_pos(fde) for fde in unmatched_orig]
reports.append(
"{} unmatched (orig): {}".format(
len(unmatched_orig), ", ".join(unmatched_addrs)
)
)
if unmatched_synth:
unmatched_addrs = [fde_pos(fde) for fde in unmatched_synth]
reports.append(
"{} unmatched (synth): {}".format(
len(unmatched_synth), ", ".join(unmatched_addrs)
)
)
if reports:
print("{}: {}".format(test_name, "; ".join(reports)))
if __name__ == "__main__":
main()