Check_gen: patch for flat vs pyramid clang tables

clang generates tables like this LOC CFA rbx rbp r14 r15 ra 00000000000028a0 rsp+8 u u u u c-8 00000000000028a1 rsp+16 u u u u c-8 00000000000028a3 rsp+24 u u u u c-8 00000000000028a5 rsp+32 u u u u c-8 00000000000028a6 rsp+40 u u u u c-8 00000000000028aa rsp+64 c-40 c-16 c-32 c-24 c-8
2019-04-05 16:32:00 +02:00 · 2019-04-05 16:32:00 +02:00 · 11fa3fab40
commit 11fa3fab40
parent 344ac84ef3
1 changed files with 140 additions and 17 deletions
--- a/csmith/check_generated_eh_frame.py
+++ b/csmith/check_generated_eh_frame.py
@ -32,15 +32,105 @@ def parse_fde_head(line):
    return pc_beg, pc_end


-def parse_fde_row(line, ra_col, rbp_col):
+def detect_clang_flat_to_pyramid(rows):
+    """ Artificially repair clang flat callee-saved saving to a gcc pyramid-like shape.
+
+    Eg., clang will generate
+       LOC           CFA      rbx   rbp   ra
+    0000000000007180 rsp+8    u     u     c-8
+    0000000000007181 rsp+16   u     u     c-8
+    0000000000007182 rsp+24   u     u     c-8
+    0000000000007189 rsp+7632 c-24  c-16  c-8
+
+
+    while we would wish to have
+       LOC           CFA      rbx   rbp   ra
+    0000000000007180 rsp+8    u     u     c-8
+    0000000000007181 rsp+16   u     c-16  c-8
+    0000000000007182 rsp+24   c-24  c-16  c-8
+    0000000000007189 rsp+7632 c-24  c-16  c-8
+
+    The repair strategy is as follows:
+    - ignore the implicit first row
+    - find the first k lines such that only CFA changes from line to line, with a delta
+      of +8, with address delta of 2. (push is 2 bytes long)
+    - for every callee-saved R that concerns us and that is defined at line k+1 with
+      offset c-x, while rsp+x is the CFA of line k' <= k, redefine R as c-k in lines
+      [k'; k[
+    """
+
+    def try_starting_at(start_row):
+        if len(rows) < start_row + 1:  # Ensure we have at least the start row
+            return rows, False
+
+        flatness_row_id = start_row
+        if rows[1]["CFA"][:4] != "rsp+":
+            return rows, False
+        first_cfa = int(rows[start_row]["CFA"][4:])
+        prev_cfa = first_cfa
+        prev_loc = rows[start_row]["LOC"]
+        for row in rows[start_row + 1 :]:
+            for reg in row:
+                if reg not in ["LOC", "CFA", "ra"] and row[reg] != "u":
+                    break
+            cfa = row["CFA"]
+            if cfa[:4] != "rsp+":
+                break
+            cfa_offset = int(cfa[4:])
+            if cfa_offset != prev_cfa + 8:
+                break
+            prev_cfa += 8
+            loc = row["LOC"]
+            if loc > prev_loc + 2:
+                break
+            prev_loc = loc
+            flatness_row_id += 1
+        flatness_row_id += 1
+        if flatness_row_id - start_row <= 1 or flatness_row_id >= len(rows):
+            return rows, False  # nothing to change
+        flatness_row = rows[flatness_row_id]
+
+        reg_changes = {}
+        for reg in flatness_row:
+            if reg in ["LOC", "CFA", "ra"]:
+                continue
+            rule = flatness_row[reg]
+            if rule[:2] != "c-":
+                return rows, False  # Not a flat_to_pyramid after all
+            rule_offset = int(rule[2:])
+            rule_offset_rectified = rule_offset - first_cfa
+            if rule_offset_rectified % 8 != 0:
+                return rows, False
+            row_change_id = rule_offset_rectified // 8 + start_row
+            reg_changes[reg] = (row_change_id, rule)
+
+        for reg in reg_changes:
+            change_from, rule = reg_changes[reg]
+            for row in rows[change_from:flatness_row_id]:
+                row[reg] = rule
+
+        return rows, True
+
+    for start_row in [1, 2]:
+        mod_rows, modified = try_starting_at(start_row)
+        if modified:
+            return mod_rows
+    return rows
+
+
+def parse_fde_row(line, reg_cols):
    vals = list(map(lambda x: x.strip(), line.split()))
-    assert len(vals) > ra_col  # ra is the rightmost useful column
-    out = {
-        "LOC": int(vals[0], 16),
-        "CFA": vals[1],
-        "rbp": vals[rbp_col] if rbp_col else "u",
-        "ra": vals[ra_col],
-    }
+    assert len(vals) > reg_cols["ra"]  # ra is the rightmost useful column
+
+    out = {"LOC": int(vals[0], 16), "CFA": vals[1]}
+
+    for reg in reg_cols:
+        col_id = reg_cols[reg]
+        out[reg] = vals[col_id]
+
+    if "rbp" not in out:
+        out["rbp"] = "u"
+
    return out


@ -52,7 +142,14 @@ def clean_rows(rows):
    out_rows = [rows[0]]
    for row in rows[1:]:
        if not row == out_rows[-1]:
-            out_rows.append(row)
+            filtered_row = row
+            filter_out = []
+            for reg in filtered_row:
+                if reg not in ["LOC", "CFA", "rbp", "ra"]:
+                    filter_out.append(reg)
+            for reg in filter_out:
+                filtered_row.pop(reg)
+            out_rows.append(filtered_row)
    return out_rows


@ -67,14 +164,24 @@ def parse_fde(lines):

    if len(lines) >= 2:  # Has content
        head_row = list(map(lambda x: x.strip(), lines[1].split()))
-        ra_col = head_row.index("ra")
-        try:
-            rbp_col = head_row.index("rbp")
-        except ValueError:
-            rbp_col = None
+        reg_cols = {}
+        for pos, reg in enumerate(head_row):
+            if reg not in ["LOC", "CFA"]:
+                reg_cols[reg] = pos

        for line in lines[2:]:
-            rows.append(parse_fde_row(line, ra_col, rbp_col))
+            rows.append(parse_fde_row(line, reg_cols))
+
+    # if pc_beg == 0x1160:
+    #     print("===== FDE: {}..{} ====".format(hex(pc_beg), hex(pc_end)))
+    #     print("BEFORE:")
+    #     for row in rows:
+    #         print(row)
+    rows = detect_clang_flat_to_pyramid(rows)
+    # if pc_beg == 0x1160:
+    #     print("AFTER:")
+    #     for row in rows:
+    #         print(row)

    return {"beg": pc_beg, "end": pc_end, "rows": clean_rows(rows)}

@ -246,9 +353,25 @@ def main():
    if mismatches > 0:
        reports.append("{} mismatches".format(mismatches))
    if unmatched_orig:
-        reports.append("{} unmatched (orig)".format(len(unmatched_orig)))
+        worth_reporting = False
+        for unmatched in unmatched_orig:
+            if len(unmatched["rows"]) > 1:
+                worth_reporting = True
+                break
+        if worth_reporting:
+            unmatched_addrs = [fde_pos(fde) for fde in unmatched_orig]
+            reports.append(
+                "{} unmatched (orig): {}".format(
+                    len(unmatched_orig), ", ".join(unmatched_addrs)
+                )
+            )
    if unmatched_synth:
-        reports.append("{} unmatched (synth)".format(len(unmatched_synth)))
+        unmatched_addrs = [fde_pos(fde) for fde in unmatched_synth]
+        reports.append(
+            "{} unmatched (synth): {}".format(
+                len(unmatched_synth), ", ".join(unmatched_addrs)
+            )
+        )

    if reports:
        print("{}: {}".format(test_name, "; ".join(reports)))