stats: various modifications
This commit is contained in:
parent
216e442f5b
commit
6629de9a3e
7 changed files with 369 additions and 142 deletions
3
stats/.gitignore
vendored
3
stats/.gitignore
vendored
|
@ -1,2 +1,3 @@
|
|||
venv
|
||||
elf_data
|
||||
elf_data*
|
||||
gathered
|
||||
|
|
|
@ -18,6 +18,7 @@ class Config:
|
|||
|
||||
elif args.feature == 'sample':
|
||||
self.size = int(args.size)
|
||||
self.output = args.output
|
||||
|
||||
elif args.feature == 'analyze':
|
||||
self.data_file = args.data_file
|
||||
|
@ -93,9 +94,9 @@ def main():
|
|||
stats_accu = gather_stats.gather_system_files(
|
||||
config,
|
||||
sample_size=config.size)
|
||||
stats_accu.dump(config.output)
|
||||
|
||||
elif config.feature == 'analyze':
|
||||
# TODO
|
||||
print("Not implemented", file=sys.stderr)
|
||||
stats_accu = StatsAccumulator.load(config.data_file)
|
||||
sys.exit(1)
|
||||
|
|
|
@ -1,122 +1,98 @@
|
|||
from elftools.common.exceptions import DWARFError
|
||||
from pyelftools_overlay import system_elfs, get_cfi
|
||||
from elftools.dwarf import callframe
|
||||
import multiprocessing
|
||||
import signal
|
||||
import concurrent.futures
|
||||
import random
|
||||
|
||||
|
||||
from stats_accu import \
|
||||
StatsAccumulator, SingleFdeData, \
|
||||
RegsList, FdeData, DwarfInstr
|
||||
StatsAccumulator, SingleFdeData, FdeData, DwarfInstr
|
||||
|
||||
|
||||
class FilesProcessor(multiprocessing.Process):
|
||||
def __init__(self, elf_list, shared_queue):
|
||||
super().__init__()
|
||||
self.stop_processing = False
|
||||
self.processed_counter = 0
|
||||
self.elf_list = elf_list
|
||||
self.shared_queue = shared_queue
|
||||
class ProcessWrapper:
|
||||
def __init__(self, fct):
|
||||
self._fct = fct
|
||||
|
||||
def stop_processing_now(self):
|
||||
self.stop_processing = True
|
||||
def __call__(self, elf_descr):
|
||||
try:
|
||||
path, elftype = elf_descr
|
||||
|
||||
def run(self):
|
||||
pos = 0
|
||||
for descr in self.elf_list:
|
||||
if self.stop_processing:
|
||||
break
|
||||
self.process_single_file(descr, pos)
|
||||
pos += 1
|
||||
print("Processing {}…".format(path))
|
||||
|
||||
print("=== Finished {} ===".format(self.name))
|
||||
return 0
|
||||
cfi = get_cfi(path)
|
||||
if not cfi:
|
||||
return None
|
||||
|
||||
def process_single_file(self, elf_descr, pos_in_list):
|
||||
if self.stop_processing:
|
||||
return
|
||||
|
||||
elf_path, elf_type = elf_descr
|
||||
|
||||
self.processed_counter += 1
|
||||
print('[{}, {}/{}] {}'.format(
|
||||
self.shared_queue.qsize(),
|
||||
pos_in_list + 1,
|
||||
len(self.elf_list),
|
||||
elf_path))
|
||||
self.process_file(elf_path, elf_type)
|
||||
|
||||
def process_file(self, path, elftype):
|
||||
''' Process a single file '''
|
||||
|
||||
cfi = get_cfi(path)
|
||||
if not cfi:
|
||||
return self._fct(path, elftype, cfi)
|
||||
except DWARFError:
|
||||
return None
|
||||
|
||||
data = FdeData()
|
||||
|
||||
for entry in cfi:
|
||||
if isinstance(entry, callframe.CIE): # Is a CIE
|
||||
self.process_cie(entry, data)
|
||||
elif isinstance(entry, callframe.FDE): # Is a FDE
|
||||
self.process_fde(entry, data)
|
||||
def process_wrapper(fct):
|
||||
return ProcessWrapper(fct)
|
||||
|
||||
out = SingleFdeData(path, elftype, data)
|
||||
self.shared_queue.put(out)
|
||||
|
||||
def incr_cell(self, table, key):
|
||||
''' Increments table[key], or sets it to 1 if unset '''
|
||||
if key in table:
|
||||
table[key] += 1
|
||||
@process_wrapper
|
||||
def process_elf(path, elftype, cfi):
|
||||
''' Process a single file '''
|
||||
|
||||
data = FdeData()
|
||||
|
||||
for entry in cfi:
|
||||
if isinstance(entry, callframe.CIE): # Is a CIE
|
||||
process_cie(entry, data)
|
||||
elif isinstance(entry, callframe.FDE): # Is a FDE
|
||||
process_fde(entry, data)
|
||||
|
||||
return SingleFdeData(path, elftype, data)
|
||||
|
||||
|
||||
def incr_cell(table, key):
|
||||
''' Increments table[key], or sets it to 1 if unset '''
|
||||
if key in table:
|
||||
table[key] += 1
|
||||
else:
|
||||
table[key] = 1
|
||||
|
||||
|
||||
def process_cie(cie, data):
|
||||
''' Process a CIE '''
|
||||
pass # Nothing needed from a CIE
|
||||
|
||||
|
||||
def process_fde(fde, data):
|
||||
''' Process a FDE '''
|
||||
data.fde_count += 1
|
||||
|
||||
decoded = fde.get_decoded()
|
||||
row_count = len(decoded.table)
|
||||
incr_cell(data.fde_with_lines, row_count)
|
||||
|
||||
for row in decoded.table:
|
||||
process_reg(data.regs.cfa, row['cfa'])
|
||||
for entry in row:
|
||||
if isinstance(entry, int):
|
||||
process_reg(data.regs.regs[entry], row[entry])
|
||||
|
||||
|
||||
def process_reg(out_reg, reg_def):
|
||||
''' Process a register '''
|
||||
if isinstance(reg_def, callframe.CFARule):
|
||||
if reg_def.reg is not None:
|
||||
out_reg.regs[reg_def.reg] += 1
|
||||
else:
|
||||
table[key] = 1
|
||||
|
||||
def process_cie(self, cie, data):
|
||||
''' Process a CIE '''
|
||||
pass # Nothing needed from a CIE
|
||||
|
||||
def process_fde(self, fde, data):
|
||||
''' Process a FDE '''
|
||||
data.fde_count += 1
|
||||
|
||||
decoded = fde.get_decoded()
|
||||
row_count = len(decoded.table)
|
||||
self.incr_cell(data.fde_with_lines, row_count)
|
||||
|
||||
for row in decoded.table:
|
||||
self.process_reg(data.regs.cfa, row['cfa'])
|
||||
for entry in row:
|
||||
if isinstance(entry, int):
|
||||
self.process_reg(data.regs.regs[entry], row[entry])
|
||||
|
||||
def process_reg(self, out_reg, reg_def):
|
||||
''' Process a register '''
|
||||
if isinstance(reg_def, callframe.CFARule):
|
||||
if reg_def.reg is not None:
|
||||
out_reg.regs[reg_def.reg] += 1
|
||||
else:
|
||||
pass # TODO exprs
|
||||
else:
|
||||
self.incr_cell(out_reg.instrs, DwarfInstr.of_pyelf(reg_def.type))
|
||||
if reg_def.type == callframe.RegisterRule.REGISTER:
|
||||
out_reg.regs[reg_def.arg] += 1
|
||||
elif (reg_def.type == callframe.RegisterRule.EXPRESSION) \
|
||||
or (reg_def.type == callframe.RegisterRule.VAL_EXPRESSION):
|
||||
pass # TODO exprs
|
||||
pass # TODO exprs
|
||||
else:
|
||||
incr_cell(out_reg.instrs, DwarfInstr.of_pyelf(reg_def.type))
|
||||
if reg_def.type == callframe.RegisterRule.REGISTER:
|
||||
out_reg.regs[reg_def.arg] += 1
|
||||
elif (reg_def.type == callframe.RegisterRule.EXPRESSION) \
|
||||
or (reg_def.type == callframe.RegisterRule.VAL_EXPRESSION):
|
||||
pass # TODO exprs
|
||||
|
||||
|
||||
def gather_system_files(config, sample_size=None):
|
||||
stats_accu = StatsAccumulator()
|
||||
processors = []
|
||||
|
||||
def signal_graceful_exit(sig, frame):
|
||||
''' Stop gracefully now '''
|
||||
nonlocal processors
|
||||
|
||||
print("Stopping after this ELF…")
|
||||
for processor in processors:
|
||||
processor.stop_processing_now()
|
||||
|
||||
signal.signal(signal.SIGINT, signal_graceful_exit)
|
||||
|
||||
elf_list = []
|
||||
for elf_path in system_elfs():
|
||||
|
@ -126,46 +102,46 @@ def gather_system_files(config, sample_size=None):
|
|||
elf_list_sampled = random.sample(elf_list, sample_size)
|
||||
elf_list = elf_list_sampled
|
||||
|
||||
elf_count = len(elf_list)
|
||||
elf_per_process = elf_count // config.cores
|
||||
elf_list_slices = []
|
||||
for i in range(config.cores - 1):
|
||||
elf_list_slices.append(
|
||||
elf_list[i * elf_per_process : (i+1) * elf_per_process])
|
||||
elf_list_slices.append(
|
||||
elf_list[(config.cores - 1) * elf_per_process
|
||||
: config.cores * elf_per_process])
|
||||
|
||||
shared_queue = multiprocessing.Queue(elf_count)
|
||||
|
||||
for elf_range in elf_list_slices:
|
||||
processors.append(FilesProcessor(elf_range, shared_queue))
|
||||
|
||||
if config.cores > 1:
|
||||
for processor in processors:
|
||||
processor.start()
|
||||
|
||||
while True:
|
||||
for processor in processors:
|
||||
if processor.is_alive():
|
||||
print("== Waiting {} ({} {}) ==".format(
|
||||
processor.name, processor.exitcode,
|
||||
processor.is_alive()))
|
||||
processor.join(timeout=1)
|
||||
if processor.exitcode is None:
|
||||
break # Loop around
|
||||
print("== Joined {} ==".format(processor.name))
|
||||
|
||||
terminated = True
|
||||
for processor in processors:
|
||||
if processor.exitcode is None:
|
||||
terminated = False
|
||||
if terminated:
|
||||
break
|
||||
with concurrent.futures.ProcessPoolExecutor(max_workers=config.cores)\
|
||||
as executor:
|
||||
for fde in executor.map(process_elf, elf_list):
|
||||
stats_accu.add_fde(fde)
|
||||
else:
|
||||
processors[0].run() # run(), not start(): in the same thread
|
||||
|
||||
while not shared_queue.empty(): # Reliable because everything is joined
|
||||
stats_accu.add_fde(shared_queue.get_nowait())
|
||||
for elf in elf_list:
|
||||
stats_accu.add_fde(process_elf(elf))
|
||||
|
||||
return stats_accu
|
||||
|
||||
|
||||
def map_system_files(mapper, sample_size=None, cores=None, include=None,
|
||||
elflist=None):
|
||||
''' `mapper` must take (path, elf_type, cfi) '''
|
||||
if cores is None:
|
||||
cores = 1
|
||||
if include is None:
|
||||
include = []
|
||||
|
||||
mapper = process_wrapper(mapper)
|
||||
|
||||
if elflist is None:
|
||||
elf_list = []
|
||||
for elf_path in system_elfs():
|
||||
elf_list.append(elf_path)
|
||||
|
||||
if sample_size is not None:
|
||||
elf_list_sampled = random.sample(elf_list, sample_size)
|
||||
elf_list = elf_list_sampled
|
||||
|
||||
elf_list += list(map(lambda x: (x, None), include))
|
||||
else:
|
||||
elf_list = elflist
|
||||
|
||||
if cores > 1:
|
||||
with concurrent.futures.ProcessPoolExecutor(max_workers=cores)\
|
||||
as executor:
|
||||
out = executor.map(mapper, elf_list)
|
||||
else:
|
||||
out = map(mapper, elf_list)
|
||||
|
||||
return out, elf_list
|
||||
|
|
228
stats/helpers.py
Normal file
228
stats/helpers.py
Normal file
|
@ -0,0 +1,228 @@
|
|||
from elftools.dwarf import callframe
|
||||
import gather_stats
|
||||
import itertools
|
||||
import functools
|
||||
|
||||
REGS_IDS = {
|
||||
'RAX': 0,
|
||||
'RDX': 1,
|
||||
'RCX': 2,
|
||||
'RBX': 3,
|
||||
'RSI': 4,
|
||||
'RDI': 5,
|
||||
'RBP': 6,
|
||||
'RSP': 7,
|
||||
'R8': 8,
|
||||
'R9': 9,
|
||||
'R10': 10,
|
||||
'R11': 11,
|
||||
'R12': 12,
|
||||
'R13': 13,
|
||||
'R14': 14,
|
||||
'R15': 15,
|
||||
'RIP': 16
|
||||
}
|
||||
|
||||
ID_TO_REG = [
|
||||
'RAX',
|
||||
'RDX',
|
||||
'RCX',
|
||||
'RBX',
|
||||
'RSI',
|
||||
'RDI',
|
||||
'RBP',
|
||||
'RSP',
|
||||
'R8',
|
||||
'R9',
|
||||
'R10',
|
||||
'R11',
|
||||
'R12',
|
||||
'R13',
|
||||
'R14',
|
||||
'R15',
|
||||
'RIP',
|
||||
]
|
||||
|
||||
HANDLED_REGS = list(map(lambda x: REGS_IDS[x], [
|
||||
'RIP',
|
||||
'RSP',
|
||||
'RBP',
|
||||
'RBX',
|
||||
]))
|
||||
|
||||
ONLY_HANDLED_REGS = True # only analyzed handled regs columns
|
||||
|
||||
PLT_EXPR = [119, 8, 128, 0, 63, 26, 59, 42, 51, 36, 34] # Handled exp
|
||||
|
||||
|
||||
def accumulate_regs(reg_list):
|
||||
out = [0] * 17
|
||||
for lst in reg_list:
|
||||
for pos in range(len(lst)):
|
||||
out[pos] += lst[pos]
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def filter_none(lst):
|
||||
for x in lst:
|
||||
if x:
|
||||
yield x
|
||||
|
||||
|
||||
def deco_filter_none(fct):
|
||||
def wrap(lst):
|
||||
return fct(filter_none(lst))
|
||||
return wrap
|
||||
|
||||
|
||||
class FdeProcessor:
|
||||
def __init__(self, fct, reducer=None):
|
||||
self._fct = fct
|
||||
self._reducer = reducer
|
||||
|
||||
def __call__(self, path, elftype, cfi):
|
||||
out = []
|
||||
for entry in cfi:
|
||||
if isinstance(entry, callframe.FDE):
|
||||
decoded = entry.get_decoded()
|
||||
out.append(self._fct(path, entry, decoded))
|
||||
if self._reducer is not None and len(out) >= 2:
|
||||
out = [self._reducer(out)]
|
||||
return out
|
||||
|
||||
|
||||
class FdeProcessorReduced:
|
||||
def __init__(self, reducer):
|
||||
self._reducer = reducer
|
||||
|
||||
def __call__(self, fct):
|
||||
return FdeProcessor(fct, self._reducer)
|
||||
|
||||
|
||||
def fde_processor(fct):
|
||||
return FdeProcessor(fct)
|
||||
|
||||
|
||||
def fde_processor_reduced(reducer):
|
||||
return FdeProcessorReduced(reducer)
|
||||
|
||||
|
||||
def is_handled_expr(expr):
|
||||
if expr == PLT_EXPR:
|
||||
return True
|
||||
|
||||
if len(expr) == 2 and 0x70 <= expr[0] <= 0x89:
|
||||
if expr[0] - 0x70 in HANDLED_REGS:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# @fde_processor
|
||||
def find_non_cfa(path, fde, decoded):
|
||||
regs_seen = 0
|
||||
non_handled_regs = 0
|
||||
non_handled_exp = 0
|
||||
cfa_dat = [0, 0] # Seen, expr
|
||||
rule_type = {
|
||||
callframe.RegisterRule.UNDEFINED: 0,
|
||||
callframe.RegisterRule.SAME_VALUE: 0,
|
||||
callframe.RegisterRule.OFFSET: 0,
|
||||
callframe.RegisterRule.VAL_OFFSET: 0,
|
||||
callframe.RegisterRule.REGISTER: 0,
|
||||
callframe.RegisterRule.EXPRESSION: 0,
|
||||
callframe.RegisterRule.VAL_EXPRESSION: 0,
|
||||
callframe.RegisterRule.ARCHITECTURAL: 0,
|
||||
}
|
||||
problematic_paths = set()
|
||||
|
||||
for row in decoded.table:
|
||||
for entry in row:
|
||||
reg_def = row[entry]
|
||||
|
||||
if entry == 'cfa':
|
||||
cfa_dat[0] += 1
|
||||
if reg_def.expr:
|
||||
cfa_dat[1] += 1
|
||||
if not is_handled_expr(reg_def.expr):
|
||||
non_handled_exp += 1
|
||||
problematic_paths.add(path)
|
||||
elif reg_def:
|
||||
if reg_def.reg not in HANDLED_REGS:
|
||||
non_handled_regs += 1
|
||||
problematic_paths.add(path)
|
||||
if not isinstance(entry, int): # CFA or PC
|
||||
continue
|
||||
|
||||
if ONLY_HANDLED_REGS and entry not in HANDLED_REGS:
|
||||
continue
|
||||
|
||||
rule_type[reg_def.type] += 1
|
||||
reg_rule = reg_def.type
|
||||
|
||||
if reg_rule in [callframe.RegisterRule.OFFSET,
|
||||
callframe.RegisterRule.VAL_OFFSET]:
|
||||
regs_seen += 1 # CFA
|
||||
elif reg_rule == callframe.RegisterRule.REGISTER:
|
||||
regs_seen += 1
|
||||
if reg_def.arg not in HANDLED_REGS:
|
||||
problematic_paths.add(path)
|
||||
non_handled_regs += 1
|
||||
elif reg_rule in [callframe.RegisterRule.EXPRESSION,
|
||||
callframe.RegisterRule.VAL_EXPRESSION]:
|
||||
expr = reg_def.arg
|
||||
if not is_handled_expr(reg_def.arg):
|
||||
problematic_paths.add(path)
|
||||
with open('/tmp/exprs', 'a') as handle:
|
||||
handle.write('[{} - {}] {}\n'.format(
|
||||
path, fde.offset,
|
||||
', '.join(map(lambda x: hex(x), expr))))
|
||||
non_handled_exp += 1
|
||||
|
||||
return (regs_seen, non_handled_regs, non_handled_exp, rule_type, cfa_dat,
|
||||
problematic_paths)
|
||||
|
||||
|
||||
def reduce_non_cfa(lst):
|
||||
def merge_dict(d1, d2):
|
||||
for x in d1:
|
||||
d1[x] += d2[x]
|
||||
return d1
|
||||
|
||||
def merge_list(l1, l2):
|
||||
out = []
|
||||
for pos in range(len(l1)): # Implicit assumption len(l1) == len(l2)
|
||||
out.append(l1[pos] + l2[pos])
|
||||
return out
|
||||
|
||||
def merge_elts(accu, elt):
|
||||
accu_regs, accu_nh, accu_exp, accu_rt, accu_cfa, accu_paths = accu
|
||||
elt_regs, elt_nh, elt_exp, elt_rt, elt_cfa, elf_paths = elt
|
||||
return (
|
||||
accu_regs + elt_regs,
|
||||
accu_nh + elt_nh,
|
||||
accu_exp + elt_exp,
|
||||
merge_dict(accu_rt, elt_rt),
|
||||
merge_list(accu_cfa, elt_cfa),
|
||||
accu_paths.union(elf_paths),
|
||||
)
|
||||
|
||||
return functools.reduce(merge_elts, lst)
|
||||
|
||||
|
||||
@deco_filter_none
|
||||
def flatten_non_cfa(result):
|
||||
flat = itertools.chain.from_iterable(result)
|
||||
out = reduce_non_cfa(flat)
|
||||
out_cfa = {
|
||||
'seen': out[4][0],
|
||||
'expr': out[4][1],
|
||||
'offset': out[4][0] - out[4][1],
|
||||
}
|
||||
out = (out[0],
|
||||
(out[1], out[0] + out_cfa['offset']),
|
||||
(out[2], out[3]['EXPRESSION'] + out_cfa['expr']),
|
||||
out[3],
|
||||
out_cfa,
|
||||
out[5])
|
||||
return out
|
|
@ -6,6 +6,11 @@ from stats_accu import ElfType
|
|||
import os
|
||||
|
||||
|
||||
ELF_BLACKLIST = [
|
||||
'/usr/lib/libavcodec.so',
|
||||
]
|
||||
|
||||
|
||||
def get_cfi(path):
|
||||
''' Get the CFI entries from the ELF at the provided path '''
|
||||
|
||||
|
@ -14,6 +19,7 @@ def get_cfi(path):
|
|||
elf_file = ELFFile(file_handle)
|
||||
|
||||
if not elf_file.has_dwarf_info():
|
||||
print("No DWARF")
|
||||
return None
|
||||
|
||||
dw_info = elf_file.get_dwarf_info()
|
||||
|
@ -22,12 +28,19 @@ def get_cfi(path):
|
|||
elif dw_info.has_EH_CFI():
|
||||
cfis = dw_info.EH_CFI_entries()
|
||||
else:
|
||||
print("No CFI")
|
||||
return None
|
||||
except ELFError:
|
||||
print("ELF Error")
|
||||
return None
|
||||
except DWARFError:
|
||||
print("DWARF Error")
|
||||
return None
|
||||
except PermissionError:
|
||||
print("Permission Error")
|
||||
return None
|
||||
except KeyError:
|
||||
print("Key Error")
|
||||
return None
|
||||
|
||||
return cfis
|
||||
|
@ -70,6 +83,9 @@ def system_elfs():
|
|||
continue
|
||||
|
||||
canonical_name = readlink_rec(direntry.path)
|
||||
for blacked in ELF_BLACKLIST:
|
||||
if canonical_name.startswith(blacked):
|
||||
continue
|
||||
if canonical_name in seen_elfs:
|
||||
continue
|
||||
|
||||
|
@ -79,10 +95,16 @@ def system_elfs():
|
|||
magic_bytes = handle.read(4)
|
||||
if magic_bytes != b'\x7fELF':
|
||||
valid_elf = False
|
||||
elf_class = handle.read(1)
|
||||
if elf_class != b'\x02': # ELF64
|
||||
valid_elf = False
|
||||
except Exception:
|
||||
continue
|
||||
if not valid_elf:
|
||||
continue
|
||||
|
||||
if not os.path.isfile(canonical_name):
|
||||
continue
|
||||
|
||||
seen_elfs.add(canonical_name)
|
||||
yield (canonical_name, elftype)
|
||||
|
|
|
@ -1,2 +1 @@
|
|||
git+https://github.com/eliben/pyelftools
|
||||
git+https://github.com/uqfoundation/pathos
|
||||
|
|
|
@ -239,7 +239,8 @@ class StatsAccumulator:
|
|||
self.fdes = []
|
||||
|
||||
def add_fde(self, fde_data):
|
||||
self.fdes.append(fde_data)
|
||||
if fde_data:
|
||||
self.fdes.append(fde_data)
|
||||
|
||||
def get_fdes(self):
|
||||
return self.fdes
|
||||
|
@ -250,7 +251,6 @@ class StatsAccumulator:
|
|||
|
||||
def dump(self, path):
|
||||
dict_form = [fde.dump() for fde in self.fdes]
|
||||
print(dict_form)
|
||||
with open(path, 'w') as handle:
|
||||
handle.write(json.dumps(dict_form))
|
||||
|
||||
|
|
Loading…
Reference in a new issue