Levenshtein: enhance output

This commit is contained in:
Théophile Bastian 2020-05-10 16:31:08 +02:00
parent fa19f99d27
commit d71c1190d4

View file

@ -11,6 +11,8 @@ class Levenshtein:
"I": (0, -1), "I": (0, -1),
} }
auto_coalesce = False
def __init__(self, pre, post): def __init__(self, pre, post):
self.pre = pre self.pre = pre
self.post = post self.post = post
@ -118,9 +120,12 @@ class Levenshtein:
if cur_op == "S": if cur_op == "S":
pre_val = self.pre[pre_pos] pre_val = self.pre[pre_pos]
post_val = self.post[post_pos] post_val = self.post[post_pos]
if pre_val == post_val:
cur_op = "L"
ops.append( ops.append(
( (
cur_op, cur_op,
(pre_pos, post_pos),
(pre_val, post_val), (pre_val, post_val),
self.subst_cost(pre_val, post_val, cost_pre, cost_post), self.subst_cost(pre_val, post_val, cost_pre, cost_post),
) )
@ -128,27 +133,75 @@ class Levenshtein:
if cur_op == "I": if cur_op == "I":
post_val = self.post[post_pos] post_val = self.post[post_pos]
ops.append( ops.append(
(cur_op, post_val, self.insert_cost(post_val, cost_pre, cost_post)) (
cur_op,
post_pos,
post_val,
self.insert_cost(post_val, cost_pre, cost_post),
)
) )
if cur_op == "D": if cur_op == "D":
pre_val = self.pre[pre_pos] pre_val = self.pre[pre_pos]
ops.append( ops.append(
(cur_op, pre_val, self.del_cost(pre_val, cost_pre, cost_post)) (
cur_op,
pre_pos,
pre_val,
self.del_cost(pre_val, cost_pre, cost_post),
)
) )
ops = ops[::-1]
if self.auto_coalesce:
ops = self.coalesce_ops(ops)
cached_result = { cached_result = {
"count": self.work_matrix[-1][-1], "count": self.work_matrix[-1][-1],
"ops": ops[::-1], "ops": ops,
} }
self.get_full_cache()[self.cache_key()] = cached_result self.get_full_cache()[self.cache_key()] = cached_result
return cached_result return cached_result
@staticmethod
def coalesce_ops(in_ops):
out_ops = []
coal_op = None
coal_pos = None
coal_span = 0
coal_vals = None
coal_cost = 0
for (op, pos, val, cost) in in_ops:
if op == coal_op:
coal_span += 1
if op == "S":
pre, post = val
coal_pre, coal_post = coal_vals
coal_vals = (coal_pre + pre, coal_post + post)
else:
coal_vals += val
coal_cost += cost
else:
out_ops.append((coal_op, (coal_pos, coal_span), coal_vals, coal_cost))
coal_op = op
coal_pos = pos
coal_span = 1
coal_vals = val
coal_cost = cost
out_ops.append((coal_op, (coal_pos, coal_span), coal_vals, coal_cost))
return out_ops
class InlineLevenshtein(Levenshtein): class InlineLevenshtein(Levenshtein):
""" Levenshtein distance for edition of a single line """ """ Levenshtein distance for edition of a single line """
auto_coalesce = True
def insert_cost(self, to_insert, pre_idx, post_idx): def insert_cost(self, to_insert, pre_idx, post_idx):
return 1 return 1