Levenshtein: enhance output
This commit is contained in:
parent
fa19f99d27
commit
d71c1190d4
1 changed files with 56 additions and 3 deletions
|
@ -11,6 +11,8 @@ class Levenshtein:
|
|||
"I": (0, -1),
|
||||
}
|
||||
|
||||
auto_coalesce = False
|
||||
|
||||
def __init__(self, pre, post):
|
||||
self.pre = pre
|
||||
self.post = post
|
||||
|
@ -118,9 +120,12 @@ class Levenshtein:
|
|||
if cur_op == "S":
|
||||
pre_val = self.pre[pre_pos]
|
||||
post_val = self.post[post_pos]
|
||||
if pre_val == post_val:
|
||||
cur_op = "L"
|
||||
ops.append(
|
||||
(
|
||||
cur_op,
|
||||
(pre_pos, post_pos),
|
||||
(pre_val, post_val),
|
||||
self.subst_cost(pre_val, post_val, cost_pre, cost_post),
|
||||
)
|
||||
|
@ -128,27 +133,75 @@ class Levenshtein:
|
|||
if cur_op == "I":
|
||||
post_val = self.post[post_pos]
|
||||
ops.append(
|
||||
(cur_op, post_val, self.insert_cost(post_val, cost_pre, cost_post))
|
||||
(
|
||||
cur_op,
|
||||
post_pos,
|
||||
post_val,
|
||||
self.insert_cost(post_val, cost_pre, cost_post),
|
||||
)
|
||||
)
|
||||
if cur_op == "D":
|
||||
pre_val = self.pre[pre_pos]
|
||||
ops.append(
|
||||
(cur_op, pre_val, self.del_cost(pre_val, cost_pre, cost_post))
|
||||
(
|
||||
cur_op,
|
||||
pre_pos,
|
||||
pre_val,
|
||||
self.del_cost(pre_val, cost_pre, cost_post),
|
||||
)
|
||||
)
|
||||
|
||||
ops = ops[::-1]
|
||||
if self.auto_coalesce:
|
||||
ops = self.coalesce_ops(ops)
|
||||
|
||||
cached_result = {
|
||||
"count": self.work_matrix[-1][-1],
|
||||
"ops": ops[::-1],
|
||||
"ops": ops,
|
||||
}
|
||||
|
||||
self.get_full_cache()[self.cache_key()] = cached_result
|
||||
|
||||
return cached_result
|
||||
|
||||
@staticmethod
|
||||
def coalesce_ops(in_ops):
|
||||
out_ops = []
|
||||
|
||||
coal_op = None
|
||||
coal_pos = None
|
||||
coal_span = 0
|
||||
coal_vals = None
|
||||
coal_cost = 0
|
||||
|
||||
for (op, pos, val, cost) in in_ops:
|
||||
if op == coal_op:
|
||||
coal_span += 1
|
||||
if op == "S":
|
||||
pre, post = val
|
||||
coal_pre, coal_post = coal_vals
|
||||
coal_vals = (coal_pre + pre, coal_post + post)
|
||||
else:
|
||||
coal_vals += val
|
||||
coal_cost += cost
|
||||
|
||||
else:
|
||||
out_ops.append((coal_op, (coal_pos, coal_span), coal_vals, coal_cost))
|
||||
coal_op = op
|
||||
coal_pos = pos
|
||||
coal_span = 1
|
||||
coal_vals = val
|
||||
coal_cost = cost
|
||||
|
||||
out_ops.append((coal_op, (coal_pos, coal_span), coal_vals, coal_cost))
|
||||
return out_ops
|
||||
|
||||
|
||||
class InlineLevenshtein(Levenshtein):
|
||||
""" Levenshtein distance for edition of a single line """
|
||||
|
||||
auto_coalesce = True
|
||||
|
||||
def insert_cost(self, to_insert, pre_idx, post_idx):
|
||||
return 1
|
||||
|
||||
|
|
Loading…
Reference in a new issue