Levenshtein: enhance output
This commit is contained in:
parent
fa19f99d27
commit
d71c1190d4
1 changed files with 56 additions and 3 deletions
|
@ -11,6 +11,8 @@ class Levenshtein:
|
||||||
"I": (0, -1),
|
"I": (0, -1),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto_coalesce = False
|
||||||
|
|
||||||
def __init__(self, pre, post):
|
def __init__(self, pre, post):
|
||||||
self.pre = pre
|
self.pre = pre
|
||||||
self.post = post
|
self.post = post
|
||||||
|
@ -118,9 +120,12 @@ class Levenshtein:
|
||||||
if cur_op == "S":
|
if cur_op == "S":
|
||||||
pre_val = self.pre[pre_pos]
|
pre_val = self.pre[pre_pos]
|
||||||
post_val = self.post[post_pos]
|
post_val = self.post[post_pos]
|
||||||
|
if pre_val == post_val:
|
||||||
|
cur_op = "L"
|
||||||
ops.append(
|
ops.append(
|
||||||
(
|
(
|
||||||
cur_op,
|
cur_op,
|
||||||
|
(pre_pos, post_pos),
|
||||||
(pre_val, post_val),
|
(pre_val, post_val),
|
||||||
self.subst_cost(pre_val, post_val, cost_pre, cost_post),
|
self.subst_cost(pre_val, post_val, cost_pre, cost_post),
|
||||||
)
|
)
|
||||||
|
@ -128,27 +133,75 @@ class Levenshtein:
|
||||||
if cur_op == "I":
|
if cur_op == "I":
|
||||||
post_val = self.post[post_pos]
|
post_val = self.post[post_pos]
|
||||||
ops.append(
|
ops.append(
|
||||||
(cur_op, post_val, self.insert_cost(post_val, cost_pre, cost_post))
|
(
|
||||||
|
cur_op,
|
||||||
|
post_pos,
|
||||||
|
post_val,
|
||||||
|
self.insert_cost(post_val, cost_pre, cost_post),
|
||||||
|
)
|
||||||
)
|
)
|
||||||
if cur_op == "D":
|
if cur_op == "D":
|
||||||
pre_val = self.pre[pre_pos]
|
pre_val = self.pre[pre_pos]
|
||||||
ops.append(
|
ops.append(
|
||||||
(cur_op, pre_val, self.del_cost(pre_val, cost_pre, cost_post))
|
(
|
||||||
|
cur_op,
|
||||||
|
pre_pos,
|
||||||
|
pre_val,
|
||||||
|
self.del_cost(pre_val, cost_pre, cost_post),
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
ops = ops[::-1]
|
||||||
|
if self.auto_coalesce:
|
||||||
|
ops = self.coalesce_ops(ops)
|
||||||
|
|
||||||
cached_result = {
|
cached_result = {
|
||||||
"count": self.work_matrix[-1][-1],
|
"count": self.work_matrix[-1][-1],
|
||||||
"ops": ops[::-1],
|
"ops": ops,
|
||||||
}
|
}
|
||||||
|
|
||||||
self.get_full_cache()[self.cache_key()] = cached_result
|
self.get_full_cache()[self.cache_key()] = cached_result
|
||||||
|
|
||||||
return cached_result
|
return cached_result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def coalesce_ops(in_ops):
|
||||||
|
out_ops = []
|
||||||
|
|
||||||
|
coal_op = None
|
||||||
|
coal_pos = None
|
||||||
|
coal_span = 0
|
||||||
|
coal_vals = None
|
||||||
|
coal_cost = 0
|
||||||
|
|
||||||
|
for (op, pos, val, cost) in in_ops:
|
||||||
|
if op == coal_op:
|
||||||
|
coal_span += 1
|
||||||
|
if op == "S":
|
||||||
|
pre, post = val
|
||||||
|
coal_pre, coal_post = coal_vals
|
||||||
|
coal_vals = (coal_pre + pre, coal_post + post)
|
||||||
|
else:
|
||||||
|
coal_vals += val
|
||||||
|
coal_cost += cost
|
||||||
|
|
||||||
|
else:
|
||||||
|
out_ops.append((coal_op, (coal_pos, coal_span), coal_vals, coal_cost))
|
||||||
|
coal_op = op
|
||||||
|
coal_pos = pos
|
||||||
|
coal_span = 1
|
||||||
|
coal_vals = val
|
||||||
|
coal_cost = cost
|
||||||
|
|
||||||
|
out_ops.append((coal_op, (coal_pos, coal_span), coal_vals, coal_cost))
|
||||||
|
return out_ops
|
||||||
|
|
||||||
|
|
||||||
class InlineLevenshtein(Levenshtein):
|
class InlineLevenshtein(Levenshtein):
|
||||||
""" Levenshtein distance for edition of a single line """
|
""" Levenshtein distance for edition of a single line """
|
||||||
|
|
||||||
|
auto_coalesce = True
|
||||||
|
|
||||||
def insert_cost(self, to_insert, pre_idx, post_idx):
|
def insert_cost(self, to_insert, pre_idx, post_idx):
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue