-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathedit.go
220 lines (195 loc) · 6.83 KB
/
edit.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
package slice
import (
"fmt"
"slices"
)
// LCS computes a longest common subsequence of as and bs.
//
// This implementation takes Θ(mn) time and O(P·min(m, n)) space for inputs of
// length m = len(as) and n = len(bs) and longest subsequence length P.
func LCS[T comparable, Slice ~[]T](as, bs Slice) Slice { return LCSFunc(as, bs, equal) }
// LCSFunc computes a longest common subsequence of as and bs, using
// eq to compare elements.
//
// This implementation takes Θ(mn) time and O(P·min(m, n)) space for inputs of
// length m = len(as) and n = len(bs) and longest subsequence length P.
func LCSFunc[T any, Slice ~[]T](as, bs Slice, eq func(a, b T) bool) Slice {
if len(as) == 0 || len(bs) == 0 {
return nil
}
// We maintain two rows of the optimization matrix, (p)revious and
// (c)urrent. Rows are positions in bs and columns are positions in as.
// The rows are extended by one position to get rid of the special case at
// the beginning of the sequence (we use 1..n instead of 0..n-1).
// Size the buffers based on the smaller input, since order does not matter.
// This lets us use less storage with no time penalty.
if len(bs) < len(as) {
as, bs = bs, as
}
type seq struct {
i int // offset into as
n int // length of path
prev *seq // previous element
}
p := make([]*seq, len(as)+1)
c := make([]*seq, len(as)+1)
var zero seq // sentinel; not written
for i := range p {
p[i] = &zero
c[i] = &zero
}
// Fill the rows top to bottom, left to right, since the optimization
// recurrence needs the previous element in the same row, and the same and
// previous elements in the previous row.
for j := 1; j <= len(bs); j++ {
p, c = c, p // swap the double buffer
// Fill the current row.
for i := 1; i <= len(as); i++ {
if eq(as[i-1], bs[j-1]) {
c[i] = &seq{i - 1, p[i-1].n + 1, p[i-1]}
} else if c[i-1].n >= p[i].n {
c[i] = c[i-1]
} else {
c[i] = p[i]
}
}
}
out := make(Slice, 0, c[len(as)].n)
for p := c[len(as)]; p.n > 0; p = p.prev {
out = append(out, as[p.i])
}
slices.Reverse(out)
return out
}
// EditOp is the opcode of an edit sequence instruction.
type EditOp byte
const (
OpDrop EditOp = '-' // Drop items from lhs
OpEmit EditOp = '=' // Emit elements from lhs
OpCopy EditOp = '+' // Copy items from rhs
OpReplace EditOp = '!' // Replace with items from rhs (== Drop+Copy)
)
// Edit is an edit operation transforming specified as part of a diff.
// Each edit refers to a specific span of one of the inputs.
type Edit[T any] struct {
Op EditOp // the diff operation to apply at the current offset
// X specifies the elements of lhs affected by the edit.
// For OpDrop and OpReplace it is the elements to be dropped.
// For OpEmit its the elements to be emitted.
// For OpCopy it is empty.
X []T
// Y specifies the elements of rhs affected by the edit.
// For OpDrop and OpEmit it is empty.
// For OpCopy and OpReplace it is the elements to be copied.
Y []T
}
func (e Edit[T]) String() string {
switch e.Op {
case OpCopy:
return fmt.Sprintf("%c%v", e.Op, e.Y)
case OpReplace:
x, y := fmt.Sprint(e.X), fmt.Sprint(e.Y)
return fmt.Sprintf("%c[%s:%s]", e.Op, x[1:len(x)-1], y[1:len(y)-1])
case OpDrop, OpEmit:
return fmt.Sprintf("%c%v", e.Op, e.X)
}
return fmt.Sprintf("!%c[INVALID]", e.Op)
}
// EditScript computes a minimal-length sequence of Edit operations that will
// transform lhs into rhs. The result is empty if lhs == rhs. The slices stored
// in returned edit operations share storage with the inputs lhs and rhs.
//
// This implementation takes Θ(mn) time and O(P·min(m, n)) space to compute a
// longest common subsequence, plus overhead of O(m+n) time and space to
// construct the edit sequence from the LCS.
//
// An edit sequence is processed in order. Items are sent to the output
// according to the following rules.
//
// For each element e of the edit script, if e.Op is:
//
// - OpDrop: No output; e.X records the items discarded.
//
// - OpEmit: Emit the elements in e.X from lhs.
//
// - OpCopy: Emit the elements in e.Y from rhs.
//
// - OpReplace: Emit the elements in e.Y from rhs. The items in e.X are the
// elements from lhs that were replaced. (== Drop + Copy)
//
// If the edit script is empty, the output is equal to the input.
func EditScript[T comparable, Slice ~[]T](lhs, rhs Slice) []Edit[T] {
return editScriptFunc(equal, lhs, rhs)
}
// editScriptFunc computes an edit script using eq as an equality comparison.
func editScriptFunc[T any, Slice ~[]T](eq func(a, b T) bool, lhs, rhs Slice) []Edit[T] {
lcs := LCSFunc(lhs, rhs, eq)
// To construct the edit sequence, i scans forward through lcs.
// For each i, we find the unclaimed elements of lhs and rhs prior to the
// occurrence of lcs[i].
//
// Elements of lhs before lcs[i] must be removed from the result.
// Elements of rhs before lcs[i] must be added to the result.
// Elements equal to lcs members are preserved as-written.
//
// However, whenever we have deletes followed immediately by inserts, the
// net effect is to "replace" some or all of the deleted items with the
// inserted ones. We represent this case explicitly with a replace edit.
lpos, rpos, i := 0, 0, 0
var out []Edit[T]
for i < len(lcs) {
// Count the numbers of elements of lhs and rhs prior to the next match.
lend := lpos
for !eq(lhs[lend], lcs[i]) {
lend++
}
rend := rpos
for !eq(rhs[rend], lcs[i]) {
rend++
}
// If we have both deletions and copies, combine them in a single replace
// instruction.
if lend > lpos && rend > rpos {
out = append(out, Edit[T]{Op: OpReplace, X: lhs[lpos:lend], Y: rhs[rpos:rend]})
rpos = rend
} else if lend > lpos {
// Record drops (there may be none).
out = append(out, Edit[T]{Op: OpDrop, X: lhs[lpos:lend]})
}
// Record copies (there may be none).
if rend > rpos {
out = append(out, Edit[T]{Op: OpCopy, Y: rhs[rpos:rend]})
}
lpos, rpos = lend, rend
// Reaching here, lhs[lpos] == rhs[rpos] == lcs[i].
// Count how many elements are equal and copy them.
m := 1
for i+m < len(lcs) && eq(lhs[lpos+m], rhs[rpos+m]) {
m++
}
out = append(out, Edit[T]{Op: OpEmit, X: lhs[lpos : lpos+m]})
i += m
lpos += m
rpos += m
}
// If we have both deletions and copies, combine them in a single replace
// instruction.
if len(lhs) > lpos && len(rhs) > rpos {
out = append(out, Edit[T]{Op: OpReplace, X: lhs[lpos:], Y: rhs[rpos:]})
rpos = len(rhs)
} else if len(lhs) > lpos {
// Drop any leftover elements of lhs.
out = append(out, Edit[T]{Op: OpDrop, X: lhs[lpos:]})
}
// Copy any leftover elements of rhs.
if len(rhs) > rpos {
out = append(out, Edit[T]{Op: OpCopy, Y: rhs[rpos:]})
}
// As a special case, if the whole edit is a single emit, drop it so that
// equal elements have an empty script.
if len(out) == 1 && out[0].Op == OpEmit {
return nil
}
return out
}
func equal[T comparable](a, b T) bool { return a == b }