From c72bdd8e0f37e448f23199b1275ef11f61102b7f Mon Sep 17 00:00:00 2001 From: Kento Okura Date: Mon, 21 Oct 2024 20:02:21 +0200 Subject: [PATCH 1/2] feat(Trie): completion API --- src/Trie.ml | 40 ++++++++++++++++++++++++++++++++++++++++ src/Trie.mli | 7 +++++++ test/Example.expected | 13 +++++++++++++ test/Example.ml | 20 +++++++++++++++++++- test/ListAsTrie.ml | 20 ++++++++++++++++++++ test/TestTrie.ml | 5 +++++ 6 files changed, 104 insertions(+), 1 deletion(-) diff --git a/src/Trie.ml b/src/Trie.ml index b0d93479..11d143f5 100644 --- a/src/Trie.ml +++ b/src/Trie.ml @@ -344,3 +344,43 @@ let set_of_tags (type tag) (cmp : tag -> tag -> int) (v : ('data, tag) t) : tag let set = ref TagSet.empty in Option.iter (fun (_, n) -> iter_tag_node (fun t -> set := TagSet.add t !set) n) v; TagSet.to_seq !set + +let edit_distance ~cutoff x y = + let len_x, len_y = String.length x, String.length y in + let grid = Array.make_matrix (len_x + 1) (len_y + 1) 0 in + for i = 1 to len_x do + grid.(i).(0) <- i; + done; + for j = 1 to len_y do + grid.(0).(j) <- j; + done; + for j = 1 to len_y do + for i = 1 to len_x do + let cost = if x.[i-1] = y.[j-1] then 0 else 1 in + let k = Int.min (grid.(i-1).(j) + 1) (grid.(i).(j-1) + 1)in + grid.(i).(j) <- Int.min k (grid.(i-1).(j-1) + cost) + done; + done; + let result = grid.(len_x).(len_y) in + if result > cutoff + then None + else + Some result + +let complete ?prefix ~(cutoff : int) (p : bwd_path) (t : ('data, 'tag) t) : (bwd_path * int) list = + let compare p d = + edit_distance ~cutoff (String.concat "" (Bwd.to_list p)) (String.concat "" (Bwd.to_list d)) + in + filter_map ?prefix (fun q _ -> + match compare p q with + | Some i -> + if i > cutoff then + None + else + (Some (q, i)) + | None -> None + ) t + |> to_seq + |> Seq.map snd + |> List.of_seq + |> List.sort (fun a b -> Int.compare (snd a) (snd b)) diff --git a/src/Trie.mli b/src/Trie.mli index cab306b0..e6bcd5a0 100644 --- a/src/Trie.mli +++ b/src/Trie.mli @@ -78,6 +78,13 @@ val filter : ?prefix:bwd_path -> (bwd_path -> 'data * 'tag -> bool) -> ('data, ' *) val filter_map : ?prefix:bwd_path -> (bwd_path -> 'data1 * 'tag1 -> ('data2 * 'tag2) option) -> ('data1, 'tag1) t -> ('data2, 'tag2) t +(** [complete ~cutoff:i p trie] returns the list of paths in [trie] with edit distance to p less than i, sorted by edit distance in ascending order. This can be used to implement autocomplete functionality and diagnostics that suggest alternative names if a path failed to resolve. +*) +val complete : ?prefix:bwd_path -> cutoff:int -> bwd_path -> ('data, 'tag) t -> (bwd_path * int) list + +(** A simple implementation of the Levenshtein edit distance algorithm, used in {!val:complete} complete*) +val edit_distance : cutoff:int -> string -> string -> int option + (** {1 Updating} *) (** [update_subtree p f t] replaces the subtree [t'] rooted at [p] in [t] with [f t']. *) diff --git a/test/Example.expected b/test/Example.expected index 1112ae11..70465cad 100644 --- a/test/Example.expected +++ b/test/Example.expected @@ -1,27 +1,40 @@ [Info] Got the following bindings at (root): + foo => 1 (local) x => 1 (local) [Warning] Data 1 (local) assigned at x was shadowed by data 2 (local) in the export namespace. [Warning] Data 1 (local) assigned at x was shadowed by data 2 (local) in the visible namespace. [Info] Got the following bindings at (root): + foo => 1 (local) x => 2 (local) [Info] Got the following bindings at (root): + foo => 1 (local) x => 10 (local) [Info] Got the following bindings at (root): + foo => 1 (local) x => 10 (local) z.y => 20 (imported) [Info] Got the following bindings at (root): a => 100 (local) + foo => 1 (local) x => 10 (local) z.y => 20 (imported) [Warning] Data 10 (local) assigned at w.x was shadowed by data 10 (local). [Info] Got the following bindings at (root): + foo => 1 (local) w.a => 100 (local) w.x => 10 (local) x => 10 (local) z.y => 20 (imported) +[Info] Got the following completion items for path zoo: + foo, distance: 1 + goo, distance: 1 + fooo, distance: 2 + ogoo, distance: 2 + z.y, distance: 2 + diff --git a/test/Example.ml b/test/Example.ml index 8d58c4cc..d0d4b50f 100644 --- a/test/Example.ml +++ b/test/Example.ml @@ -2,7 +2,7 @@ open Yuujinchou open Bwd (* A tiny language demonstrating some power of the Scope module. *) -type modifier_cmd = Print +type modifier_cmd = Print | Complete type decl = (* declaration *) | Decl of Trie.path * int @@ -12,6 +12,8 @@ type decl = | Import of int Trie.Untagged.t * modifier_cmd Language.t (* printing out all visible bindings *) | PrintVisible + (* Get completion suggestions for a path *) + | CompleteVisible (* exporting a binding *) | Export of Trie.path (* section *) @@ -74,6 +76,15 @@ struct input; Format.printf "@]@."; input + | Complete -> + let typo = Bwd.of_list ["zoo"] in + Format.printf "@[[Info] Got the following completion items for path %a%a:@;" + pp_path typo pp_context context; + List.iter + (fun (path, dist) -> Format.printf "%a, distance: %i@;" pp_path path dist) + (Trie.complete ~cutoff:2 typo input); + Format.printf "@]@."; + input end (* The interpreter *) @@ -89,6 +100,8 @@ let rec interpret_decl : decl -> unit = S.import_subtree ~modifier:m ([], t) | PrintVisible -> S.modify_visible (Language.hook Print) + | CompleteVisible -> + S.modify_visible (Language.hook Complete) | Export p -> S.export_visible (Language.only p) | Section (p, sec) -> @@ -105,6 +118,7 @@ let interpret (prog : program) = (* Some code in action *) let () = interpret [ Decl (["x"], 1); + Decl (["foo"], 1); PrintVisible; Decl (["x"], 2); PrintVisible; @@ -120,4 +134,8 @@ let () = interpret [ Export ["x"]; ]); PrintVisible; + Decl (["fooo"], 1); + Decl (["goo"], 1); + Decl (["ogoo"], 1); + CompleteVisible; ] diff --git a/test/ListAsTrie.ml b/test/ListAsTrie.ml index 4614bc12..fac86d9d 100644 --- a/test/ListAsTrie.ml +++ b/test/ListAsTrie.ml @@ -106,3 +106,23 @@ let retag_subtree pre t l = List.map (fun ((p, (d, _)) as b) -> if Option.is_some (split_path pre p) then p, (d, t) else b) l let untag l = retag () l let set_of_tags cmp l = List.to_seq @@ List.sort_uniq cmp @@ List.map (fun (_, (_, t)) -> t) l + +let edit_distance = Yuujinchou.Trie.edit_distance + +let complete ?prefix ~(cutoff : int) (p : bwd_path) (t : ('data, 'tag) t) : (bwd_path * int) list = + let compare p d = + edit_distance ~cutoff (String.concat "" (Bwd.to_list p)) (String.concat "" (Bwd.to_list d)) + in + filter_map ?prefix (fun q _ -> + match compare p q with + | Some i -> + if i > cutoff then + None + else + (Some (q, i)) + | None -> None + ) t + |> to_seq + |> Seq.map snd + |> List.of_seq + |> List.sort (fun a b -> Int.compare (snd a) (snd b)) diff --git a/test/TestTrie.ml b/test/TestTrie.ml index 33882f93..4b623f2b 100644 --- a/test/TestTrie.ml +++ b/test/TestTrie.ml @@ -248,6 +248,10 @@ let test_untag = let test_set_of_tags = Q.Test.make ~count ~name:"set_of_tags" gen_list ~print:print_list (fun l -> List.of_seq (Trie.set_of_tags Int.compare (of_list l)) = List.of_seq (ListAsTrie.set_of_tags Int.compare l)) +let test_complete = + Q.Test.make ~count ~name:"complete" Q.Gen.(pair gen_bwd_path gen_list) ~print:Q.Print.(pair print_bwd_path print_list) + (fun (p, l) -> + (Trie.complete ~cutoff:2 p (of_list l)) = ListAsTrie.complete ~cutoff:2 p l) let () = exit @@ @@ -285,4 +289,5 @@ let () = ; test_retag_subtree ; test_untag ; test_set_of_tags + ; test_complete ] From b742bd90665a7778cd6dc0bd470821ed89d6d199 Mon Sep 17 00:00:00 2001 From: Kento Okura Date: Mon, 21 Oct 2024 20:53:30 +0200 Subject: [PATCH 2/2] return trie tagged by the edit distance. --- src/Trie.ml | 12 ++++-------- src/Trie.mli | 5 ++--- test/Example.expected | 2 +- test/Example.ml | 6 +++++- test/ListAsTrie.ml | 12 ++++-------- test/TestTrie.ml | 2 +- 6 files changed, 17 insertions(+), 22 deletions(-) diff --git a/src/Trie.ml b/src/Trie.ml index 11d143f5..fe80be63 100644 --- a/src/Trie.ml +++ b/src/Trie.ml @@ -367,20 +367,16 @@ let edit_distance ~cutoff x y = else Some result -let complete ?prefix ~(cutoff : int) (p : bwd_path) (t : ('data, 'tag) t) : (bwd_path * int) list = +let complete ?prefix ~(cutoff : int) (p : bwd_path) : ('data, 'tag) t -> ('data, int) t = let compare p d = edit_distance ~cutoff (String.concat "" (Bwd.to_list p)) (String.concat "" (Bwd.to_list d)) in - filter_map ?prefix (fun q _ -> + filter_map ?prefix (fun q (data, _) -> match compare p q with | Some i -> if i > cutoff then None else - (Some (q, i)) + (Some (data, i)) | None -> None - ) t - |> to_seq - |> Seq.map snd - |> List.of_seq - |> List.sort (fun a b -> Int.compare (snd a) (snd b)) + ) diff --git a/src/Trie.mli b/src/Trie.mli index e6bcd5a0..f8dec6dd 100644 --- a/src/Trie.mli +++ b/src/Trie.mli @@ -78,9 +78,8 @@ val filter : ?prefix:bwd_path -> (bwd_path -> 'data * 'tag -> bool) -> ('data, ' *) val filter_map : ?prefix:bwd_path -> (bwd_path -> 'data1 * 'tag1 -> ('data2 * 'tag2) option) -> ('data1, 'tag1) t -> ('data2, 'tag2) t -(** [complete ~cutoff:i p trie] returns the list of paths in [trie] with edit distance to p less than i, sorted by edit distance in ascending order. This can be used to implement autocomplete functionality and diagnostics that suggest alternative names if a path failed to resolve. -*) -val complete : ?prefix:bwd_path -> cutoff:int -> bwd_path -> ('data, 'tag) t -> (bwd_path * int) list +(** [complete ~cutoff:i p trie] retags each entry [e] in [trie] with the edit distance of p to the path of [e]. It can be used to implement autocomplete and "Did you mean..." style diagnostics.*) +val complete : ?prefix:bwd_path -> cutoff:int -> bwd_path -> ('data, 'tag) t -> ('data, int) t (** A simple implementation of the Levenshtein edit distance algorithm, used in {!val:complete} complete*) val edit_distance : cutoff:int -> string -> string -> int option diff --git a/test/Example.expected b/test/Example.expected index 70465cad..d947da9a 100644 --- a/test/Example.expected +++ b/test/Example.expected @@ -33,8 +33,8 @@ [Info] Got the following completion items for path zoo: foo, distance: 1 - goo, distance: 1 fooo, distance: 2 + goo, distance: 1 ogoo, distance: 2 z.y, distance: 2 diff --git a/test/Example.ml b/test/Example.ml index d0d4b50f..02d84a20 100644 --- a/test/Example.ml +++ b/test/Example.ml @@ -82,7 +82,11 @@ struct pp_path typo pp_context context; List.iter (fun (path, dist) -> Format.printf "%a, distance: %i@;" pp_path path dist) - (Trie.complete ~cutoff:2 typo input); + (Trie.complete ~cutoff:2 typo input + |> Trie.to_seq + |> List.of_seq + |> List.map (fun (path, (_, dist)) -> (Bwd.of_list path), dist) + ); Format.printf "@]@."; input end diff --git a/test/ListAsTrie.ml b/test/ListAsTrie.ml index fac86d9d..3cad0e26 100644 --- a/test/ListAsTrie.ml +++ b/test/ListAsTrie.ml @@ -109,20 +109,16 @@ let set_of_tags cmp l = List.to_seq @@ List.sort_uniq cmp @@ List.map (fun (_, ( let edit_distance = Yuujinchou.Trie.edit_distance -let complete ?prefix ~(cutoff : int) (p : bwd_path) (t : ('data, 'tag) t) : (bwd_path * int) list = +let complete ?prefix ~(cutoff : int) (p : bwd_path) : ('data, 'tag) t -> ('data, int) t = let compare p d = edit_distance ~cutoff (String.concat "" (Bwd.to_list p)) (String.concat "" (Bwd.to_list d)) in - filter_map ?prefix (fun q _ -> + filter_map ?prefix (fun q (data, _) -> match compare p q with | Some i -> if i > cutoff then None else - (Some (q, i)) + (Some (data, i)) | None -> None - ) t - |> to_seq - |> Seq.map snd - |> List.of_seq - |> List.sort (fun a b -> Int.compare (snd a) (snd b)) + ) diff --git a/test/TestTrie.ml b/test/TestTrie.ml index 4b623f2b..b3bb5ada 100644 --- a/test/TestTrie.ml +++ b/test/TestTrie.ml @@ -251,7 +251,7 @@ let test_set_of_tags = let test_complete = Q.Test.make ~count ~name:"complete" Q.Gen.(pair gen_bwd_path gen_list) ~print:Q.Print.(pair print_bwd_path print_list) (fun (p, l) -> - (Trie.complete ~cutoff:2 p (of_list l)) = ListAsTrie.complete ~cutoff:2 p l) + (to_list @@ Trie.complete ~cutoff:2 p (of_list l)) = ListAsTrie.complete ~cutoff:2 p l) let () = exit @@