1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
type terminals =
| Empty
| Terminals of Entry.t array
| Summary of Entry.t array
type node =
{ start : int
; len : int
; size : int
; terminals : terminals
; children : node array option
}
type t =
{ str : string
; t : node
}
let empty = { start = 0; len = 0; size = 0; children = None; terminals = Empty }
let empty () =
Obj.obj @@ Obj.dup @@ Obj.repr empty
let size t = t.t.size
let minimum { t; _ } =
match t.terminals with
| Empty -> assert false
| Terminals arr | Summary arr -> arr.(0)
let array_find ~str chr arr =
let rec go i =
if i >= Array.length arr
then None
else begin
let node = arr.(i) in
if chr = str.[node.start - 1] then Some node else go (i + 1)
end
in
go 0
let array_find ~str chr = function
| None -> None
| Some arr -> array_find ~str chr arr
let lcp i_str i j_str j j_len =
let j_stop = j + j_len in
let rec go_lcp i j =
if i >= String.length i_str || j >= j_stop
then i
else begin
let i_chr, j_chr = i_str.[i], j_str.[j] in
if i_chr <> j_chr then i else go_lcp (i + 1) (j + 1)
end
in
let i' = go_lcp i j in
i' - i
let rec find ~str node pattern i =
if i >= String.length pattern
then Some node
else begin
match array_find ~str pattern.[i] node.children with
| None -> None
| Some child -> find_lcp ~str child pattern (i + 1)
end
and find_lcp ~str child pattern i =
let n = lcp pattern i str child.start child.len in
if i + n = String.length pattern
then Some { child with start = child.start + n; len = child.len - n }
else if n = child.len
then find ~str child pattern (i + n)
else None
let find t pattern =
match find_lcp ~str:t.str t.t pattern 0 with
| None -> None
| Some child -> Some { str = t.str; t = child }
let advance node =
assert (node.len >= 1) ;
{ node with start = node.start + 1; len = node.len - 1 }
let stepback node =
assert (node.len >= 0) ;
{ node with start = node.start - 1; len = node.len + 1 }
let rec find_skip ~spaces t pattern yield =
let skip () =
let node = t.t in
if node.len >= 1
then begin
let spaces = spaces + if t.str.[node.start] = ' ' then 1 else 0 in
if spaces > 1
then ()
else find_skip ~spaces { t with t = advance t.t } pattern yield
end
else begin
match node.children with
| None -> ()
| Some children ->
Array.iter
(fun child -> find_skip ~spaces { t with t = stepback child } pattern yield)
children
end
in
if spaces = 0
then skip ()
else if spaces = 1 && pattern = Type_polarity.poly
then begin
match find t pattern with
| None -> ()
| Some here -> yield here
end
else begin
skip () ;
match find t pattern with
| None -> ()
| Some here -> yield here
end
let find_star t pattern yield =
let rec go t = function
| [] -> yield t
| p :: ps -> find_skip ~spaces:0 t p @@ fun t -> go t ps
in
match String.split_on_char ' ' pattern with
| [] -> ()
| p :: ps -> begin
match find t p with
| None -> ()
| Some t -> go t ps
end
let find_star t pattern =
let found = ref [] in
find_star t pattern (fun t -> found := t :: !found) ;
!found