1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
external format_int : string -> int -> string = "caml_format_int"
let err_no_pred = "U+0000 has no predecessor"
let err_no_succ = "U+10FFFF has no successor"
let err_not_sv i = format_int "%X" i ^ " is not a Unicode scalar value"
let err_not_latin1 u = "U+" ^ format_int "%04X" u ^ " is not a latin1 character"
type t = int
let min = 0x0000
let max = 0x10FFFF
let lo_bound = 0xD7FF
let hi_bound = 0xE000
let bom = 0xFEFF
let rep = 0xFFFD
let succ u =
if u = lo_bound then hi_bound else
if u = max then invalid_arg err_no_succ else
u + 1
let pred u =
if u = hi_bound then lo_bound else
if u = min then invalid_arg err_no_pred else
u - 1
let is_valid i = (min <= i && i <= lo_bound) || (hi_bound <= i && i <= max)
let of_int i = if is_valid i then i else invalid_arg (err_not_sv i)
external unsafe_of_int : int -> t = "%identity"
external to_int : t -> int = "%identity"
let is_char u = u < 256
let of_char c = Char.code c
let to_char u =
if u > 255 then invalid_arg (err_not_latin1 u) else
Char.unsafe_chr u
let unsafe_to_char = Char.unsafe_chr
let equal : int -> int -> bool = ( = )
let compare : int -> int -> int = Stdlib.compare
external seeded_hash_param :
int -> int -> int -> 'a -> int = "caml_hash" [@@noalloc]
let seeded_hash seed x = seeded_hash_param 10 100 seed x
let hash x = seeded_hash_param 10 100 0 x
type utf_decode = int
let valid_bit = 27
let decode_bits = 24
let[@inline] utf_decode_is_valid d = (d lsr valid_bit) = 1
let[@inline] utf_decode_length d = (d lsr decode_bits) land 0b111
let[@inline] utf_decode_uchar d = unsafe_of_int (d land 0xFFFFFF)
let[@inline] utf_decode n u = ((8 lor n) lsl decode_bits) lor (to_int u)
let[@inline] utf_decode_invalid n = (n lsl decode_bits) lor rep
let utf_8_byte_length u = match to_int u with
| u when u < 0 -> assert false
| u when u <= 0x007F -> 1
| u when u <= 0x07FF -> 2
| u when u <= 0xFFFF -> 3
| u when u <= 0x10FFFF -> 4
| _ -> assert false
let utf_16_byte_length u = match to_int u with
| u when u < 0 -> assert false
| u when u <= 0xFFFF -> 2
| u when u <= 0x10FFFF -> 4
| _ -> assert false