1 //! Utility functions to traverse the unicode graphemes of a `Rope`'s text contents.
2 //!
3 //! Based on https://github.com/cessen/led/blob/c4fa72405f510b7fd16052f90a598c429b3104a6/src/graphemes.rs
4 use ropey::{iter::Chunks, str_utils::byte_to_char_idx, RopeSlice};
5 use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
6 use unicode_width::UnicodeWidthStr;
7
8 use std::fmt;
9
10 #[must_use]
grapheme_width(g: &str) -> usize11 pub fn grapheme_width(g: &str) -> usize {
12 if g.as_bytes()[0] <= 127 {
13 // Fast-path ascii.
14 // Point 1: theoretically, ascii control characters should have zero
15 // width, but in our case we actually want them to have width: if they
16 // show up in text, we want to treat them as textual elements that can
17 // be editied. So we can get away with making all ascii single width
18 // here.
19 // Point 2: we're only examining the first codepoint here, which means
20 // we're ignoring graphemes formed with combining characters. However,
21 // if it starts with ascii, it's going to be a single-width grapeheme
22 // regardless, so, again, we can get away with that here.
23 // Point 3: we're only examining the first _byte_. But for utf8, when
24 // checking for ascii range values only, that works.
25 1
26 } else {
27 // We use max(1) here because all grapeheme clusters--even illformed
28 // ones--should have at least some width so they can be edited
29 // properly.
30 UnicodeWidthStr::width(g).max(1)
31 }
32 }
33
34 #[must_use]
nth_prev_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize35 pub fn nth_prev_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize {
36 // Bounds check
37 debug_assert!(char_idx <= slice.len_chars());
38
39 // We work with bytes for this, so convert.
40 let mut byte_idx = slice.char_to_byte(char_idx);
41
42 // Get the chunk with our byte index in it.
43 let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx);
44
45 // Set up the grapheme cursor.
46 let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true);
47
48 // Find the previous grapheme cluster boundary.
49 for _ in 0..n {
50 loop {
51 match gc.prev_boundary(chunk, chunk_byte_idx) {
52 Ok(None) => return 0,
53 Ok(Some(n)) => {
54 byte_idx = n;
55 break;
56 }
57 Err(GraphemeIncomplete::PrevChunk) => {
58 let (a, b, c, _) = slice.chunk_at_byte(chunk_byte_idx - 1);
59 chunk = a;
60 chunk_byte_idx = b;
61 chunk_char_idx = c;
62 }
63 Err(GraphemeIncomplete::PreContext(n)) => {
64 let ctx_chunk = slice.chunk_at_byte(n - 1).0;
65 gc.provide_context(ctx_chunk, n - ctx_chunk.len());
66 }
67 _ => unreachable!(),
68 }
69 }
70 }
71 let tmp = byte_to_char_idx(chunk, byte_idx - chunk_byte_idx);
72 chunk_char_idx + tmp
73 }
74
75 /// Finds the previous grapheme boundary before the given char position.
76 #[must_use]
77 #[inline(always)]
prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize78 pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
79 nth_prev_grapheme_boundary(slice, char_idx, 1)
80 }
81
82 #[must_use]
nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize83 pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize {
84 // Bounds check
85 debug_assert!(char_idx <= slice.len_chars());
86
87 // We work with bytes for this, so convert.
88 let mut byte_idx = slice.char_to_byte(char_idx);
89
90 // Get the chunk with our byte index in it.
91 let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx);
92
93 // Set up the grapheme cursor.
94 let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true);
95
96 // Find the nth next grapheme cluster boundary.
97 for _ in 0..n {
98 loop {
99 match gc.next_boundary(chunk, chunk_byte_idx) {
100 Ok(None) => return slice.len_chars(),
101 Ok(Some(n)) => {
102 byte_idx = n;
103 break;
104 }
105 Err(GraphemeIncomplete::NextChunk) => {
106 chunk_byte_idx += chunk.len();
107 let (a, _, c, _) = slice.chunk_at_byte(chunk_byte_idx);
108 chunk = a;
109 chunk_char_idx = c;
110 }
111 Err(GraphemeIncomplete::PreContext(n)) => {
112 let ctx_chunk = slice.chunk_at_byte(n - 1).0;
113 gc.provide_context(ctx_chunk, n - ctx_chunk.len());
114 }
115 _ => unreachable!(),
116 }
117 }
118 }
119 let tmp = byte_to_char_idx(chunk, byte_idx - chunk_byte_idx);
120 chunk_char_idx + tmp
121 }
122
123 /// Finds the next grapheme boundary after the given char position.
124 #[must_use]
125 #[inline(always)]
next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize126 pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
127 nth_next_grapheme_boundary(slice, char_idx, 1)
128 }
129
130 /// Returns the passed char index if it's already a grapheme boundary,
131 /// or the next grapheme boundary char index if not.
132 #[must_use]
133 #[inline]
ensure_grapheme_boundary_next(slice: RopeSlice, char_idx: usize) -> usize134 pub fn ensure_grapheme_boundary_next(slice: RopeSlice, char_idx: usize) -> usize {
135 if char_idx == 0 {
136 char_idx
137 } else {
138 next_grapheme_boundary(slice, char_idx - 1)
139 }
140 }
141
142 /// Returns the passed char index if it's already a grapheme boundary,
143 /// or the prev grapheme boundary char index if not.
144 #[must_use]
145 #[inline]
ensure_grapheme_boundary_prev(slice: RopeSlice, char_idx: usize) -> usize146 pub fn ensure_grapheme_boundary_prev(slice: RopeSlice, char_idx: usize) -> usize {
147 if char_idx == slice.len_chars() {
148 char_idx
149 } else {
150 prev_grapheme_boundary(slice, char_idx + 1)
151 }
152 }
153
154 /// Returns whether the given char position is a grapheme boundary.
155 #[must_use]
is_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> bool156 pub fn is_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> bool {
157 // Bounds check
158 debug_assert!(char_idx <= slice.len_chars());
159
160 // We work with bytes for this, so convert.
161 let byte_idx = slice.char_to_byte(char_idx);
162
163 // Get the chunk with our byte index in it.
164 let (chunk, chunk_byte_idx, _, _) = slice.chunk_at_byte(byte_idx);
165
166 // Set up the grapheme cursor.
167 let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true);
168
169 // Determine if the given position is a grapheme cluster boundary.
170 loop {
171 match gc.is_boundary(chunk, chunk_byte_idx) {
172 Ok(n) => return n,
173 Err(GraphemeIncomplete::PreContext(n)) => {
174 let (ctx_chunk, ctx_byte_start, _, _) = slice.chunk_at_byte(n - 1);
175 gc.provide_context(ctx_chunk, ctx_byte_start);
176 }
177 Err(_) => unreachable!(),
178 }
179 }
180 }
181
182 /// An iterator over the graphemes of a `RopeSlice`.
183 #[derive(Clone)]
184 pub struct RopeGraphemes<'a> {
185 text: RopeSlice<'a>,
186 chunks: Chunks<'a>,
187 cur_chunk: &'a str,
188 cur_chunk_start: usize,
189 cursor: GraphemeCursor,
190 }
191
192 impl<'a> fmt::Debug for RopeGraphemes<'a> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result193 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
194 f.debug_struct("RopeGraphemes")
195 .field("text", &self.text)
196 .field("chunks", &self.chunks)
197 .field("cur_chunk", &self.cur_chunk)
198 .field("cur_chunk_start", &self.cur_chunk_start)
199 // .field("cursor", &self.cursor)
200 .finish()
201 }
202 }
203
204 impl<'a> RopeGraphemes<'a> {
205 #[must_use]
new(slice: RopeSlice) -> RopeGraphemes206 pub fn new(slice: RopeSlice) -> RopeGraphemes {
207 let mut chunks = slice.chunks();
208 let first_chunk = chunks.next().unwrap_or("");
209 RopeGraphemes {
210 text: slice,
211 chunks,
212 cur_chunk: first_chunk,
213 cur_chunk_start: 0,
214 cursor: GraphemeCursor::new(0, slice.len_bytes(), true),
215 }
216 }
217 }
218
219 impl<'a> Iterator for RopeGraphemes<'a> {
220 type Item = RopeSlice<'a>;
221
next(&mut self) -> Option<RopeSlice<'a>>222 fn next(&mut self) -> Option<RopeSlice<'a>> {
223 let a = self.cursor.cur_cursor();
224 let b;
225 loop {
226 match self
227 .cursor
228 .next_boundary(self.cur_chunk, self.cur_chunk_start)
229 {
230 Ok(None) => {
231 return None;
232 }
233 Ok(Some(n)) => {
234 b = n;
235 break;
236 }
237 Err(GraphemeIncomplete::NextChunk) => {
238 self.cur_chunk_start += self.cur_chunk.len();
239 self.cur_chunk = self.chunks.next().unwrap_or("");
240 }
241 Err(GraphemeIncomplete::PreContext(idx)) => {
242 let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
243 self.cursor.provide_context(chunk, byte_idx);
244 }
245 _ => unreachable!(),
246 }
247 }
248
249 if a < self.cur_chunk_start {
250 let a_char = self.text.byte_to_char(a);
251 let b_char = self.text.byte_to_char(b);
252
253 Some(self.text.slice(a_char..b_char))
254 } else {
255 let a2 = a - self.cur_chunk_start;
256 let b2 = b - self.cur_chunk_start;
257 Some((&self.cur_chunk[a2..b2]).into())
258 }
259 }
260 }
261