Open
Description
The grapheme boundaries of "🇷🇸🇮🇴" should be 8 and 16, but by feeding GraphemeCursor
the individual RIS codepoints I get 8 and 12. Am I using the API incorrectly or is this a bug?
use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
fn main() {
let s = "🇷🇸🇮🇴";
let mut cursor = GraphemeCursor::new(0, s.len(), true);
// 🇷🇸
match cursor.next_boundary("🇷", 0) {
Err(GraphemeIncomplete::NextChunk) => {}
_ => unreachable!(),
}
match cursor.next_boundary("🇸", 4) {
Err(GraphemeIncomplete::PreContext(4)) => {
cursor.provide_context("🇷", 0);
}
_ => unreachable!(),
}
match cursor.next_boundary("🇸", 4) {
Err(GraphemeIncomplete::NextChunk) => {}
_ => unreachable!(),
}
match cursor.next_boundary("🇮", 8) {
Err(GraphemeIncomplete::PreContext(8)) => {
cursor.provide_context("🇸", 4);
}
_ => unreachable!(),
}
match cursor.next_boundary("🇮", 8) {
Err(GraphemeIncomplete::PreContext(4)) => {
cursor.provide_context("🇷", 0);
}
_ => unreachable!(),
}
match cursor.next_boundary("🇮", 8) {
Ok(Some(8)) => {}
_ => unreachable!(),
}
// 🇮🇴
match cursor.next_boundary("🇮", 8) {
Err(GraphemeIncomplete::NextChunk) => {}
_ => unreachable!(),
}
match cursor.next_boundary("🇴", 12) {
Err(GraphemeIncomplete::PreContext(12)) => {
cursor.provide_context("🇮", 8);
}
_ => unreachable!(),
}
match cursor.next_boundary("🇴", 12) {
Err(GraphemeIncomplete::PreContext(8)) => {
cursor.provide_context("🇸", 4);
}
_ => unreachable!(),
}
match cursor.next_boundary("🇴", 12) {
Err(GraphemeIncomplete::PreContext(4)) => {
cursor.provide_context("🇷", 0);
}
_ => unreachable!(),
}
match cursor.next_boundary("🇴", 12) {
Ok(Some(16)) => {}
Ok(Some(12)) => panic!("this should be 16"),
_ => unreachable!(),
}
}
Metadata
Metadata
Assignees
Labels
No labels