builtin: add a non allocating rune.length_in_bytes() method and tests for it
parent
b3aedff3f8
commit
d7bc2a88f7
|
@ -53,3 +53,21 @@ pub fn (c rune) repeat(count int) string {
|
|||
res := unsafe { utf32_to_str_no_malloc(u32(c), &buffer[0]) }
|
||||
return res.repeat(count)
|
||||
}
|
||||
|
||||
pub fn (c rune) length_in_bytes() int {
|
||||
code := u32(c)
|
||||
if code <= 0x7F {
|
||||
return 1
|
||||
} else if code <= 0x7FF {
|
||||
return 2
|
||||
} else if 0xD800 <= code && code <= 0xDFFF {
|
||||
// between min and max for surrogates
|
||||
return -1
|
||||
} else if code <= 0xFFFF {
|
||||
return 3
|
||||
} else if code <= 0x10FFFF {
|
||||
// 0x10FFFF is the maximum valid unicode code point
|
||||
return 4
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
|
|
@ -11,3 +11,27 @@ fn test_repeat() {
|
|||
assert r1.repeat(0) == ''
|
||||
assert r2.repeat(0) == ''
|
||||
}
|
||||
|
||||
fn test_length_in_bytes() {
|
||||
assert rune(0x0).length_in_bytes() == 1
|
||||
assert `A`.length_in_bytes() == 1 // latin letter
|
||||
assert rune(0x7F).length_in_bytes() == 1
|
||||
//
|
||||
assert rune(0x80).length_in_bytes() == 2
|
||||
assert `Д`.length_in_bytes() == 2 // cyrillic letter
|
||||
assert rune(0x7FF).length_in_bytes() == 2
|
||||
//
|
||||
assert rune(0x800).length_in_bytes() == 3
|
||||
assert `喂`.length_in_bytes() == 3 // hey
|
||||
assert rune(0xFFFF).length_in_bytes() == 3
|
||||
//
|
||||
assert rune(0xD800).length_in_bytes() == -1 // min for surrogates
|
||||
assert rune(0xD866).length_in_bytes() == -1 // invalid
|
||||
assert rune(0xDFFF).length_in_bytes() == -1 // max for surrogates
|
||||
//
|
||||
assert rune(0x100000).length_in_bytes() == 4
|
||||
assert rune(0x10FFD7).length_in_bytes() == 4 // "Supplementary Private Use Area-B" ¯\_(ツ)_/¯
|
||||
assert rune(0x10FFFF).length_in_bytes() == 4
|
||||
//
|
||||
assert rune(0x110000).length_in_bytes() == -1
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue