encoding.utf8: add `pub fn is_letter(r rune) bool` (#11547)

pull/11558/head
ChAoS_UnItY 2021-09-21 02:16:50 +08:00 committed by GitHub
parent 5cf0ee46b3
commit 077c55d0c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 1277 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -141,6 +141,25 @@ pub fn is_punct(s string, index int) bool {
return is_uchar_punct(get_uchar(s, index))
}
// is_control return true if the rune is control code
pub fn is_control(r rune) bool {
// control codes are all below 0xff
if r > max_latin_1 {
return false
}
return props[byte(r)] == 1
}
// is_letter returns true if the rune is unicode letter or in unicode category L
pub fn is_letter(r rune) bool {
if (r >= `a` && r <= `z`) || (r >= `A` && r <= `Z`) {
return true
} else if r <= max_latin_1 {
return props[byte(r)] & p_l_mask != 0
}
return is_excluding_latin(letter_table, r)
}
// is_uchar_punct return true if the input unicode is a western unicode punctuation
pub fn is_uchar_punct(uchar int) bool {
return find_punct_in_table(uchar, utf8.unicode_punct_western) != 0

View File

@ -64,3 +64,30 @@ fn test_reversed() {
assert utf8.reverse(a) == '!gnaL V'
assert utf8.reverse(b) == 'dlrow olleh'
}
fn test_is_control() {
for ra in `a` .. `z` {
assert utf8.is_control(ra) == false
}
for ra in `A` .. `Z` {
assert utf8.is_control(ra) == false
}
assert utf8.is_control('\x01'.runes()[0]) == true
assert utf8.is_control('\u0100'.runes()[0]) == false
}
fn test_is_letter() {
for ra in `a` .. `z` {
assert utf8.is_letter(ra) == true
}
for ra in `A` .. `Z` {
assert utf8.is_letter(ra) == true
}
assert utf8.is_letter(`ɀ`) == true
assert utf8.is_letter(`ȶ`) == true
assert utf8.is_letter(`ȹ`) == true
}