encoding.utf8: remove const unicode_con_table_up_to_low table (#9672)

pull/9676/head
penguindark 2021-04-11 09:28:19 +02:00 committed by GitHub
parent acb58d4923
commit cca06fce90
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 957 additions and 1542 deletions

View File

@ -43,6 +43,7 @@ const (
'vlib/builtin/', 'vlib/builtin/',
'vlib/cli/', 'vlib/cli/',
'vlib/dl/', 'vlib/dl/',
'vlib/encoding/utf8/',
'vlib/flag/', 'vlib/flag/',
'vlib/gg/', 'vlib/gg/',
'vlib/math/bits/bits.v', 'vlib/math/bits/bits.v',

View File

@ -37,7 +37,7 @@ pub fn east_asian_width_property_at(s string, index int) EastAsianWidthProperty
mut left, mut right := 0, east_asian.east_asian_width_data.len - 1 mut left, mut right := 0, east_asian.east_asian_width_data.len - 1
for left <= right { for left <= right {
middle := left + ((right - left) / 2) middle := left + ((right - left) / 2)
entry := east_asian_width_data[middle] entry := east_asian.east_asian_width_data[middle]
if codepoint < entry.point { if codepoint < entry.point {
right = middle - 1 right = middle - 1
continue continue

View File

@ -11,10 +11,10 @@ pub fn validate_str(str string) bool {
return validate(str.str, str.len) return validate(str.str, str.len)
} }
pub fn validate(data byteptr, len int) bool { pub fn validate(data &byte, len int) bool {
mut state := Utf8State{} mut state := Utf8State{}
for i := 0; i < len; i++ { for i := 0; i < len; i++ {
s := unsafe {data[i]} s := unsafe { data[i] }
if s == 0 { if s == 0 {
break break
} }

File diff suppressed because it is too large Load Diff

View File

@ -1,58 +1,57 @@
import encoding.utf8 import encoding.utf8
fn test_utf8_util() { fn test_utf8_util() {
// string test // string test
src:="ăĂ ôÔ testo æ"//_\u1E5A\u1E5B<=>\u1F49\u1F41<=>\u0128\u012a\u012c" // len 29 runes, raw 49 bytes src := 'ăĂ ôÔ testo æ' //_\u1E5A\u1E5B<=>\u1F49\u1F41<=>\u0128\u012a\u012c" // len 29 runes, raw 49 bytes
src_upper:="ĂĂ ÔÔ TESTO Æ"//_\u1E5A\u1E5A<=>\u1F49\u1F49<=>\u0128\u012A\u012C" src_upper := 'ĂĂ ÔÔ TESTO Æ' //_\u1E5A\u1E5A<=>\u1F49\u1F49<=>\u0128\u012A\u012C"
src_lower:="ăă ôô testo æ"//_\u1E5B\u1E5B<=>\u1F41\u1F41<=>\u0129\u012B\u012D" src_lower := 'ăă ôô testo æ' //_\u1E5B\u1E5B<=>\u1F41\u1F41<=>\u0129\u012B\u012D"
upper:=utf8.to_upper(src) upper := utf8.to_upper(src)
lower:=utf8.to_lower(src) lower := utf8.to_lower(src)
assert upper==src_upper assert upper == src_upper
assert lower==src_lower assert lower == src_lower
assert utf8.to_upper('абвёabc12') == 'АБВЁABC12' assert utf8.to_upper('абвёabc12') == 'АБВЁABC12'
assert utf8.to_lower('АБВЁABC12') == 'абвёabc12' assert utf8.to_lower('АБВЁABC12') == 'абвёabc12'
// ustring test // ustring test
src1:=src.ustring() src1 := src.ustring()
upper1:=utf8.u_to_upper(src1) upper1 := utf8.u_to_upper(src1)
lower1:=utf8.u_to_lower(src1) lower1 := utf8.u_to_lower(src1)
assert upper1==( src_upper.ustring() ) assert upper1 == (src_upper.ustring())
assert lower1==( src_lower.ustring() ) assert lower1 == (src_lower.ustring())
// test len function // test len function
assert utf8.len("pippo")==5 assert utf8.len('pippo') == 5
assert utf8.len(src)==15 //29 assert utf8.len(src) == 15 // 29
assert src.len==24 //49 assert src.len == 24 // 49
// test u_len function // test u_len function
assert utf8.u_len(src1)==15 //29 assert utf8.u_len(src1) == 15 // 29
assert utf8.u_len("pippo".ustring())==5 assert utf8.u_len('pippo'.ustring()) == 5
// western punctuation // western punctuation
a := '.abc?abcòàè.' a := '.abc?abcòàè.'
assert utf8.is_punct(a,0)==true assert utf8.is_punct(a, 0) == true
assert utf8.is_punct('b',0)==false assert utf8.is_punct('b', 0) == false
assert utf8.is_uchar_punct(0x002E)==true assert utf8.is_uchar_punct(0x002E) == true
assert utf8.is_punct(a,4)==true // ? assert utf8.is_punct(a, 4) == true // ?
assert utf8.is_punct(a,14)==true // last . assert utf8.is_punct(a, 14) == true // last .
assert utf8.is_punct(a,12)==false // è assert utf8.is_punct(a, 12) == false // è
println("OK western") println('OK western')
// global punctuation // global punctuation
b := '.ĂĂa. ÔÔ TESTO Æ' b := '.ĂĂa. ÔÔ TESTO Æ'
assert utf8.is_global_punct(b,0)==true assert utf8.is_global_punct(b, 0) == true
assert utf8.is_global_punct('.',0)==true assert utf8.is_global_punct('.', 0) == true
assert utf8.is_uchar_punct(0x002E)==true assert utf8.is_uchar_punct(0x002E) == true
assert utf8.is_global_punct(b,6)==true // . assert utf8.is_global_punct(b, 6) == true // .
assert utf8.is_global_punct(b,1)==false // a assert utf8.is_global_punct(b, 1) == false // a
// test utility functions // test utility functions
assert utf8.get_uchar(b,0)==0x002E assert utf8.get_uchar(b, 0) == 0x002E
} }
fn test_raw_indexing() { fn test_raw_indexing() {
a := "V Lang!" a := 'V Lang!'
// test non ascii characters // test non ascii characters
assert utf8.raw_index(a, 0) == '' assert utf8.raw_index(a, 0) == ''