encoding.utf8: remove const unicode_con_table_up_to_low table (#9672)
parent
acb58d4923
commit
cca06fce90
|
@ -43,6 +43,7 @@ const (
|
||||||
'vlib/builtin/',
|
'vlib/builtin/',
|
||||||
'vlib/cli/',
|
'vlib/cli/',
|
||||||
'vlib/dl/',
|
'vlib/dl/',
|
||||||
|
'vlib/encoding/utf8/',
|
||||||
'vlib/flag/',
|
'vlib/flag/',
|
||||||
'vlib/gg/',
|
'vlib/gg/',
|
||||||
'vlib/math/bits/bits.v',
|
'vlib/math/bits/bits.v',
|
||||||
|
|
|
@ -37,7 +37,7 @@ pub fn east_asian_width_property_at(s string, index int) EastAsianWidthProperty
|
||||||
mut left, mut right := 0, east_asian.east_asian_width_data.len - 1
|
mut left, mut right := 0, east_asian.east_asian_width_data.len - 1
|
||||||
for left <= right {
|
for left <= right {
|
||||||
middle := left + ((right - left) / 2)
|
middle := left + ((right - left) / 2)
|
||||||
entry := east_asian_width_data[middle]
|
entry := east_asian.east_asian_width_data[middle]
|
||||||
if codepoint < entry.point {
|
if codepoint < entry.point {
|
||||||
right = middle - 1
|
right = middle - 1
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -11,10 +11,10 @@ pub fn validate_str(str string) bool {
|
||||||
return validate(str.str, str.len)
|
return validate(str.str, str.len)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn validate(data byteptr, len int) bool {
|
pub fn validate(data &byte, len int) bool {
|
||||||
mut state := Utf8State{}
|
mut state := Utf8State{}
|
||||||
for i := 0; i < len; i++ {
|
for i := 0; i < len; i++ {
|
||||||
s := unsafe {data[i]}
|
s := unsafe { data[i] }
|
||||||
if s == 0 {
|
if s == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,63 +1,62 @@
|
||||||
import encoding.utf8
|
import encoding.utf8
|
||||||
|
|
||||||
fn test_utf8_util() {
|
fn test_utf8_util() {
|
||||||
|
|
||||||
// string test
|
// string test
|
||||||
src:="ăĂ ôÔ testo 怔"//_\u1E5A\u1E5B<=>\u1F49\u1F41<=>\u0128\u012a\u012c" // len 29 runes, raw 49 bytes
|
src := 'ăĂ ôÔ testo 怔' //_\u1E5A\u1E5B<=>\u1F49\u1F41<=>\u0128\u012a\u012c" // len 29 runes, raw 49 bytes
|
||||||
src_upper:="ĂĂ ÔÔ TESTO Æ€”"//_\u1E5A\u1E5A<=>\u1F49\u1F49<=>\u0128\u012A\u012C"
|
src_upper := 'ĂĂ ÔÔ TESTO Æ€”' //_\u1E5A\u1E5A<=>\u1F49\u1F49<=>\u0128\u012A\u012C"
|
||||||
src_lower:="ăă ôô testo 怔"//_\u1E5B\u1E5B<=>\u1F41\u1F41<=>\u0129\u012B\u012D"
|
src_lower := 'ăă ôô testo 怔' //_\u1E5B\u1E5B<=>\u1F41\u1F41<=>\u0129\u012B\u012D"
|
||||||
upper:=utf8.to_upper(src)
|
upper := utf8.to_upper(src)
|
||||||
lower:=utf8.to_lower(src)
|
lower := utf8.to_lower(src)
|
||||||
assert upper==src_upper
|
assert upper == src_upper
|
||||||
assert lower==src_lower
|
assert lower == src_lower
|
||||||
|
|
||||||
assert utf8.to_upper('абвёabc12{') == 'АБВЁABC12{'
|
assert utf8.to_upper('абвёabc12{') == 'АБВЁABC12{'
|
||||||
assert utf8.to_lower('АБВЁABC12{') == 'абвёabc12{'
|
assert utf8.to_lower('АБВЁABC12{') == 'абвёabc12{'
|
||||||
|
|
||||||
// ustring test
|
// ustring test
|
||||||
src1:=src.ustring()
|
src1 := src.ustring()
|
||||||
upper1:=utf8.u_to_upper(src1)
|
upper1 := utf8.u_to_upper(src1)
|
||||||
lower1:=utf8.u_to_lower(src1)
|
lower1 := utf8.u_to_lower(src1)
|
||||||
assert upper1==( src_upper.ustring() )
|
assert upper1 == (src_upper.ustring())
|
||||||
assert lower1==( src_lower.ustring() )
|
assert lower1 == (src_lower.ustring())
|
||||||
|
|
||||||
// test len function
|
// test len function
|
||||||
assert utf8.len("pippo")==5
|
assert utf8.len('pippo') == 5
|
||||||
assert utf8.len(src)==15 //29
|
assert utf8.len(src) == 15 // 29
|
||||||
assert src.len==24 //49
|
assert src.len == 24 // 49
|
||||||
// test u_len function
|
// test u_len function
|
||||||
assert utf8.u_len(src1)==15 //29
|
assert utf8.u_len(src1) == 15 // 29
|
||||||
assert utf8.u_len("pippo".ustring())==5
|
assert utf8.u_len('pippo'.ustring()) == 5
|
||||||
|
|
||||||
// western punctuation
|
// western punctuation
|
||||||
a := '.abc?abcòàè.'
|
a := '.abc?abcòàè.'
|
||||||
assert utf8.is_punct(a,0)==true
|
assert utf8.is_punct(a, 0) == true
|
||||||
assert utf8.is_punct('b',0)==false
|
assert utf8.is_punct('b', 0) == false
|
||||||
assert utf8.is_uchar_punct(0x002E)==true
|
assert utf8.is_uchar_punct(0x002E) == true
|
||||||
assert utf8.is_punct(a,4)==true // ?
|
assert utf8.is_punct(a, 4) == true // ?
|
||||||
assert utf8.is_punct(a,14)==true // last .
|
assert utf8.is_punct(a, 14) == true // last .
|
||||||
assert utf8.is_punct(a,12)==false // è
|
assert utf8.is_punct(a, 12) == false // è
|
||||||
println("OK western")
|
println('OK western')
|
||||||
|
|
||||||
// global punctuation
|
|
||||||
b := '.ĂĂa. ÔÔ TESTO Æ€'
|
|
||||||
assert utf8.is_global_punct(b,0)==true
|
|
||||||
assert utf8.is_global_punct('.',0)==true
|
|
||||||
assert utf8.is_uchar_punct(0x002E)==true
|
|
||||||
assert utf8.is_global_punct(b,6)==true // .
|
|
||||||
assert utf8.is_global_punct(b,1)==false // a
|
|
||||||
|
|
||||||
// test utility functions
|
// global punctuation
|
||||||
assert utf8.get_uchar(b,0)==0x002E
|
b := '.ĂĂa. ÔÔ TESTO Æ€'
|
||||||
|
assert utf8.is_global_punct(b, 0) == true
|
||||||
|
assert utf8.is_global_punct('.', 0) == true
|
||||||
|
assert utf8.is_uchar_punct(0x002E) == true
|
||||||
|
assert utf8.is_global_punct(b, 6) == true // .
|
||||||
|
assert utf8.is_global_punct(b, 1) == false // a
|
||||||
|
|
||||||
|
// test utility functions
|
||||||
|
assert utf8.get_uchar(b, 0) == 0x002E
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_raw_indexing() {
|
fn test_raw_indexing() {
|
||||||
a := "我是V Lang!"
|
a := '我是V Lang!'
|
||||||
|
|
||||||
// test non ascii characters
|
// test non ascii characters
|
||||||
assert utf8.raw_index(a, 0) == '我'
|
assert utf8.raw_index(a, 0) == '我'
|
||||||
assert utf8.raw_index(a, 1) == '是'
|
assert utf8.raw_index(a, 1) == '是'
|
||||||
|
|
||||||
// test ascii characters
|
// test ascii characters
|
||||||
assert utf8.raw_index(a, 2) == 'V'
|
assert utf8.raw_index(a, 2) == 'V'
|
||||||
assert utf8.raw_index(a, 3) == ' '
|
assert utf8.raw_index(a, 3) == ' '
|
||||||
|
|
Loading…
Reference in New Issue