encoding.utf8: fix a bug in up_low (#9610)

pull/9611/head
penguindark 2021-04-05 20:28:21 +02:00 committed by GitHub
parent d11fb8497a
commit 9aabf222fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 30 additions and 5 deletions

View File

@ -216,7 +216,7 @@ fn up_low(s string, upper_flag bool) string {
//C.printf("len: %d code: %04x ",ch_len,res)
ch_index := find_char_in_table(u16(res), upper_flag)
//C.printf(" utf8 index: %d ",ch_index)
//C.printf(" utf8 index: %d \n",ch_index)
// char not in table, no need of conversion
if ch_index == 0 {
@ -307,11 +307,11 @@ fn find_char_in_table( in_code u16, upper_flag bool) int {
mut index := 0
mut x := u16(0)
mut offset:=0 // up to low
mut offset:=0 // up to low
mut i_step:=1 // up to low
if upper_flag==true {
offset=1 // low to up
i_step=0 // low to up
offset=1 // low to up
i_step=0 // low to up
}
//C.printf("looking for [%04x] in (%d..%d).\n",in_code,first_index,last_index)
@ -335,7 +335,29 @@ fn find_char_in_table( in_code u16, upper_flag bool) int {
break
}
}
//C.printf("not found.\n")
//C.printf("not found.\n %d %04x",index, unicode_con_table_up_to_low[ (index<<1)+offset ] )
// the low to up is not full sorted for different reasons,
// we must try a linear search in the surroundings
if upper_flag {
search_radius := 30 * 2
max_index := unicode_con_table_up_to_low.len >> 1
mut index1 := index + search_radius
if index1 > max_index {
index1 = max_index
}
index = index - search_radius
if index < 0 {
index = 0
}
for index < index1 {
if unicode_con_table_up_to_low[ (index << 1) + 1 ] == in_code {
return (index << 1)
}
index++
}
}
//eprintln("NOT FOUND!!")
return 0
}

View File

@ -11,6 +11,9 @@ fn test_utf8_util() {
assert upper==src_upper
assert lower==src_lower
assert utf8.to_upper('абвёabc12') == 'АБВЁABC12'
assert utf8.to_lower('АБВЁABC12') == 'абвёabc12'
// ustring test
src1:=src.ustring()
upper1:=utf8.u_to_upper(src1)