encoding.utf8: fix a bug in up_low (#9610)

pull/9611/head
penguindark 2021-04-05 20:28:21 +02:00 committed by GitHub
parent d11fb8497a
commit 9aabf222fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 30 additions and 5 deletions

View File

@ -216,7 +216,7 @@ fn up_low(s string, upper_flag bool) string {
//C.printf("len: %d code: %04x ",ch_len,res) //C.printf("len: %d code: %04x ",ch_len,res)
ch_index := find_char_in_table(u16(res), upper_flag) ch_index := find_char_in_table(u16(res), upper_flag)
//C.printf(" utf8 index: %d ",ch_index) //C.printf(" utf8 index: %d \n",ch_index)
// char not in table, no need of conversion // char not in table, no need of conversion
if ch_index == 0 { if ch_index == 0 {
@ -335,7 +335,29 @@ fn find_char_in_table( in_code u16, upper_flag bool) int {
break break
} }
} }
//C.printf("not found.\n") //C.printf("not found.\n %d %04x",index, unicode_con_table_up_to_low[ (index<<1)+offset ] )
// the low to up is not full sorted for different reasons,
// we must try a linear search in the surroundings
if upper_flag {
search_radius := 30 * 2
max_index := unicode_con_table_up_to_low.len >> 1
mut index1 := index + search_radius
if index1 > max_index {
index1 = max_index
}
index = index - search_radius
if index < 0 {
index = 0
}
for index < index1 {
if unicode_con_table_up_to_low[ (index << 1) + 1 ] == in_code {
return (index << 1)
}
index++
}
}
//eprintln("NOT FOUND!!")
return 0 return 0
} }

View File

@ -11,6 +11,9 @@ fn test_utf8_util() {
assert upper==src_upper assert upper==src_upper
assert lower==src_lower assert lower==src_lower
assert utf8.to_upper('абвёabc12') == 'АБВЁABC12'
assert utf8.to_lower('АБВЁABC12') == 'абвёabc12'
// ustring test // ustring test
src1:=src.ustring() src1:=src.ustring()
upper1:=utf8.u_to_upper(src1) upper1:=utf8.u_to_upper(src1)