move vlib/builtin/utf8_util.v to vlib/encoding/utf8/utf8_util.v

pull/2940/head
Delyan Angelov 2019-11-30 07:52:37 +02:00 committed by Alexander Medvednikov
parent 661ddf3d1e
commit c7f8f2175b
2 changed files with 16 additions and 14 deletions

View File

@ -5,7 +5,7 @@
* This file contains utilities for utf8 strings * This file contains utilities for utf8 strings
* *
**********************************************************************/ **********************************************************************/
module builtin module utf8
// //
// utf8_to_upper // utf8_to_upper
@ -16,8 +16,8 @@ module builtin
// //
// Convert a utf8 string to uppercase // Convert a utf8 string to uppercase
// //
pub fn (s string) utf8_to_upper() string { pub fn to_upper(s string) string {
return s.utf8_up_low(true) return up_low(s, true)
} }
// //
@ -29,8 +29,8 @@ pub fn (s string) utf8_to_upper() string {
// //
// Convert a utf8 string to lowercase // Convert a utf8 string to lowercase
// //
pub fn (s string) utf8_to_lower() string { pub fn to_lower(s string) string {
return s.utf8_up_low(false) return up_low(s, false)
} }
// Private function, calculate the lenght in bytes of a utf8 rune // Private function, calculate the lenght in bytes of a utf8 rune
@ -39,7 +39,7 @@ fn utf8util_char_len(b byte) int {
} }
// Private function, make the dir jobs // Private function, make the dir jobs
fn (s string) utf8_up_low(uppper_flag bool) string { fn up_low(s string, uppper_flag bool) string {
mut _index := 0 mut _index := 0
mut old_index := 0 mut old_index := 0
mut str_res := malloc(s.len + 1) mut str_res := malloc(s.len + 1)
@ -110,8 +110,8 @@ fn (s string) utf8_up_low(uppper_flag bool) string {
//C.printf("Old char: %04x, New char: %04x, index: %d, offset: %d\n",unicode_con_table[ch_index],tab_char,ch_index,offset) //C.printf("Old char: %04x, New char: %04x, index: %d, offset: %d\n",unicode_con_table[ch_index],tab_char,ch_index,offset)
if ch_len == 2 { if ch_len == 2 {
ch0:=( (tab_char >> 6) & 0x1f ) | 0xc0 /*110x xxxx*/ ch0:=byte( (tab_char >> 6) & 0x1f ) | 0xc0 /*110x xxxx*/
ch1:=( (tab_char >> 0) & 0x3f ) | 0x80 /*10xx xxxx*/ ch1:=byte( (tab_char >> 0) & 0x3f ) | 0x80 /*10xx xxxx*/
//C.printf("[%02x%02x]",ch0,ch1) //C.printf("[%02x%02x]",ch0,ch1)
str_res[ _index + 0 ] = ch0 str_res[ _index + 0 ] = ch0
@ -124,9 +124,9 @@ fn (s string) utf8_up_low(uppper_flag bool) string {
} }
else if ch_len == 3 { else if ch_len == 3 {
ch0:=( (tab_char >> 12) & 0x0f ) | 0xe0 /*1110 xxxx*/ ch0:=byte( (tab_char >> 12) & 0x0f ) | 0xe0 /*1110 xxxx*/
ch1:=( (tab_char >> 6) & 0x3f ) | 0x80 /*10xx xxxx*/ ch1:=byte( (tab_char >> 6) & 0x3f ) | 0x80 /*10xx xxxx*/
ch2:=( (tab_char >> 0) & 0x3f ) | 0x80 /*10xx xxxx*/ ch2:=byte( (tab_char >> 0) & 0x3f ) | 0x80 /*10xx xxxx*/
str_res[_index + 0 ] = ch0 str_res[_index + 0 ] = ch0
str_res[_index + 1 ] = ch1 str_res[_index + 1 ] = ch1
@ -852,4 +852,4 @@ u16(0x0061), 0x0041, // LATIN SMALL LETTER A LATIN CAPITAL LETTER A,
0xFF59, 0xFF39, // FULLWIDTH LATIN SMALL LETTER Y FULLWIDTH LATIN CAPITAL LETTER Y 0xFF59, 0xFF39, // FULLWIDTH LATIN SMALL LETTER Y FULLWIDTH LATIN CAPITAL LETTER Y
0xFF5A, 0xFF3A, // FULLWIDTH LATIN SMALL LETTER Z FULLWIDTH LATIN CAPITAL LETTER Z 0xFF5A, 0xFF3A, // FULLWIDTH LATIN SMALL LETTER Z FULLWIDTH LATIN CAPITAL LETTER Z
] ]
) )

View File

@ -1,7 +1,9 @@
import encoding.utf8
fn test_utf8_util() { fn test_utf8_util() {
src:="ăĂ ôÔ testo æ" src:="ăĂ ôÔ testo æ"
upper:=src.utf8_to_upper() upper:=utf8.to_upper(src)
lower:=src.utf8_to_lower() lower:=utf8.to_lower(src)
assert upper=="ĂĂ ÔÔ TESTO Æ" assert upper=="ĂĂ ÔÔ TESTO Æ"
assert lower=="ăă ôô testo æ" assert lower=="ăă ôô testo æ"
} }