encoding.utf8: fix len and ulen and optimize raw_index (#9682)
							parent
							
								
									67d8639917
								
							
						
					
					
						commit
						d7a64bbc8a
					
				|  | @ -15,6 +15,10 @@ Utility functions | ||||||
| 
 | 
 | ||||||
| // len return the length as number of unicode chars from a string
 | // len return the length as number of unicode chars from a string
 | ||||||
| pub fn len(s string) int { | pub fn len(s string) int { | ||||||
|  | 	if s.len == 0 { | ||||||
|  | 		return 0 | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	mut count := 0 | 	mut count := 0 | ||||||
| 	mut index := 0 | 	mut index := 0 | ||||||
| 
 | 
 | ||||||
|  | @ -82,10 +86,16 @@ pub fn get_uchar(s string, index int) int { | ||||||
| // raw_index - get the raw chracter from the string by the given index value.
 | // raw_index - get the raw chracter from the string by the given index value.
 | ||||||
| // example: '我是V Lang'.raw_index(1) => '是'
 | // example: '我是V Lang'.raw_index(1) => '是'
 | ||||||
| 
 | 
 | ||||||
|  | // raw_index - get the raw chracter from the string by the given index value.
 | ||||||
|  | // example: utf8.raw_index('我是V Lang', 1) => '是'
 | ||||||
| pub fn raw_index(s string, index int) string { | pub fn raw_index(s string, index int) string { | ||||||
| 	mut r := []rune{} | 	mut r := []rune{} | ||||||
| 
 | 
 | ||||||
| 	for i := 0; i < s.len; i++ { | 	for i := 0; i < s.len; i++ { | ||||||
|  | 		if r.len - 1 == index { | ||||||
|  | 			break | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
| 		b := s[i] | 		b := s[i] | ||||||
| 		ch_len := ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3) | 		ch_len := ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -21,10 +21,12 @@ fn test_utf8_util() { | ||||||
| 	assert lower1 == (src_lower.ustring()) | 	assert lower1 == (src_lower.ustring()) | ||||||
| 
 | 
 | ||||||
| 	// test len function
 | 	// test len function
 | ||||||
|  | 	assert utf8.len('') == 0 | ||||||
| 	assert utf8.len('pippo') == 5 | 	assert utf8.len('pippo') == 5 | ||||||
| 	assert utf8.len(src) == 15 // 29
 | 	assert utf8.len(src) == 15 // 29
 | ||||||
| 	assert src.len == 24 // 49
 | 	assert src.len == 24 // 49
 | ||||||
| 	// test u_len function
 | 	// test u_len function
 | ||||||
|  | 	assert utf8.u_len(''.ustring()) == 0 | ||||||
| 	assert utf8.u_len(src1) == 15 // 29
 | 	assert utf8.u_len(src1) == 15 // 29
 | ||||||
| 	assert utf8.u_len('pippo'.ustring()) == 5 | 	assert utf8.u_len('pippo'.ustring()) == 5 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue