builtin: add `string.len_utf8()` method (#14208)
							parent
							
								
									3c8b67521b
								
							
						
					
					
						commit
						f8ed4ab9d0
					
				| 
						 | 
				
			
			@ -295,6 +295,17 @@ pub fn (cp &char) vstring_literal_with_len(len int) string {
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// len_utf8 returns the number of runes contained in the string `s`.
 | 
			
		||||
pub fn (s string) len_utf8() int {
 | 
			
		||||
	mut l := 0
 | 
			
		||||
	mut i := 0
 | 
			
		||||
	for i < s.len {
 | 
			
		||||
		l++
 | 
			
		||||
		i += ((0xe5000000 >> ((unsafe { s.str[i] } >> 3) & 0x1e)) & 3) + 1
 | 
			
		||||
	}
 | 
			
		||||
	return l
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// clone_static returns an independent copy of a given array.
 | 
			
		||||
// It should be used only in -autofree generated code.
 | 
			
		||||
fn (a string) clone_static() string {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -24,6 +24,14 @@ fn test_add() {
 | 
			
		|||
	assert a.ends_with('3')
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn test_len_utf8() {
 | 
			
		||||
	assert 'Vlang'.len_utf8() == 5
 | 
			
		||||
	assert 'María'.len_utf8() == 5
 | 
			
		||||
	assert '姓名'.len_utf8() == 2
 | 
			
		||||
	assert 'Слово'.len_utf8() == 5
 | 
			
		||||
	assert 'Λέξη'.len_utf8() == 4
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn test_ends_with() {
 | 
			
		||||
	a := 'browser.v'
 | 
			
		||||
	assert a.ends_with('.v')
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -66,6 +66,19 @@ pub fn utf32_decode_to_buffer(code u32, buf &u8) int {
 | 
			
		|||
	return 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// utf8_str_len returns the number of runes contained in the string.
 | 
			
		||||
[deprecated: 'use `string.len_utf8()` instead']
 | 
			
		||||
[deprecated_after: '2022-05-28']
 | 
			
		||||
pub fn utf8_str_len(s string) int {
 | 
			
		||||
	mut l := 0
 | 
			
		||||
	mut i := 0
 | 
			
		||||
	for i < s.len {
 | 
			
		||||
		l++
 | 
			
		||||
		i += ((0xe5000000 >> ((unsafe { s.str[i] } >> 3) & 0x1e)) & 3) + 1
 | 
			
		||||
	}
 | 
			
		||||
	return l
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Convert utf8 to utf32
 | 
			
		||||
// the original implementation did not check for
 | 
			
		||||
// valid utf8 in the string, and could result in
 | 
			
		||||
| 
						 | 
				
			
			@ -134,17 +147,6 @@ fn utf8_len(c u8) int {
 | 
			
		|||
	return b
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Calculate string length for in number of codepoints
 | 
			
		||||
pub fn utf8_str_len(s string) int {
 | 
			
		||||
	mut l := 0
 | 
			
		||||
	mut i := 0
 | 
			
		||||
	for i < s.len {
 | 
			
		||||
		l++
 | 
			
		||||
		i += ((0xe5000000 >> ((unsafe { s.str[i] } >> 3) & 0x1e)) & 3) + 1
 | 
			
		||||
	}
 | 
			
		||||
	return l
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Calculate string length for formatting, i.e. number of "characters"
 | 
			
		||||
// This is simplified implementation. if you need specification compliant width,
 | 
			
		||||
// use utf8.east_asian.display_width.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3143,7 +3143,7 @@ fn (mut g Gen) char_literal(node ast.CharLiteral) {
 | 
			
		|||
		return
 | 
			
		||||
	}
 | 
			
		||||
	// TODO: optimize use L-char instead of u32 when possible
 | 
			
		||||
	if utf8_str_len(node.val) < node.val.len {
 | 
			
		||||
	if node.val.len_utf8() < node.val.len {
 | 
			
		||||
		g.write('((rune)0x$node.val.utf32_code().hex() /* `$node.val` */)')
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue