encoding.utf8: remove const unicode_con_table_up_to_low table (#9672)
							parent
							
								
									acb58d4923
								
							
						
					
					
						commit
						cca06fce90
					
				| 
						 | 
					@ -43,6 +43,7 @@ const (
 | 
				
			||||||
		'vlib/builtin/',
 | 
							'vlib/builtin/',
 | 
				
			||||||
		'vlib/cli/',
 | 
							'vlib/cli/',
 | 
				
			||||||
		'vlib/dl/',
 | 
							'vlib/dl/',
 | 
				
			||||||
 | 
							'vlib/encoding/utf8/',
 | 
				
			||||||
		'vlib/flag/',
 | 
							'vlib/flag/',
 | 
				
			||||||
		'vlib/gg/',
 | 
							'vlib/gg/',
 | 
				
			||||||
		'vlib/math/bits/bits.v',
 | 
							'vlib/math/bits/bits.v',
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -37,7 +37,7 @@ pub fn east_asian_width_property_at(s string, index int) EastAsianWidthProperty
 | 
				
			||||||
	mut left, mut right := 0, east_asian.east_asian_width_data.len - 1
 | 
						mut left, mut right := 0, east_asian.east_asian_width_data.len - 1
 | 
				
			||||||
	for left <= right {
 | 
						for left <= right {
 | 
				
			||||||
		middle := left + ((right - left) / 2)
 | 
							middle := left + ((right - left) / 2)
 | 
				
			||||||
		entry := east_asian_width_data[middle]
 | 
							entry := east_asian.east_asian_width_data[middle]
 | 
				
			||||||
		if codepoint < entry.point {
 | 
							if codepoint < entry.point {
 | 
				
			||||||
			right = middle - 1
 | 
								right = middle - 1
 | 
				
			||||||
			continue
 | 
								continue
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -11,10 +11,10 @@ pub fn validate_str(str string) bool {
 | 
				
			||||||
	return validate(str.str, str.len)
 | 
						return validate(str.str, str.len)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub fn validate(data byteptr, len int) bool {
 | 
					pub fn validate(data &byte, len int) bool {
 | 
				
			||||||
	mut state := Utf8State{}
 | 
						mut state := Utf8State{}
 | 
				
			||||||
	for i := 0; i < len; i++ {
 | 
						for i := 0; i < len; i++ {
 | 
				
			||||||
		s := unsafe {data[i]}
 | 
							s := unsafe { data[i] }
 | 
				
			||||||
		if s == 0 {
 | 
							if s == 0 {
 | 
				
			||||||
			break
 | 
								break
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| 
						 | 
					@ -1,63 +1,62 @@
 | 
				
			||||||
import encoding.utf8
 | 
					import encoding.utf8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn test_utf8_util() {
 | 
					fn test_utf8_util() {
 | 
				
			||||||
 | 
					 | 
				
			||||||
	// string test
 | 
						// string test
 | 
				
			||||||
	src:="ăĂ ôÔ testo 怔"//_\u1E5A\u1E5B<=>\u1F49\u1F41<=>\u0128\u012a\u012c" // len 29 runes, raw 49 bytes
 | 
						src := 'ăĂ ôÔ testo 怔' //_\u1E5A\u1E5B<=>\u1F49\u1F41<=>\u0128\u012a\u012c" // len 29 runes, raw 49 bytes
 | 
				
			||||||
	src_upper:="ĂĂ ÔÔ TESTO Æ€”"//_\u1E5A\u1E5A<=>\u1F49\u1F49<=>\u0128\u012A\u012C"
 | 
						src_upper := 'ĂĂ ÔÔ TESTO Æ€”' //_\u1E5A\u1E5A<=>\u1F49\u1F49<=>\u0128\u012A\u012C"
 | 
				
			||||||
	src_lower:="ăă ôô testo 怔"//_\u1E5B\u1E5B<=>\u1F41\u1F41<=>\u0129\u012B\u012D"
 | 
						src_lower := 'ăă ôô testo 怔' //_\u1E5B\u1E5B<=>\u1F41\u1F41<=>\u0129\u012B\u012D"
 | 
				
			||||||
	upper:=utf8.to_upper(src)
 | 
						upper := utf8.to_upper(src)
 | 
				
			||||||
	lower:=utf8.to_lower(src)
 | 
						lower := utf8.to_lower(src)
 | 
				
			||||||
	assert upper==src_upper
 | 
						assert upper == src_upper
 | 
				
			||||||
	assert lower==src_lower
 | 
						assert lower == src_lower
 | 
				
			||||||
	
 | 
					
 | 
				
			||||||
	assert utf8.to_upper('абвёabc12{') == 'АБВЁABC12{'
 | 
						assert utf8.to_upper('абвёabc12{') == 'АБВЁABC12{'
 | 
				
			||||||
	assert utf8.to_lower('АБВЁABC12{') == 'абвёabc12{'
 | 
						assert utf8.to_lower('АБВЁABC12{') == 'абвёabc12{'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// ustring test
 | 
						// ustring test
 | 
				
			||||||
	src1:=src.ustring()
 | 
						src1 := src.ustring()
 | 
				
			||||||
	upper1:=utf8.u_to_upper(src1)
 | 
						upper1 := utf8.u_to_upper(src1)
 | 
				
			||||||
	lower1:=utf8.u_to_lower(src1)
 | 
						lower1 := utf8.u_to_lower(src1)
 | 
				
			||||||
	assert upper1==( src_upper.ustring() )
 | 
						assert upper1 == (src_upper.ustring())
 | 
				
			||||||
	assert lower1==( src_lower.ustring() )
 | 
						assert lower1 == (src_lower.ustring())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// test len function
 | 
						// test len function
 | 
				
			||||||
	assert utf8.len("pippo")==5
 | 
						assert utf8.len('pippo') == 5
 | 
				
			||||||
	assert utf8.len(src)==15 //29
 | 
						assert utf8.len(src) == 15 // 29
 | 
				
			||||||
	assert src.len==24 //49
 | 
						assert src.len == 24 // 49
 | 
				
			||||||
	// test u_len function
 | 
						// test u_len function
 | 
				
			||||||
	assert utf8.u_len(src1)==15 //29
 | 
						assert utf8.u_len(src1) == 15 // 29
 | 
				
			||||||
	assert utf8.u_len("pippo".ustring())==5
 | 
						assert utf8.u_len('pippo'.ustring()) == 5
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// western punctuation
 | 
						// western punctuation
 | 
				
			||||||
	a := '.abc?abcòàè.'  	
 | 
						a := '.abc?abcòàè.'
 | 
				
			||||||
  	assert utf8.is_punct(a,0)==true
 | 
						assert utf8.is_punct(a, 0) == true
 | 
				
			||||||
  	assert utf8.is_punct('b',0)==false
 | 
						assert utf8.is_punct('b', 0) == false
 | 
				
			||||||
  	assert utf8.is_uchar_punct(0x002E)==true 
 | 
						assert utf8.is_uchar_punct(0x002E) == true
 | 
				
			||||||
  	assert utf8.is_punct(a,4)==true		// ?
 | 
						assert utf8.is_punct(a, 4) == true // ?
 | 
				
			||||||
  	assert utf8.is_punct(a,14)==true		// last .
 | 
						assert utf8.is_punct(a, 14) == true // last .
 | 
				
			||||||
  	assert utf8.is_punct(a,12)==false	// è
 | 
						assert utf8.is_punct(a, 12) == false // è
 | 
				
			||||||
  	println("OK western")
 | 
						println('OK western')
 | 
				
			||||||
  	
 | 
					 | 
				
			||||||
  	// global punctuation
 | 
					 | 
				
			||||||
  	b := '.ĂĂa. ÔÔ TESTO Æ€'
 | 
					 | 
				
			||||||
  	assert utf8.is_global_punct(b,0)==true
 | 
					 | 
				
			||||||
  	assert utf8.is_global_punct('.',0)==true
 | 
					 | 
				
			||||||
  	assert utf8.is_uchar_punct(0x002E)==true
 | 
					 | 
				
			||||||
  	assert utf8.is_global_punct(b,6)==true	// .
 | 
					 | 
				
			||||||
  	assert utf8.is_global_punct(b,1)==false	// a
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  	// test utility functions
 | 
						// global punctuation
 | 
				
			||||||
  	assert utf8.get_uchar(b,0)==0x002E
 | 
						b := '.ĂĂa. ÔÔ TESTO Æ€'
 | 
				
			||||||
 | 
						assert utf8.is_global_punct(b, 0) == true
 | 
				
			||||||
 | 
						assert utf8.is_global_punct('.', 0) == true
 | 
				
			||||||
 | 
						assert utf8.is_uchar_punct(0x002E) == true
 | 
				
			||||||
 | 
						assert utf8.is_global_punct(b, 6) == true // .
 | 
				
			||||||
 | 
						assert utf8.is_global_punct(b, 1) == false // a
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// test utility functions
 | 
				
			||||||
 | 
						assert utf8.get_uchar(b, 0) == 0x002E
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn test_raw_indexing() {
 | 
					fn test_raw_indexing() {
 | 
				
			||||||
	a := "我是V Lang!"
 | 
						a := '我是V Lang!'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// test non ascii characters
 | 
						// test non ascii characters
 | 
				
			||||||
	assert utf8.raw_index(a, 0) == '我'
 | 
						assert utf8.raw_index(a, 0) == '我'
 | 
				
			||||||
	assert utf8.raw_index(a, 1) == '是'
 | 
						assert utf8.raw_index(a, 1) == '是'
 | 
				
			||||||
	
 | 
					
 | 
				
			||||||
	// test ascii characters
 | 
						// test ascii characters
 | 
				
			||||||
	assert utf8.raw_index(a, 2) == 'V'
 | 
						assert utf8.raw_index(a, 2) == 'V'
 | 
				
			||||||
	assert utf8.raw_index(a, 3) == ' '
 | 
						assert utf8.raw_index(a, 3) == ' '
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue