scanner: print multibyte char for invalid char error (#8804)
							parent
							
								
									0142d58aa6
								
							
						
					
					
						commit
						1891ebf22d
					
				|  | @ -23,7 +23,7 @@ pub fn len(s string) int { | |||
| 	mut index := 0 | ||||
| 
 | ||||
| 	for { | ||||
| 		ch_len := utf8util_char_len(s[index]) | ||||
| 		ch_len := char_len(s[index]) | ||||
| 		index += ch_len | ||||
| 		count++ | ||||
| 		if index >= s.len { | ||||
|  | @ -38,12 +38,17 @@ pub fn u_len(s ustring) int { | |||
| 	return len(s.s) | ||||
| } | ||||
| 
 | ||||
| // char_len calculate the length in bytes of a utf8 char
 | ||||
| pub fn char_len(b byte) int { | ||||
| 	return ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3) + 1 | ||||
| } | ||||
| 
 | ||||
| // get_uchar convert a unicode glyph in string[index] into a int unicode char
 | ||||
| pub fn get_uchar(s string, index int) int { | ||||
| 	mut res := 0 | ||||
| 	mut ch_len := 0 | ||||
| 	if s.len > 0  { | ||||
| 		ch_len = utf8util_char_len(s[index]) | ||||
| 	if s.len > 0 { | ||||
| 		ch_len = char_len(s[index]) | ||||
| 
 | ||||
| 		if ch_len == 1 { | ||||
| 			return u16(s[index]) | ||||
|  | @ -153,10 +158,6 @@ pub fn is_uchar_global_punct( uchar int ) bool { | |||
| Private functions | ||||
| 
 | ||||
| */ | ||||
| // utf8util_char_len calculate the length in bytes of a utf8 char
 | ||||
| fn utf8util_char_len(b byte) int { | ||||
| 	return (( 0xe5000000 >> (( b >> 3 ) & 0x1e )) & 3 ) + 1 | ||||
| } | ||||
| 
 | ||||
| //
 | ||||
| // if upper_flag == true  then make low ==> upper conversion
 | ||||
|  | @ -168,7 +169,7 @@ fn up_low(s string, upper_flag bool) string { | |||
| 	mut str_res := unsafe {malloc(s.len + 1)} | ||||
| 
 | ||||
| 	for { | ||||
| 		ch_len := utf8util_char_len(s[index]) | ||||
| 		ch_len := char_len(s[index]) | ||||
| 
 | ||||
| 		if ch_len == 1 { | ||||
| 			if upper_flag==true { | ||||
|  |  | |||
|  | @ -0,0 +1,3 @@ | |||
| vlib/v/checker/tests/invalid_char_err.vv:1:1: error: invalid character `🐈` | ||||
|     1 | 🐈println('') | ||||
|       | ^ | ||||
|  | @ -0,0 +1 @@ | |||
| 🐈println('') | ||||
|  | @ -4,6 +4,7 @@ | |||
| module scanner | ||||
| 
 | ||||
| import os | ||||
| import encoding.utf8 | ||||
| import v.token | ||||
| import v.pref | ||||
| import v.util | ||||
|  | @ -1002,12 +1003,19 @@ fn (mut s Scanner) text_scan() token.Token { | |||
| 				return s.end_of_file() | ||||
| 			} | ||||
| 		} | ||||
| 		s.error('invalid character `$c.ascii_str()`') | ||||
| 		s.invalid_character() | ||||
| 		break | ||||
| 	} | ||||
| 	return s.end_of_file() | ||||
| } | ||||
| 
 | ||||
| fn (mut s Scanner) invalid_character() { | ||||
| 	len := utf8.char_len(s.text[s.pos]) | ||||
| 	end := util.imin(s.pos + len, s.text.len) | ||||
| 	c := s.text[s.pos..end] | ||||
| 	s.error('invalid character `$c`') | ||||
| } | ||||
| 
 | ||||
| fn (s &Scanner) current_column() int { | ||||
| 	return s.pos - s.last_nl_pos | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue