scanner: print multibyte char for invalid char error (#8804)
parent
0142d58aa6
commit
1891ebf22d
|
@ -23,7 +23,7 @@ pub fn len(s string) int {
|
|||
mut index := 0
|
||||
|
||||
for {
|
||||
ch_len := utf8util_char_len(s[index])
|
||||
ch_len := char_len(s[index])
|
||||
index += ch_len
|
||||
count++
|
||||
if index >= s.len {
|
||||
|
@ -38,12 +38,17 @@ pub fn u_len(s ustring) int {
|
|||
return len(s.s)
|
||||
}
|
||||
|
||||
// char_len calculate the length in bytes of a utf8 char
|
||||
pub fn char_len(b byte) int {
|
||||
return ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3) + 1
|
||||
}
|
||||
|
||||
// get_uchar convert a unicode glyph in string[index] into a int unicode char
|
||||
pub fn get_uchar(s string, index int) int {
|
||||
mut res := 0
|
||||
mut ch_len := 0
|
||||
if s.len > 0 {
|
||||
ch_len = utf8util_char_len(s[index])
|
||||
ch_len = char_len(s[index])
|
||||
|
||||
if ch_len == 1 {
|
||||
return u16(s[index])
|
||||
|
@ -153,10 +158,6 @@ pub fn is_uchar_global_punct( uchar int ) bool {
|
|||
Private functions
|
||||
|
||||
*/
|
||||
// utf8util_char_len calculate the length in bytes of a utf8 char
|
||||
fn utf8util_char_len(b byte) int {
|
||||
return (( 0xe5000000 >> (( b >> 3 ) & 0x1e )) & 3 ) + 1
|
||||
}
|
||||
|
||||
//
|
||||
// if upper_flag == true then make low ==> upper conversion
|
||||
|
@ -168,7 +169,7 @@ fn up_low(s string, upper_flag bool) string {
|
|||
mut str_res := unsafe {malloc(s.len + 1)}
|
||||
|
||||
for {
|
||||
ch_len := utf8util_char_len(s[index])
|
||||
ch_len := char_len(s[index])
|
||||
|
||||
if ch_len == 1 {
|
||||
if upper_flag==true {
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
vlib/v/checker/tests/invalid_char_err.vv:1:1: error: invalid character `🐈`
|
||||
1 | 🐈println('')
|
||||
| ^
|
|
@ -0,0 +1 @@
|
|||
🐈println('')
|
|
@ -4,6 +4,7 @@
|
|||
module scanner
|
||||
|
||||
import os
|
||||
import encoding.utf8
|
||||
import v.token
|
||||
import v.pref
|
||||
import v.util
|
||||
|
@ -1002,12 +1003,19 @@ fn (mut s Scanner) text_scan() token.Token {
|
|||
return s.end_of_file()
|
||||
}
|
||||
}
|
||||
s.error('invalid character `$c.ascii_str()`')
|
||||
s.invalid_character()
|
||||
break
|
||||
}
|
||||
return s.end_of_file()
|
||||
}
|
||||
|
||||
fn (mut s Scanner) invalid_character() {
|
||||
len := utf8.char_len(s.text[s.pos])
|
||||
end := util.imin(s.pos + len, s.text.len)
|
||||
c := s.text[s.pos..end]
|
||||
s.error('invalid character `$c`')
|
||||
}
|
||||
|
||||
fn (s &Scanner) current_column() int {
|
||||
return s.pos - s.last_nl_pos
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue