scanner: print multibyte char for invalid char error (#8804)
parent
0142d58aa6
commit
1891ebf22d
|
@ -23,7 +23,7 @@ pub fn len(s string) int {
|
||||||
mut index := 0
|
mut index := 0
|
||||||
|
|
||||||
for {
|
for {
|
||||||
ch_len := utf8util_char_len(s[index])
|
ch_len := char_len(s[index])
|
||||||
index += ch_len
|
index += ch_len
|
||||||
count++
|
count++
|
||||||
if index >= s.len {
|
if index >= s.len {
|
||||||
|
@ -38,12 +38,17 @@ pub fn u_len(s ustring) int {
|
||||||
return len(s.s)
|
return len(s.s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// char_len calculate the length in bytes of a utf8 char
|
||||||
|
pub fn char_len(b byte) int {
|
||||||
|
return ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3) + 1
|
||||||
|
}
|
||||||
|
|
||||||
// get_uchar convert a unicode glyph in string[index] into a int unicode char
|
// get_uchar convert a unicode glyph in string[index] into a int unicode char
|
||||||
pub fn get_uchar(s string, index int) int {
|
pub fn get_uchar(s string, index int) int {
|
||||||
mut res := 0
|
mut res := 0
|
||||||
mut ch_len := 0
|
mut ch_len := 0
|
||||||
if s.len > 0 {
|
if s.len > 0 {
|
||||||
ch_len = utf8util_char_len(s[index])
|
ch_len = char_len(s[index])
|
||||||
|
|
||||||
if ch_len == 1 {
|
if ch_len == 1 {
|
||||||
return u16(s[index])
|
return u16(s[index])
|
||||||
|
@ -153,10 +158,6 @@ pub fn is_uchar_global_punct( uchar int ) bool {
|
||||||
Private functions
|
Private functions
|
||||||
|
|
||||||
*/
|
*/
|
||||||
// utf8util_char_len calculate the length in bytes of a utf8 char
|
|
||||||
fn utf8util_char_len(b byte) int {
|
|
||||||
return (( 0xe5000000 >> (( b >> 3 ) & 0x1e )) & 3 ) + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// if upper_flag == true then make low ==> upper conversion
|
// if upper_flag == true then make low ==> upper conversion
|
||||||
|
@ -168,7 +169,7 @@ fn up_low(s string, upper_flag bool) string {
|
||||||
mut str_res := unsafe {malloc(s.len + 1)}
|
mut str_res := unsafe {malloc(s.len + 1)}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
ch_len := utf8util_char_len(s[index])
|
ch_len := char_len(s[index])
|
||||||
|
|
||||||
if ch_len == 1 {
|
if ch_len == 1 {
|
||||||
if upper_flag==true {
|
if upper_flag==true {
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
vlib/v/checker/tests/invalid_char_err.vv:1:1: error: invalid character `🐈`
|
||||||
|
1 | 🐈println('')
|
||||||
|
| ^
|
|
@ -0,0 +1 @@
|
||||||
|
🐈println('')
|
|
@ -4,6 +4,7 @@
|
||||||
module scanner
|
module scanner
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import encoding.utf8
|
||||||
import v.token
|
import v.token
|
||||||
import v.pref
|
import v.pref
|
||||||
import v.util
|
import v.util
|
||||||
|
@ -1002,12 +1003,19 @@ fn (mut s Scanner) text_scan() token.Token {
|
||||||
return s.end_of_file()
|
return s.end_of_file()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s.error('invalid character `$c.ascii_str()`')
|
s.invalid_character()
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
return s.end_of_file()
|
return s.end_of_file()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn (mut s Scanner) invalid_character() {
|
||||||
|
len := utf8.char_len(s.text[s.pos])
|
||||||
|
end := util.imin(s.pos + len, s.text.len)
|
||||||
|
c := s.text[s.pos..end]
|
||||||
|
s.error('invalid character `$c`')
|
||||||
|
}
|
||||||
|
|
||||||
fn (s &Scanner) current_column() int {
|
fn (s &Scanner) current_column() int {
|
||||||
return s.pos - s.last_nl_pos
|
return s.pos - s.last_nl_pos
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue