scanner, cgen: improve support for escape codes in backticks/runes (#13127)
parent
ea660315e0
commit
ab642cac43
|
@ -617,8 +617,10 @@ pub fn (mut c Checker) string_inter_lit(mut node ast.StringInterLiteral) ast.Typ
|
||||||
return ast.string_type
|
return ast.string_type
|
||||||
}
|
}
|
||||||
|
|
||||||
const hex_lit_overflow_message = 'hex character literal overflows string'
|
const unicode_lit_overflow_message = 'unicode character exceeds max allowed value of 0x10ffff, consider using a unicode literal (\\u####)'
|
||||||
|
|
||||||
|
// unicode character literals are limited to a maximum value of 0x10ffff
|
||||||
|
// https://stackoverflow.com/questions/52203351/why-unicode-is-restricted-to-0x10ffff
|
||||||
pub fn (mut c Checker) string_lit(mut node ast.StringLiteral) ast.Type {
|
pub fn (mut c Checker) string_lit(mut node ast.StringLiteral) ast.Type {
|
||||||
mut idx := 0
|
mut idx := 0
|
||||||
for idx < node.val.len {
|
for idx < node.val.len {
|
||||||
|
@ -631,7 +633,7 @@ pub fn (mut c Checker) string_lit(mut node ast.StringLiteral) ast.Type {
|
||||||
start_idx := idx
|
start_idx := idx
|
||||||
idx++
|
idx++
|
||||||
next_ch := node.val[idx] or { return ast.string_type }
|
next_ch := node.val[idx] or { return ast.string_type }
|
||||||
if next_ch == `x` {
|
if next_ch == `u` {
|
||||||
idx++
|
idx++
|
||||||
mut ch := node.val[idx] or { return ast.string_type }
|
mut ch := node.val[idx] or { return ast.string_type }
|
||||||
mut hex_char_count := 0
|
mut hex_char_count := 0
|
||||||
|
@ -647,13 +649,13 @@ pub fn (mut c Checker) string_lit(mut node ast.StringLiteral) ast.Type {
|
||||||
first_digit := node.val[idx - 5] - 48
|
first_digit := node.val[idx - 5] - 48
|
||||||
second_digit := node.val[idx - 4] - 48
|
second_digit := node.val[idx - 4] - 48
|
||||||
if first_digit > 1 {
|
if first_digit > 1 {
|
||||||
c.error(checker.hex_lit_overflow_message, end_pos)
|
c.error(checker.unicode_lit_overflow_message, end_pos)
|
||||||
} else if first_digit == 1 && second_digit > 0 {
|
} else if first_digit == 1 && second_digit > 0 {
|
||||||
c.error(checker.hex_lit_overflow_message, end_pos)
|
c.error(checker.unicode_lit_overflow_message, end_pos)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
c.error(checker.hex_lit_overflow_message, end_pos)
|
c.error(checker.unicode_lit_overflow_message, end_pos)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
idx++
|
idx++
|
||||||
|
|
|
@ -1,18 +0,0 @@
|
||||||
vlib/v/checker/tests/hex_literal_overflow.vv:1:7: error: hex character literal overflows string
|
|
||||||
1 | a := '\x11ffff'
|
|
||||||
| ~~~~~~~~
|
|
||||||
2 | b := '\x20ffff'
|
|
||||||
3 | c := '\x10fffff'
|
|
||||||
vlib/v/checker/tests/hex_literal_overflow.vv:2:7: error: hex character literal overflows string
|
|
||||||
1 | a := '\x11ffff'
|
|
||||||
2 | b := '\x20ffff'
|
|
||||||
| ~~~~~~~~
|
|
||||||
3 | c := '\x10fffff'
|
|
||||||
4 | println(a)
|
|
||||||
vlib/v/checker/tests/hex_literal_overflow.vv:3:7: error: hex character literal overflows string
|
|
||||||
1 | a := '\x11ffff'
|
|
||||||
2 | b := '\x20ffff'
|
|
||||||
3 | c := '\x10fffff'
|
|
||||||
| ~~~~~~~~~
|
|
||||||
4 | println(a)
|
|
||||||
5 | println(b)
|
|
|
@ -1,5 +1,5 @@
|
||||||
vlib/v/checker/tests/import_mod_sub_as_sub_err.vv:1:25: error: import alias `encoding.utf8 as utf8` is redundant
|
vlib/v/checker/tests/import_mod_sub_as_sub_err.vv:1:25: error: import alias `encoding.utf8 as utf8` is redundant
|
||||||
1 | import encoding.utf8 as utf8
|
1 | import encoding.utf8 as utf8
|
||||||
| ~~~~
|
| ~~~~
|
||||||
2 |
|
2 |
|
||||||
3 | fn main() {
|
3 | fn main() {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
vlib/v/checker/tests/minus_op_wrong_type_err.vv:10:10: error: mismatched types `Aaa` and `int literal`
|
vlib/v/checker/tests/minus_op_wrong_type_err.vv:10:10: error: mismatched types `Aaa` and `int literal`
|
||||||
8 |
|
8 |
|
||||||
9 | fn main() {
|
9 | fn main() {
|
||||||
10 | println(Aaa{} - 10)
|
10 | println(Aaa{} - 10)
|
||||||
| ~~~~~~~~~~
|
| ~~~~~~~~~~
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
vlib/v/checker/tests/mut_array_get_element_address_err.vv:3:20: error: cannot take the address of mutable array elements outside unsafe blocks
|
vlib/v/checker/tests/mut_array_get_element_address_err.vv:3:20: error: cannot take the address of mutable array elements outside unsafe blocks
|
||||||
1 | fn main() {
|
1 | fn main() {
|
||||||
2 | mut arr_int := [int(23), 45, 7, 8]
|
2 | mut arr_int := [int(23), 45, 7, 8]
|
||||||
3 | ele := &arr_int[1]
|
3 | ele := &arr_int[1]
|
||||||
| ~~~
|
| ~~~
|
||||||
4 | println(ele)
|
4 | println(ele)
|
||||||
5 | }
|
5 | }
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
vlib/v/checker/tests/string_escape_x_err_a.vv:2:15: error: `\x` used with no following hex digits
|
vlib/v/checker/tests/string_escape_x_err_a.vv:2:15: error: `\x` used without two following hex digits
|
||||||
1 | fn main() {
|
1 | fn main() {
|
||||||
2 | println('\x')
|
2 | println('\x')
|
||||||
| ^
|
| ^
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
vlib/v/checker/tests/string_escape_x_err_b.vv:2:15: error: `\x` used with no following hex digits
|
vlib/v/checker/tests/string_escape_x_err_b.vv:2:15: error: `\x` used without two following hex digits
|
||||||
1 | fn main() {
|
1 | fn main() {
|
||||||
2 | println('\xhh')
|
2 | println('\xhh')
|
||||||
| ^
|
| ^
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
vlib/v/checker/tests/unknown_function.vv:4:15: error: unknown function: math.max_i64
|
vlib/v/checker/tests/unknown_function.vv:4:15: error: unknown function: math.max_i64
|
||||||
2 |
|
2 |
|
||||||
3 | fn main() {
|
3 | fn main() {
|
||||||
4 | println(math.max_i64())
|
4 | println(math.max_i64())
|
||||||
| ~~~~~~~~~
|
| ~~~~~~~~~
|
||||||
|
|
|
@ -2510,6 +2510,19 @@ fn (mut g Gen) expr_with_cast(expr ast.Expr, got_type_raw ast.Type, expected_typ
|
||||||
g.expr(expr)
|
g.expr(expr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn cescape_nonascii(original string) string {
|
||||||
|
mut b := strings.new_builder(original.len)
|
||||||
|
for c in original {
|
||||||
|
if c < 32 || c > 126 {
|
||||||
|
b.write_string('\\${c:03o}')
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
b.write_b(c)
|
||||||
|
}
|
||||||
|
res := b.str()
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
// cestring returns a V string, properly escaped for embeddeding in a C string literal.
|
// cestring returns a V string, properly escaped for embeddeding in a C string literal.
|
||||||
fn cestring(s string) string {
|
fn cestring(s string) string {
|
||||||
return s.replace('\\', '\\\\').replace('"', "'")
|
return s.replace('\\', '\\\\').replace('"', "'")
|
||||||
|
@ -2517,7 +2530,7 @@ fn cestring(s string) string {
|
||||||
|
|
||||||
// ctoslit returns a '_SLIT("$s")' call, where s is properly escaped.
|
// ctoslit returns a '_SLIT("$s")' call, where s is properly escaped.
|
||||||
fn ctoslit(s string) string {
|
fn ctoslit(s string) string {
|
||||||
return '_SLIT("' + cestring(s) + '")'
|
return '_SLIT("' + cescape_nonascii(cestring(s)) + '")'
|
||||||
}
|
}
|
||||||
|
|
||||||
fn (mut g Gen) gen_attrs(attrs []ast.Attr) {
|
fn (mut g Gen) gen_attrs(attrs []ast.Attr) {
|
||||||
|
|
|
@ -6,7 +6,7 @@ import v.ast
|
||||||
import v.util
|
import v.util
|
||||||
|
|
||||||
fn (mut g Gen) string_literal(node ast.StringLiteral) {
|
fn (mut g Gen) string_literal(node ast.StringLiteral) {
|
||||||
escaped_val := util.smart_quote(node.val, node.is_raw)
|
escaped_val := cescape_nonascii(util.smart_quote(node.val, node.is_raw))
|
||||||
if node.language == .c {
|
if node.language == .c {
|
||||||
g.write('"$escaped_val"')
|
g.write('"$escaped_val"')
|
||||||
} else {
|
} else {
|
||||||
|
@ -25,7 +25,7 @@ fn (mut g Gen) string_inter_literal_sb_optimized(call_expr ast.CallExpr) {
|
||||||
is_nl := call_expr.name == 'writeln'
|
is_nl := call_expr.name == 'writeln'
|
||||||
// println('optimize sb $call_expr.name')
|
// println('optimize sb $call_expr.name')
|
||||||
for i, val in node.vals {
|
for i, val in node.vals {
|
||||||
escaped_val := util.smart_quote(val, false)
|
escaped_val := cescape_nonascii(util.smart_quote(val, false))
|
||||||
// if val == '' {
|
// if val == '' {
|
||||||
// break
|
// break
|
||||||
// continue
|
// continue
|
||||||
|
|
|
@ -1174,6 +1174,7 @@ fn (mut s Scanner) ident_string() string {
|
||||||
}
|
}
|
||||||
s.is_inside_string = false
|
s.is_inside_string = false
|
||||||
mut u_escapes_pos := []int{} // pos list of \uXXXX
|
mut u_escapes_pos := []int{} // pos list of \uXXXX
|
||||||
|
mut h_escapes_pos := []int{} // pos list of \xXX
|
||||||
mut backslash_count := if start_char == scanner.backslash { 1 } else { 0 }
|
mut backslash_count := if start_char == scanner.backslash { 1 } else { 0 }
|
||||||
for {
|
for {
|
||||||
s.pos++
|
s.pos++
|
||||||
|
@ -1221,8 +1222,12 @@ fn (mut s Scanner) ident_string() string {
|
||||||
// Escape `\x` `\u`
|
// Escape `\x` `\u`
|
||||||
if backslash_count % 2 == 1 && !is_raw && !is_cstr {
|
if backslash_count % 2 == 1 && !is_raw && !is_cstr {
|
||||||
// Escape `\x`
|
// Escape `\x`
|
||||||
if c == `x` && (s.text[s.pos + 1] == s.quote || !s.text[s.pos + 1].is_hex_digit()) {
|
if c == `x` {
|
||||||
s.error(r'`\x` used with no following hex digits')
|
if s.text[s.pos + 1] == s.quote || !(s.text[s.pos + 1].is_hex_digit()
|
||||||
|
&& s.text[s.pos + 2].is_hex_digit()) {
|
||||||
|
s.error(r'`\x` used without two following hex digits')
|
||||||
|
}
|
||||||
|
h_escapes_pos << s.pos - 1
|
||||||
}
|
}
|
||||||
// Escape `\u`
|
// Escape `\u`
|
||||||
if c == `u` {
|
if c == `u` {
|
||||||
|
@ -1266,6 +1271,9 @@ fn (mut s Scanner) ident_string() string {
|
||||||
if !s.is_fmt && u_escapes_pos.len > 0 {
|
if !s.is_fmt && u_escapes_pos.len > 0 {
|
||||||
string_so_far = decode_u_escapes(string_so_far, start, u_escapes_pos)
|
string_so_far = decode_u_escapes(string_so_far, start, u_escapes_pos)
|
||||||
}
|
}
|
||||||
|
if !s.is_fmt && h_escapes_pos.len > 0 {
|
||||||
|
string_so_far = decode_h_escapes(string_so_far, start, h_escapes_pos)
|
||||||
|
}
|
||||||
if n_cr_chars > 0 {
|
if n_cr_chars > 0 {
|
||||||
string_so_far = string_so_far.replace('\r', '')
|
string_so_far = string_so_far.replace('\r', '')
|
||||||
}
|
}
|
||||||
|
@ -1278,6 +1286,27 @@ fn (mut s Scanner) ident_string() string {
|
||||||
return lit
|
return lit
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// only handle single-byte inline escapes like '\xc0'
|
||||||
|
fn decode_h_escapes(s string, start int, escapes_pos []int) string {
|
||||||
|
if escapes_pos.len == 0 {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
mut ss := []string{cap: escapes_pos.len * 2 + 1}
|
||||||
|
ss << s[..escapes_pos.first() - start]
|
||||||
|
for i, pos in escapes_pos {
|
||||||
|
idx := pos - start
|
||||||
|
end_idx := idx + 4 // "\xXX".len == 4
|
||||||
|
// notice this function doesn't do any decoding... it just replaces '\xc0' with the byte 0xc0
|
||||||
|
ss << [byte(strconv.parse_uint(s[idx + 2..end_idx], 16, 8) or { 0 })].bytestr()
|
||||||
|
if i + 1 < escapes_pos.len {
|
||||||
|
ss << s[end_idx..escapes_pos[i + 1] - start]
|
||||||
|
} else {
|
||||||
|
ss << s[end_idx..]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ss.join('')
|
||||||
|
}
|
||||||
|
|
||||||
fn decode_u_escapes(s string, start int, escapes_pos []int) string {
|
fn decode_u_escapes(s string, start int, escapes_pos []int) string {
|
||||||
if escapes_pos.len == 0 {
|
if escapes_pos.len == 0 {
|
||||||
return s
|
return s
|
||||||
|
@ -1312,10 +1341,32 @@ fn trim_slash_line_break(s string) string {
|
||||||
return ret_str
|
return ret_str
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// ident_char is called when a backtick "single-char" is parsed from the code
|
||||||
|
/// it is needed because some runes (chars) are written with escape sequences
|
||||||
|
/// the string it returns should be a standardized, simplified version of the character
|
||||||
|
/// as it would appear in source code
|
||||||
|
/// possibilities:
|
||||||
|
/// single chars like `a`, `b` => 'a', 'b'
|
||||||
|
/// escaped single chars like `\\`, `\``, `\n` => '\\', '`', '\n'
|
||||||
|
/// escaped hex bytes like `\x01`, `\x61` => '\x01', 'a'
|
||||||
|
/// escaped multibyte runes like `\xe29885` => (★)
|
||||||
|
/// escaped unicode literals like `\u2605`
|
||||||
fn (mut s Scanner) ident_char() string {
|
fn (mut s Scanner) ident_char() string {
|
||||||
start := s.pos
|
lspos := token.Position{
|
||||||
|
line_nr: s.line_nr
|
||||||
|
pos: s.pos
|
||||||
|
col: s.pos - s.last_nl_pos - 1
|
||||||
|
}
|
||||||
|
|
||||||
|
start := s.pos // the string position of the first backtick char
|
||||||
slash := `\\`
|
slash := `\\`
|
||||||
mut len := 0
|
mut len := 0
|
||||||
|
|
||||||
|
// set flags for advanced escapes first
|
||||||
|
escaped_hex := s.expect('\\x', start + 1)
|
||||||
|
escaped_unicode := s.expect('\\u', start + 1)
|
||||||
|
|
||||||
|
// walk the string to get characters up to the next backtick
|
||||||
for {
|
for {
|
||||||
s.pos++
|
s.pos++
|
||||||
if s.pos >= s.text.len {
|
if s.pos >= s.text.len {
|
||||||
|
@ -1334,12 +1385,68 @@ fn (mut s Scanner) ident_char() string {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
len--
|
len--
|
||||||
c := s.text[start + 1..s.pos]
|
mut c := s.text[start + 1..s.pos]
|
||||||
if len != 1 {
|
if len != 1 {
|
||||||
|
// if the content expresses an escape code, it will have an even number of characters
|
||||||
|
// e.g. \x61 or \u2605
|
||||||
|
if (c.len % 2 == 0) && (escaped_hex || escaped_unicode) {
|
||||||
|
if escaped_unicode {
|
||||||
|
c = decode_u_escapes(c, 0, [0])
|
||||||
|
} else {
|
||||||
|
// we have to handle hex ourselves
|
||||||
|
ascii_0 := byte(0x30)
|
||||||
|
ascii_a := byte(0x61)
|
||||||
|
mut accumulated := []byte{}
|
||||||
|
val := c[2..c.len].to_lower() // 0A -> 0a
|
||||||
|
mut offset := 0
|
||||||
|
// take two characters at a time, parse as hex and add to bytes
|
||||||
|
for {
|
||||||
|
if offset >= val.len - 1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
mut byteval := byte(0)
|
||||||
|
big := val[offset]
|
||||||
|
little := val[offset + 1]
|
||||||
|
if !big.is_hex_digit() {
|
||||||
|
accumulated.clear()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if !little.is_hex_digit() {
|
||||||
|
accumulated.clear()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if big.is_digit() {
|
||||||
|
byteval |= (big - ascii_0) << 4
|
||||||
|
} else {
|
||||||
|
byteval |= (big - ascii_a + 10) << 4
|
||||||
|
}
|
||||||
|
if little.is_digit() {
|
||||||
|
byteval |= (little - ascii_0)
|
||||||
|
} else {
|
||||||
|
byteval |= (little - ascii_a + 10)
|
||||||
|
}
|
||||||
|
|
||||||
|
accumulated << byteval
|
||||||
|
offset += 2
|
||||||
|
}
|
||||||
|
if accumulated.len > 0 {
|
||||||
|
c = accumulated.bytestr()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// the string inside the backticks is longer than one character
|
||||||
|
// but we might only have one rune, say in the case
|
||||||
u := c.runes()
|
u := c.runes()
|
||||||
if u.len != 1 {
|
if u.len != 1 {
|
||||||
s.error('invalid character literal (more than one character)\n' +
|
if escaped_hex || escaped_unicode {
|
||||||
'use quotes for strings, backticks for characters')
|
s.error('invalid character literal (escape sequence did not refer to a singular rune)')
|
||||||
|
} else {
|
||||||
|
s.add_error_detail_with_pos('use quotes for strings, backticks for characters',
|
||||||
|
lspos)
|
||||||
|
s.error('invalid character literal (more than one character)')
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Escapes a `'` character
|
// Escapes a `'` character
|
||||||
|
|
|
@ -19,6 +19,19 @@ fn scan_kinds(text string) []token.Kind {
|
||||||
return token_kinds
|
return token_kinds
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn scan_tokens(text string) []token.Token {
|
||||||
|
mut scanner := new_scanner(text, .parse_comments, &pref.Preferences{})
|
||||||
|
mut tokens := []token.Token{}
|
||||||
|
for {
|
||||||
|
tok := scanner.scan()
|
||||||
|
if tok.kind == .eof {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
tokens << tok
|
||||||
|
}
|
||||||
|
return tokens
|
||||||
|
}
|
||||||
|
|
||||||
fn test_scan() {
|
fn test_scan() {
|
||||||
token_kinds := scan_kinds('println(2 + 3)')
|
token_kinds := scan_kinds('println(2 + 3)')
|
||||||
assert token_kinds.len == 6
|
assert token_kinds.len == 6
|
||||||
|
@ -138,6 +151,90 @@ fn test_ref_ref_array_ref_ref_foo() {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_escape_string() {
|
fn test_escape_string() {
|
||||||
|
// these assertions aren't helpful...
|
||||||
|
// they test the vlib built-in to the compiler,
|
||||||
|
// but we want to test this module before compilation
|
||||||
assert '\x61' == 'a'
|
assert '\x61' == 'a'
|
||||||
assert '\x62' == 'b'
|
assert '\x62' == 'b'
|
||||||
|
// assert `\x61` == `a` // will work after pull request goes through
|
||||||
|
|
||||||
|
// SINGLE CHAR ESCAPES
|
||||||
|
// SINGLE CHAR APOSTROPHE
|
||||||
|
mut result := scan_tokens(r"`'`")
|
||||||
|
assert result[0].kind == .chartoken
|
||||||
|
assert result[0].lit == r"\'"
|
||||||
|
|
||||||
|
// SINGLE CHAR BACKTICK
|
||||||
|
result = scan_tokens(r'`\``')
|
||||||
|
assert result[0].kind == .chartoken
|
||||||
|
assert result[0].lit == r'\`'
|
||||||
|
|
||||||
|
// SINGLE CHAR SLASH
|
||||||
|
result = scan_tokens(r'`\\`')
|
||||||
|
assert result[0].kind == .chartoken
|
||||||
|
assert result[0].lit == r'\\'
|
||||||
|
|
||||||
|
// SINGLE CHAR UNICODE ESCAPE
|
||||||
|
result = scan_tokens(r'`\u2605`')
|
||||||
|
assert result[0].kind == .chartoken
|
||||||
|
assert result[0].lit == r'★'
|
||||||
|
|
||||||
|
// SINGLE CHAR ESCAPED ASCII
|
||||||
|
result = scan_tokens(r'`\x61`')
|
||||||
|
assert result[0].kind == .chartoken
|
||||||
|
assert result[0].lit == r'a'
|
||||||
|
|
||||||
|
// SINGLE CHAR INCORRECT ESCAPE
|
||||||
|
// result = scan_tokens(r'`\x61\x61`') // should always result in an error
|
||||||
|
|
||||||
|
// SINGLE CHAR MULTI-BYTE UTF-8
|
||||||
|
// Compilation blocked by vlib/v/checker/check_types.v, but works in the repl
|
||||||
|
result = scan_tokens(r'`\xe29885`')
|
||||||
|
assert result[0].lit == r'★'
|
||||||
|
|
||||||
|
// STRING ESCAPES =================
|
||||||
|
// STRING APOSTROPHE
|
||||||
|
result = scan_tokens(r"'\''")
|
||||||
|
assert result[0].kind == .string
|
||||||
|
assert result[0].lit == r"\'"
|
||||||
|
|
||||||
|
// STRING BACKTICK
|
||||||
|
result = scan_tokens(r"'\`'")
|
||||||
|
assert result[0].kind == .string
|
||||||
|
assert result[0].lit == r'\`'
|
||||||
|
|
||||||
|
// STRING SLASH
|
||||||
|
result = scan_tokens(r"'\\'")
|
||||||
|
assert result[0].kind == .string
|
||||||
|
assert result[0].lit == r'\\'
|
||||||
|
|
||||||
|
// STRING UNICODE ESCAPE
|
||||||
|
result = scan_tokens(r"'\u2605'")
|
||||||
|
assert result[0].kind == .string
|
||||||
|
assert result[0].lit == r'★'
|
||||||
|
|
||||||
|
// STRING ESCAPED ASCII
|
||||||
|
result = scan_tokens(r"'\x61'")
|
||||||
|
assert result[0].kind == .string
|
||||||
|
assert result[0].lit == r'a'
|
||||||
|
|
||||||
|
// STRING ESCAPED EXTENDED ASCII
|
||||||
|
// (should not be converted to unicode)
|
||||||
|
result = scan_tokens(r"'\xe29885'")
|
||||||
|
assert result[0].kind == .string
|
||||||
|
assert result[0].lit.bytes() == [byte(0xe2), `9`, `8`, `8`, `5`]
|
||||||
|
|
||||||
|
// SHOULD RESULT IN ERRORS
|
||||||
|
// result = scan_tokens(r'`\x61\x61`') // should always result in an error
|
||||||
|
// result = scan_tokens(r"'\x'") // should always result in an error
|
||||||
|
// result = scan_tokens(r'`hello`') // should always result in an error
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_comment_string() {
|
||||||
|
mut result := scan_tokens('// single line comment will get an \\x01 prepended')
|
||||||
|
assert result[0].kind == .comment
|
||||||
|
assert result[0].lit[0] == byte(1) // \x01
|
||||||
|
// result = scan_tokens('/// doc comment will keep third / at beginning')
|
||||||
|
// result = scan_tokens('/* block comment will be stripped of whitespace */')
|
||||||
|
// result = scan_tokens('a := 0 // line end comment also gets \\x01 prepended')
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue