scanner: replace ascii unicode(\u0020) with hex(\x20) (#9259)

pull/9275/head^2
zakuro 2021-03-13 15:02:42 +09:00 committed by GitHub
parent 62458e3553
commit 00dedaf6c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 31 additions and 7 deletions

View File

@ -1101,6 +1101,7 @@ fn (mut s Scanner) ident_string() string {
start++
}
s.is_inside_string = false
mut u_to_x_pos := []int{} // pos list to replace \u0020 -> \x20
slash := `\\`
for {
s.pos++
@ -1146,13 +1147,17 @@ fn (mut s Scanner) ident_string() string {
s.error(r'`\x` used with no following hex digits')
}
// Escape `\u`
if c == `u` && (s.text[s.pos + 1] == s.quote
|| s.text[s.pos + 2] == s.quote || s.text[s.pos + 3] == s.quote
|| s.text[s.pos + 4] == s.quote || !s.text[s.pos + 1].is_hex_digit()
|| !s.text[s.pos + 2].is_hex_digit()
|| !s.text[s.pos + 3].is_hex_digit()
|| !s.text[s.pos + 4].is_hex_digit()) {
if c == `u` {
if s.text[s.pos + 1] == s.quote || s.text[s.pos + 2] == s.quote
|| s.text[s.pos + 3] == s.quote || s.text[s.pos + 4] == s.quote
|| !s.text[s.pos + 1].is_hex_digit() || !s.text[s.pos + 2].is_hex_digit()
|| !s.text[s.pos + 3].is_hex_digit() || !s.text[s.pos + 4].is_hex_digit() {
s.error(r'`\u` incomplete unicode character value')
} else if s.text[s.pos + 1] == `0` && s.text[s.pos + 2] == `0`
&& (`0` <= s.text[s.pos + 3] && s.text[s.pos + 3] < `8`) {
// ascii
u_to_x_pos << s.pos - 1
}
}
}
// ${var} (ignore in vfmt mode) (skip \$)
@ -1179,6 +1184,15 @@ fn (mut s Scanner) ident_string() string {
}
if start <= s.pos {
mut string_so_far := s.text[start..end]
if u_to_x_pos.len > 0 {
mut ss := []string{cap: u_to_x_pos.len + 1}
ss << string_so_far[..u_to_x_pos[0] - start]
for i in 0 .. u_to_x_pos.len - 1 {
ss << r'\x' + string_so_far[u_to_x_pos[i] - start + 4..u_to_x_pos[i + 1] - start]
}
ss << r'\x' + string_so_far[u_to_x_pos.last() + 4 - start..]
string_so_far = ss.join('')
}
if n_cr_chars > 0 {
string_so_far = string_so_far.replace('\r', '')
}

View File

@ -0,0 +1,10 @@
fn test_raw_string() {
assert r'\n\u00c0' == '\\n\\u00c0'
}
fn test_escape() {
assert '\x20' == ' '
assert '\u0020' == ' '
// assert '\u00c4' == 'Ä'
assert '\r\n'.bytes() == [byte(0x0d), 0x0a]
}