From 4d1f721558ef935a02274f04b90501e1b0837c79 Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Sun, 8 Dec 2019 20:06:00 +0300 Subject: [PATCH] fix "" string interpolation bug --- vlib/builtin/string_test.v | 8 ++ vlib/compiler/parser.v | 148 ---------------------------- vlib/compiler/scanner.v | 19 ++-- vlib/compiler/string_expression.v | 154 ++++++++++++++++++++++++++++++ 4 files changed, 173 insertions(+), 156 deletions(-) create mode 100644 vlib/compiler/string_expression.v diff --git a/vlib/builtin/string_test.v b/vlib/builtin/string_test.v index a42aad0d81..ecc1dd72f7 100644 --- a/vlib/builtin/string_test.v +++ b/vlib/builtin/string_test.v @@ -565,3 +565,11 @@ fn test_inter_before_comp_if() { } } +fn test_double_quote_inter() { + a := 1 + b := 2 + println("${a} ${b}") + assert "${a} ${b}" == "1 2" + assert '${a} ${b}' == "1 2" +} + diff --git a/vlib/compiler/parser.v b/vlib/compiler/parser.v index f102fb85b8..1c5c74e866 100644 --- a/vlib/compiler/parser.v +++ b/vlib/compiler/parser.v @@ -2280,154 +2280,6 @@ fn format_str(_str string) string { return str } -fn (p mut Parser) string_expr() { - is_raw := p.tok == .name && p.lit == 'r' - is_cstr := p.tok == .name && p.lit == 'c' - if is_raw || is_cstr { - p.next() - } - str := p.lit - // No ${}, just return a simple string - if p.peek() != .str_dollar || is_raw { - f := if is_raw { cescaped_path(str) } else { format_str(str) } - // `C.puts('hi')` => `puts("hi");` - /* - Calling a C function sometimes requires a call to a string method - C.fun('ssss'.to_wide()) => fun(string_to_wide(tos3("ssss"))) - */ - if (p.calling_c && p.peek() != .dot) || is_cstr || (p.pref.translated && p.mod == 'main') { - p.gen('"$f"') - } - else if p.is_sql { - p.gen("'$str'") - } - else if p.is_js { - p.gen('tos("$f")') - } - else { - p.gen('tos3("$f")') - } - p.next() - return - } - $if js { - p.error('js backend does not support string formatting yet') - } - p.is_alloc = true // $ interpolation means there's allocation - mut args := '"' - mut format := '"' - mut complex_inter := false // for vfmt - for p.tok == .str { - // Add the string between %d's - p.lit = p.lit.replace('%', '%%') - format += format_str(p.lit) - p.next()// skip $ - if p.tok != .str_dollar { - continue - } - // Handle .dollar - p.check(.str_dollar) - // If there's no string after current token, it means we are in - // a complex expression (`${...}`) - if p.peek() != .str { - p.fgen('{') - complex_inter = true - } - // Get bool expr inside a temp var - typ, val_ := p.tmp_expr() - val := val_.trim_space() - args += ', $val' - if typ == 'string' { - // args += '.str' - // printf("%.*s", a.len, a.str) syntax - args += '.len, ${val}.str' - } - if typ == 'ustring' { - args += '.len, ${val}.s.str' - } - if typ == 'bool' { - //args += '.len, ${val}.str' - } - // Custom format? ${t.hour:02d} - custom := p.tok == .colon - if custom { - mut cformat := '' - p.next() - if p.tok == .dot { - cformat += '.' - p.next() - } - if p.tok == .minus { // support for left aligned formatting - cformat += '-' - p.next() - } - cformat += p.lit// 02 - p.next() - fspec := p.lit // f - cformat += fspec - if fspec == 's' { - //println('custom str F=$cformat | format_specifier: "$fspec" | typ: $typ ') - if typ != 'string' { - p.error('only V strings can be formatted with a :${cformat} format, but you have given "${val}", which has type ${typ}') - } - args = args.all_before_last('${val}.len, ${val}.str') + '${val}.str' - } - format += '%$cformat' - p.next() - } - else { - f := p.typ_to_fmt(typ, 0) - if f == '' { - is_array := typ.starts_with('array_') - typ2 := p.table.find_type(typ) - has_str_method := p.table.type_has_method(typ2, 'str') - if is_array || has_str_method { - if is_array && !has_str_method { - p.gen_array_str(typ2) - } - tmp_var := p.get_tmp() - p.cgen.insert_before('string $tmp_var = ${typ}_str(${val});') - args = args.all_before_last(val) + '${tmp_var}.len, ${tmp_var}.str' - format += '%.*s ' - } - else { - p.error('unhandled sprintf format "$typ" ') - } - } - format += f - } - //println('interpolation format is: |${format}| args are: |${args}| ') - } - if complex_inter { - p.fgen('}') - } - //p.fgen('\'') - // println("hello %d", num) optimization. - if p.cgen.nogen { - return - } - // println: don't allocate a new string, just print it. - $if !windows { - cur_line := p.cgen.cur_line.trim_space() - if cur_line == 'println (' && p.tok != .plus { - p.cgen.resetln(cur_line.replace('println (', 'printf(')) - p.gen('$format\\n$args') - return - } - } - // '$age'! means the user wants this to be a tmp string (uses global buffer, no allocation, - // won't be used again) - // TODO remove this hack, do this automatically - if p.tok == .not { - p.check(.not) - p.gen('_STR_TMP($format$args)') - } - else { - // Otherwise do len counting + allocation + sprintf - p.gen('_STR($format$args)') - } -} - // m := map[string]int{} // m := { 'one': 1 } fn (p mut Parser) map_init() string { diff --git a/vlib/compiler/scanner.v b/vlib/compiler/scanner.v index a28e27fc58..c1cf0fc74d 100644 --- a/vlib/compiler/scanner.v +++ b/vlib/compiler/scanner.v @@ -298,14 +298,15 @@ fn (s mut Scanner) scan() ScanRes { s.inside_string = false } } + // end of `$expr` if s.inter_start && next_char != `.` { s.inter_end = true s.inter_start = false } if s.pos == 0 && next_char == ` ` { - s.pos++ //If a single letter name at the start of the file, increment //Otherwise the scanner would be stuck at s.pos = 0 + s.pos++ } return scan_res(.name, name) } @@ -390,8 +391,7 @@ fn (s mut Scanner) scan() ScanRes { return scan_res(.lcbr, '') } `$` { - // if s.inter_start { - if s.inside_string {// || s.inter_start { + if s.inside_string { return scan_res(.str_dollar, '') } else { return scan_res(.dollar, '') @@ -402,8 +402,7 @@ fn (s mut Scanner) scan() ScanRes { // s = `hello ${name} !` if s.inside_string { s.pos++ - // TODO UNNEEDED? - if s.text[s.pos] == single_quote { + if s.text[s.pos] == s.quote { s.inside_string = false return scan_res(.str, '') } @@ -704,15 +703,19 @@ fn (s mut Scanner) ident_string() string { if c == `0` && s.pos > 5 && s.expect('\\x0', s.pos - 3) { s.error('0 character in a string literal') } - // ${var} - if c == `{` && prevc == `$` && !is_raw && !s.is_fmt && s.count_symbol_before(s.pos-2, slash) % 2 == 0 { + // ${var} (ignore in vfmt mode) + if c == `{` && prevc == `$` && !is_raw && !s.is_fmt && + s.count_symbol_before(s.pos-2, slash) % 2 == 0 + { s.inside_string = true // so that s.pos points to $ at the next step s.pos -= 2 break } // $var - if (c.is_letter() || c == `_`) && prevc == `$` && !s.is_fmt && !is_raw && s.count_symbol_before(s.pos-2, slash) % 2 == 0 { + if (c.is_letter() || c == `_`) && prevc == `$` && !s.is_fmt && + !is_raw && s.count_symbol_before(s.pos-2, slash) % 2 == 0 + { s.inside_string = true s.inter_start = true s.pos -= 2 diff --git a/vlib/compiler/string_expression.v b/vlib/compiler/string_expression.v new file mode 100644 index 0000000000..26bf1141bb --- /dev/null +++ b/vlib/compiler/string_expression.v @@ -0,0 +1,154 @@ +// Copyright (c) 2019 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +module compiler + +fn (p mut Parser) string_expr() { + is_raw := p.tok == .name && p.lit == 'r' + is_cstr := p.tok == .name && p.lit == 'c' + if is_raw || is_cstr { + p.next() + } + str := p.lit + // No ${}, just return a simple string + if p.peek() != .str_dollar || is_raw { + f := if is_raw { cescaped_path(str) } else { format_str(str) } + // `C.puts('hi')` => `puts("hi");` + /* + Calling a C function sometimes requires a call to a string method + C.fun('ssss'.to_wide()) => fun(string_to_wide(tos3("ssss"))) + */ + if (p.calling_c && p.peek() != .dot) || is_cstr || (p.pref.translated && p.mod == 'main') { + p.gen('"$f"') + } + else if p.is_sql { + p.gen("'$str'") + } + else if p.is_js { + p.gen('tos("$f")') + } + else { + p.gen('tos3("$f")') + } + p.next() + return + } + $if js { + p.error('js backend does not support string formatting yet') + } + p.is_alloc = true // $ interpolation means there's allocation + mut args := '"' + mut format := '"' + mut complex_inter := false // for vfmt + for p.tok == .str { + // Add the string between %d's + p.lit = p.lit.replace('%', '%%') + format += format_str(p.lit) + p.next()// skip $ + if p.tok != .str_dollar { + continue + } + // Handle .dollar + p.check(.str_dollar) + // If there's no string after current token, it means we are in + // a complex expression (`${...}`) + if p.peek() != .str { + p.fgen('{') + complex_inter = true + } + // Get bool expr inside a temp var + typ, val_ := p.tmp_expr() + val := val_.trim_space() + args += ', $val' + if typ == 'string' { + // args += '.str' + // printf("%.*s", a.len, a.str) syntax + args += '.len, ${val}.str' + } + if typ == 'ustring' { + args += '.len, ${val}.s.str' + } + if typ == 'bool' { + //args += '.len, ${val}.str' + } + // Custom format? ${t.hour:02d} + custom := p.tok == .colon + if custom { + mut cformat := '' + p.next() + if p.tok == .dot { + cformat += '.' + p.next() + } + if p.tok == .minus { // support for left aligned formatting + cformat += '-' + p.next() + } + cformat += p.lit// 02 + p.next() + fspec := p.lit // f + cformat += fspec + if fspec == 's' { + //println('custom str F=$cformat | format_specifier: "$fspec" | typ: $typ ') + if typ != 'string' { + p.error('only V strings can be formatted with a :${cformat} format, but you have given "${val}", which has type ${typ}') + } + args = args.all_before_last('${val}.len, ${val}.str') + '${val}.str' + } + format += '%$cformat' + p.next() + } + else { + f := p.typ_to_fmt(typ, 0) + if f == '' { + is_array := typ.starts_with('array_') + typ2 := p.table.find_type(typ) + has_str_method := p.table.type_has_method(typ2, 'str') + if is_array || has_str_method { + if is_array && !has_str_method { + p.gen_array_str(typ2) + } + tmp_var := p.get_tmp() + p.cgen.insert_before('string $tmp_var = ${typ}_str(${val});') + args = args.all_before_last(val) + '${tmp_var}.len, ${tmp_var}.str' + format += '%.*s ' + } + else { + p.error('unhandled sprintf format "$typ" ') + } + } + format += f + } + //println('interpolation format is: |${format}| args are: |${args}| ') + } + if complex_inter { + p.fgen('}') + } + //p.fgen('\'') + // println("hello %d", num) optimization. + if p.cgen.nogen { + return + } + // println: don't allocate a new string, just print it. + $if !windows { + cur_line := p.cgen.cur_line.trim_space() + if cur_line == 'println (' && p.tok != .plus { + p.cgen.resetln(cur_line.replace('println (', 'printf(')) + p.gen('$format\\n$args') + return + } + } + // '$age'! means the user wants this to be a tmp string (uses global buffer, no allocation, + // won't be used again) + // TODO remove this hack, do this automatically + if p.tok == .not { + p.check(.not) + p.gen('_STR_TMP($format$args)') + } + else { + // Otherwise do len counting + allocation + sprintf + p.gen('_STR($format$args)') + } +} +