cgen: string interpolation: optimize away some partial strings of 0 length

pull/4764/head
Uwe Krüger 2020-05-07 04:34:18 +02:00 committed by GitHub
parent 19390871e9
commit 59aa31cee5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 25 additions and 15 deletions

View File

@ -2425,16 +2425,18 @@ fn (mut g Gen) string_inter_literal(node ast.StringInterLiteral) {
// Build the string with % // Build the string with %
mut fieldwidths := []int{} mut fieldwidths := []int{}
mut specs := []byte{} mut specs := []byte{}
mut num_fmts := 1 mut end_string := false
for i, val in node.vals { for i, val in node.vals {
escaped_val := val.replace_each(['"', '\\"', '\r\n', '\\n', '\n', '\\n', '%', '%%']) escaped_val := val.replace_each(['"', '\\"', '\r\n', '\\n', '\n', '\\n', '%', '%%'])
g.write(escaped_val)
if i >= node.exprs.len { if i >= node.exprs.len {
fieldwidths << 0 if escaped_val.len > 0 {
specs << `_` end_string = true
g.write('\\000')
g.write(escaped_val)
}
continue continue
} }
num_fmts++ g.write(escaped_val)
sym := g.table.get_type_symbol(node.expr_types[i]) sym := g.table.get_type_symbol(node.expr_types[i])
sfmt := node.expr_fmts[i] sfmt := node.expr_fmts[i]
mut fspec := `_` // placeholder mut fspec := `_` // placeholder
@ -2507,7 +2509,7 @@ fn (mut g Gen) string_inter_literal(node ast.StringInterLiteral) {
if fspec == `p` { if fspec == `p` {
g.write('${fmt}p') g.write('${fmt}p')
} else { } else {
g.write('${fmt}l${fspec:c}') g.write('${fmt}"PRI${fspec:c}PTR"')
} }
} else if node.expr_types[i].is_int() { } else if node.expr_types[i].is_int() {
if fspec == `c` { if fspec == `c` {
@ -2533,9 +2535,16 @@ fn (mut g Gen) string_inter_literal(node ast.StringInterLiteral) {
// TODO: better check this case // TODO: better check this case
g.write('${fmt}"PRId32"') g.write('${fmt}"PRId32"')
} }
g.write('\\000') if i < node.exprs.len - 1 {
g.write('\\000')
}
} }
g.write('", $num_fmts, ') num_string_parts := if end_string {
node.exprs.len+1
} else {
node.exprs.len
}
g.write('", $num_string_parts, ')
// Build args // Build args
for i, expr in node.exprs { for i, expr in node.exprs {
if node.expr_types[i] == table.string_type { if node.expr_types[i] == table.string_type {

View File

@ -70,8 +70,7 @@ string _STR(const char *fmt, int nfmts, ...) {
//v_panic(tos3('Invaid format specifier')); //v_panic(tos3('Invaid format specifier'));
} }
} else { } else {
if (k) _STR_PRINT_ARG(fmt, &buf, &nbytes, &memsize, k);
_STR_PRINT_ARG(fmt, &buf, &nbytes, &memsize, k);
} }
fmt += k+1; fmt += k+1;
} }

View File

@ -98,8 +98,10 @@ fn test_inttypes_string_interpolation() {
assert '${s:X}:${us:x}:${u16(uc):04x}' == 'A460:d431:00d9' assert '${s:X}:${us:x}:${u16(uc):04x}' == 'A460:d431:00d9'
assert '${i:x}:${ui:X}:${int(s):x}' == '9f430000:CBF6EFC7:ffffa460' assert '${i:x}:${ui:X}:${int(s):x}' == '9f430000:CBF6EFC7:ffffa460'
assert '${l:x}:${ul:X}' == '9537727cad98876c:EF2B7D4001165BD2' assert '${l:x}:${ul:X}' == '9537727cad98876c:EF2B7D4001165BD2'
// TODO this does not work on Windows // default pointer format is platform dependent, so try a few
// assert '${vp:p}:$bp' == '0xcbf6efc7:0x39e53208c' assert '${vp:p}:$bp' == '0xcbf6efc7:0x39e53208c' ||
'${vp:p}:$bp' == 'CBF6EFC7:39E53208C' ||
'${vp:p}:$bp' == '00000000CBF6EFC7:000000039E53208C'
} }
fn test_utf8_string_interpolation() { fn test_utf8_string_interpolation() {
@ -108,9 +110,9 @@ fn test_utf8_string_interpolation() {
m := '10' m := '10'
assert '$a $st $m' == 'à-côté Sträßle 10' assert '$a $st $m' == 'à-côté Sträßle 10'
assert '>${a:10}< >${st:-8}< >${m:5}<-' == '> à-côté< >Sträßle < > 10<-' assert '>${a:10}< >${st:-8}< >${m:5}<-' == '> à-côté< >Sträßle < > 10<-'
e := '\u20AC' // Eurosign // e := '\u20AC' // Eurosign doesn' work with MSVC and tcc
// TODO: this fails with MSVC and tcc e := ''
// assert '100.00 $e' == '100.00 €' assert '100.00 $e' == '100.00 '
m2 := 'Москва́' // cyrillic а́: combination of U+0430 and U+0301, UTF-8: d0 b0 cc 81 m2 := 'Москва́' // cyrillic а́: combination of U+0430 and U+0301, UTF-8: d0 b0 cc 81
d := 'Antonín Dvořák' // latin á: U+00E1, UTF-8: c3 a1 d := 'Antonín Dvořák' // latin á: U+00E1, UTF-8: c3 a1
assert ':${m2:7}:${d:-15}:' == ': Москва́:Antonín Dvořák :' assert ':${m2:7}:${d:-15}:' == ': Москва́:Antonín Dvořák :'