From bfe0a7887fef67a91f46c35d41a6eccd38c3e9fe Mon Sep 17 00:00:00 2001 From: penguindark <57967770+penguindark@users.noreply.github.com> Date: Sun, 25 Apr 2021 16:57:21 +0200 Subject: [PATCH] builtin,strconv: fix and optimize utf8 and formatting functions (#9874) --- vlib/builtin/utf8.v | 23 +++++++++-------------- vlib/strconv/f32_str.v | 8 ++++++++ vlib/strconv/f64_str.v | 8 ++++++++ vlib/strconv/format.v | 14 ++++++++++++-- 4 files changed, 37 insertions(+), 16 deletions(-) diff --git a/vlib/builtin/utf8.v b/vlib/builtin/utf8.v index d2eca88cf2..f55d1b087a 100644 --- a/vlib/builtin/utf8.v +++ b/vlib/builtin/utf8.v @@ -103,16 +103,12 @@ fn utf8_len(c byte) int { } // Calculate string length for in number of codepoints -fn utf8_str_len(s string) int { +pub fn utf8_str_len(s string) int { mut l := 0 - for i := 0; i < s.len; i++ { + mut i := 0 + for i < s.len { l++ - c := unsafe { s.str[i] } - if (c & (1 << 7)) != 0 { - for t := byte(1 << 6); (c & t) != 0; t >>= 1 { - i++ - } - } + i += ((0xe5000000 >> ((unsafe { s.str[i] } >> 3) & 0x1e)) & 3) + 1 } return l } @@ -124,17 +120,16 @@ pub fn utf8_str_visible_length(s string) int { mut l := 0 mut ul := 1 for i := 0; i < s.len; i += ul { - ul = 1 c := unsafe { s.str[i] } - if (c & (1 << 7)) != 0 { - for t := byte(1 << 6); (c & t) != 0; t >>= 1 { - ul++ - } - } + ul = ((0xe5000000 >> ((unsafe { s.str[i] } >> 3) & 0x1e)) & 3) + 1 if i + ul > s.len { // incomplete UTF-8 sequence return l } l++ + // avoid the match if not needed + if ul == 1 { + continue + } // recognize combining characters and wide characters match ul { 2 { diff --git a/vlib/strconv/f32_str.v b/vlib/strconv/f32_str.v index 270c4c2458..a1e231c998 100644 --- a/vlib/strconv/f32_str.v +++ b/vlib/strconv/f32_str.v @@ -94,6 +94,14 @@ pub fn (d Dec32) get_string_32(neg bool, i_n_digit int, i_pad_digit int) string x++ } + // no decimal digits needed, end here + if i_n_digit == 0 { + unsafe { + buf[i]=0 + return tos(byteptr(&buf[0]), i) + } + } + if out_len >= 1 { buf[y - x] = `.` x++ diff --git a/vlib/strconv/f64_str.v b/vlib/strconv/f64_str.v index f33d54797a..99c73bd082 100644 --- a/vlib/strconv/f64_str.v +++ b/vlib/strconv/f64_str.v @@ -110,6 +110,14 @@ fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string { x++ } + // no decimal digits needed, end here + if i_n_digit == 0 { + unsafe { + buf[i]=0 + return tos(byteptr(&buf[0]), i) + } + } + if out_len >= 1 { buf[y - x] = `.` x++ diff --git a/vlib/strconv/format.v b/vlib/strconv/format.v index bba96f2b36..f9c06efc32 100644 --- a/vlib/strconv/format.v +++ b/vlib/strconv/format.v @@ -29,7 +29,7 @@ enum Char_parse_state { reset_params } -enum Align_text { +pub enum Align_text { right = 0 left center @@ -176,6 +176,12 @@ pub fn f64_to_str_lnd(f f64, dec_digit int) string { i++ } } + + // no more digits needed, stop here + if dec_digit <= 0 { + return unsafe { tos(res.data, dot_res_sp) } + } + //println("r_i-d_pos: ${r_i - d_pos}") if dot_res_sp >= 0 { if (r_i - dot_res_sp) > dec_digit { @@ -204,6 +210,7 @@ pub fn f64_to_str_lnd(f f64, dec_digit int) string { */ pub struct BF_param { +pub mut: pad_ch byte = byte(` `) // padding char len0 int = -1 // default len for whole the number or string len1 int = 6 // number of decimal digits, if needed @@ -214,7 +221,10 @@ pub struct BF_param { } pub fn format_str(s string, p BF_param) string { - dif := p.len0 - s.len + if p.len0 <= 0 { + return s + } + dif := p.len0 - utf8_str_visible_length(s) if dif <= 0 { return s }