strconv,js: f64_to_str works on JS backend now; Fix BigInt usage in infix expressions (#12464)

2021-11-15 15:13:44 +03:00 · 2021-11-15 15:13:44 +03:00 · a8a1e9381f
parent 1d2b16dde2
commit a8a1e9381f
7 changed files with 399 additions and 66 deletions
--- a/vlib/strconv/f32_str.js.v
+++ b/vlib/strconv/f32_str.js.v
@ -0,0 +1 @@
+module strconv
--- a/vlib/strconv/f64_str.c.v
+++ b/vlib/strconv/f64_str.c.v
@ -20,42 +20,6 @@ https://github.com/cespare/ryu/tree/ba56a33f39e3bbbfa409095d0f9ae168a595feea

 =============================================================================*/

-// pow of ten table used by n_digit reduction
-const (
-	ten_pow_table_64 = [
-		u64(1),
-		u64(10),
-		u64(100),
-		u64(1000),
-		u64(10000),
-		u64(100000),
-		u64(1000000),
-		u64(10000000),
-		u64(100000000),
-		u64(1000000000),
-		u64(10000000000),
-		u64(100000000000),
-		u64(1000000000000),
-		u64(10000000000000),
-		u64(100000000000000),
-		u64(1000000000000000),
-		u64(10000000000000000),
-		u64(100000000000000000),
-		u64(1000000000000000000),
-		u64(10000000000000000000),
-	]
-)
-
-//=============================================================================
-// Conversion Functions
-//=============================================================================
-const (
-	mantbits64 = u32(52)
-	expbits64  = u32(11)
-	bias64     = 1023 // f64 exponent bias
-	maxexp64   = 2047
-)
-
 [direct_array_access]
 fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string {
 	mut n_digit := i_n_digit + 1
@ -87,10 +51,10 @@ fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string {
 	// rounding last used digit
 	if n_digit < out_len {
 		// println("out:[$out]")
-		out += strconv.ten_pow_table_64[out_len - n_digit - 1] * 5 // round to up
-		out /= strconv.ten_pow_table_64[out_len - n_digit]
+		out += ten_pow_table_64[out_len - n_digit - 1] * 5 // round to up
+		out /= ten_pow_table_64[out_len - n_digit]
 		// println("out1:[$out] ${d.m / ten_pow_table_64[out_len - n_digit ]}")
-		if d.m / strconv.ten_pow_table_64[out_len - n_digit] < out {
+		if d.m / ten_pow_table_64[out_len - n_digit] < out {
 			d_exp++
 			n_digit++
 		}
@ -170,11 +134,11 @@ fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string {

 fn f64_to_decimal_exact_int(i_mant u64, exp u64) (Dec64, bool) {
 	mut d := Dec64{}
-	e := exp - strconv.bias64
-	if e > strconv.mantbits64 {
+	e := exp - bias64
+	if e > mantbits64 {
 		return d, false
 	}
-	shift := strconv.mantbits64 - e
+	shift := mantbits64 - e
 	mant := i_mant | u64(0x0010_0000_0000_0000) // implicit 1
 	// mant  := i_mant | (1 << mantbits64) // implicit 1
 	d.m = mant >> shift
@ -195,11 +159,11 @@ fn f64_to_decimal(mant u64, exp u64) Dec64 {
 	if exp == 0 {
 		// We subtract 2 so that the bounds computation has
 		// 2 additional bits.
-		e2 = 1 - strconv.bias64 - int(strconv.mantbits64) - 2
+		e2 = 1 - bias64 - int(mantbits64) - 2
 		m2 = mant
 	} else {
-		e2 = int(exp) - strconv.bias64 - int(strconv.mantbits64) - 2
-		m2 = (u64(1) << strconv.mantbits64) | mant
+		e2 = int(exp) - bias64 - int(mantbits64) - 2
+		m2 = (u64(1) << mantbits64) | mant
 	}
 	even := (m2 & 1) == 0
 	accept_bounds := even
@ -373,13 +337,13 @@ pub fn f64_to_str(f f64, n_digit int) string {
 	u1.f = f
 	u := unsafe { u1.u }

-	neg := (u >> (strconv.mantbits64 + strconv.expbits64)) != 0
-	mant := u & ((u64(1) << strconv.mantbits64) - u64(1))
-	exp := (u >> strconv.mantbits64) & ((u64(1) << strconv.expbits64) - u64(1))
+	neg := (u >> (mantbits64 + expbits64)) != 0
+	mant := u & ((u64(1) << mantbits64) - u64(1))
+	exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1))
 	// println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016lx}")

 	// Exit early for easy cases.
-	if (exp == strconv.maxexp64) || (exp == 0 && mant == 0) {
+	if (exp == maxexp64) || (exp == 0 && mant == 0) {
 		return get_string_special(neg, exp == 0, mant == 0)
 	}

@ -398,13 +362,13 @@ pub fn f64_to_str_pad(f f64, n_digit int) string {
 	u1.f = f
 	u := unsafe { u1.u }

-	neg := (u >> (strconv.mantbits64 + strconv.expbits64)) != 0
-	mant := u & ((u64(1) << strconv.mantbits64) - u64(1))
-	exp := (u >> strconv.mantbits64) & ((u64(1) << strconv.expbits64) - u64(1))
+	neg := (u >> (mantbits64 + expbits64)) != 0
+	mant := u & ((u64(1) << mantbits64) - u64(1))
+	exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1))
 	// println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016lx}")

 	// Exit early for easy cases.
-	if (exp == strconv.maxexp64) || (exp == 0 && mant == 0) {
+	if (exp == maxexp64) || (exp == 0 && mant == 0) {
 		return get_string_special(neg, exp == 0, mant == 0)
 	}

--- a/vlib/strconv/f64_str.js.v
+++ b/vlib/strconv/f64_str.js.v
@ -0,0 +1,339 @@
+module strconv
+
+import math
+
+fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string {
+	mut n_digit := i_n_digit + 1
+	pad_digit := i_pad_digit + 1
+	mut out := d.m
+	mut d_exp := d.e
+	// mut out_len      := decimal_len_64(out)
+	mut out_len := dec_digits(out)
+	out_len_original := out_len
+
+	mut fw_zeros := 0
+	if pad_digit > out_len {
+		fw_zeros = pad_digit - out_len
+	}
+
+	mut buf := []byte{len: (out_len + 6 + 1 + 1 + fw_zeros)} // sign + mant_len + . +  e + e_sign + exp_len(2) + \0}
+	mut i := 0
+
+	if neg {
+		#buf.arr.arr[i.val] = '-'.charCodeAt()
+		i++
+	}
+
+	mut disp := 0
+	if out_len <= 1 {
+		disp = 1
+	}
+
+	// rounding last used digit
+	if n_digit < out_len {
+		// println("out:[$out]")
+		out += ten_pow_table_64[out_len - n_digit - 1] * 5 // round to up
+		out /= ten_pow_table_64[out_len - n_digit]
+		// println("out1:[$out] ${d.m / ten_pow_table_64[out_len - n_digit ]}")
+		if d.m / ten_pow_table_64[out_len - n_digit] < out {
+			d_exp++
+			n_digit++
+		}
+
+		// println("cmp: ${d.m/ten_pow_table_64[out_len - n_digit ]} ${out/ten_pow_table_64[out_len - n_digit ]}")
+
+		out_len = n_digit
+		// println("orig: ${out_len_original} new len: ${out_len} out:[$out]")
+	}
+
+	y := i + out_len
+	mut x := 0
+	for x < (out_len - disp - 1) {
+		#buf.arr.arr[y.val - x.val].val = '0'.charCodeAt() + Number(out.valueOf() % 10n)
+
+		out /= 10
+		i++
+		x++
+	}
+
+	// no decimal digits needed, end here
+	if i_n_digit == 0 {
+		res := ''
+		#buf.arr.arr.forEach((it) => it.val == 0 ? res.str : res.str += String.fromCharCode(it.val))
+
+		return res
+	}
+
+	if out_len >= 1 {
+		buf[y - x] = `.`
+		x++
+		i++
+	}
+
+	if y - x >= 0 {
+		#buf.arr.arr[y.val - x.val].val = '0'.charCodeAt() + Number(out.valueOf() % 10n)
+		i++
+	}
+
+	for fw_zeros > 0 {
+		#buf.arr.arr[i.val].val = '0'.charCodeAt()
+		i++
+		fw_zeros--
+	}
+
+	#buf.arr.arr[i.val].val = 'e'.charCodeAt()
+	i++
+
+	mut exp := d_exp + out_len_original - 1
+	if exp < 0 {
+		#buf.arr.arr[i.val].val = '-'.charCodeAt()
+		i++
+		exp = -exp
+	} else {
+		#buf.arr.arr[i.val].val = '+'.charCodeAt()
+		i++
+	}
+
+	// Always print at least two digits to match strconv's formatting.
+	d2 := exp % 10
+	exp /= 10
+	d1 := exp % 10
+	_ := d1
+	_ := d2
+	d0 := exp / 10
+	if d0 > 0 {
+		#buf.arr.arr[i].val = '0'.charCodeAt() + d0.val
+		i++
+	}
+	#buf.arr.arr[i].val = '0'.charCodeAt() + d1.val
+	i++
+	#buf.arr.arr[i].val = '0' + d2.val
+	i++
+	#buf.arr.arr[i].val = 0
+
+	res := ''
+	#buf.arr.arr.forEach((it) => it.val == 0 ? res.str : res.str += String.fromCharCode(it.val))
+
+	return res
+}
+
+fn f64_to_decimal_exact_int(i_mant u64, exp u64) (Dec64, bool) {
+	mut d := Dec64{}
+	e := exp - bias64
+	if e > mantbits64 {
+		return d, false
+	}
+	shift := mantbits64 - e
+	mant := i_mant | u64(0x0010_0000_0000_0000) // implicit 1
+	// mant  := i_mant | (1 << mantbits64) // implicit 1
+	d.m = mant >> shift
+	if (d.m << shift) != mant {
+		return d, false
+	}
+
+	for (d.m % 10) == 0 {
+		d.m /= 10
+		d.e++
+	}
+	return d, true
+}
+
+fn f64_to_decimal(mant u64, exp u64) Dec64 {
+	mut e2 := 0
+	mut m2 := u64(0)
+	if exp == 0 {
+		// We subtract 2 so that the bounds computation has
+		// 2 additional bits.
+		e2 = 1 - bias64 - int(mantbits64) - 2
+		m2 = mant
+	} else {
+		e2 = int(exp) - bias64 - int(mantbits64) - 2
+		m2 = (u64(1) << mantbits64) | mant
+	}
+	even := (m2 & 1) == 0
+	accept_bounds := even
+
+	// Step 2: Determine the interval of valid decimal representations.
+	mv := u64(4 * m2)
+	mm_shift := bool_to_u64(mant != 0 || exp <= 1)
+
+	// Step 3: Convert to a decimal power base uing 128-bit arithmetic.
+	mut vr := u64(0)
+	mut vp := u64(0)
+	mut vm := u64(0)
+	mut e10 := 0
+	mut vm_is_trailing_zeros := false
+	mut vr_is_trailing_zeros := false
+
+	if e2 >= 0 {
+		// This expression is slightly faster than max(0, log10Pow2(e2) - 1).
+		q := log10_pow2(e2) - bool_to_u32(e2 > 3)
+		e10 = int(q)
+		k := pow5_inv_num_bits_64 + pow5_bits(int(q)) - 1
+		i := -e2 + int(q) + k
+
+		mul := pow5_inv_split_64[q]
+		vr = mul_shift_64(u64(4) * m2, mul, i)
+		vp = mul_shift_64(u64(4) * m2 + u64(2), mul, i)
+		vm = mul_shift_64(u64(4) * m2 - u64(1) - mm_shift, mul, i)
+		if q <= 21 {
+			// This should use q <= 22, but I think 21 is also safe.
+			// Smaller values may still be safe, but it's more
+			// difficult to reason about them. Only one of mp, mv,
+			// and mm can be a multiple of 5, if any.
+			if mv % 5 == 0 {
+				vr_is_trailing_zeros = multiple_of_power_of_five_64(mv, q)
+			} else if accept_bounds {
+				// Same as min(e2 + (^mm & 1), pow5Factor64(mm)) >= q
+				// <=> e2 + (^mm & 1) >= q && pow5Factor64(mm) >= q
+				// <=> true && pow5Factor64(mm) >= q, since e2 >= q.
+				vm_is_trailing_zeros = multiple_of_power_of_five_64(mv - 1 - mm_shift,
+					q)
+			} else if multiple_of_power_of_five_64(mv + 2, q) {
+				vp--
+			}
+		}
+	} else {
+		// This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
+		q := log10_pow5(-e2) - bool_to_u32(-e2 > 1)
+		e10 = int(q) + e2
+		i := -e2 - int(q)
+		k := pow5_bits(i) - pow5_num_bits_64
+		j := int(q) - k
+		mul := pow5_split_64[i]
+		vr = mul_shift_64(u64(4) * m2, mul, j)
+		vp = mul_shift_64(u64(4) * m2 + u64(2), mul, j)
+		vm = mul_shift_64(u64(4) * m2 - u64(1) - mm_shift, mul, j)
+		if q <= 1 {
+			// {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
+			// mv = 4 * m2, so it always has at least two trailing 0 bits.
+			vr_is_trailing_zeros = true
+			if accept_bounds {
+				// mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.
+				vm_is_trailing_zeros = (mm_shift == 1)
+			} else {
+				// mp = mv + 2, so it always has at least one trailing 0 bit.
+				vp--
+			}
+		} else if q < 63 { // TODO(ulfjack/cespare): Use a tighter bound here.
+			// We need to compute min(ntz(mv), pow5Factor64(mv) - e2) >= q - 1
+			// <=> ntz(mv) >= q - 1 && pow5Factor64(mv) - e2 >= q - 1
+			// <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q)
+			// <=> (mv & ((1 << (q - 1)) - 1)) == 0
+			// We also need to make sure that the left shift does not overflow.
+			vr_is_trailing_zeros = multiple_of_power_of_two_64(mv, q - 1)
+		}
+	}
+
+	// Step 4: Find the shortest decimal representation
+	// in the interval of valid representations.
+	mut removed := 0
+	mut last_removed_digit := byte(0)
+	mut out := u64(0)
+	// On average, we remove ~2 digits.
+	if vm_is_trailing_zeros || vr_is_trailing_zeros {
+		// General case, which happens rarely (~0.7%).
+		for {
+			vp_div_10 := vp / 10
+			vm_div_10 := vm / 10
+			if vp_div_10 <= vm_div_10 {
+				break
+			}
+			vm_mod_10 := vm % 10
+			vr_div_10 := vr / 10
+			vr_mod_10 := vr % 10
+			vm_is_trailing_zeros = vm_is_trailing_zeros && vm_mod_10 == 0
+			vr_is_trailing_zeros = vr_is_trailing_zeros && (last_removed_digit == 0)
+			last_removed_digit = byte(vr_mod_10)
+			vr = vr_div_10
+			vp = vp_div_10
+			vm = vm_div_10
+			removed++
+		}
+		if vm_is_trailing_zeros {
+			for {
+				vm_div_10 := vm / 10
+				vm_mod_10 := vm % 10
+				if vm_mod_10 != 0 {
+					break
+				}
+				vp_div_10 := vp / 10
+				vr_div_10 := vr / 10
+				vr_mod_10 := vr % 10
+				vr_is_trailing_zeros = vr_is_trailing_zeros && (last_removed_digit == 0)
+				last_removed_digit = byte(vr_mod_10)
+				vr = vr_div_10
+				vp = vp_div_10
+				vm = vm_div_10
+				removed++
+			}
+		}
+		if vr_is_trailing_zeros && (last_removed_digit == 5) && (vr % 2) == 0 {
+			// Round even if the exact number is .....50..0.
+			last_removed_digit = 4
+		}
+		out = vr
+		// We need to take vr + 1 if vr is outside bounds
+		// or we need to round up.
+		if (vr == vm && (!accept_bounds || !vm_is_trailing_zeros)) || last_removed_digit >= 5 {
+			out++
+		}
+	} else {
+		// Specialized for the common case (~99.3%).
+		// Percentages below are relative to this.
+		mut round_up := false
+		for vp / 100 > vm / 100 {
+			// Optimization: remove two digits at a time (~86.2%).
+			round_up = (vr % 100) >= 50
+			vr /= 100
+			vp /= 100
+			vm /= 100
+			removed += 2
+		}
+		// Loop iterations below (approximately), without optimization above:
+		// 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
+		// Loop iterations below (approximately), with optimization above:
+		// 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
+		for vp / 10 > vm / 10 {
+			round_up = (vr % 10) >= 5
+			vr /= 10
+			vp /= 10
+			vm /= 10
+			removed++
+		}
+		// We need to take vr + 1 if vr is outside bounds
+		// or we need to round up.
+		out = vr + bool_to_u64(vr == vm || round_up)
+	}
+
+	return Dec64{
+		m: out
+		e: e10 + removed
+	}
+}
+
+//=============================================================================
+// String Functions
+//=============================================================================
+
+// f64_to_str return a string in scientific notation with max n_digit after the dot
+pub fn f64_to_str(f f64, n_digit int) string {
+	u := math.f64_bits(f)
+	neg := (u >> (mantbits64 + expbits64)) != 0
+	mant := u & ((u64(1) << mantbits64) - u64(1))
+	exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1))
+	// println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016lx}")
+
+	// Exit early for easy cases.
+	if (exp == maxexp64) || (exp == 0 && mant == 0) {
+		return get_string_special(neg, exp == 0, mant == 0)
+	}
+
+	mut d, ok := f64_to_decimal_exact_int(mant, exp)
+	if !ok {
+		// println("to_decimal")
+		d = f64_to_decimal(mant, exp)
+	}
+	// println("${d.m} ${d.e}")
+	return d.get_string_64(neg, n_digit, 0)
+}
--- a/vlib/strconv/f64_str.v
+++ b/vlib/strconv/f64_str.v
@ -0,0 +1,37 @@
+module strconv
+
+// pow of ten table used by n_digit reduction
+const (
+	ten_pow_table_64 = [
+		u64(1),
+		u64(10),
+		u64(100),
+		u64(1000),
+		u64(10000),
+		u64(100000),
+		u64(1000000),
+		u64(10000000),
+		u64(100000000),
+		u64(1000000000),
+		u64(10000000000),
+		u64(100000000000),
+		u64(1000000000000),
+		u64(10000000000000),
+		u64(100000000000000),
+		u64(1000000000000000),
+		u64(10000000000000000),
+		u64(100000000000000000),
+		u64(1000000000000000000),
+		u64(10000000000000000000),
+	]
+)
+
+//=============================================================================
+// Conversion Functions
+//=============================================================================
+const (
+	mantbits64 = u32(52)
+	expbits64  = u32(11)
+	bias64     = 1023 // f64 exponent bias
+	maxexp64   = 2047
+)
--- a/vlib/strconv/format_mem.js.v
+++ b/vlib/strconv/format_mem.js.v
@ -86,7 +86,7 @@ pub fn format_dec_sb(d u64, p BF_param, mut res strings.Builder) {
 		i--
 	}

-	for j in 0 .. n_char {
+	for _ in 0 .. n_char {
 		i++
 		res.write_b(buf[i])
 	}
--- a/vlib/v/gen/js/infix.v
+++ b/vlib/v/gen/js/infix.v
@ -15,12 +15,12 @@ fn (mut g JsGen) gen_plain_infix_expr(node ast.InfixExpr) {
 		g.write('BigInt(')
 		g.expr(node.left)
 		g.gen_deref_ptr(node.left_type)
-		g.write('.val)')
+		g.write('.valueOf())')
 		g.write(' $node.op.str() ')
 		g.write('BigInt(')
 		g.expr(node.right)
 		g.gen_deref_ptr(node.left_type)
-		g.write('.val)')
+		g.write('.valueOf())')
 	} else {
 		g.expr(node.left)
 		g.gen_deref_ptr(node.left_type)
--- a/vlib/v/gen/js/js.v
+++ b/vlib/v/gen/js/js.v
@ -979,16 +979,8 @@ fn (mut g JsGen) expr(node ast.Expr) {
 				}
 			} else {
 				g.write(node.op.str())
-
-				if node.op in [.inc, .dec] {
-					g.expr(node.right)
-					g.write('.val ')
-				} else {
-					g.write('(')
-					g.expr(node.right)
-					g.write('.valueOf()')
-					g.write(')')
-				}
+				g.expr(node.right)
+				g.write('.val ')
 			}
 		}
 		ast.RangeExpr {