v/vlib/strconv/utilities.v

594 lines
11 KiB
V
Raw Normal View History

module strconv
/*
f32/f64 to string utilities
Copyright (c) 2019-2021 Dario Deledda. All rights reserved.
Use of this source code is governed by an MIT license
that can be found in the LICENSE file.
This file contains the f32/f64 to string utilities functions
These functions are based on the work of:
Publication:PLDI 2018: Proceedings of the 39th ACM SIGPLAN
Conference on Programming Language Design and ImplementationJune 2018
Pages 270282 https://doi.org/10.1145/3192366.3192369
inspired by the Go version here:
https://github.com/cespare/ryu/tree/ba56a33f39e3bbbfa409095d0f9ae168a595feea
*/
2020-02-25 11:12:37 +01:00
import math.bits
//import math
/*
General Utilities
*/
2020-02-25 11:12:37 +01:00
fn assert1(t bool, msg string) {
if !t {
panic(msg)
}
}
[inline]
fn bool_to_int(b bool) int {
if b {
return 1
}
return 0
}
[inline]
fn bool_to_u32(b bool) u32 {
if b {
return u32(1)
}
return u32(0)
}
[inline]
fn bool_to_u64(b bool) u64 {
if b {
return u64(1)
}
return u64(0)
}
fn get_string_special(neg bool, expZero bool, mantZero bool) string {
if !mantZero {
return "nan"
2020-02-25 11:12:37 +01:00
}
if !expZero {
if neg {
return "-inf"
} else {
return "+inf"
}
}
if neg {
return "-0e+00"
}
return "0e+00"
}
/*
32 bit functions
*/
// decimal_len_32 return the number of decimal digits of the input
[deprecated]
pub fn decimal_len_32(u u32) int {
2020-02-25 11:12:37 +01:00
// Function precondition: u is not a 10-digit number.
// (9 digits are sufficient for round-tripping.)
// This benchmarked faster than the log2 approach used for u64.
assert1(u < 1000000000, "too big")
2020-04-02 14:26:35 +02:00
2020-02-25 11:12:37 +01:00
if u >= 100000000 { return 9 }
else if u >= 10000000 { return 8 }
else if u >= 1000000 { return 7 }
else if u >= 100000 { return 6 }
else if u >= 10000 { return 5 }
else if u >= 1000 { return 4 }
else if u >= 100 { return 3 }
else if u >= 10 { return 2 }
return 1
}
fn mul_shift_32(m u32, mul u64, ishift int) u32 {
2020-04-02 14:26:35 +02:00
// QTODO
//assert ishift > 32
2020-02-25 11:12:37 +01:00
hi, lo := bits.mul_64(u64(m), mul)
shifted_sum := (lo >> u64(ishift)) + (hi << u64(64-ishift))
2020-04-04 15:07:42 +02:00
assert1(shifted_sum <= 2147483647, "shiftedSum <= math.max_u32")
2020-02-25 11:12:37 +01:00
return u32(shifted_sum)
}
fn mul_pow5_invdiv_pow2(m u32, q u32, j int) u32 {
return mul_shift_32(m, pow5_inv_split_32[q], j)
}
fn mul_pow5_div_pow2(m u32, i u32, j int) u32 {
return mul_shift_32(m, pow5_split_32[i], j)
}
fn pow5_factor_32(i_v u32) u32 {
mut v := i_v
for n := u32(0); ; n++ {
2020-04-02 14:26:35 +02:00
q := v/5
2020-02-25 11:12:37 +01:00
r := v%5
if r != 0 {
return n
}
v = q
}
return v
}
// multiple_of_power_of_five_32 reports whether v is divisible by 5^p.
fn multiple_of_power_of_five_32(v u32, p u32) bool {
return pow5_factor_32(v) >= p
}
// multiple_of_power_of_two_32 reports whether v is divisible by 2^p.
fn multiple_of_power_of_two_32(v u32, p u32) bool {
2020-03-01 13:26:09 +01:00
return u32(bits.trailing_zeros_32(v)) >= p
2020-02-25 11:12:37 +01:00
}
// log10_pow2 returns floor(log_10(2^e)).
fn log10_pow2(e int) u32 {
// The first value this approximation fails for is 2^1651
// which is just greater than 10^297.
assert1(e >= 0, "e >= 0")
assert1(e <= 1650, "e <= 1650")
return (u32(e) * 78913) >> 18
}
// log10_pow5 returns floor(log_10(5^e)).
fn log10_pow5(e int) u32 {
// The first value this approximation fails for is 5^2621
// which is just greater than 10^1832.
assert1(e >= 0, "e >= 0")
assert1(e <= 2620, "e <= 2620")
return (u32(e) * 732923) >> 20
}
// pow5_bits returns ceil(log_2(5^e)), or else 1 if e==0.
fn pow5_bits(e int) int {
// This approximation works up to the point that the multiplication
// overflows at e = 3529. If the multiplication were done in 64 bits,
// it would fail at 5^4004 which is just greater than 2^9297.
assert1(e >= 0, "e >= 0")
assert1(e <= 3528, "e <= 3528")
return int( ((u32(e)*1217359)>>19) + 1)
}
/*
64 bit functions
*/
[deprecated]
// decimal_len_64 return the number of decimal digits of the input
pub fn decimal_len_64(u u64) int {
2020-02-25 11:12:37 +01:00
// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
log2 := 64 - bits.leading_zeros_64(u) - 1
t := (log2 + 1) * 1233 >> 12
return t - bool_to_int(u < powers_of_10[t]) + 1
}
fn shift_right_128(v Uint128, shift int) u64 {
// The shift value is always modulo 64.
// In the current implementation of the 64-bit version
// of Ryu, the shift value is always < 64.
// (It is in the range [2, 59].)
// Check this here in case a future change requires larger shift
// values. In this case this function needs to be adjusted.
assert1(shift < 64, "shift < 64")
return (v.hi << u64(64 - shift)) | (v.lo >> u32(shift))
}
fn mul_shift_64(m u64, mul Uint128, shift int) u64 {
hihi, hilo := bits.mul_64(m, mul.hi)
lohi, _ := bits.mul_64(m, mul.lo)
2020-05-18 21:38:06 +02:00
mut sum := Uint128{lo: lohi + hilo,hi: hihi}
2020-02-25 11:12:37 +01:00
if sum.lo < lohi {
sum.hi++ // overflow
}
return shift_right_128(sum, shift-64)
}
fn pow5_factor_64(v_i u64) u32 {
mut v := v_i
for n := u32(0); ; n++ {
q := v/5
r := v%5
if r != 0 {
return n
}
v = q
}
return u32(0)
}
fn multiple_of_power_of_five_64(v u64, p u32) bool {
return pow5_factor_64(v) >= p
}
fn multiple_of_power_of_two_64(v u64, p u32) bool {
return u32(bits.trailing_zeros_64(v)) >= p
}
/*
f64 to string with string format
*/
2020-02-25 11:12:37 +01:00
// TODO: Investigate precision issues
// f32_to_str_l return a string with the f32 converted in a string in decimal notation
[manualfree]
pub fn f32_to_str_l(f f32) string {
s := f32_to_str(f,6)
res := fxx_to_str_l_parse(s)
unsafe{s.free()}
return res
2020-02-25 11:12:37 +01:00
}
[manualfree]
pub fn f32_to_str_l_no_dot(f f32) string {
s := f32_to_str(f,6)
res := fxx_to_str_l_parse_no_dot(s)
unsafe{s.free()}
return res
}
[manualfree]
2020-02-25 11:12:37 +01:00
pub fn f64_to_str_l(f f64) string {
s := f64_to_str(f,18)
res := fxx_to_str_l_parse(s)
unsafe{s.free()}
return res
}
[manualfree]
pub fn f64_to_str_l_no_dot(f f64) string {
s := f64_to_str(f,18)
res := fxx_to_str_l_parse_no_dot(s)
unsafe{s.free()}
return res
}
2020-02-25 11:12:37 +01:00
// f64_to_str_l return a string with the f64 converted in a string in decimal notation
[manualfree]
pub fn fxx_to_str_l_parse(s string) string {
2020-02-25 11:12:37 +01:00
// check for +inf -inf Nan
if s.len > 2 && (s[0] == `n` || s[1] == `i`) {
2020-02-25 11:12:37 +01:00
return s
2020-04-02 14:26:35 +02:00
}
2020-02-25 11:12:37 +01:00
m_sgn_flag := false
mut sgn := 1
mut b := [26]byte{}
2020-02-25 11:12:37 +01:00
mut d_pos := 1
mut i := 0
mut i1 := 0
mut exp := 0
mut exp_sgn := 1
// get sign and decimal parts
2020-02-25 11:12:37 +01:00
for c in s {
if c == `-` {
sgn = -1
i++
} else if c == `+` {
sgn = 1
i++
}
else if c >= `0` && c <= `9` {
b[i1] = c
i1++
2020-02-25 11:12:37 +01:00
i++
} else if c == `.` {
if sgn > 0 {
d_pos = i
} else {
d_pos = i-1
}
i++
} else if c == `e` {
i++
break
} else {
return "Float conversion error!!"
}
}
b[i1] = 0
// get exponent
if s[i] == `-` {
exp_sgn = -1
i++
} else if s[i] == `+` {
exp_sgn = 1
i++
}
mut c := i
for c < s.len {
exp = exp * 10 + int(s[c]-`0`)
c++
2020-02-25 11:12:37 +01:00
}
// allocate exp+32 chars for the return string
mut res := []byte{len: exp+32, init: 0}
2020-02-25 11:12:37 +01:00
mut r_i := 0 // result string buffer index
2020-04-02 14:26:35 +02:00
2020-02-25 11:12:37 +01:00
//println("s:${sgn} b:${b[0]} es:${exp_sgn} exp:${exp}")
if sgn == 1 {
if m_sgn_flag {
res[r_i] = `+`
r_i++
}
} else {
res[r_i] = `-`
r_i++
}
2020-02-25 11:12:37 +01:00
i = 0
if exp_sgn >= 0 {
for b[i] != 0 {
res[r_i] = b[i]
r_i++
i++
if i >= d_pos && exp >= 0 {
if exp == 0 {
res[r_i] = `.`
r_i++
}
exp--
}
}
for exp >= 0 {
res[r_i] = `0`
r_i++
exp--
}
} else {
mut dot_p := true
for exp > 0 {
res[r_i] = `0`
r_i++
exp--
if dot_p {
res[r_i] = `.`
r_i++
dot_p = false
}
}
for b[i] != 0 {
res[r_i] = b[i]
r_i++
i++
}
}
/*
// remove the dot form the numbers like 2.
if r_i > 1 && res[r_i-1] == `.` {
r_i--
}
*/
res[r_i] = 0
return unsafe { tos(res.data,r_i) }
}
// f64_to_str_l return a string with the f64 converted in a string in decimal notation
[manualfree]
pub fn fxx_to_str_l_parse_no_dot(s string) string {
// check for +inf -inf Nan
if s.len > 2 && (s[0] == `n` || s[1] == `i`) {
return s
}
m_sgn_flag := false
mut sgn := 1
mut b := [26]byte{}
mut d_pos := 1
mut i := 0
mut i1 := 0
mut exp := 0
mut exp_sgn := 1
// get sign and decimal parts
for c in s {
if c == `-` {
sgn = -1
i++
} else if c == `+` {
sgn = 1
i++
}
else if c >= `0` && c <= `9` {
b[i1] = c
i1++
i++
} else if c == `.` {
if sgn > 0 {
d_pos = i
} else {
d_pos = i-1
}
i++
} else if c == `e` {
i++
break
} else {
return "Float conversion error!!"
}
}
b[i1] = 0
// get exponent
if s[i] == `-` {
exp_sgn = -1
i++
} else if s[i] == `+` {
exp_sgn = 1
i++
}
mut c := i
for c < s.len {
exp = exp * 10 + int(s[c]-`0`)
c++
}
// allocate exp+32 chars for the return string
mut res := []byte{len: exp+32, init: 0}
mut r_i := 0 // result string buffer index
//println("s:${sgn} b:${b[0]} es:${exp_sgn} exp:${exp}")
2020-02-25 11:12:37 +01:00
if sgn == 1 {
if m_sgn_flag {
res[r_i] = `+`
r_i++
2020-02-25 11:12:37 +01:00
}
} else {
res[r_i] = `-`
r_i++
2020-02-25 11:12:37 +01:00
}
i = 0
if exp_sgn >= 0 {
for b[i] != 0 {
res[r_i] = b[i]
r_i++
2020-02-25 11:12:37 +01:00
i++
if i >= d_pos && exp >= 0 {
if exp == 0 {
res[r_i] = `.`
r_i++
2020-02-25 11:12:37 +01:00
}
exp--
}
}
for exp >= 0 {
res[r_i] = `0`
r_i++
2020-02-25 11:12:37 +01:00
exp--
}
} else {
mut dot_p := true
for exp > 0 {
res[r_i] = `0`
r_i++
2020-02-25 11:12:37 +01:00
exp--
if dot_p {
res[r_i] = `.`
r_i++
2020-02-25 11:12:37 +01:00
dot_p = false
}
}
for b[i] != 0 {
res[r_i] = b[i]
r_i++
2020-04-02 14:26:35 +02:00
i++
2020-02-25 11:12:37 +01:00
}
}
// remove the dot form the numbers like 2.
if r_i > 1 && res[r_i-1] == `.` {
r_i--
}
2020-02-25 11:12:37 +01:00
res[r_i] = 0
return unsafe { tos(res.data,r_i) }
2020-02-25 11:12:37 +01:00
}
// dec_digits return the number of decimal digit of an u64
pub fn dec_digits(n u64) int {
if n <= 9_999_999_999 { // 1-10
if n <= 99_999 { // 5
if n <= 99 { // 2
if n <= 9 { // 1
return 1
} else {
return 2
}
} else {
if n <= 999 { // 3
return 3
} else {
if n <= 9999 { // 4
return 4
} else {
return 5
}
}
}
} else {
if n <= 9_999_999 { // 7
if n <= 999_999 { // 6
return 6
} else {
return 7
}
} else {
if n <= 99_999_999 { //8
return 8
} else {
if n <= 999_999_999 { // 9
return 9
}
return 10
}
}
}
} else {
if n <= 999_999_999_999_999 { // 5
if n <= 999_999_999_999 { // 2
if n <= 99_999_999_999 { // 1
return 11
} else {
return 12
}
} else {
if n <= 9_999_999_999_999 { // 3
return 13
} else {
if n <= 99_999_999_999_999 { // 4
return 14
} else {
return 15
}
}
}
} else {
if n <= 99_999_999_999_999_999 { // 7
if n <= 9_999_999_999_999_999 { // 6
return 16
} else {
return 17
}
} else {
if n <= 999_999_999_999_999_999 { //8
return 18
} else {
if n <= 9_999_999_999_999_999_999 { // 9
return 19
}
return 20
}
}
}
}
}