strconv: new faster atof function (#9903)

pull/9892/head^2
penguindark 2021-04-28 06:42:22 +02:00 committed by GitHub
parent faf2656335
commit a2014f86b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 174 additions and 296 deletions

View File

@ -1,9 +1,9 @@
module strconv module strconv
/*
/*
atof util atof util
Copyright (c) 2019 Dario Deledda. All rights reserved. Copyright (c) 2019-2021 Dario Deledda. All rights reserved.
Use of this source code is governed by an MIT license Use of this source code is governed by an MIT license
that can be found in the LICENSE file. that can be found in the LICENSE file.
@ -20,33 +20,32 @@ Original license: MIT
96 bit operation utilities 96 bit operation utilities
Note: when u128 will be available these function can be refactored Note: when u128 will be available these function can be refactored
*/ */
// right logical shift 96 bit // right logical shift 96 bit
fn lsr96(s2 u32, s1 u32, s0 u32) (u32,u32,u32) { fn lsr96(s2 u32, s1 u32, s0 u32) (u32, u32, u32) {
mut r0 := u32(0) mut r0 := u32(0)
mut r1 := u32(0) mut r1 := u32(0)
mut r2 := u32(0) mut r2 := u32(0)
r0 = (s0>>1) | ((s1 & u32(1))<<31) r0 = (s0 >> 1) | ((s1 & u32(1)) << 31)
r1 = (s1>>1) | ((s2 & u32(1))<<31) r1 = (s1 >> 1) | ((s2 & u32(1)) << 31)
r2 = s2>>1 r2 = s2 >> 1
return r2,r1,r0 return r2, r1, r0
} }
// left logical shift 96 bit // left logical shift 96 bit
fn lsl96(s2 u32, s1 u32, s0 u32) (u32,u32,u32) { fn lsl96(s2 u32, s1 u32, s0 u32) (u32, u32, u32) {
mut r0 := u32(0) mut r0 := u32(0)
mut r1 := u32(0) mut r1 := u32(0)
mut r2 := u32(0) mut r2 := u32(0)
r2 = (s2<<1) | ((s1 & (u32(1)<<31))>>31) r2 = (s2 << 1) | ((s1 & (u32(1) << 31)) >> 31)
r1 = (s1<<1) | ((s0 & (u32(1)<<31))>>31) r1 = (s1 << 1) | ((s0 & (u32(1) << 31)) >> 31)
r0 = s0<<1 r0 = s0 << 1
return r2,r1,r0 return r2, r1, r0
} }
// sum on 96 bit // sum on 96 bit
fn add96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32,u32,u32) { fn add96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32, u32, u32) {
mut w := u64(0) mut w := u64(0)
mut r0 := u32(0) mut r0 := u32(0)
mut r1 := u32(0) mut r1 := u32(0)
@ -59,11 +58,11 @@ fn add96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32,u32,u32) {
w >>= 32 w >>= 32
w += u64(s2) + u64(d2) w += u64(s2) + u64(d2)
r2 = u32(w) r2 = u32(w)
return r2,r1,r0 return r2, r1, r0
} }
// subtraction on 96 bit // subtraction on 96 bit
fn sub96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32,u32,u32) { fn sub96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32, u32, u32) {
mut w := u64(0) mut w := u64(0)
mut r0 := u32(0) mut r0 := u32(0)
mut r1 := u32(0) mut r1 := u32(0)
@ -76,66 +75,49 @@ fn sub96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32,u32,u32) {
w >>= 32 w >>= 32
w += u64(s2) - u64(d2) w += u64(s2) - u64(d2)
r2 = u32(w) r2 = u32(w)
return r2,r1,r0 return r2, r1, r0
} }
/* /*
Constants Constants
*/ */
const ( const (
// //
// f64 constants // f64 constants
// //
digits = 18 digits = 18
double_plus_zero = u64(0x0000000000000000) double_plus_zero = u64(0x0000000000000000)
double_minus_zero = u64(0x8000000000000000) double_minus_zero = u64(0x8000000000000000)
double_plus_infinity = u64(0x7FF0000000000000) double_plus_infinity = u64(0x7FF0000000000000)
double_minus_infinity = u64(0xFFF0000000000000) double_minus_infinity = u64(0xFFF0000000000000)
// //
// parser state machine states
//
fsm_a = 0
fsm_b = 1
fsm_c = 2
fsm_d = 3
fsm_e = 4
fsm_f = 5
fsm_g = 6
fsm_h = 7
fsm_i = 8
fsm_stop = 9
//
// Possible parser return values. // Possible parser return values.
// //
parser_ok = 0 // parser finished OK parser_ok = 0 // parser finished OK
parser_pzero = 1 // no digits or number is smaller than +-2^-1022 parser_pzero = 1 // no digits or number is smaller than +-2^-1022
parser_mzero = 2 // number is negative, module smaller parser_mzero = 2 // number is negative, module smaller
parser_pinf = 3 // number is higher than +HUGE_VAL parser_pinf = 3 // number is higher than +HUGE_VAL
parser_minf = 4 // number is lower than -HUGE_VAL parser_minf = 4 // number is lower than -HUGE_VAL
// //
// char constants // char constants
// Note: Modify these if working with non-ASCII encoding // Note: Modify these if working with non-ASCII encoding
// //
c_dpoint = `.` c_dpoint = `.`
c_plus = `+` c_plus = `+`
c_minus = `-` c_minus = `-`
c_zero = `0` c_zero = `0`
c_nine = `9` c_nine = `9`
c_ten = u32(10) c_ten = u32(10)
) )
/* /*
Utility Utility
*/ */
// NOTE: Modify these if working with non-ASCII encoding // NOTE: Modify these if working with non-ASCII encoding
fn is_digit(x byte) bool { fn is_digit(x byte) bool {
return (x >= c_zero && x <= c_nine) == true return (x >= strconv.c_zero && x <= strconv.c_nine) == true
} }
fn is_space(x byte) bool { fn is_space(x byte) bool {
@ -147,212 +129,115 @@ fn is_exp(x byte) bool {
} }
/* /*
Support struct Support struct
*/ */
/* /*
String parser String parser
NOTE: #TOFIX need one char after the last char of the number NOTE: #TOFIX need one char after the last char of the number
*/ */
// parser return a support struct with all the parsing information for the converter fn parser(s string) (int, PrepNumber) {
fn parser(s string) (int,PrepNumber) {
mut state := fsm_a
mut digx := 0 mut digx := 0
mut c := byte(` `) // initial value for kicking off the state machine mut result := strconv.parser_ok
mut result := parser_ok
mut expneg := false mut expneg := false
mut expexp := 0 mut expexp := 0
mut i := 0 mut i := 0
mut pn := PrepNumber{ mut pn := PrepNumber{}
// skip spaces
for i < s.len && s[i].is_space() {
i++
} }
for state != fsm_stop {
match state { // check negatives
// skip starting spaces if s[i] == `-` {
fsm_a { pn.negative = true
if is_space(c) == true { i++
c = s[i] }
i++
} // positive sign ignore it
else { if s[i] == `+` {
state = fsm_b i++
} }
// read mantissa
for i < s.len && s[i].is_digit() {
// println("$i => ${s[i]}")
if digx < strconv.digits {
pn.mantissa *= 10
pn.mantissa += u64(s[i] - strconv.c_zero)
digx++
} else if pn.exponent < 2147483647 {
pn.exponent++
}
i++
}
// read mantissa decimals
if (i < s.len) && (s[i] == `.`) {
i++
for i < s.len && s[i].is_digit() {
if digx < strconv.digits {
pn.mantissa *= 10
pn.mantissa += u64(s[i] - strconv.c_zero)
pn.exponent--
digx++
} }
// check for the sign or point i++
fsm_b {
state = fsm_c
if c == c_plus {
c = s[i]
i++
}
else if c == c_minus {
pn.negative = true
c = s[i]
i++
}
else if is_digit(c) {
}
else if c == c_dpoint {
}
else {
state = fsm_stop
}
}
// skip the inital zeros
fsm_c {
if c == c_zero {
c = s[i]
i++
}
else if c == c_dpoint {
c = s[i]
i++
state = fsm_d
}
else {
state = fsm_e
}
}
// reading leading zeros in the fractional part of mantissa
fsm_d {
if c == c_zero {
c = s[i]
i++
if pn.exponent > -2147483647 {
pn.exponent--
}
}
else {
state = fsm_f
}
}
// reading integer part of mantissa
fsm_e {
if is_digit(c) {
if digx < digits {
pn.mantissa *= 10
pn.mantissa += u64(c - c_zero)
digx++
}
else if pn.exponent < 2147483647 {
pn.exponent++
}
c = s[i]
i++
}
else if c == c_dpoint {
c = s[i]
i++
state = fsm_f
}
else {
state = fsm_f
}
}
// reading fractional part of mantissa
fsm_f {
if is_digit(c) {
if digx < digits {
pn.mantissa *= 10
pn.mantissa += u64(c - c_zero)
pn.exponent--
digx++
}
c = s[i]
i++
}
else if is_exp(c) {
c = s[i]
i++
state = fsm_g
}
else {
state = fsm_g
}
}
// reading sign of exponent
fsm_g {
if c == c_plus {
c = s[i]
i++
}
else if c == c_minus {
expneg = true
c = s[i]
i++
}
state = fsm_h
}
// skipping leading zeros of exponent
fsm_h {
if c == c_zero {
c = s[i]
i++
}
else {
state = fsm_i
}
}
// reading exponent digits
fsm_i {
if is_digit(c) {
if expexp < 214748364 {
expexp *= 10
expexp += int(c - c_zero)
}
c = s[i]
i++
}
else {
state = fsm_stop
}
}
else {
}}
// C.printf("len: %d i: %d str: %s \n",s.len,i,s[..i])
if i >= s.len {
state = fsm_stop
} }
} }
// read exponent
if (i < s.len) && ((s[i] == `e`) || (s[i] == `E`)) {
i++
if i < s.len {
// esponent sign
if s[i] == strconv.c_plus {
i++
} else if s[i] == strconv.c_minus {
expneg = true
i++
}
for i < s.len && s[i].is_digit() {
if expexp < 214748364 {
expexp *= 10
expexp += int(s[i] - strconv.c_zero)
}
i++
}
}
}
if expneg { if expneg {
expexp = -expexp expexp = -expexp
} }
pn.exponent += expexp pn.exponent += expexp
if pn.mantissa == 0 { if pn.mantissa == 0 {
if pn.negative { if pn.negative {
result = parser_mzero result = strconv.parser_mzero
} else {
result = strconv.parser_pzero
} }
else { } else if pn.exponent > 309 {
result = parser_pzero
}
}
else if pn.exponent > 309 {
if pn.negative { if pn.negative {
result = parser_minf result = strconv.parser_minf
} else {
result = strconv.parser_pinf
} }
else { } else if pn.exponent < -328 {
result = parser_pinf
}
}
else if pn.exponent < -328 {
if pn.negative { if pn.negative {
result = parser_mzero result = strconv.parser_mzero
} } else {
else { result = strconv.parser_pzero
result = parser_pzero
} }
} }
return result,pn return result, pn
} }
/* /*
Converter to the bit form of the f64 number Converter to the bit form of the f64 number
*/ */
// converter return a u64 with the bit image of the f64 number // converter return a u64 with the bit image of the f64 number
@ -367,21 +252,21 @@ fn converter(mut pn PrepNumber) u64 {
mut r2 := u32(0) // 96-bit precision integer mut r2 := u32(0) // 96-bit precision integer
mut r1 := u32(0) mut r1 := u32(0)
mut r0 := u32(0) mut r0 := u32(0)
mask28 := u32(u64(0xF)<<28) mask28 := u32(u64(0xF) << 28)
mut result := u64(0) mut result := u64(0)
// working on 3 u32 to have 96 bit precision // working on 3 u32 to have 96 bit precision
s0 = u32(pn.mantissa & u64(0x00000000FFFFFFFF)) s0 = u32(pn.mantissa & u64(0x00000000FFFFFFFF))
s1 = u32(pn.mantissa>>32) s1 = u32(pn.mantissa >> 32)
s2 = u32(0) s2 = u32(0)
// so we take the decimal exponent off // so we take the decimal exponent off
for pn.exponent > 0 { for pn.exponent > 0 {
q2,q1,q0 = lsl96(s2, s1, s0) // q = s * 2 q2, q1, q0 = lsl96(s2, s1, s0) // q = s * 2
r2,r1,r0 = lsl96(q2, q1, q0) // r = s * 4 <=> q * 2 r2, r1, r0 = lsl96(q2, q1, q0) // r = s * 4 <=> q * 2
s2,s1,s0 = lsl96(r2, r1, r0) // s = s * 8 <=> r * 2 s2, s1, s0 = lsl96(r2, r1, r0) // s = s * 8 <=> r * 2
s2,s1,s0 = add96(s2, s1, s0, q2, q1, q0) // s = (s * 8) + (s * 2) <=> s*10 s2, s1, s0 = add96(s2, s1, s0, q2, q1, q0) // s = (s * 8) + (s * 2) <=> s*10
pn.exponent-- pn.exponent--
for (s2 & mask28) != 0 { for (s2 & mask28) != 0 {
q2,q1,q0 = lsr96(s2, s1, s0) q2, q1, q0 = lsr96(s2, s1, s0)
binexp++ binexp++
s2 = q2 s2 = q2
s1 = q1 s1 = q1
@ -389,25 +274,25 @@ fn converter(mut pn PrepNumber) u64 {
} }
} }
for pn.exponent < 0 { for pn.exponent < 0 {
for !((s2 & (u32(1)<<31)) != 0) { for !((s2 & (u32(1) << 31)) != 0) {
q2,q1,q0 = lsl96(s2, s1, s0) q2, q1, q0 = lsl96(s2, s1, s0)
binexp-- binexp--
s2 = q2 s2 = q2
s1 = q1 s1 = q1
s0 = q0 s0 = q0
} }
q2 = s2 / c_ten q2 = s2 / strconv.c_ten
r1 = s2 % c_ten r1 = s2 % strconv.c_ten
r2 = (s1>>8) | (r1<<24) r2 = (s1 >> 8) | (r1 << 24)
q1 = r2 / c_ten q1 = r2 / strconv.c_ten
r1 = r2 % c_ten r1 = r2 % strconv.c_ten
r2 = ((s1 & u32(0xFF))<<16) | (s0>>16) | (r1<<24) r2 = ((s1 & u32(0xFF)) << 16) | (s0 >> 16) | (r1 << 24)
r0 = r2 / c_ten r0 = r2 / strconv.c_ten
r1 = r2 % c_ten r1 = r2 % strconv.c_ten
q1 = (q1<<8) | ((r0 & u32(0x00FF0000))>>16) q1 = (q1 << 8) | ((r0 & u32(0x00FF0000)) >> 16)
q0 = r0<<16 q0 = r0 << 16
r2 = (s0 & u32(0xFFFF)) | (r1<<16) r2 = (s0 & u32(0xFFFF)) | (r1 << 16)
q0 |= r2 / c_ten q0 |= r2 / strconv.c_ten
s2 = q2 s2 = q2
s1 = q1 s1 = q1
s0 = q0 s0 = q0
@ -417,7 +302,7 @@ fn converter(mut pn PrepNumber) u64 {
// normalization, the 28 bit in s2 must the leftest one in the variable // normalization, the 28 bit in s2 must the leftest one in the variable
if s2 != 0 || s1 != 0 || s0 != 0 { if s2 != 0 || s1 != 0 || s0 != 0 {
for (s2 & mask28) == 0 { for (s2 & mask28) == 0 {
q2,q1,q0 = lsl96(s2, s1, s0) q2, q1, q0 = lsl96(s2, s1, s0)
binexp-- binexp--
s2 = q2 s2 = q2
s1 = q1 s1 = q1
@ -436,19 +321,22 @@ fn converter(mut pn PrepNumber) u64 {
* If bit 53 is 0, round down * If bit 53 is 0, round down
* If bit 53 is 1, round up * If bit 53 is 1, round up
*/ */
/* test case 1 complete /*
test case 1 complete
s2=0x1FFFFFFF s2=0x1FFFFFFF
s1=0xFFFFFF80 s1=0xFFFFFF80
s0=0x0 s0=0x0
*/ */
/* test case 1 check_round_bit /*
test case 1 check_round_bit
s2=0x18888888 s2=0x18888888
s1=0x88888880 s1=0x88888880
s0=0x0 s0=0x0
*/ */
/* test case check_round_bit + normalization /*
test case check_round_bit + normalization
s2=0x18888888 s2=0x18888888
s1=0x88888F80 s1=0x88888F80
s0=0x0 s0=0x0
@ -457,27 +345,26 @@ fn converter(mut pn PrepNumber) u64 {
// C.printf("mantissa before rounding: %08x%08x%08x binexp: %d \n", s2,s1,s0,binexp) // C.printf("mantissa before rounding: %08x%08x%08x binexp: %d \n", s2,s1,s0,binexp)
// s1 => 0xFFFFFFxx only F are rapresented // s1 => 0xFFFFFFxx only F are rapresented
nbit := 7 nbit := 7
check_round_bit := u32(1)<<u32(nbit) check_round_bit := u32(1) << u32(nbit)
check_round_mask := u32(0xFFFFFFFF)<<u32(nbit) check_round_mask := u32(0xFFFFFFFF) << u32(nbit)
if (s1 & check_round_bit) != 0 { if (s1 & check_round_bit) != 0 {
// C.printf("need round!! cehck mask: %08x\n", s1 & ~check_round_mask ) // C.printf("need round!! cehck mask: %08x\n", s1 & ~check_round_mask )
if (s1 & ~check_round_mask) != 0 { if (s1 & ~check_round_mask) != 0 {
// C.printf("Add 1!\n") // C.printf("Add 1!\n")
s2,s1,s0 = add96(s2, s1, s0, 0, check_round_bit, 0) s2, s1, s0 = add96(s2, s1, s0, 0, check_round_bit, 0)
} } else {
else {
// C.printf("All 0!\n") // C.printf("All 0!\n")
if (s1 & (check_round_bit<<u32(1))) != 0 { if (s1 & (check_round_bit << u32(1))) != 0 {
// C.printf("Add 1 form -1 bit control!\n") // C.printf("Add 1 form -1 bit control!\n")
s2,s1,s0 = add96(s2, s1, s0, 0, check_round_bit, 0) s2, s1, s0 = add96(s2, s1, s0, 0, check_round_bit, 0)
} }
} }
s1 = s1 & check_round_mask s1 = s1 & check_round_mask
s0 = u32(0) s0 = u32(0)
// recheck normalization // recheck normalization
if s2 & (mask28<<u32(1)) != 0 { if s2 & (mask28 << u32(1)) != 0 {
// C.printf("Renormalize!!") // C.printf("Renormalize!!")
q2,q1,q0 = lsr96(s2, s1, s0) q2, q1, q0 = lsr96(s2, s1, s0)
binexp-- binexp--
s2 = q2 s2 = q2
s1 = q1 s1 = q1
@ -492,26 +379,22 @@ fn converter(mut pn PrepNumber) u64 {
binexp += 1023 binexp += 1023
if binexp > 2046 { if binexp > 2046 {
if pn.negative { if pn.negative {
result = double_minus_infinity result = strconv.double_minus_infinity
} else {
result = strconv.double_plus_infinity
} }
else { } else if binexp < 1 {
result = double_plus_infinity
}
}
else if binexp < 1 {
if pn.negative { if pn.negative {
result = double_minus_zero result = strconv.double_minus_zero
} else {
result = strconv.double_plus_zero
} }
else { } else if s2 != 0 {
result = double_plus_zero
}
}
else if s2 != 0 {
mut q := u64(0) mut q := u64(0)
binexs2 := u64(binexp)<<52 binexs2 := u64(binexp) << 52
q = (u64(s2 & ~mask28)<<24) | ((u64(s1) + u64(128))>>8) | binexs2 q = (u64(s2 & ~mask28) << 24) | ((u64(s1) + u64(128)) >> 8) | binexs2
if pn.negative { if pn.negative {
q |= (u64(1)<<63) q |= (u64(1) << 63)
} }
result = q result = q
} }
@ -519,38 +402,33 @@ fn converter(mut pn PrepNumber) u64 {
} }
/* /*
Public functions Public functions
*/ */
// atof64 return a f64 from a string doing a parsing operation // atof64 return a f64 from a string doing a parsing operation
pub fn atof64(s string) f64 { pub fn atof64(s string) f64 {
mut pn := PrepNumber{ mut pn := PrepNumber{}
}
mut res_parsing := 0 mut res_parsing := 0
mut res := Float64u{} mut res := Float64u{}
res_parsing,pn = parser(s + ' ') // TODO: need an extra char for now res_parsing, pn = parser(s)
// println(pn)
match res_parsing { match res_parsing {
parser_ok { strconv.parser_ok {
res.u = converter(mut pn) res.u = converter(mut pn)
} }
parser_pzero { strconv.parser_pzero {
res.u = double_plus_zero res.u = strconv.double_plus_zero
} }
parser_mzero { strconv.parser_mzero {
res.u = double_minus_zero res.u = strconv.double_minus_zero
} }
parser_pinf { strconv.parser_pinf {
res.u = double_plus_infinity res.u = strconv.double_plus_infinity
} }
parser_minf { strconv.parser_minf {
res.u = double_minus_infinity res.u = strconv.double_minus_infinity
}
else {
} }
else {}
} }
return unsafe {res.f} return unsafe { res.f }
} }