strconv: new faster atof function (#9903)

pull/9892/head^2
penguindark 2021-04-28 06:42:22 +02:00 committed by GitHub
parent faf2656335
commit a2014f86b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 174 additions and 296 deletions

View File

@ -1,9 +1,9 @@
module strconv
/*
/*
atof util
Copyright (c) 2019 Dario Deledda. All rights reserved.
Copyright (c) 2019-2021 Dario Deledda. All rights reserved.
Use of this source code is governed by an MIT license
that can be found in the LICENSE file.
@ -20,33 +20,32 @@ Original license: MIT
96 bit operation utilities
Note: when u128 will be available these function can be refactored
*/
// right logical shift 96 bit
fn lsr96(s2 u32, s1 u32, s0 u32) (u32,u32,u32) {
fn lsr96(s2 u32, s1 u32, s0 u32) (u32, u32, u32) {
mut r0 := u32(0)
mut r1 := u32(0)
mut r2 := u32(0)
r0 = (s0>>1) | ((s1 & u32(1))<<31)
r1 = (s1>>1) | ((s2 & u32(1))<<31)
r2 = s2>>1
return r2,r1,r0
r0 = (s0 >> 1) | ((s1 & u32(1)) << 31)
r1 = (s1 >> 1) | ((s2 & u32(1)) << 31)
r2 = s2 >> 1
return r2, r1, r0
}
// left logical shift 96 bit
fn lsl96(s2 u32, s1 u32, s0 u32) (u32,u32,u32) {
fn lsl96(s2 u32, s1 u32, s0 u32) (u32, u32, u32) {
mut r0 := u32(0)
mut r1 := u32(0)
mut r2 := u32(0)
r2 = (s2<<1) | ((s1 & (u32(1)<<31))>>31)
r1 = (s1<<1) | ((s0 & (u32(1)<<31))>>31)
r0 = s0<<1
return r2,r1,r0
r2 = (s2 << 1) | ((s1 & (u32(1) << 31)) >> 31)
r1 = (s1 << 1) | ((s0 & (u32(1) << 31)) >> 31)
r0 = s0 << 1
return r2, r1, r0
}
// sum on 96 bit
fn add96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32,u32,u32) {
fn add96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32, u32, u32) {
mut w := u64(0)
mut r0 := u32(0)
mut r1 := u32(0)
@ -59,11 +58,11 @@ fn add96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32,u32,u32) {
w >>= 32
w += u64(s2) + u64(d2)
r2 = u32(w)
return r2,r1,r0
return r2, r1, r0
}
// subtraction on 96 bit
fn sub96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32,u32,u32) {
fn sub96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32, u32, u32) {
mut w := u64(0)
mut r0 := u32(0)
mut r1 := u32(0)
@ -76,39 +75,23 @@ fn sub96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32,u32,u32) {
w >>= 32
w += u64(s2) - u64(d2)
r2 = u32(w)
return r2,r1,r0
return r2, r1, r0
}
/*
Constants
*/
const (
//
// f64 constants
//
//
// f64 constants
//
digits = 18
double_plus_zero = u64(0x0000000000000000)
double_minus_zero = u64(0x8000000000000000)
double_plus_infinity = u64(0x7FF0000000000000)
double_minus_infinity = u64(0xFFF0000000000000)
//
// parser state machine states
//
fsm_a = 0
fsm_b = 1
fsm_c = 2
fsm_d = 3
fsm_e = 4
fsm_f = 5
fsm_g = 6
fsm_h = 7
fsm_i = 8
fsm_stop = 9
//
// Possible parser return values.
//
parser_ok = 0 // parser finished OK
@ -127,15 +110,14 @@ const (
c_nine = `9`
c_ten = u32(10)
)
/*
Utility
*/
// NOTE: Modify these if working with non-ASCII encoding
fn is_digit(x byte) bool {
return (x >= c_zero && x <= c_nine) == true
return (x >= strconv.c_zero && x <= strconv.c_nine) == true
}
fn is_space(x byte) bool {
@ -147,212 +129,115 @@ fn is_exp(x byte) bool {
}
/*
Support struct
*/
/*
String parser
NOTE: #TOFIX need one char after the last char of the number
*/
// parser return a support struct with all the parsing information for the converter
fn parser(s string) (int,PrepNumber) {
mut state := fsm_a
fn parser(s string) (int, PrepNumber) {
mut digx := 0
mut c := byte(` `) // initial value for kicking off the state machine
mut result := parser_ok
mut result := strconv.parser_ok
mut expneg := false
mut expexp := 0
mut i := 0
mut pn := PrepNumber{
}
for state != fsm_stop {
match state {
// skip starting spaces
fsm_a {
if is_space(c) == true {
c = s[i]
mut pn := PrepNumber{}
// skip spaces
for i < s.len && s[i].is_space() {
i++
}
else {
state = fsm_b
}
}
// check for the sign or point
fsm_b {
state = fsm_c
if c == c_plus {
c = s[i]
i++
}
else if c == c_minus {
// check negatives
if s[i] == `-` {
pn.negative = true
c = s[i]
i++
}
else if is_digit(c) {
}
else if c == c_dpoint {
}
else {
state = fsm_stop
}
}
// skip the inital zeros
fsm_c {
if c == c_zero {
c = s[i]
// positive sign ignore it
if s[i] == `+` {
i++
}
else if c == c_dpoint {
c = s[i]
i++
state = fsm_d
}
else {
state = fsm_e
}
}
// reading leading zeros in the fractional part of mantissa
fsm_d {
if c == c_zero {
c = s[i]
i++
if pn.exponent > -2147483647 {
pn.exponent--
}
}
else {
state = fsm_f
}
}
// reading integer part of mantissa
fsm_e {
if is_digit(c) {
if digx < digits {
// read mantissa
for i < s.len && s[i].is_digit() {
// println("$i => ${s[i]}")
if digx < strconv.digits {
pn.mantissa *= 10
pn.mantissa += u64(c - c_zero)
pn.mantissa += u64(s[i] - strconv.c_zero)
digx++
}
else if pn.exponent < 2147483647 {
} else if pn.exponent < 2147483647 {
pn.exponent++
}
c = s[i]
i++
}
else if c == c_dpoint {
c = s[i]
// read mantissa decimals
if (i < s.len) && (s[i] == `.`) {
i++
state = fsm_f
}
else {
state = fsm_f
}
}
// reading fractional part of mantissa
fsm_f {
if is_digit(c) {
if digx < digits {
for i < s.len && s[i].is_digit() {
if digx < strconv.digits {
pn.mantissa *= 10
pn.mantissa += u64(c - c_zero)
pn.mantissa += u64(s[i] - strconv.c_zero)
pn.exponent--
digx++
}
c = s[i]
i++
}
else if is_exp(c) {
c = s[i]
}
// read exponent
if (i < s.len) && ((s[i] == `e`) || (s[i] == `E`)) {
i++
state = fsm_g
}
else {
state = fsm_g
}
}
// reading sign of exponent
fsm_g {
if c == c_plus {
c = s[i]
if i < s.len {
// esponent sign
if s[i] == strconv.c_plus {
i++
}
else if c == c_minus {
} else if s[i] == strconv.c_minus {
expneg = true
c = s[i]
i++
}
state = fsm_h
}
// skipping leading zeros of exponent
fsm_h {
if c == c_zero {
c = s[i]
i++
}
else {
state = fsm_i
}
}
// reading exponent digits
fsm_i {
if is_digit(c) {
for i < s.len && s[i].is_digit() {
if expexp < 214748364 {
expexp *= 10
expexp += int(c - c_zero)
expexp += int(s[i] - strconv.c_zero)
}
c = s[i]
i++
}
else {
state = fsm_stop
}
}
else {
}}
// C.printf("len: %d i: %d str: %s \n",s.len,i,s[..i])
if i >= s.len {
state = fsm_stop
}
}
if expneg {
expexp = -expexp
}
pn.exponent += expexp
if pn.mantissa == 0 {
if pn.negative {
result = parser_mzero
result = strconv.parser_mzero
} else {
result = strconv.parser_pzero
}
else {
result = parser_pzero
}
}
else if pn.exponent > 309 {
} else if pn.exponent > 309 {
if pn.negative {
result = parser_minf
result = strconv.parser_minf
} else {
result = strconv.parser_pinf
}
else {
result = parser_pinf
}
}
else if pn.exponent < -328 {
} else if pn.exponent < -328 {
if pn.negative {
result = parser_mzero
}
else {
result = parser_pzero
result = strconv.parser_mzero
} else {
result = strconv.parser_pzero
}
}
return result,pn
return result, pn
}
/*
Converter to the bit form of the f64 number
*/
// converter return a u64 with the bit image of the f64 number
@ -367,21 +252,21 @@ fn converter(mut pn PrepNumber) u64 {
mut r2 := u32(0) // 96-bit precision integer
mut r1 := u32(0)
mut r0 := u32(0)
mask28 := u32(u64(0xF)<<28)
mask28 := u32(u64(0xF) << 28)
mut result := u64(0)
// working on 3 u32 to have 96 bit precision
s0 = u32(pn.mantissa & u64(0x00000000FFFFFFFF))
s1 = u32(pn.mantissa>>32)
s1 = u32(pn.mantissa >> 32)
s2 = u32(0)
// so we take the decimal exponent off
for pn.exponent > 0 {
q2,q1,q0 = lsl96(s2, s1, s0) // q = s * 2
r2,r1,r0 = lsl96(q2, q1, q0) // r = s * 4 <=> q * 2
s2,s1,s0 = lsl96(r2, r1, r0) // s = s * 8 <=> r * 2
s2,s1,s0 = add96(s2, s1, s0, q2, q1, q0) // s = (s * 8) + (s * 2) <=> s*10
q2, q1, q0 = lsl96(s2, s1, s0) // q = s * 2
r2, r1, r0 = lsl96(q2, q1, q0) // r = s * 4 <=> q * 2
s2, s1, s0 = lsl96(r2, r1, r0) // s = s * 8 <=> r * 2
s2, s1, s0 = add96(s2, s1, s0, q2, q1, q0) // s = (s * 8) + (s * 2) <=> s*10
pn.exponent--
for (s2 & mask28) != 0 {
q2,q1,q0 = lsr96(s2, s1, s0)
q2, q1, q0 = lsr96(s2, s1, s0)
binexp++
s2 = q2
s1 = q1
@ -389,25 +274,25 @@ fn converter(mut pn PrepNumber) u64 {
}
}
for pn.exponent < 0 {
for !((s2 & (u32(1)<<31)) != 0) {
q2,q1,q0 = lsl96(s2, s1, s0)
for !((s2 & (u32(1) << 31)) != 0) {
q2, q1, q0 = lsl96(s2, s1, s0)
binexp--
s2 = q2
s1 = q1
s0 = q0
}
q2 = s2 / c_ten
r1 = s2 % c_ten
r2 = (s1>>8) | (r1<<24)
q1 = r2 / c_ten
r1 = r2 % c_ten
r2 = ((s1 & u32(0xFF))<<16) | (s0>>16) | (r1<<24)
r0 = r2 / c_ten
r1 = r2 % c_ten
q1 = (q1<<8) | ((r0 & u32(0x00FF0000))>>16)
q0 = r0<<16
r2 = (s0 & u32(0xFFFF)) | (r1<<16)
q0 |= r2 / c_ten
q2 = s2 / strconv.c_ten
r1 = s2 % strconv.c_ten
r2 = (s1 >> 8) | (r1 << 24)
q1 = r2 / strconv.c_ten
r1 = r2 % strconv.c_ten
r2 = ((s1 & u32(0xFF)) << 16) | (s0 >> 16) | (r1 << 24)
r0 = r2 / strconv.c_ten
r1 = r2 % strconv.c_ten
q1 = (q1 << 8) | ((r0 & u32(0x00FF0000)) >> 16)
q0 = r0 << 16
r2 = (s0 & u32(0xFFFF)) | (r1 << 16)
q0 |= r2 / strconv.c_ten
s2 = q2
s1 = q1
s0 = q0
@ -417,7 +302,7 @@ fn converter(mut pn PrepNumber) u64 {
// normalization, the 28 bit in s2 must the leftest one in the variable
if s2 != 0 || s1 != 0 || s0 != 0 {
for (s2 & mask28) == 0 {
q2,q1,q0 = lsl96(s2, s1, s0)
q2, q1, q0 = lsl96(s2, s1, s0)
binexp--
s2 = q2
s1 = q1
@ -436,19 +321,22 @@ fn converter(mut pn PrepNumber) u64 {
* If bit 53 is 0, round down
* If bit 53 is 1, round up
*/
/* test case 1 complete
/*
test case 1 complete
s2=0x1FFFFFFF
s1=0xFFFFFF80
s0=0x0
*/
/* test case 1 check_round_bit
/*
test case 1 check_round_bit
s2=0x18888888
s1=0x88888880
s0=0x0
*/
/* test case check_round_bit + normalization
/*
test case check_round_bit + normalization
s2=0x18888888
s1=0x88888F80
s0=0x0
@ -457,27 +345,26 @@ fn converter(mut pn PrepNumber) u64 {
// C.printf("mantissa before rounding: %08x%08x%08x binexp: %d \n", s2,s1,s0,binexp)
// s1 => 0xFFFFFFxx only F are rapresented
nbit := 7
check_round_bit := u32(1)<<u32(nbit)
check_round_mask := u32(0xFFFFFFFF)<<u32(nbit)
check_round_bit := u32(1) << u32(nbit)
check_round_mask := u32(0xFFFFFFFF) << u32(nbit)
if (s1 & check_round_bit) != 0 {
// C.printf("need round!! cehck mask: %08x\n", s1 & ~check_round_mask )
if (s1 & ~check_round_mask) != 0 {
// C.printf("Add 1!\n")
s2,s1,s0 = add96(s2, s1, s0, 0, check_round_bit, 0)
}
else {
s2, s1, s0 = add96(s2, s1, s0, 0, check_round_bit, 0)
} else {
// C.printf("All 0!\n")
if (s1 & (check_round_bit<<u32(1))) != 0 {
if (s1 & (check_round_bit << u32(1))) != 0 {
// C.printf("Add 1 form -1 bit control!\n")
s2,s1,s0 = add96(s2, s1, s0, 0, check_round_bit, 0)
s2, s1, s0 = add96(s2, s1, s0, 0, check_round_bit, 0)
}
}
s1 = s1 & check_round_mask
s0 = u32(0)
// recheck normalization
if s2 & (mask28<<u32(1)) != 0 {
if s2 & (mask28 << u32(1)) != 0 {
// C.printf("Renormalize!!")
q2,q1,q0 = lsr96(s2, s1, s0)
q2, q1, q0 = lsr96(s2, s1, s0)
binexp--
s2 = q2
s1 = q1
@ -492,26 +379,22 @@ fn converter(mut pn PrepNumber) u64 {
binexp += 1023
if binexp > 2046 {
if pn.negative {
result = double_minus_infinity
result = strconv.double_minus_infinity
} else {
result = strconv.double_plus_infinity
}
else {
result = double_plus_infinity
}
}
else if binexp < 1 {
} else if binexp < 1 {
if pn.negative {
result = double_minus_zero
result = strconv.double_minus_zero
} else {
result = strconv.double_plus_zero
}
else {
result = double_plus_zero
}
}
else if s2 != 0 {
} else if s2 != 0 {
mut q := u64(0)
binexs2 := u64(binexp)<<52
q = (u64(s2 & ~mask28)<<24) | ((u64(s1) + u64(128))>>8) | binexs2
binexs2 := u64(binexp) << 52
q = (u64(s2 & ~mask28) << 24) | ((u64(s1) + u64(128)) >> 8) | binexs2
if pn.negative {
q |= (u64(1)<<63)
q |= (u64(1) << 63)
}
result = q
}
@ -519,38 +402,33 @@ fn converter(mut pn PrepNumber) u64 {
}
/*
Public functions
*/
// atof64 return a f64 from a string doing a parsing operation
pub fn atof64(s string) f64 {
mut pn := PrepNumber{
}
mut pn := PrepNumber{}
mut res_parsing := 0
mut res := Float64u{}
res_parsing,pn = parser(s + ' ') // TODO: need an extra char for now
// println(pn)
res_parsing, pn = parser(s)
match res_parsing {
parser_ok {
strconv.parser_ok {
res.u = converter(mut pn)
}
parser_pzero {
res.u = double_plus_zero
strconv.parser_pzero {
res.u = strconv.double_plus_zero
}
parser_mzero {
res.u = double_minus_zero
strconv.parser_mzero {
res.u = strconv.double_minus_zero
}
parser_pinf {
res.u = double_plus_infinity
strconv.parser_pinf {
res.u = strconv.double_plus_infinity
}
parser_minf {
res.u = double_minus_infinity
strconv.parser_minf {
res.u = strconv.double_minus_infinity
}
else {
else {}
}
}
return unsafe {res.f}
return unsafe { res.f }
}