v/vlib/builtin/string_interpolation.v

740 lines
17 KiB
V

module builtin
import strconv
import strings
/*=============================================================================
Copyright (c) 2019-2022 Dario Deledda. All rights reserved.
Use of this source code is governed by an MIT license
that can be found in the LICENSE file.
This file contains string interpolation V functions
=============================================================================*/
//=============================================================================
// Enum format types max 0x1F => 32 types
//=============================================================================
pub enum StrIntpType {
si_no_str = 0 // no parameter to print only fix string
si_c
si_u8
si_i8
si_u16
si_i16
si_u32
si_i32
si_u64
si_i64
si_e32
si_e64
si_f32
si_f64
si_g32
si_g64
si_s
si_p
si_vp
}
pub fn (x StrIntpType) str() string {
match x {
.si_no_str { return 'no_str' }
.si_c { return 'c' }
.si_u8 { return 'u8' }
.si_i8 { return 'i8' }
.si_u16 { return 'u16' }
.si_i16 { return 'i16' }
.si_u32 { return 'u32' }
.si_i32 { return 'i32' }
.si_u64 { return 'u64' }
.si_i64 { return 'i64' }
.si_f32 { return 'f32' }
.si_f64 { return 'f64' }
.si_g32 { return 'f32' } // g32 format use f32 data
.si_g64 { return 'f64' } // g64 format use f64 data
.si_e32 { return 'f32' } // e32 format use f32 data
.si_e64 { return 'f64' } // e64 format use f64 data
.si_s { return 's' }
.si_p { return 'p' }
.si_vp { return 'vp' }
}
}
//=============================================================================
// Union data
//=============================================================================
pub union StrIntpMem {
pub mut:
d_c u32
d_u8 byte
d_i8 i8
d_u16 u16
d_i16 i16
d_u32 u32
d_i32 int
d_u64 u64
d_i64 i64
d_f32 f32
d_f64 f64
d_s string
d_p voidptr
d_vp voidptr
}
[inline]
fn fabs32(x f32) f32 {
return if x < 0 { -x } else { x }
}
[inline]
fn fabs64(x f64) f64 {
return if x < 0 { -x } else { x }
}
[inline]
fn abs64(x i64) u64 {
return if x < 0 { u64(-x) } else { u64(x) }
}
//=========================================
//
// u32/u64 bit compact format
//
//___ 32 24 16 8
//___ | | | |
//_3333333333222222222211111111110000000000
//_9876543210987654321098765432109876543210
//_nPPPPPPPPBBBBWWWWWWWWWWTDDDDDDDSUAA=====
// = data type 5 bit max 32 data type
// A allign 2 bit Note: for now only 1 used!
// U uppercase 1 bit 0 do nothing, 1 do to_upper()
// S sign 1 bit show the sign if positive
// D decimals 7 bit number of decimals digit to show
// T tail zeros 1 bit 1 remove tail zeros, 0 do nothing
// W Width 10 bit number of char for padding and indentation
// B num base 4 bit start from 2, 0 for base 10
// P pad char 1/8 bit padding char (in u32 format reduced to 1 bit as flag for `0` padding)
// --------------
// TOTAL: 39/32 bit
//=========================================
// convert from data format to compact u64
pub fn get_str_intp_u64_format(fmt_type StrIntpType, in_width int, in_precision int, in_tail_zeros bool, in_sign bool, in_pad_ch byte, in_base int, in_upper_case bool) u64 {
width := if in_width != 0 { abs64(in_width) } else { u64(0) }
allign := if in_width > 0 { u64(1 << 5) } else { u64(0) } // two bit 0 .left 1 .rigth, for now we use only one
upper_case := if in_upper_case { u64(1 << 7) } else { u64(0) }
sign := if in_sign { u64(1 << 8) } else { u64(0) }
precision := if in_precision != 987698 {
(u64(in_precision & 0x7F) << 9)
} else {
u64(0x7F) << 9
}
tail_zeros := if in_tail_zeros { u32(1) << 16 } else { u32(0) }
base := u64(u32(in_base & 0xf) << 27)
res := u64((u64(fmt_type) & 0x1F) | allign | upper_case | sign | precision | tail_zeros | (u64(width & 0x3FF) << 17) | base | (u64(in_pad_ch) << 31))
return res
}
// convert from data format to compact u32
pub fn get_str_intp_u32_format(fmt_type StrIntpType, in_width int, in_precision int, in_tail_zeros bool, in_sign bool, in_pad_ch byte, in_base int, in_upper_case bool) u32 {
width := if in_width != 0 { abs64(in_width) } else { u32(0) }
allign := if in_width > 0 { u32(1 << 5) } else { u32(0) } // two bit 0 .left 1 .rigth, for now we use only one
upper_case := if in_upper_case { u32(1 << 7) } else { u32(0) }
sign := if in_sign { u32(1 << 8) } else { u32(0) }
precision := if in_precision != 987698 {
(u32(in_precision & 0x7F) << 9)
} else {
u32(0x7F) << 9
}
tail_zeros := if in_tail_zeros { u32(1) << 16 } else { u32(0) }
base := u32(u32(in_base & 0xf) << 27)
res := u32((u32(fmt_type) & 0x1F) | allign | upper_case | sign | precision | tail_zeros | (u32(width & 0x3FF) << 17) | base | (u32(in_pad_ch & 1) << 31))
return res
}
// convert from struct to formated string
[manualfree]
fn (data &StrIntpData) process_str_intp_data(mut sb strings.Builder) {
x := data.fmt
typ := StrIntpType(x & 0x1F)
allign := int((x >> 5) & 0x01)
upper_case := ((x >> 7) & 0x01) > 0
sign := int((x >> 8) & 0x01)
precision := int((x >> 9) & 0x7F)
tail_zeros := ((x >> 16) & 0x01) > 0
width := int(i16((x >> 17) & 0x3FF))
mut base := int(x >> 27) & 0xF
fmt_pad_ch := byte((x >> 31) & 0xFF)
// no string interpolation is needed, return empty string
if typ == .si_no_str {
return
}
// if width > 0 { println("${x.hex()} Type: ${x & 0x7F} Width: ${width} Precision: ${precision} allign:${allign}") }
// manage base if any
if base > 0 {
base += 2 // we start from 2, 0 == base 10
}
// mange pad char, for now only 0 allowed
mut pad_ch := byte(` `)
if fmt_pad_ch > 0 {
// pad_ch = fmt_pad_ch
pad_ch = `0`
}
len0_set := if width > 0 { width } else { -1 }
len1_set := if precision == 0x7F { -1 } else { precision }
sign_set := sign == 1
mut bf := strconv.BF_param{
pad_ch: pad_ch // padding char
len0: len0_set // default len for whole the number or string
len1: len1_set // number of decimal digits, if needed
positive: true // mandatory: the sign of the number passed
sign_flag: sign_set // flag for print sign as prefix in padding
allign: .left // alignment of the string
rm_tail_zero: tail_zeros // false // remove the tail zeros from floats
}
// allign
if fmt_pad_ch == 0 {
match allign {
0 { bf.allign = .left }
1 { bf.allign = .right }
// 2 { bf.allign = .center }
else { bf.allign = .left }
}
} else {
bf.allign = .right
}
unsafe {
// strings
if typ == .si_s {
mut s := ''
if upper_case {
s = data.d.d_s.to_upper()
} else {
s = data.d.d_s.clone()
}
if width == 0 {
sb.write_string(s)
} else {
strconv.format_str_sb(s, bf, mut sb)
}
s.free()
return
}
// signed int
if typ in [.si_i8, .si_i16, .si_i32, .si_i64] {
mut d := data.d.d_i64
if typ == .si_i8 {
d = i64(data.d.d_i8)
} else if typ == .si_i16 {
d = i64(data.d.d_i16)
} else if typ == .si_i32 {
d = i64(data.d.d_i32)
}
if base == 0 {
if width == 0 {
d_str := d.str()
sb.write_string(d_str)
d_str.free()
return
}
if d < 0 {
bf.positive = false
}
strconv.format_dec_sb(abs64(d), bf, mut sb)
} else {
// binary, we use 3 for binary
if base == 3 {
base = 2
}
mut absd, mut write_minus := d, false
if d < 0 && pad_ch != ` ` {
absd = -d
write_minus = true
}
mut hx := strconv.format_int(absd, base)
if upper_case {
tmp := hx
hx = hx.to_upper()
tmp.free()
}
if write_minus {
sb.write_b(`-`)
bf.len0-- // compensate for the `-` above
}
if width == 0 {
sb.write_string(hx)
} else {
strconv.format_str_sb(hx, bf, mut sb)
}
hx.free()
}
return
}
// unsigned int and pointers
if typ in [.si_u8, .si_u16, .si_u32, .si_u64] {
mut d := data.d.d_u64
if typ == .si_u8 {
d = u64(data.d.d_u8)
} else if typ == .si_u16 {
d = u64(data.d.d_u16)
} else if typ == .si_u32 {
d = u64(data.d.d_u32)
}
if base == 0 {
if width == 0 {
d_str := d.str()
sb.write_string(d_str)
d_str.free()
return
}
strconv.format_dec_sb(d, bf, mut sb)
} else {
// binary, we use 3 for binary
if base == 3 {
base = 2
}
mut hx := strconv.format_uint(d, base)
if upper_case {
tmp := hx
hx = hx.to_upper()
tmp.free()
}
if width == 0 {
sb.write_string(hx)
} else {
strconv.format_str_sb(hx, bf, mut sb)
}
hx.free()
}
return
}
// pointers
if typ == .si_p {
mut d := data.d.d_u64
base = 16 // TODO: **** decide the behaviour of this flag! ****
if base == 0 {
if width == 0 {
d_str := d.str()
sb.write_string(d_str)
d_str.free()
return
}
strconv.format_dec_sb(d, bf, mut sb)
} else {
mut hx := strconv.format_uint(d, base)
if upper_case {
tmp := hx
hx = hx.to_upper()
tmp.free()
}
if width == 0 {
sb.write_string(hx)
} else {
strconv.format_str_sb(hx, bf, mut sb)
}
hx.free()
}
return
}
// default settings for floats
mut use_default_str := false
if width == 0 && precision == 0x7F {
bf.len1 = 3
use_default_str = true
}
if bf.len1 < 0 {
bf.len1 = 3
}
match typ {
// floating point
.si_f32 {
$if !nofloat ? {
// println("HERE: f32")
if use_default_str {
mut f := data.d.d_f32.str()
if upper_case {
tmp := f
f = f.to_upper()
tmp.free()
}
sb.write_string(f)
f.free()
} else {
// println("HERE: f32 format")
// println(data.d.d_f32)
if data.d.d_f32 < 0 {
bf.positive = false
}
mut f := strconv.format_fl(data.d.d_f32, bf)
if upper_case {
tmp := f
f = f.to_upper()
tmp.free()
}
sb.write_string(f)
f.free()
}
}
}
.si_f64 {
$if !nofloat ? {
// println("HERE: f64")
if use_default_str {
mut f := data.d.d_f64.str()
if upper_case {
tmp := f
f = f.to_upper()
tmp.free()
}
sb.write_string(f)
f.free()
} else {
if data.d.d_f64 < 0 {
bf.positive = false
}
f_union := strconv.Float64u{
f: data.d.d_f64
}
if f_union.u == strconv.double_minus_zero {
bf.positive = false
}
mut f := strconv.format_fl(data.d.d_f64, bf)
if upper_case {
tmp := f
f = f.to_upper()
tmp.free()
}
sb.write_string(f)
f.free()
}
}
}
.si_g32 {
// println("HERE: g32")
if use_default_str {
$if !nofloat ? {
mut f := data.d.d_f32.strg()
if upper_case {
tmp := f
f = f.to_upper()
tmp.free()
}
sb.write_string(f)
f.free()
}
} else {
// Manage +/-0
if data.d.d_f32 == strconv.single_plus_zero {
tmp_str := '0'
strconv.format_str_sb(tmp_str, bf, mut sb)
tmp_str.free()
return
}
if data.d.d_f32 == strconv.single_minus_zero {
tmp_str := '-0'
strconv.format_str_sb(tmp_str, bf, mut sb)
tmp_str.free()
return
}
// Manage +/-INF
if data.d.d_f32 == strconv.single_plus_infinity {
mut tmp_str := '+inf'
if upper_case {
tmp_str = '+INF'
}
strconv.format_str_sb(tmp_str, bf, mut sb)
tmp_str.free()
}
if data.d.d_f32 == strconv.single_minus_infinity {
mut tmp_str := '-inf'
if upper_case {
tmp_str = '-INF'
}
strconv.format_str_sb(tmp_str, bf, mut sb)
tmp_str.free()
}
if data.d.d_f32 < 0 {
bf.positive = false
}
d := fabs32(data.d.d_f32)
if d < 999_999.0 && d >= 0.00001 {
mut f := strconv.format_fl(data.d.d_f32, bf)
if upper_case {
tmp := f
f = f.to_upper()
tmp.free()
}
sb.write_string(f)
f.free()
return
}
mut f := strconv.format_es(data.d.d_f32, bf)
if upper_case {
tmp := f
f = f.to_upper()
tmp.free()
}
sb.write_string(f)
f.free()
}
}
.si_g64 {
// println("HERE: g64")
if use_default_str {
$if !nofloat ? {
mut f := data.d.d_f64.strg()
if upper_case {
tmp := f
f = f.to_upper()
tmp.free()
}
sb.write_string(f)
f.free()
}
} else {
// Manage +/-0
if data.d.d_f64 == strconv.double_plus_zero {
tmp_str := '0'
strconv.format_str_sb(tmp_str, bf, mut sb)
tmp_str.free()
return
}
if data.d.d_f64 == strconv.double_minus_zero {
tmp_str := '-0'
strconv.format_str_sb(tmp_str, bf, mut sb)
tmp_str.free()
return
}
// Manage +/-INF
if data.d.d_f64 == strconv.double_plus_infinity {
mut tmp_str := '+inf'
if upper_case {
tmp_str = '+INF'
}
strconv.format_str_sb(tmp_str, bf, mut sb)
tmp_str.free()
}
if data.d.d_f64 == strconv.double_minus_infinity {
mut tmp_str := '-inf'
if upper_case {
tmp_str = '-INF'
}
strconv.format_str_sb(tmp_str, bf, mut sb)
tmp_str.free()
}
if data.d.d_f64 < 0 {
bf.positive = false
}
d := fabs64(data.d.d_f64)
if d < 999_999.0 && d >= 0.00001 {
mut f := strconv.format_fl(data.d.d_f64, bf)
if upper_case {
tmp := f
f = f.to_upper()
tmp.free()
}
sb.write_string(f)
f.free()
return
}
mut f := strconv.format_es(data.d.d_f64, bf)
if upper_case {
tmp := f
f = f.to_upper()
tmp.free()
}
sb.write_string(f)
f.free()
}
}
.si_e32 {
$if !nofloat ? {
// println("HERE: e32")
bf.len1 = 6
if use_default_str {
mut f := data.d.d_f32.str()
if upper_case {
tmp := f
f = f.to_upper()
tmp.free()
}
sb.write_string(f)
f.free()
} else {
if data.d.d_f32 < 0 {
bf.positive = false
}
mut f := strconv.format_es(data.d.d_f32, bf)
if upper_case {
tmp := f
f = f.to_upper()
tmp.free()
}
sb.write_string(f)
f.free()
}
}
}
.si_e64 {
$if !nofloat ? {
// println("HERE: e64")
bf.len1 = 6
if use_default_str {
mut f := data.d.d_f64.str()
if upper_case {
tmp := f
f = f.to_upper()
tmp.free()
}
sb.write_string(f)
f.free()
} else {
if data.d.d_f64 < 0 {
bf.positive = false
}
mut f := strconv.format_es(data.d.d_f64, bf)
if upper_case {
tmp := f
f = f.to_upper()
tmp.free()
}
sb.write_string(f)
f.free()
}
}
}
// runes
.si_c {
ss := utf32_to_str(data.d.d_c)
sb.write_string(ss)
ss.free()
}
// v pointers
.si_vp {
ss := u64(data.d.d_vp).hex()
sb.write_string(ss)
ss.free()
}
else {
sb.write_string('***ERROR!***')
}
}
}
}
//====================================================================================
// storing struct used by cgen
pub struct StrIntpCgenData {
pub:
str string
fmt string
d string
}
// NOTE: LOW LEVEL struct
// storing struct passed to V in the C code
pub struct StrIntpData {
pub:
str string
// fmt u64 // expanded version for future use, 64 bit
fmt u32
d StrIntpMem
}
// interpolation function
[direct_array_access; manualfree]
pub fn str_intp(data_len int, in_data voidptr) string {
mut res := strings.new_builder(256)
input_base := &StrIntpData(in_data)
for i := 0; i < data_len; i++ {
data := unsafe { &(input_base[i]) }
// avoid empty strings
if data.str.len != 0 {
res.write_string(data.str)
}
// skip empty data
if data.fmt != 0 {
data.process_str_intp_data(mut res)
}
}
ret := res.str()
unsafe { res.free() }
return ret
}
//====================================================================================
// Utility for the compiler "auto_str_methods.v"
//====================================================================================
// substitute old _STR calls
pub const (
// BUG: this const is not released from the memory! use a const for now
// si_s_code = "0x" + int(StrIntpType.si_s).hex() // code for a simple string
si_s_code = '0xfe10'
si_g32_code = '0xfe0e'
si_g64_code = '0xfe0f'
)
[inline]
pub fn str_intp_sq(in_str string) string {
return 'str_intp(2, _MOV((StrIntpData[]){{_SLIT("\'"), $si_s_code, {.d_s = $in_str}},{_SLIT("\'"), 0, {.d_c = 0 }}}))'
}
[inline]
pub fn str_intp_rune(in_str string) string {
return 'str_intp(2, _MOV((StrIntpData[]){{_SLIT("\`"), $si_s_code, {.d_s = $in_str}},{_SLIT("\`"), 0, {.d_c = 0 }}}))'
}
[inline]
pub fn str_intp_g32(in_str string) string {
return 'str_intp(1, _MOV((StrIntpData[]){{_SLIT0, $si_g32_code, {.d_f32 = $in_str }}}))'
}
[inline]
pub fn str_intp_g64(in_str string) string {
return 'str_intp(1, _MOV((StrIntpData[]){{_SLIT0, $si_g64_code, {.d_f64 = $in_str }}}))'
}
// replace %% with the in_str
[manualfree]
pub fn str_intp_sub(base_str string, in_str string) string {
index := base_str.index('%%') or {
eprintln('No strin interpolation %% parameteres')
exit(1)
}
// return base_str[..index] + in_str + base_str[index+2..]
unsafe {
st_str := base_str[..index]
if index + 2 < base_str.len {
en_str := base_str[index + 2..]
res_str := 'str_intp(2, _MOV((StrIntpData[]){{_SLIT("$st_str"), $si_s_code, {.d_s = $in_str }},{_SLIT("$en_str"), 0, {.d_c = 0}}}))'
st_str.free()
en_str.free()
return res_str
}
res2_str := 'str_intp(1, _MOV((StrIntpData[]){{_SLIT("$st_str"), $si_s_code, {.d_s = $in_str }}}))'
st_str.free()
return res2_str
}
}