transformer: provide direct_memory_access to arrays when safe (#12724)

pull/12802/head
Thomas Mangin 2021-12-11 19:55:46 +00:00 committed by GitHub
parent fe14e2fceb
commit 0d0d7323bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 623 additions and 27 deletions

View File

@ -1341,6 +1341,7 @@ fn (t Tree) index_expr(node ast.IndexExpr) &Node {
obj.add_terse('left_type', t.type_node(node.left_type))
obj.add_terse('index', t.expr(node.index))
obj.add_terse('is_setter', t.bool_node(node.is_setter))
obj.add_terse('is_direct', t.bool_node(node.is_direct))
obj.add_terse('or_expr', t.or_expr(node.or_expr))
obj.add('pos', t.position(node.pos))
return obj

View File

@ -816,6 +816,7 @@ pub mut:
is_array bool
is_farray bool
is_option bool // IfGuard
is_direct bool // Set if the underlying memory can be safely accessed
}
pub struct IfExpr {

View File

@ -18,7 +18,7 @@ fn (mut g Gen) index_expr(node ast.IndexExpr) {
} else if sym.kind == .map {
g.index_of_map(node, sym)
} else if sym.kind == .string && !node.left_type.is_ptr() {
is_direct_array_access := g.fn_decl != 0 && g.fn_decl.is_direct_arr
is_direct_array_access := (g.fn_decl != 0 && g.fn_decl.is_direct_arr) || node.is_direct
if is_direct_array_access {
g.expr(node.left)
g.write('.str[ ')
@ -128,7 +128,7 @@ fn (mut g Gen) index_of_array(node ast.IndexExpr, sym ast.TypeSymbol) {
// `(*(Val*)array_get(vals, i)).field = x;`
is_selector := node.left is ast.SelectorExpr
if g.is_assign_lhs && !is_selector && node.is_setter {
is_direct_array_access := g.fn_decl != 0 && g.fn_decl.is_direct_arr
is_direct_array_access := (g.fn_decl != 0 && g.fn_decl.is_direct_arr) || node.is_direct
is_op_assign := g.assign_op != .assign && info.elem_type != ast.string_type
array_ptr_type_str := match elem_typ.kind {
.function { 'voidptr*' }
@ -195,7 +195,7 @@ fn (mut g Gen) index_of_array(node ast.IndexExpr, sym ast.TypeSymbol) {
}
}
} else {
is_direct_array_access := g.fn_decl != 0 && g.fn_decl.is_direct_arr
is_direct_array_access := (g.fn_decl != 0 && g.fn_decl.is_direct_arr) || node.is_direct
array_ptr_type_str := match elem_typ.kind {
.function { 'voidptr*' }
else { '$elem_type_str*' }

View File

@ -28,7 +28,7 @@ pub fn (mut p Parser) parse_array_type() ast.Type {
fixed_size = const_field.expr.val.int()
} else {
if const_field.expr is ast.InfixExpr {
t := transformer.new_transformer(p.pref)
mut t := transformer.new_transformer(p.pref)
folded_expr := t.infix_expr(const_field.expr)
if folded_expr is ast.IntegerLiteral {

View File

@ -0,0 +1,48 @@
import os
const (
test = @VROOT + '/vlib/v/tests/testdata/test_array_bound.v'
)
fn direct(line string) {
if !line.contains('\tmain__direct(') {
return
}
trimmed := line.trim_space()
if trimmed.contains('array_get') {
assert trimmed == 'this should have been a direct access in $test line $line'
}
}
fn access(line string) {
if !line.contains('\tmain__access(') {
return
}
trimmed := line.trim_space()
if !trimmed.contains('array_get') {
assert trimmed == 'this should have been an array access in $test line $line'
}
}
fn test_array_optimisation() {
mut args := []string{cap: 3}
args << test
args << '-o'
args << '-'
mut p := os.new_process(@VEXE)
p.set_args(args)
p.set_redirect_stdio()
p.run()
stdout := p.stdout_slurp()
p.wait()
p.close()
assert stdout.contains('// THE END.')
for line in stdout.split('\n') {
direct(line)
access(line)
}
println('ok, could not detect any wrong memory access optimisation')
}

View File

@ -0,0 +1,147 @@
enum Index {
one
two
}
fn direct(b byte) bool {
return true
}
fn access(b byte) bool {
return false
}
fn check_underscore(a []byte) {
_ = a[1]
direct(a[0])
}
fn check_fn(a []byte) {
access(a[2])
direct(a[2])
direct(a[1])
}
fn check_if_access(a []byte) {
access(a[3])
if a[3] == `0` {
}
}
fn check_if_fn_access(a []byte) {
access(a[4])
if direct(a[4]) {
direct(a[4])
}
}
fn check_if_test_in_branch(a []byte) {
if a[6] == `0` {
direct(a[5])
access(a[7])
}
}
fn check_if_fn_branch(a []byte) {
if access(a[8]) {
direct(a[7])
}
}
fn check_for_branch(a []byte) {
access(a[9])
for _ in 0 .. 1 {
access(a[10])
}
direct(a[9])
access(a[10])
}
fn check_assert(a []byte) {
assert a.len >= 19
direct(a[18])
assert 21 <= a.len
_ = a[20]
}
fn check_range_access(a []byte) {
access(a[23])
range := a[20..25]
direct(range[3])
access(range[4])
direct(range[4])
}
fn check_for(a []byte) {
for access(a[30]) == false {
direct(a[30])
access(a[31])
return
}
}
fn check_for_c_init_0(a []byte) {
for a[32] == 0 {
direct(a[32])
access(a[33])
return
}
}
fn check_for_c_init_1(a []byte) {
for access_it := a[34]; a[34] == 0; {
direct(a[34])
access(a[35])
return
}
}
// fn check_for_c_init_2(a []byte) {
// mut run := true
// for x := a[36]; a[36] == 0 && run; x = a[38] {
// direct(a[36])
// access(a[37])
// run = false
// }
// direct(a[38])
// }
// fn skip_clone(a []byte) {
// access(a[30])
// fn_local := a.clone()
// direct(fn_local[30])
// }
// fn skip_mut_self_assign(a []byte) {
// mut fn_local := a.clone()
// fn_local[31] = fn_local[32]
// direct(fn_local[31])
// }
// fn skip_enum (a []byte) {
// access(a[33])
// direct(a[Index.two])
// }
fn main() {
a := []byte{len: 50}
direct(a[49])
check_underscore(a)
check_fn(a)
check_if_access(a)
check_if_fn_access(a)
check_if_test_in_branch(a)
check_if_fn_branch(a)
check_for_branch(a)
check_assert(a)
check_range_access(a)
check_for(a)
check_for_c_init_0(a)
check_for_c_init_1(a)
// check_for_c_init_2(a)
// skip_clone(a)
// skip_mut_self_assign(a)
// skip_enum (a)
}

View File

@ -4,46 +4,352 @@ import v.pref
import v.ast
import v.util
struct KeyVal {
key string
value int
}
[if debug_bounds_checking ?]
fn debug_bounds_checking(str string) {
println(str)
}
// IndexState is used to track the index analysis performed when parsing the code
// `IndexExpr` nodes are annotated with `is_direct`, indicating that the array index can be safely directly accessed.
// The c_gen code check will handle this annotation and perform this direct memory access. The following cases are considered valid for this optimisation:
// 1. the array size is known and has a `len` larger than the index requested
// 2. the array was previously accessed with a higher value which would have reported the issue already
// 3. the array was created from a range expression a := range[10..13] and the offset'ed indexes are safe
// Current limitations:
// * any function using break/continue or goto/label stopped from being optimised as soon as the relevant AST nodes are found as the code can not be ensured to be sequential
// * `enum` and `const` indexes are not optimised (they could probably be looked up)
// * for loops with multiple var in their init and/or inc are not analysed
// * mut array are not analysed as their size can be reduced, but self-assignment in a single line
pub struct IndexState {
mut:
// max_index has the biggest array index accessed for then named array
// so if a[2] was set or read, it will be 2
// A new array with no .len will recorded as -1 (accessing a[0] would be invalid)
// the value -2 is used to indicate that the array should not be analysed
// this is used for a mut array
max_index map[string]int
// We need to snapshot when entering `if` and `for` blocks and restore on exit
// as the statements may not be run. This is managed by indent() & unindent().
saved_disabled []bool
saved_key_vals [][]KeyVal
pub mut:
// on encountering goto/break/continue statements we stop any analysis
// for the current function (as the code is not linear anymore)
disabled bool
level int
}
// we are remembering the last array accessed and checking if the value is safe
// the node is updated with this information which can then be used by the code generators
fn (mut i IndexState) safe_access(key string, new int) bool {
$if no_bounds_checking {
return false
}
if i.disabled {
return false
}
old := i.max_index[key] or {
debug_bounds_checking('$i.level ${key}.len = $new')
i.max_index[key] = new
return false
}
if new > old {
if old < -1 {
debug_bounds_checking('$i.level $key[$new] unsafe (mut array)')
return false
}
debug_bounds_checking('$i.level $key[$new] unsafe (index was $old)')
i.max_index[key] = new
return false
}
debug_bounds_checking('$i.level $key[$new] safe (index is $old)')
return true
}
// safe_offset returns for a previvous array what was the highest
// offset we ever accessed for that identifier
fn (mut i IndexState) safe_offset(key string) int {
$if no_bounds_checking {
return -2
}
if i.disabled {
return -2
}
return i.max_index[key] or { -1 }
}
// indent is used for when encountering new code blocks (if, for and functions)
// The code analysis needs to take into consideration blocks of code which
// may not run at runtime (if/for) and therefore even if a new maximum for an
// index access is found on an if branch it can not be used within the parent
// code. The same is true with for blocks. indent() snapshot the current state,
// to allow restoration with unindent()
// Also within a function, analysis must be `disabled` when goto or break are
// encountered as the code flow is then not lineear, and only restart when a
// new function analysis is started.
[if !no_bounds_checking]
fn (mut i IndexState) indent(is_function bool) {
mut kvs := []KeyVal{cap: i.max_index.len}
for k, v in i.max_index {
kvs << KeyVal{k, v}
}
i.saved_disabled << i.disabled
i.saved_key_vals << kvs
if is_function {
i.disabled = false
}
i.level += 1
}
// restoring the data as it was before the if/for/unsafe block
[if !no_bounds_checking]
fn (mut i IndexState) unindent() {
i.level -= 1
mut keys := []string{cap: i.max_index.len}
for k, _ in i.max_index {
keys << k
}
for k in keys {
i.max_index.delete(k)
}
for saved in i.saved_key_vals.pop() {
i.max_index[saved.key] = saved.value
}
i.disabled = i.saved_disabled.pop()
}
pub struct Transformer {
pref &pref.Preferences
pub mut:
index &IndexState
}
pub fn new_transformer(pref &pref.Preferences) &Transformer {
return &Transformer{
pref: pref
index: &IndexState{
saved_key_vals: [][]KeyVal{cap: 1000}
saved_disabled: []bool{cap: 1000}
}
}
}
pub fn (t Transformer) transform_files(ast_files []&ast.File) {
pub fn (mut t Transformer) transform_files(ast_files []&ast.File) {
for i in 0 .. ast_files.len {
mut file := unsafe { ast_files[i] }
t.transform(mut file)
}
}
pub fn (t Transformer) transform(mut ast_file ast.File) {
pub fn (mut t Transformer) transform(mut ast_file ast.File) {
for mut stmt in ast_file.stmts {
t.stmt(mut stmt)
}
}
pub fn (t Transformer) stmt(mut node ast.Stmt) {
pub fn (mut t Transformer) find_new_array_len(node ast.AssignStmt) {
// looking for, array := []type{len:int}
mut right := node.right[0]
if mut right is ast.ArrayInit {
mut left := node.left[0]
if mut left is ast.Ident {
// we can not analyse mut array
if left.is_mut {
t.index.safe_access(left.name, -2)
return
}
// as we do not need to check any value under the setup len
if !right.has_len {
t.index.safe_access(left.name, -1)
return
}
mut len := int(0)
value := right.len_expr
if value is ast.IntegerLiteral {
len = value.val.int() + 1
}
t.index.safe_access(left.name, len)
}
}
}
pub fn (mut t Transformer) find_new_range(node ast.AssignStmt) {
// looking for, array := []type{len:int}
mut right := node.right[0]
if mut right is ast.IndexExpr {
mut left := node.left[0]
if mut left is ast.Ident {
// we can not analyse mut array
if left.is_mut {
t.index.safe_access(left.name, -2)
return
}
index := right.index
if index is ast.RangeExpr {
range_low := index.low
if range_low is ast.IntegerLiteral {
sub_left := right.left
if sub_left is ast.Ident {
safe := t.index.safe_offset(sub_left.name)
low := range_low.val.int()
if safe >= low {
t.index.safe_access(left.name, safe - low)
}
}
}
}
}
}
}
pub fn (mut t Transformer) find_mut_self_assign(node ast.AssignStmt) {
// even if mutable we can be sure than `a[1] = a[2] is safe
}
pub fn (mut t Transformer) find_assert_len(node ast.InfixExpr) {
right := node.right
match right {
ast.IntegerLiteral {
left := node.left
if left is ast.SelectorExpr {
len := right.val.int()
if left.field_name == 'len' {
match node.op {
.eq { // ==
t.index.safe_access(left.expr.str(), len - 1)
}
.ge { // >=
t.index.safe_access(left.expr.str(), len - 1)
}
.gt { // >
t.index.safe_access(left.expr.str(), len)
}
else {}
}
}
}
}
ast.SelectorExpr {
left := node.left
if left is ast.IntegerLiteral {
len := left.val.int()
if right.field_name == 'len' {
match node.op {
.eq { // ==
t.index.safe_access(right.expr.str(), len - 1)
}
.le { // <=
t.index.safe_access(right.expr.str(), len - 1)
}
.lt { // <
t.index.safe_access(right.expr.str(), len)
}
else {}
}
}
}
}
else {}
}
}
pub fn (mut t Transformer) check_safe_array(mut node ast.IndexExpr) {
if !node.is_array {
return
}
index := node.index
name := node.left
match index {
ast.IntegerLiteral {
node.is_direct = t.index.safe_access(name.str(), index.val.int())
}
ast.RangeExpr {
if index.has_high {
high := index.high
if high is ast.IntegerLiteral {
t.index.safe_access(name.str(), high.val.int())
return
}
}
if index.has_low {
low := index.low
if low is ast.IntegerLiteral {
t.index.safe_access(name.str(), low.val.int())
return
}
}
}
ast.CastExpr {
// do not deal with weird casting
if index.typname != 'int' {
return
}
index_expr := index.expr
if index_expr is ast.IntegerLiteral {
val := index_expr.val
node.is_direct = t.index.safe_access(name.str(), val.int())
}
}
ast.EnumVal {
debug_bounds_checking('? $name[.$index.val] safe?: no-idea (yet)!')
}
ast.Ident {
// we may be able to track const value in simple cases
}
else {}
}
}
pub fn (mut t Transformer) stmt(mut node ast.Stmt) {
match mut node {
ast.EmptyStmt {}
ast.NodeError {}
ast.AsmStmt {}
ast.AssertStmt {}
ast.AssertStmt {
expr := node.expr
match expr {
ast.InfixExpr {
t.find_assert_len(expr)
}
else {
t.expr(expr)
}
}
}
ast.AssignStmt {
t.find_new_array_len(node)
t.find_new_range(node)
t.find_mut_self_assign(node)
for mut right in node.right {
right = t.expr(right)
}
for left in node.left {
t.expr(left)
}
}
ast.Block {
t.index.indent(false)
for mut stmt in node.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
}
ast.BranchStmt {
// break or continue:
// we can not rely on sequential scanning and need to cancel all index optimisation
t.index.disabled = true
}
ast.BranchStmt {}
ast.ComptimeFor {}
ast.ConstDecl {
for mut field in node.fields {
@ -74,24 +380,76 @@ pub fn (t Transformer) stmt(mut node ast.Stmt) {
}
}
ast.FnDecl {
t.index.indent(true)
for mut stmt in node.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
}
ast.ForCStmt {
// TODO we do not optimise array access for multi init
// for a,b := 0,1; a < 10; a,b = a+b, a {...}
// https://github.com/vlang/v/issues/12782
// if node.has_init && !node.is_multi {
// mut init := node.init
// t.stmt(mut init)
// }
if node.has_cond {
t.expr(node.cond)
}
t.index.indent(false)
for mut stmt in node.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
// https://github.com/vlang/v/issues/12782
// if node.has_inc && !node.is_multi {
// mut inc := node.inc
// t.stmt(mut inc)
// }
}
ast.ForInStmt {
// indexes access within the for itself are not optimised (yet)
t.index.indent(false)
for mut stmt in node.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
}
ast.ForCStmt {}
ast.ForInStmt {}
ast.ForStmt {
cond_expr := t.expr(node.cond)
node = &ast.ForStmt{
...node
cond: t.expr(node.cond)
cond: cond_expr
}
if node.cond is ast.BoolLiteral && !(node.cond as ast.BoolLiteral).val { // for false { ... } should be eleminated
node = &ast.EmptyStmt{}
match node.cond {
ast.BoolLiteral {
if !(node.cond as ast.BoolLiteral).val { // for false { ... } should be eleminated
node = &ast.EmptyStmt{}
}
}
else {
if !node.is_inf {
t.index.indent(false)
for mut stmt in node.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
}
}
}
}
ast.GlobalDecl {}
ast.GotoLabel {}
ast.GotoStmt {}
ast.GotoStmt {
// we can not rely on sequential scanning and need to cancel all index optimisation
t.index.disabled = true
}
ast.HashStmt {}
ast.Import {}
ast.InterfaceDecl {}
@ -107,16 +465,30 @@ pub fn (t Transformer) stmt(mut node ast.Stmt) {
}
}
pub fn (t Transformer) expr(node ast.Expr) ast.Expr {
pub fn (mut t Transformer) expr(node ast.Expr) ast.Expr {
match mut node {
ast.CallExpr {
for arg in node.args {
t.expr(arg.expr)
}
return node
}
ast.InfixExpr {
return t.infix_expr(node)
}
ast.OrExpr {
for mut stmt in node.stmts {
t.stmt(mut stmt)
}
return node
}
ast.IndexExpr {
return ast.IndexExpr{
t.check_safe_array(mut node)
mut index := ast.IndexExpr{
...node
index: t.expr(node.index)
}
return index
}
ast.IfExpr {
for mut branch in node.branches {
@ -124,6 +496,7 @@ pub fn (t Transformer) expr(node ast.Expr) ast.Expr {
...(*branch)
cond: t.expr(branch.cond)
}
t.index.indent(false)
for i, mut stmt in branch.stmts {
t.stmt(mut stmt)
@ -151,7 +524,10 @@ pub fn (t Transformer) expr(node ast.Expr) ast.Expr {
}
}
}
t.index.unindent()
}
// where we place the result of the if when a := if ...
t.expr(node.left)
return node
}
ast.MatchExpr {
@ -163,6 +539,7 @@ pub fn (t Transformer) expr(node ast.Expr) ast.Expr {
for mut expr in branch.exprs {
expr = t.expr(expr)
}
t.index.indent(false)
for i, mut stmt in branch.stmts {
t.stmt(mut stmt)
@ -190,24 +567,34 @@ pub fn (t Transformer) expr(node ast.Expr) ast.Expr {
}
}
}
t.index.unindent()
}
return node
}
ast.AnonFn {
return node
}
ast.PostfixExpr {
return node
}
ast.PrefixExpr {
return node
}
ast.Likely {
return node
}
else {
return node
}
}
}
pub fn (t Transformer) if_expr(mut original ast.IfExpr) ast.Expr {
pub fn (mut t Transformer) if_expr(mut original ast.IfExpr) ast.Expr {
mut stop_index, mut unreachable_branches := -1, []int{cap: original.branches.len}
if original.is_comptime {
return *original
}
for i, mut branch in original.branches {
for mut stmt in branch.stmts {
t.stmt(mut stmt)
}
cond := t.expr(branch.cond)
branch = ast.IfBranch{
...(*branch)
@ -221,6 +608,11 @@ pub fn (t Transformer) if_expr(mut original ast.IfExpr) ast.Expr {
unreachable_branches << i
}
}
t.index.indent(false)
for mut stmt in branch.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
}
if stop_index != -1 {
unreachable_branches = unreachable_branches.filter(it < stop_index)
@ -243,7 +635,7 @@ pub fn (t Transformer) if_expr(mut original ast.IfExpr) ast.Expr {
return *original
}
pub fn (t Transformer) match_expr(mut original ast.MatchExpr) ast.Expr {
pub fn (mut t Transformer) match_expr(mut original ast.MatchExpr) ast.Expr {
cond, mut terminate := t.expr(original.cond), false
original = ast.MatchExpr{
...(*original)
@ -251,13 +643,14 @@ pub fn (t Transformer) match_expr(mut original ast.MatchExpr) ast.Expr {
}
for mut branch in original.branches {
if branch.is_else {
t.index.indent(false)
for mut stmt in branch.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
continue
}
for mut stmt in branch.stmts {
t.stmt(mut stmt)
}
for mut expr in branch.exprs {
expr = t.expr(expr)
@ -314,6 +707,12 @@ pub fn (t Transformer) match_expr(mut original ast.MatchExpr) ast.Expr {
}
}
t.index.indent(false)
for mut stmt in branch.stmts {
t.stmt(mut stmt)
}
t.index.unindent()
if terminate {
break
}
@ -321,7 +720,7 @@ pub fn (t Transformer) match_expr(mut original ast.MatchExpr) ast.Expr {
return *original
}
pub fn (t Transformer) infix_expr(original ast.InfixExpr) ast.Expr {
pub fn (mut t Transformer) infix_expr(original ast.InfixExpr) ast.Expr {
mut node := original
node.left = t.expr(node.left)
node.right = t.expr(node.right)