From 597a774d365aa8f5d5d718cea52f6ba1e5848ec5 Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Mon, 3 May 2021 14:14:32 +0300 Subject: [PATCH] scanner: speed up Scanner.skip_whitespace (~2-3% speed up of `-o x.c cmd/v`) --- vlib/builtin/string.v | 11 ++++++----- vlib/math/mathutil/mathutil.v | 18 +++--------------- vlib/v/scanner/scanner.v | 34 ++++++++++++++++++++-------------- vlib/v/util/scanning.v | 5 ----- 4 files changed, 29 insertions(+), 39 deletions(-) diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index da0fcc0ec6..b4d78229b5 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -952,7 +952,7 @@ pub fn (s string) ends_with(p string) bool { return false } for i in 0 .. p.len { - if p[i] != s[s.len - p.len + i] { + if unsafe { p.str[i] != s.str[s.len - p.len + i] } { return false } } @@ -1087,12 +1087,13 @@ pub fn (s string) find_between(start string, end string) string { } // is_space returns `true` if the byte is a white space character. -// The following list is considered white space characters: ` `, `\n`, `\t`, `\v`, `\f`, `\r`, 0x85, 0xa0 +// The following list is considered white space characters: ` `, `\t`, `\n`, `\v`, `\f`, `\r`, 0x85, 0xa0 // Example: assert byte(` `).is_space() == true +[inline] pub fn (c byte) is_space() bool { - // 0x0085 is NEXT LINE (NEL) - // 0x00a0 is NO-BREAK SPACE - return c in [` `, `\n`, `\t`, `\v`, `\f`, `\r`, 0x85, 0xa0] + // 0x85 is NEXT LINE (NEL) + // 0xa0 is NO-BREAK SPACE + return c == 32 || (c > 8 && c < 14) || (c == 0x85) || (c == 0xa0) } // trim_space strips any of ` `, `\n`, `\t`, `\v`, `\f`, `\r` from the start and end of the string. diff --git a/vlib/math/mathutil/mathutil.v b/vlib/math/mathutil/mathutil.v index 9ebb991983..0930e2618e 100644 --- a/vlib/math/mathutil/mathutil.v +++ b/vlib/math/mathutil/mathutil.v @@ -5,27 +5,15 @@ module mathutil [inline] pub fn min(a T, b T) T { - if a < b { - return a - } else { - return b - } + return if a < b { a } else { b } } [inline] pub fn max(a T, b T) T { - if a > b { - return a - } else { - return b - } + return if a > b { a } else { b } } [inline] pub fn abs(a T) T { - if a > 0 { - return a - } else { - return -a - } + return if a > 0 { a } else { -a } } diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index 3affa24830..8826cbf923 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -17,6 +17,8 @@ const ( double_quote = `"` // char used as number separator num_sep = `_` + b_lf = 10 + b_cr = 13 ) pub struct Scanner { @@ -492,18 +494,22 @@ fn (mut s Scanner) ident_number() string { } } -[inline] +[direct_array_access; inline] fn (mut s Scanner) skip_whitespace() { - // if s.is_vh { println('vh') return } - for s.pos < s.text.len && s.text[s.pos].is_space() { - if util.is_nl(s.text[s.pos]) && s.is_vh { + for s.pos < s.text.len { + c := s.text[s.pos] + if !c.is_space() { return } - if s.pos + 1 < s.text.len && s.text[s.pos] == `\r` && s.text[s.pos + 1] == `\n` { + c_is_nl := c == scanner.b_cr || c == scanner.b_lf + if c_is_nl && s.is_vh { + return + } + if s.pos + 1 < s.text.len && c == scanner.b_cr && s.text[s.pos + 1] == scanner.b_lf { s.is_crlf = true } // Count \r\n as one line - if util.is_nl(s.text[s.pos]) && !s.expect('\r\n', s.pos - 1) { + if c_is_nl && !(s.pos > 0 && s.text[s.pos - 1] == scanner.b_cr && c == scanner.b_lf) { s.inc_line_number() } s.pos++ @@ -975,7 +981,7 @@ fn (mut s Scanner) text_scan() token.Token { start := s.pos + 1 s.ignore_line() mut comment_line_end := s.pos - if s.text[s.pos - 1] == `\r` { + if s.text[s.pos - 1] == scanner.b_cr { comment_line_end-- } else { // fix line_nr, \n was read; the comment is marked on the next line @@ -987,7 +993,7 @@ fn (mut s Scanner) text_scan() token.Token { mut comment := s.line_comment // Find out if this comment is on its own line (for vfmt) mut is_separate_line_comment := true - for j := start - 2; j >= 0 && s.text[j] != `\n`; j-- { + for j := start - 2; j >= 0 && s.text[j] != scanner.b_lf; j-- { if s.text[j] !in [`\t`, ` `] { is_separate_line_comment = false } @@ -1015,7 +1021,7 @@ fn (mut s Scanner) text_scan() token.Token { s.line_nr-- s.error('comment not terminated') } - if s.text[s.pos] == `\n` { + if s.text[s.pos] == scanner.b_lf { s.inc_line_number() continue } @@ -1098,7 +1104,7 @@ fn (mut s Scanner) ident_string() string { if start_char == s.quote || (start_char == s.inter_quote && (s.is_inter_start || s.is_enclosed_inter)) { start++ - } else if start_char == `\n` { + } else if start_char == scanner.b_lf { s.inc_line_number() } s.is_inside_string = false @@ -1120,10 +1126,10 @@ fn (mut s Scanner) ident_string() string { if c == s.inter_quote && (s.is_inter_start || s.is_enclosed_inter) { break } - if c == `\r` { + if c == scanner.b_cr { n_cr_chars++ } - if c == `\n` { + if c == scanner.b_lf { s.inc_line_number() } // Don't allow \0 @@ -1288,9 +1294,9 @@ fn (mut s Scanner) ignore_line() { s.inc_line_number() } -[inline] +[direct_array_access; inline] fn (mut s Scanner) eat_to_end_of_line() { - for s.pos < s.text.len && s.text[s.pos] != `\n` { + for s.pos < s.text.len && s.text[s.pos] != scanner.b_lf { s.pos++ } } diff --git a/vlib/v/util/scanning.v b/vlib/v/util/scanning.v index e17a39e804..997ffa666d 100644 --- a/vlib/v/util/scanning.v +++ b/vlib/v/util/scanning.v @@ -10,11 +10,6 @@ pub fn is_func_char(c byte) bool { return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`) || c == `_` || c.is_digit() } -[inline] -pub fn is_nl(c byte) bool { - return c == `\r` || c == `\n` -} - pub fn contains_capital(s string) bool { for c in s { if c >= `A` && c <= `Z` {