scanner.v: refactoring

pull/1289/head
d2verb 2019-07-24 07:06:48 +09:00 committed by Alexander Medvednikov
parent 8462e99bc5
commit 88758082d2
5 changed files with 176 additions and 153 deletions

View File

@ -25,6 +25,7 @@ test: v
clean: clean:
-rm -f v.c .v.c v vprod thirdparty/**/*.o -rm -f v.c .v.c v vprod thirdparty/**/*.o
find . -name '.*.c' -print0 | xargs -0 -n1 rm -f
SOURCES = $(wildcard thirdparty/**/*.c) SOURCES = $(wildcard thirdparty/**/*.c)
OBJECTS := ${SOURCES:.c=.o} OBJECTS := ${SOURCES:.c=.o}

View File

@ -823,7 +823,7 @@ fn (p mut Parser) get_type() string {
if p.tok == .lsbr { if p.tok == .lsbr {
p.check(.lsbr) p.check(.lsbr)
// [10]int // [10]int
if p.tok == .integer { if p.tok == .number {
typ = '[$p.lit]' typ = '[$p.lit]'
p.next() p.next()
} }
@ -834,9 +834,9 @@ fn (p mut Parser) get_type() string {
// [10][3]int // [10][3]int
if p.tok == .lsbr { if p.tok == .lsbr {
p.next() p.next()
if p.tok == .integer { if p.tok == .number {
typ += '[$p.lit]' typ += '[$p.lit]'
p.check(.integer) p.check(.number)
} }
else { else {
is_arr2 = true is_arr2 = true
@ -2095,7 +2095,7 @@ fn (p mut Parser) term() string {
p.next() p.next()
p.gen(tok.str())// + ' /*op2*/ ') p.gen(tok.str())// + ' /*op2*/ ')
p.fgen(' ' + tok.str() + ' ') p.fgen(' ' + tok.str() + ' ')
if is_div && p.tok == .integer && p.lit == '0' { if is_div && p.tok == .number && p.lit == '0' {
p.error('division by zero') p.error('division by zero')
} }
if is_mod && (is_float_type(typ) || !is_number_type(typ)) { if is_mod && (is_float_type(typ) || !is_number_type(typ)) {
@ -2129,7 +2129,7 @@ fn (p mut Parser) factor() string {
mut typ := '' mut typ := ''
tok := p.tok tok := p.tok
switch tok { switch tok {
case .integer: case .number:
typ = 'int' typ = 'int'
// Check if float (`1.0`, `1e+3`) but not if is hexa // Check if float (`1.0`, `1e+3`) but not if is hexa
if (p.lit.contains('.') || (p.lit.contains('e') || p.lit.contains('E'))) && if (p.lit.contains('.') || (p.lit.contains('e') || p.lit.contains('E'))) &&
@ -2452,7 +2452,7 @@ fn (p mut Parser) map_init() string {
fn (p mut Parser) array_init() string { fn (p mut Parser) array_init() string {
p.is_alloc = true p.is_alloc = true
p.check(.lsbr) p.check(.lsbr)
is_integer := p.tok == .integer is_integer := p.tok == .number
lit := p.lit lit := p.lit
mut typ := '' mut typ := ''
new_arr_ph := p.cgen.add_placeholder() new_arr_ph := p.cgen.add_placeholder()
@ -3334,7 +3334,7 @@ fn (p mut Parser) return_st() {
} }
else { else {
// Don't allow `return val` in functions that don't return anything // Don't allow `return val` in functions that don't return anything
if false && p.tok == .name || p.tok == .integer { if false && p.tok == .name || p.tok == .number {
p.error('function `$p.cur_fn.name` does not return a value') p.error('function `$p.cur_fn.name` does not return a value')
} }
@ -3529,3 +3529,16 @@ fn (p mut Parser) fspace() {
fn (p mut Parser) fgenln(s string) { fn (p mut Parser) fgenln(s string) {
p.scanner.fgenln(s) p.scanner.fgenln(s)
} }
fn (p mut Parser) peek() Token {
for {
tok := p.scanner.peek()
if tok != .nl {
return tok
}
}
}
fn (p mut Parser) create_type_string(T Type, name string) {
p.scanner.create_type_string(T, name)
}

View File

@ -26,16 +26,11 @@ mut:
prev_tok Token prev_tok Token
} }
const (
SingleQuote = `\'`
//QUOTE = `"`
)
fn new_scanner(file_path string) *Scanner { fn new_scanner(file_path string) *Scanner {
if !os.file_exists(file_path) { if !os.file_exists(file_path) {
panic('"$file_path" doesn\'t exist') panic('"$file_path" doesn\'t exist')
} }
//text := os.read_file(file_path)
mut raw_text := os.read_file(file_path) or { mut raw_text := os.read_file(file_path) or {
panic('scanner: failed to open "$file_path"') panic('scanner: failed to open "$file_path"')
return &Scanner{} return &Scanner{}
@ -60,7 +55,6 @@ fn new_scanner(file_path string) *Scanner {
fmt_out: strings.new_builder(1000) fmt_out: strings.new_builder(1000)
} }
// println('new scanner "$file_path" txt.len=$scanner.text.len')
return scanner return scanner
} }
@ -74,14 +68,6 @@ fn scan_res(tok Token, lit string) ScanRes {
return ScanRes{tok, lit} return ScanRes{tok, lit}
} }
fn is_white(c byte) bool {
return c.is_white()
}
fn is_nl(c byte) bool {
return c == `\r` || c == `\n`
}
fn (s mut Scanner) ident_name() string { fn (s mut Scanner) ident_name() string {
start := s.pos start := s.pos
for { for {
@ -99,51 +85,119 @@ fn (s mut Scanner) ident_name() string {
return name return name
} }
fn (s mut Scanner) ident_number() string { fn (s mut Scanner) ident_hex_number() string {
start := s.pos start_pos := s.pos
is_hex := s.pos + 1 < s.text.len && s.text[s.pos] == `0` && s.text[s.pos + 1] == `x` s.pos += 2 // skip '0x'
is_oct := !is_hex && s.text[s.pos] == `0`
mut is_float := false
for { for {
s.pos++
if s.pos >= s.text.len { if s.pos >= s.text.len {
break break
} }
c := s.text[s.pos] c := s.text[s.pos]
if c == `.` { if !c.is_hex_digit() {
is_float = true break
} }
is_good_hex := is_hex && (c == `x` || (c >= `a` && c <= `f`) || (c >= `A` && c <= `F`))
// 1e+3, 1e-3, 1e3
if !is_hex && c == `e` && s.pos + 1 < s.text.len {
next := s.text[s.pos + 1]
if next == `+` || next == `-` || next.is_digit() {
s.pos++ s.pos++
continue
} }
} number := s.text.substr(start_pos, s.pos)
if !c.is_digit() && c != `.` && !is_good_hex {
break
}
// 1..9
if c == `.` && s.pos + 1 < s.text.len && s.text[s.pos + 1] == `.` {
break
}
if is_oct && c >= `8` && !is_float {
s.error('malformed octal constant')
}
}
number := s.text.substr(start, s.pos)
s.pos-- s.pos--
return number return number
} }
fn (s mut Scanner) ident_oct_number() string {
start_pos := s.pos
for {
if s.pos >= s.text.len {
break
}
c := s.text[s.pos]
if c.is_digit() {
if !c.is_oct_digit() {
s.error('malformed octal constant')
}
} else {
break
}
s.pos++
}
number := s.text.substr(start_pos, s.pos)
s.pos--
return number
}
fn (s mut Scanner) ident_dec_number() string {
start_pos := s.pos
// scan integer part
for s.text[s.pos].is_digit() {
s.pos++
}
// e.g. 1..9
// we just return '1' and don't scan '..9'
if s.expect('..', s.pos) {
number := s.text.substr(start_pos, s.pos)
s.pos--
return number
}
// scan fractional part
if s.text[s.pos] == `.` {
s.pos++
for s.text[s.pos].is_digit() {
s.pos++
}
}
// scan exponential part
mut has_exponential_part := false
if s.expect('e+', s.pos) || s.expect('e-', s.pos) {
exp_start_pos := s.pos += 2
for s.text[s.pos].is_digit() {
s.pos++
}
if exp_start_pos == s.pos {
s.error('exponent has no digits')
}
has_exponential_part = true
}
// error check: 1.23.4, 123.e+3.4
if s.text[s.pos] == `.` {
if has_exponential_part {
s.error('exponential part should be integer')
}
else {
s.error('too many decimal points in number')
}
}
number := s.text.substr(start_pos, s.pos)
s.pos--
return number
}
fn (s mut Scanner) ident_number() string {
if s.expect('0x', s.pos) {
return s.ident_hex_number()
}
if s.expect('0.', s.pos) || s.expect('0e', s.pos) {
return s.ident_dec_number()
}
if s.text[s.pos] == `0` {
return s.ident_oct_number()
}
return s.ident_dec_number()
}
fn (s Scanner) has_gone_over_line_end() bool { fn (s Scanner) has_gone_over_line_end() bool {
mut i := s.pos-1 mut i := s.pos-1
for i >= 0 && !is_white(s.text[i]) { for i >= 0 && !s.text[i].is_white() {
i-- i--
} }
for i >= 0 && is_white(s.text[i]) { for i >= 0 && s.text[i].is_white() {
if is_nl(s.text[i]) { if is_nl(s.text[i]) {
return true return true
} }
@ -153,45 +207,21 @@ fn (s Scanner) has_gone_over_line_end() bool {
} }
fn (s mut Scanner) skip_whitespace() { fn (s mut Scanner) skip_whitespace() {
for s.pos < s.text.len && is_white(s.text[s.pos]) { for s.pos < s.text.len && s.text[s.pos].is_white() {
if is_nl(s.text[s.pos]) {
// Count \r\n as one line // Count \r\n as one line
if !(s.text[s.pos] == `\n` && s.pos > 0 && s.text[s.pos-1] == `\r`) { if is_nl(s.text[s.pos]) && !s.expect('\r\n', s.pos-1) {
s.line_nr++ s.line_nr++
} }
}
s.pos++ s.pos++
} }
// if s.pos == s.text.len {
// return scan_res(.eof, '')
// }
} }
fn (s mut Scanner) get_var_name(pos int) string {
mut pos_start := pos
for ; pos_start >= 0 && s.text[pos_start] != `\n` && s.text[pos_start] != `;`; pos_start-- {}
pos_start++
return s.text.substr(pos_start, pos)
}
// CAO stands for Compound Assignment Operators (e.g '+=' )
/*
fn (s mut Scanner) cao_change(operator string) {
s.text = s.text.substr(0, s.pos - operator.len) + ' = ' + s.get_var_name(s.pos - operator.len) + ' ' + operator + ' ' + s.text.substr(s.pos + 1, s.text.len)
}
*/
fn (s mut Scanner) scan() ScanRes { fn (s mut Scanner) scan() ScanRes {
if s.line_comment != '' { if s.line_comment != '' {
//s.fgenln('// LOL "$s.line_comment"') //s.fgenln('// LOL "$s.line_comment"')
//s.line_comment = '' //s.line_comment = ''
} }
// if s.file_path == 'd.v' {
// println('\nscan()')
// }
if s.started { if s.started {
// || (s.pos == 0 && s.text.len > 0 && s.text[s.pos] == `\n`) {
s.pos++ s.pos++
} }
s.started = true s.started = true
@ -204,8 +234,7 @@ if s.line_comment != '' {
} }
// End of $var, start next string // End of $var, start next string
if s.dollar_end { if s.dollar_end {
// fmt.Println("end of $var, get string", s.pos, string(s.text[s.pos])) if s.text[s.pos] == `\'` {
if s.text[s.pos] == SingleQuote {
s.dollar_end = false s.dollar_end = false
return scan_res(.str, '') return scan_res(.str, '')
} }
@ -215,7 +244,6 @@ if s.line_comment != '' {
s.skip_whitespace() s.skip_whitespace()
// end of file // end of file
if s.pos >= s.text.len { if s.pos >= s.text.len {
// println('scan(): returning .eof (pos >= len)')
return scan_res(.eof, '') return scan_res(.eof, '')
} }
// handle each char // handle each char
@ -230,26 +258,19 @@ if s.line_comment != '' {
// tmp hack to detect . in ${} // tmp hack to detect . in ${}
// Check if not .eof to prevent panic // Check if not .eof to prevent panic
next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` } next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` }
// println('!!! got name=$name next_char=$next_char')
if is_key(name) { if is_key(name) {
// println('IS KEY')
// tok := (key_to_token(name))
// println(tok.str())
return scan_res(key_to_token(name), '') return scan_res(key_to_token(name), '')
} }
// 'asdf $b' => "b" is the last name in the string, dont start parsing string // 'asdf $b' => "b" is the last name in the string, dont start parsing string
// at the next ', skip it // at the next ', skip it
if s.inside_string { if s.inside_string {
// println('is_letter inside string! nextc=${nextc.str()}') if next_char == `\'` {
if next_char == SingleQuote {
// println('var is last before QUOTE')
s.pos++ s.pos++
s.dollar_start = false s.dollar_start = false
s.inside_string = false s.inside_string = false
} }
} }
if s.dollar_start && next_char != `.` {//&& next_char != `(` { if s.dollar_start && next_char != `.` {
// println('INSIDE .str .dollar var=$name')
s.dollar_end = true s.dollar_end = true
s.dollar_start = false s.dollar_start = false
} }
@ -263,7 +284,7 @@ if s.line_comment != '' {
// `123`, `.123` // `123`, `.123`
else if c.is_digit() || c == `.` && nextc.is_digit() { else if c.is_digit() || c == `.` && nextc.is_digit() {
num := s.ident_number() num := s.ident_number()
return scan_res(.integer, num) return scan_res(.number, num)
} }
// all other tokens // all other tokens
switch c { switch c {
@ -307,7 +328,7 @@ if s.line_comment != '' {
return scan_res(.mod, '') return scan_res(.mod, '')
case `?`: case `?`:
return scan_res(.question, '') return scan_res(.question, '')
case SingleQuote: case `\'`:
return scan_res(.str, s.ident_string()) return scan_res(.str, s.ident_string())
// TODO allow double quotes // TODO allow double quotes
// case QUOTE: // case QUOTE:
@ -336,7 +357,7 @@ if s.line_comment != '' {
if s.inside_string { if s.inside_string {
s.pos++ s.pos++
// TODO UN.neEDED? // TODO UN.neEDED?
if s.text[s.pos] == SingleQuote { if s.text[s.pos] == `\'` {
s.inside_string = false s.inside_string = false
return scan_res(.str, '') return scan_res(.str, '')
} }
@ -458,7 +479,6 @@ if s.line_comment != '' {
return scan_res(.div_assign, '') return scan_res(.div_assign, '')
} }
if nextc == `/` { if nextc == `/` {
// debug("!!!!!!.key_goT LI.ne COM")
start := s.pos + 1 start := s.pos + 1
for s.pos < s.text.len && s.text[s.pos] != `\n`{ for s.pos < s.text.len && s.text[s.pos] != `\n`{
s.pos++ s.pos++
@ -485,11 +505,11 @@ if s.line_comment != '' {
s.line_nr++ s.line_nr++
continue continue
} }
if s.text[s.pos] == `/` && s.text[s.pos + 1] == `*` { if s.expect('/*', s.pos) {
nest_count++ nest_count++
continue continue
} }
if s.text[s.pos] == `*` && s.text[s.pos + 1] == `/` { if s.expect('*/', s.pos) {
nest_count-- nest_count--
} }
} }
@ -507,7 +527,6 @@ if s.line_comment != '' {
return scan_res(.eof, '') return scan_res(.eof, '')
} }
} }
println('(char code=$c) pos=$s.pos len=$s.text.len')
mut msg := 'invalid character `${c.str()}`' mut msg := 'invalid character `${c.str()}`'
if c == `"` { if c == `"` {
msg += ', use \' to denote strings' msg += ', use \' to denote strings'
@ -520,9 +539,6 @@ fn (s &Scanner) error(msg string) {
file := s.file_path.all_after('/') file := s.file_path.all_after('/')
println('panic: $file:${s.line_nr + 1}') println('panic: $file:${s.line_nr + 1}')
println(msg) println(msg)
// os.print_backtrace()
// println(file)
// println(s.file_path)
exit(1) exit(1)
} }
@ -531,10 +547,6 @@ fn (s &Scanner) error(msg string) {
fn (s mut Scanner) ident_string() string { fn (s mut Scanner) ident_string() string {
// println("\nidentString() at char=", string(s.text[s.pos]), // println("\nidentString() at char=", string(s.text[s.pos]),
// "chard=", s.text[s.pos], " pos=", s.pos, "txt=", s.text[s.pos:s.pos+7]) // "chard=", s.text[s.pos], " pos=", s.pos, "txt=", s.text[s.pos:s.pos+7])
debug := s.file_path.contains('test_test')
if debug {
println('identStr() $s.file_path line=$s.line_nr pos=$s.pos')
}
mut start := s.pos mut start := s.pos
s.inside_string = false s.inside_string = false
slash := `\\` slash := `\\`
@ -544,12 +556,9 @@ fn (s mut Scanner) ident_string() string {
break break
} }
c := s.text[s.pos] c := s.text[s.pos]
if debug {
println(c.str())
}
prevc := s.text[s.pos - 1] prevc := s.text[s.pos - 1]
// end of string // end of string
if c == SingleQuote && (prevc != slash || (prevc == slash && s.text[s.pos - 2] == slash)) { if c == `\'` && (prevc != slash || (prevc == slash && s.text[s.pos - 2] == slash)) {
// handle '123\\' slash at the end // handle '123\\' slash at the end
break break
} }
@ -561,8 +570,7 @@ fn (s mut Scanner) ident_string() string {
s.error('0 character in a string literal') s.error('0 character in a string literal')
} }
// Don't allow \x00 // Don't allow \x00
if c == `0` && s.pos > 5 && s.text[s.pos - 1] == `0` && s.text[s.pos - 2] == `x` && if c == `0` && s.pos > 5 && s.expect('\\x0', s.pos - 3) {
s.text[s.pos - 3] == `\\` {
s.error('0 character in a string literal') s.error('0 character in a string literal')
} }
// ${var} // ${var}
@ -573,17 +581,15 @@ fn (s mut Scanner) ident_string() string {
break break
} }
// $var // $var
// if !s.is_fmt && c != `{` && c != ` ` && ! (c >= `0` && c <= `9`) && prevc == `$` {
if (c.is_letter() || c == `_`) && prevc == `$` { if (c.is_letter() || c == `_`) && prevc == `$` {
s.inside_string = true s.inside_string = true
s.dollar_start = true s.dollar_start = true
// println('setting s.dollar=true pos=$s.pos')
s.pos -= 2 s.pos -= 2
break break
} }
} }
mut lit := '' mut lit := ''
if s.text[start] == SingleQuote { if s.text[start] == `\'` {
start++ start++
} }
mut end := s.pos mut end := s.pos
@ -594,17 +600,6 @@ fn (s mut Scanner) ident_string() string {
else { else {
lit = s.text.substr(start, end) lit = s.text.substr(start, end)
} }
// if lit.contains('\n') {
// println('\nstring lit="$lit" pos=$s.pos line=$s.line_nr')
// }
/*
for c in lit {
if s.file_path.contains('range_test') {
println('!')
println(c)
}
}
*/
return lit return lit
} }
@ -620,7 +615,7 @@ fn (s mut Scanner) ident_char() string {
if s.text[s.pos] != slash { if s.text[s.pos] != slash {
len++ len++
} }
double_slash := s.text[s.pos - 1] == slash && s.text[s.pos - 2] == slash double_slash := s.expect('\\\\', s.pos - 2)
if s.text[s.pos] == `\`` && (s.text[s.pos - 1] != slash || double_slash) { if s.text[s.pos] == `\`` && (s.text[s.pos - 1] != slash || double_slash) {
if double_slash { if double_slash {
len++ len++
@ -639,24 +634,18 @@ fn (s mut Scanner) ident_char() string {
return c return c
} }
fn (p mut Parser) peek() Token {
for {
tok := p.scanner.peek()
if tok != .nl {
return tok
}
}
}
fn (s mut Scanner) peek() Token { fn (s mut Scanner) peek() Token {
// save scanner state
pos := s.pos pos := s.pos
line := s.line_nr line := s.line_nr
inside_string := s.inside_string inside_string := s.inside_string
dollar_start := s.dollar_start dollar_start := s.dollar_start
dollar_end := s.dollar_end dollar_end := s.dollar_end
// /////
res := s.scan() res := s.scan()
tok := res.tok tok := res.tok
// restore scanner state
s.pos = pos s.pos = pos
s.line_nr = line s.line_nr = line
s.inside_string = inside_string s.inside_string = inside_string
@ -665,30 +654,42 @@ fn (s mut Scanner) peek() Token {
return tok return tok
} }
fn (s mut Scanner) expect(want string, start_pos int) bool {
end_pos := start_pos + want.len
if start_pos < 0 || start_pos >= s.text.len {
return false
}
if end_pos < 0 || end_pos > s.text.len {
return false
}
for pos in start_pos..end_pos {
if s.text[pos] != want[pos-start_pos] {
return false
}
}
return true
}
fn (s mut Scanner) debug_tokens() { fn (s mut Scanner) debug_tokens() {
s.pos = 0 s.pos = 0
s.debug = true
fname := s.file_path.all_after('/') fname := s.file_path.all_after('/')
println('\n===DEBUG TOKENS $fname===') println('\n===DEBUG TOKENS $fname===')
// allToks := ''
s.debug = true
for { for {
res := s.scan() res := s.scan()
tok := res.tok tok := res.tok
lit := res.lit lit := res.lit
// printiln(tok)
print(tok.str()) print(tok.str())
// allToks += tok.String()
if lit != '' { if lit != '' {
println(' `$lit`') println(' `$lit`')
// allToks += " `" + lit + "`"
} }
else { else {
println('') println('')
} }
// allToks += "\n"
if tok == .eof { if tok == .eof {
println('============ END OF DEBUG TOKENS ==================') println('============ END OF DEBUG TOKENS ==================')
// fmt.Println("========"+s.file+"========\n", allToks)
break break
} }
} }
@ -698,6 +699,10 @@ fn is_name_char(c byte) bool {
return c.is_letter() || c == `_` return c.is_letter() || c == `_`
} }
fn is_nl(c byte) bool {
return c == `\r` || c == `\n`
}
fn (s mut Scanner) get_opening_bracket() int { fn (s mut Scanner) get_opening_bracket() int {
mut pos := s.pos mut pos := s.pos
mut parentheses := 0 mut parentheses := 0
@ -740,7 +745,3 @@ fn (s mut Scanner) create_type_string(T Type, name string) {
s.line_nr = line s.line_nr = line
s.inside_string = inside_string s.inside_string = inside_string
} }
fn (p mut Parser) create_type_string(T Type, name string) {
p.scanner.create_type_string(T, name)
}

View File

@ -7,7 +7,7 @@ module main
enum Token { enum Token {
eof eof
name // user name // user
integer // 123 number // 123
str // 'foo' str // 'foo'
str_inter // 'name=$user.name' str_inter // 'name=$user.name'
chartoken // `A` chartoken // `A`
@ -127,7 +127,7 @@ fn build_token_str() []string {
s[Token.keyword_end] = '' s[Token.keyword_end] = ''
s[Token.eof] = '.eof' s[Token.eof] = '.eof'
s[Token.name] = '.name' s[Token.name] = '.name'
s[Token.integer] = '.integer' s[Token.number] = '.number'
s[Token.str] = 'STR' s[Token.str] = 'STR'
s[Token.chartoken] = '.chartoken' s[Token.chartoken] = '.chartoken'
s[Token.plus] = '+' s[Token.plus] = '+'

View File

@ -714,6 +714,14 @@ pub fn (c byte) is_digit() bool {
return c >= `0` && c <= `9` return c >= `0` && c <= `9`
} }
pub fn (c byte) is_hex_digit() bool {
return c.is_digit() || (c >= `a` && c <= `f`) || (c >= `A` && c <= `F`)
}
pub fn (c byte) is_oct_digit() bool {
return c >= `0` && c <= `7`
}
pub fn (c byte) is_letter() bool { pub fn (c byte) is_letter() bool {
return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`) return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`)
} }