v/vlib/compiler/token.v

309 lines
6.4 KiB
V
Raw Normal View History

2019-06-23 04:21:30 +02:00
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module compiler
2019-06-22 20:20:28 +02:00
2019-10-23 12:03:14 +02:00
struct Token {
2019-12-19 22:29:37 +01:00
tok TokenKind // the token number/enum; for quick comparisons
2019-10-23 12:03:14 +02:00
lit string // literal representation of the token
line_nr int // the line number in the source where the token occured
name_idx int // name table index for O(1) lookup
pos int // the position of the token in scanner text
2019-10-23 12:03:14 +02:00
}
enum TokenKind {
eof
2019-12-19 22:29:37 +01:00
name // user
number // 123
str // 'foo'
str_inter // 'name=$user.name'
chartoken // `A`
plus
minus
mul
div
mod
2019-08-17 21:19:37 +02:00
xor // ^
pipe // |
inc // ++
dec // --
and // &&
logical_or
not
bit_not
question
comma
semicolon
colon
2019-08-17 21:19:37 +02:00
arrow // =>
left_arrow // <-
amp
hash
dollar
2019-12-03 11:08:57 +01:00
str_dollar
left_shift
righ_shift
2019-12-19 22:29:37 +01:00
// at // @
assign // =
decl_assign // :=
plus_assign // +=
minus_assign // -=
div_assign
mult_assign
xor_assign
mod_assign
or_assign
and_assign
righ_shift_assign
left_shift_assign
2019-06-22 20:20:28 +02:00
// {} () []
lcbr
rcbr
lpar
rpar
lsbr
rsbr
2019-06-22 20:20:28 +02:00
// == != <= < >= >
eq
ne
gt
lt
ge
le
2019-06-22 20:20:28 +02:00
// comments
2019-11-11 15:18:32 +01:00
line_comment
mline_comment
2019-08-17 21:19:37 +02:00
nl
dot
dotdot
ellipsis
2019-06-22 20:20:28 +02:00
// keywords
keyword_beg
2019-07-14 11:01:32 +02:00
key_as
2019-11-16 22:58:09 +01:00
key_asm
2019-07-14 11:01:32 +02:00
key_assert
key_atomic
key_break
key_const
2019-07-14 11:01:32 +02:00
key_continue
2019-08-17 21:19:37 +02:00
key_defer
2019-07-14 11:01:32 +02:00
key_else
key_embed
key_enum
2019-07-14 11:01:32 +02:00
key_false
key_for
key_fn
2019-07-14 11:01:32 +02:00
key_global
key_go
2019-08-17 21:19:37 +02:00
key_goto
key_if
key_import
2019-08-17 21:19:37 +02:00
key_import_const
key_in
key_interface
2019-12-19 22:29:37 +01:00
// key_it
2019-08-17 21:19:37 +02:00
key_match
2019-07-14 11:01:32 +02:00
key_module
key_mut
key_none
2019-07-14 11:01:32 +02:00
key_return
2019-08-17 21:19:37 +02:00
key_select
2019-07-14 11:01:32 +02:00
key_sizeof
key_offsetof
2019-07-14 11:01:32 +02:00
key_struct
2019-08-17 21:19:37 +02:00
key_switch
2019-07-14 11:01:32 +02:00
key_true
2019-08-17 21:19:37 +02:00
key_type
2019-12-19 22:29:37 +01:00
// typeof
key_orelse
key_union
2019-08-17 21:19:37 +02:00
key_pub
key_static
key_unsafe
2019-06-22 20:20:28 +02:00
keyword_end
}
// build_keys genereates a map with keywords' string values:
// Keywords['return'] == .key_return
2019-07-14 11:01:32 +02:00
fn build_keys() map[string]int {
2019-08-17 21:19:37 +02:00
mut res := map[string]int
for t := int(TokenKind.keyword_beg) + 1; t < int(TokenKind.keyword_end); t++ {
2019-07-14 11:01:32 +02:00
key := TokenStr[t]
2019-12-07 13:51:00 +01:00
res[key] = t
2019-06-22 20:20:28 +02:00
}
return res
}
// TODO remove once we have `enum TokenKind { name('name') if('if') ... }`
2019-06-22 20:20:28 +02:00
fn build_token_str() []string {
2019-09-19 04:22:24 +02:00
mut s := [''].repeat(NrTokens)
s[TokenKind.keyword_beg] = ''
s[TokenKind.keyword_end] = ''
s[TokenKind.eof] = 'eof'
s[TokenKind.name] = 'name'
s[TokenKind.number] = 'number'
s[TokenKind.str] = 'STR'
s[TokenKind.chartoken] = 'char'
s[TokenKind.plus] = '+'
s[TokenKind.minus] = '-'
s[TokenKind.mul] = '*'
s[TokenKind.div] = '/'
s[TokenKind.mod] = '%'
s[TokenKind.xor] = '^'
s[TokenKind.bit_not] = '~'
s[TokenKind.pipe] = '|'
s[TokenKind.hash] = '#'
s[TokenKind.amp] = '&'
s[TokenKind.inc] = '++'
s[TokenKind.dec] = '--'
s[TokenKind.and] = '&&'
s[TokenKind.logical_or] = '||'
s[TokenKind.not] = '!'
s[TokenKind.dot] = '.'
s[TokenKind.dotdot] = '..'
s[TokenKind.ellipsis] = '...'
s[TokenKind.comma] = ','
2019-12-19 22:29:37 +01:00
// s[TokenKind.at] = '@'
s[TokenKind.semicolon] = ';'
s[TokenKind.colon] = ':'
s[TokenKind.arrow] = '=>'
s[TokenKind.assign] = '='
s[TokenKind.decl_assign] = ':='
s[TokenKind.plus_assign] = '+='
s[TokenKind.minus_assign] = '-='
s[TokenKind.mult_assign] = '*='
s[TokenKind.div_assign] = '/='
s[TokenKind.xor_assign] = '^='
s[TokenKind.mod_assign] = '%='
s[TokenKind.or_assign] = '|='
s[TokenKind.and_assign] = '&='
s[TokenKind.righ_shift_assign] = '>>='
s[TokenKind.left_shift_assign] = '<<='
s[TokenKind.lcbr] = '{'
s[TokenKind.rcbr] = '}'
s[TokenKind.lpar] = '('
s[TokenKind.rpar] = ')'
s[TokenKind.lsbr] = '['
s[TokenKind.rsbr] = ']'
s[TokenKind.eq] = '=='
s[TokenKind.ne] = '!='
s[TokenKind.gt] = '>'
s[TokenKind.lt] = '<'
s[TokenKind.ge] = '>='
s[TokenKind.le] = '<='
s[TokenKind.question] = '?'
s[TokenKind.left_shift] = '<<'
s[TokenKind.righ_shift] = '>>'
2019-11-11 15:18:32 +01:00
s[TokenKind.line_comment] = '// line comment'
s[TokenKind.mline_comment] = '/* mline comment */'
s[TokenKind.nl] = 'NLL'
s[TokenKind.dollar] = '$'
2019-12-03 11:08:57 +01:00
s[TokenKind.str_dollar] = '$2'
s[TokenKind.key_assert] = 'assert'
s[TokenKind.key_struct] = 'struct'
s[TokenKind.key_if] = 'if'
2019-12-19 22:29:37 +01:00
// s[TokenKind.key_it] = 'it'
s[TokenKind.key_else] = 'else'
2019-11-16 22:58:09 +01:00
s[TokenKind.key_asm] = 'asm'
s[TokenKind.key_return] = 'return'
s[TokenKind.key_module] = 'module'
s[TokenKind.key_sizeof] = 'sizeof'
s[TokenKind.key_go] = 'go'
s[TokenKind.key_goto] = 'goto'
s[TokenKind.key_const] = 'const'
s[TokenKind.key_mut] = 'mut'
s[TokenKind.key_type] = 'type'
s[TokenKind.key_for] = 'for'
s[TokenKind.key_switch] = 'switch'
s[TokenKind.key_fn] = 'fn'
s[TokenKind.key_true] = 'true'
s[TokenKind.key_false] = 'false'
s[TokenKind.key_continue] = 'continue'
s[TokenKind.key_break] = 'break'
s[TokenKind.key_import] = 'import'
s[TokenKind.key_embed] = 'embed'
s[TokenKind.key_unsafe] = 'unsafe'
2019-12-19 22:29:37 +01:00
// Tokens[key_typeof] = 'typeof'
s[TokenKind.key_enum] = 'enum'
s[TokenKind.key_interface] = 'interface'
s[TokenKind.key_pub] = 'pub'
s[TokenKind.key_import_const] = 'import_const'
s[TokenKind.key_in] = 'in'
s[TokenKind.key_atomic] = 'atomic'
s[TokenKind.key_orelse] = 'or'
s[TokenKind.key_global] = '__global'
s[TokenKind.key_union] = 'union'
s[TokenKind.key_static] = 'static'
s[TokenKind.key_as] = 'as'
s[TokenKind.key_defer] = 'defer'
s[TokenKind.key_match] = 'match'
s[TokenKind.key_select] = 'select'
s[TokenKind.key_none] = 'none'
s[TokenKind.key_offsetof] = '__offsetof'
2019-06-22 20:20:28 +02:00
return s
}
const (
NrTokens = 141
2019-07-14 11:01:32 +02:00
TokenStr = build_token_str()
2019-06-22 20:20:28 +02:00
KEYWORDS = build_keys()
)
fn key_to_token(key string) TokenKind {
a := TokenKind(KEYWORDS[key])
2019-06-22 20:20:28 +02:00
return a
}
fn is_key(key string) bool {
return int(key_to_token(key)) > 0
}
pub fn (t TokenKind) str() string {
2019-07-14 11:01:32 +02:00
return TokenStr[int(t)]
2019-06-22 20:20:28 +02:00
}
fn (t TokenKind) is_decl() bool {
2019-12-19 22:29:37 +01:00
return t in [.key_enum, .key_interface, .key_fn, .key_struct, .key_type, .key_const, .key_import_const, .key_pub, .eof]
2019-06-22 20:20:28 +02:00
}
const (
2019-12-19 22:29:37 +01:00
AssignTokens = [TokenKind.assign, .plus_assign, .minus_assign, .mult_assign, .div_assign, .xor_assign, .mod_assign, .or_assign, .and_assign, .righ_shift_assign, .left_shift_assign]
2019-06-22 20:20:28 +02:00
)
fn (t TokenKind) is_assign() bool {
2019-06-22 20:20:28 +02:00
return t in AssignTokens
}
fn (t []TokenKind) contains(val TokenKind) bool {
2019-06-22 20:20:28 +02:00
for tt in t {
if tt == val {
return true
}
}
return false
}
pub fn (t Token) str() string {
2019-11-09 20:05:44 +01:00
if t.tok == .number {
return t.lit
2019-12-03 11:08:57 +01:00
}
2019-11-11 06:58:50 +01:00
if t.tok == .chartoken {
return '`$t.lit`'
2019-12-03 11:08:57 +01:00
}
2019-10-23 12:03:14 +02:00
if t.tok == .str {
return "'$t.lit'"
2019-12-03 11:08:57 +01:00
}
if t.tok == .eof {
return '.EOF'
}
if t.tok < .plus {
return t.lit // string, number etc
2019-12-03 11:08:57 +01:00
}
return t.tok.str()
2019-12-03 11:08:57 +01:00
}
pub fn (t Token) detailed_str() string {
return 'Token{ .line:${t.line_nr:4d}, .pos:${t.pos:5d}, .tok: ${t.tok:3d} } = $t '
}