diff --git a/cmd/tools/vscan.v b/cmd/tools/vscan.v new file mode 100644 index 0000000000..46d08f717d --- /dev/null +++ b/cmd/tools/vscan.v @@ -0,0 +1,28 @@ +module main + +import os +import v.scanner +import v.pref +import v.token +import flag + +fn main() { + mut fp := flag.new_flag_parser(os.args#[2..]) + fp.application('v scan') + fp.version('0.0.1') + fp.description('\nScan .v source files, and print the V tokens contained in them.') + fp.arguments_description('PATH [PATH]...') + fp.limit_free_args_to_at_least(1) ? + pref := pref.new_preferences() + mut all_paths := fp.remaining_parameters() + for path in all_paths { + mut scanner := scanner.new_scanner_file(path, .parse_comments, pref) ? + mut tok := token.Token{} + for tok.kind != .eof { + tok = scanner.scan() + pos := tok.pos() + location := '$path:${pos.line_nr + 1}:${pos.col + 1}:' + println('${location:-32} | pos: ${pos.pos:-5} | $tok.debug()') + } + } +} diff --git a/cmd/v/v.v b/cmd/v/v.v index 998d9dddd9..a85d9a78b4 100644 --- a/cmd/v/v.v +++ b/cmd/v/v.v @@ -33,6 +33,7 @@ const ( 'setup-freetype', 'shader', 'symlink', + 'scan', 'test', 'test-all', // runs most of the tests and other checking tools, that will be run by the CI 'test-cleancode', diff --git a/vlib/v/parser/parser.v b/vlib/v/parser/parser.v index e68b84390f..268c7188ec 100644 --- a/vlib/v/parser/parser.v +++ b/vlib/v/parser/parser.v @@ -213,7 +213,7 @@ pub fn parse_file(path string, table &ast.Table, comments_mode scanner.CommentsM eprintln('> ${@MOD}.${@FN} comments_mode: ${comments_mode:-20} | path: $path') } mut p := Parser{ - scanner: scanner.new_scanner_file(path, comments_mode, pref) + scanner: scanner.new_scanner_file(path, comments_mode, pref) or { panic(err) } comments_mode: comments_mode table: table pref: pref @@ -238,7 +238,7 @@ pub fn parse_vet_file(path string, table_ &ast.Table, pref &pref.Preferences) (& parent: 0 } mut p := Parser{ - scanner: scanner.new_scanner_file(path, .parse_comments, pref) + scanner: scanner.new_scanner_file(path, .parse_comments, pref) or { panic(err) } comments_mode: .parse_comments table: table_ pref: pref diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index e5bcb8f3d7..0ebaa11121 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -105,14 +105,11 @@ pub enum CommentsMode { } // new scanner from file. -pub fn new_scanner_file(file_path string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner { +pub fn new_scanner_file(file_path string, comments_mode CommentsMode, pref &pref.Preferences) ?&Scanner { if !os.is_file(file_path) { - verror('$file_path is not a file') - } - raw_text := util.read_file(file_path) or { - verror(err.msg()) - return voidptr(0) + return error('$file_path is not a .v file') } + raw_text := util.read_file(file_path) or { return err } mut s := &Scanner{ pref: pref text: raw_text @@ -519,7 +516,7 @@ fn (mut s Scanner) ident_number() string { fn (mut s Scanner) skip_whitespace() { for s.pos < s.text.len { c := s.text[s.pos] - if c == 8 { + if c == 9 { // tabs are most common s.pos++ continue @@ -1595,11 +1592,6 @@ fn (mut s Scanner) vet_error(msg string, fix vet.FixKind) { s.vet_errors << ve } -[noreturn] -pub fn verror(s string) { - util.verror('scanner error', s) -} - fn (mut s Scanner) trace(fbase string, message string) { if s.file_base == fbase { println('> s.trace | ${fbase:-10s} | $message') diff --git a/vlib/v/token/token.v b/vlib/v/token/token.v index 4e545e1334..82a93a11c5 100644 --- a/vlib/v/token/token.v +++ b/vlib/v/token/token.v @@ -308,10 +308,22 @@ fn build_token_str() []string { s[Kind.key_none] = 'none' s[Kind.key_offsetof] = '__offsetof' s[Kind.key_is] = 'is' + // The following kinds are not for tokens returned by the V scanner. + // They are used just for organisation/ease of checking: + s[Kind.keyword_beg] = 'keyword_beg' + s[Kind.keyword_end] = 'keyword_end' + s[Kind.str_inter] = 'str_inter' + $if debug_build_token_str ? { + for k, v in s { + if v == '' { + eprintln('>>> ${@MOD}.${@METHOD} missing k: $k | .${kind_to_string(Kind(k))}') + } + } + } return s } -const token_str = build_token_str() +pub const token_str = build_token_str() pub const keywords = build_keys() @@ -357,6 +369,12 @@ pub fn (t Token) str() string { return s } +pub fn (t Token) debug() string { + ks := kind_to_string(t.kind) + s := if t.lit == '' { t.kind.str() } else { t.lit } + return 'tok: .${ks:-12} | lit: `$s`' +} + // Representation of highest and lowest precedence /* pub const lowest_prec = 0 @@ -475,3 +493,244 @@ pub fn (kind Kind) is_infix() bool { .le, .logical_or, .xor, .not_in, .key_is, .not_is, .and, .dot, .pipe, .amp, .left_shift, .right_shift, .unsigned_right_shift, .arrow] } + +pub fn kind_to_string(k Kind) string { + return match k { + .unknown { 'unknown' } + .eof { 'eof' } + .name { 'name' } + .number { 'number' } + .string { 'string' } + .str_inter { 'str_inter' } + .chartoken { 'chartoken' } + .plus { 'plus' } + .minus { 'minus' } + .mul { 'mul' } + .div { 'div' } + .mod { 'mod' } + .xor { 'xor' } + .pipe { 'pipe' } + .inc { 'inc' } + .dec { 'dec' } + .and { 'and' } + .logical_or { 'logical_or' } + .not { 'not' } + .bit_not { 'bit_not' } + .question { 'question' } + .comma { 'comma' } + .semicolon { 'semicolon' } + .colon { 'colon' } + .arrow { 'arrow' } + .amp { 'amp' } + .hash { 'hash' } + .dollar { 'dollar' } + .at { 'at' } + .str_dollar { 'str_dollar' } + .left_shift { 'left_shift' } + .right_shift { 'right_shift' } + .unsigned_right_shift { 'unsigned_right_shift' } + .not_in { 'not_in' } + .not_is { 'not_is' } + .assign { 'assign' } + .decl_assign { 'decl_assign' } + .plus_assign { 'plus_assign' } + .minus_assign { 'minus_assign' } + .div_assign { 'div_assign' } + .mult_assign { 'mult_assign' } + .xor_assign { 'xor_assign' } + .mod_assign { 'mod_assign' } + .or_assign { 'or_assign' } + .and_assign { 'and_assign' } + .right_shift_assign { 'right_shift_assign' } + .left_shift_assign { 'left_shift_assign' } + .unsigned_right_shift_assign { 'unsigned_right_shift_assign' } + .lcbr { 'lcbr' } + .rcbr { 'rcbr' } + .lpar { 'lpar' } + .rpar { 'rpar' } + .lsbr { 'lsbr' } + .nilsbr { 'nilsbr' } + .rsbr { 'rsbr' } + .eq { 'eq' } + .ne { 'ne' } + .gt { 'gt' } + .lt { 'lt' } + .ge { 'ge' } + .le { 'le' } + .comment { 'comment' } + .nl { 'nl' } + .dot { 'dot' } + .dotdot { 'dotdot' } + .ellipsis { 'ellipsis' } + .keyword_beg { 'keyword_beg' } + .key_as { 'key_as' } + .key_asm { 'key_asm' } + .key_assert { 'key_assert' } + .key_atomic { 'key_atomic' } + .key_break { 'key_break' } + .key_const { 'key_const' } + .key_continue { 'key_continue' } + .key_defer { 'key_defer' } + .key_else { 'key_else' } + .key_enum { 'key_enum' } + .key_false { 'key_false' } + .key_for { 'key_for' } + .key_fn { 'key_fn' } + .key_global { 'key_global' } + .key_go { 'key_go' } + .key_goto { 'key_goto' } + .key_if { 'key_if' } + .key_import { 'key_import' } + .key_in { 'key_in' } + .key_interface { 'key_interface' } + .key_is { 'key_is' } + .key_match { 'key_match' } + .key_module { 'key_module' } + .key_mut { 'key_mut' } + .key_shared { 'key_shared' } + .key_lock { 'key_lock' } + .key_rlock { 'key_rlock' } + .key_none { 'key_none' } + .key_return { 'key_return' } + .key_select { 'key_select' } + .key_sizeof { 'key_sizeof' } + .key_isreftype { 'key_isreftype' } + .key_likely { 'key_likely' } + .key_unlikely { 'key_unlikely' } + .key_offsetof { 'key_offsetof' } + .key_struct { 'key_struct' } + .key_true { 'key_true' } + .key_type { 'key_type' } + .key_typeof { 'key_typeof' } + .key_dump { 'key_dump' } + .key_orelse { 'key_orelse' } + .key_union { 'key_union' } + .key_pub { 'key_pub' } + .key_static { 'key_static' } + .key_volatile { 'key_volatile' } + .key_unsafe { 'key_unsafe' } + .keyword_end { 'keyword_end' } + ._end_ { '_end_' } + } +} + +pub fn kind_from_string(s string) ?Kind { + match s { + 'unknown' { return .unknown } + 'eof' { return .eof } + 'name' { return .name } + 'number' { return .number } + 'string' { return .string } + 'str_inter' { return .str_inter } + 'chartoken' { return .chartoken } + 'plus' { return .plus } + 'minus' { return .minus } + 'mul' { return .mul } + 'div' { return .div } + 'mod' { return .mod } + 'xor' { return .xor } + 'pipe' { return .pipe } + 'inc' { return .inc } + 'dec' { return .dec } + 'and' { return .and } + 'logical_or' { return .logical_or } + 'not' { return .not } + 'bit_not' { return .bit_not } + 'question' { return .question } + 'comma' { return .comma } + 'semicolon' { return .semicolon } + 'colon' { return .colon } + 'arrow' { return .arrow } + 'amp' { return .amp } + 'hash' { return .hash } + 'dollar' { return .dollar } + 'at' { return .at } + 'str_dollar' { return .str_dollar } + 'left_shift' { return .left_shift } + 'right_shift' { return .right_shift } + 'unsigned_right_shift' { return .unsigned_right_shift } + 'not_in' { return .not_in } + 'not_is' { return .not_is } + 'assign' { return .assign } + 'decl_assign' { return .decl_assign } + 'plus_assign' { return .plus_assign } + 'minus_assign' { return .minus_assign } + 'div_assign' { return .div_assign } + 'mult_assign' { return .mult_assign } + 'xor_assign' { return .xor_assign } + 'mod_assign' { return .mod_assign } + 'or_assign' { return .or_assign } + 'and_assign' { return .and_assign } + 'right_shift_assign' { return .right_shift_assign } + 'left_shift_assign' { return .left_shift_assign } + 'unsigned_right_shift_assign' { return .unsigned_right_shift_assign } + 'lcbr' { return .lcbr } + 'rcbr' { return .rcbr } + 'lpar' { return .lpar } + 'rpar' { return .rpar } + 'lsbr' { return .lsbr } + 'nilsbr' { return .nilsbr } + 'rsbr' { return .rsbr } + 'eq' { return .eq } + 'ne' { return .ne } + 'gt' { return .gt } + 'lt' { return .lt } + 'ge' { return .ge } + 'le' { return .le } + 'comment' { return .comment } + 'nl' { return .nl } + 'dot' { return .dot } + 'dotdot' { return .dotdot } + 'ellipsis' { return .ellipsis } + 'keyword_beg' { return .keyword_beg } + 'key_as' { return .key_as } + 'key_asm' { return .key_asm } + 'key_assert' { return .key_assert } + 'key_atomic' { return .key_atomic } + 'key_break' { return .key_break } + 'key_const' { return .key_const } + 'key_continue' { return .key_continue } + 'key_defer' { return .key_defer } + 'key_else' { return .key_else } + 'key_enum' { return .key_enum } + 'key_false' { return .key_false } + 'key_for' { return .key_for } + 'key_fn' { return .key_fn } + 'key_global' { return .key_global } + 'key_go' { return .key_go } + 'key_goto' { return .key_goto } + 'key_if' { return .key_if } + 'key_import' { return .key_import } + 'key_in' { return .key_in } + 'key_interface' { return .key_interface } + 'key_is' { return .key_is } + 'key_match' { return .key_match } + 'key_module' { return .key_module } + 'key_mut' { return .key_mut } + 'key_shared' { return .key_shared } + 'key_lock' { return .key_lock } + 'key_rlock' { return .key_rlock } + 'key_none' { return .key_none } + 'key_return' { return .key_return } + 'key_select' { return .key_select } + 'key_sizeof' { return .key_sizeof } + 'key_isreftype' { return .key_isreftype } + 'key_likely' { return .key_likely } + 'key_unlikely' { return .key_unlikely } + 'key_offsetof' { return .key_offsetof } + 'key_struct' { return .key_struct } + 'key_true' { return .key_true } + 'key_type' { return .key_type } + 'key_typeof' { return .key_typeof } + 'key_dump' { return .key_dump } + 'key_orelse' { return .key_orelse } + 'key_union' { return .key_union } + 'key_pub' { return .key_pub } + 'key_static' { return .key_static } + 'key_volatile' { return .key_volatile } + 'key_unsafe' { return .key_unsafe } + 'keyword_end' { return .keyword_end } + '_end_' { return ._end_ } + else { return error('unknown') } + } +}