diff --git a/cmd/tools/vdoc.v b/cmd/tools/vdoc.v index 78a7064931..3cd8a47a13 100644 --- a/cmd/tools/vdoc.v +++ b/cmd/tools/vdoc.v @@ -399,7 +399,7 @@ fn (cfg DocConfig) gen_plaintext(idx int) string { for cn in dcs.contents { pw.writeln(cn.content) if cn.comment.len > 0 { - pw.writeln('\n' + cn.comment) + pw.writeln('\n' + '\/\/ ' + cn.comment.trim_space()) } if cfg.show_loc { pw.writeln('Location: ${cn.file_path}:${cn.pos.line}:${cn.pos.col}\n\n') @@ -509,7 +509,7 @@ fn (mut cfg DocConfig) generate_docs_from_file() { dirs := if cfg.is_multi { get_modules_list(cfg.input_path) } else { [cfg.input_path] } for dirpath in dirs { cfg.vprintln('Generating docs for ${dirpath}...') - mut dcs := doc.generate(dirpath, cfg.pub_only, !is_vlib) or { + mut dcs := doc.generate(dirpath, cfg.pub_only, true) or { panic(err) } if dcs.contents.len == 0 { continue } diff --git a/vlib/v/doc/doc.v b/vlib/v/doc/doc.v index 25f51220ab..c61dcdf150 100644 --- a/vlib/v/doc/doc.v +++ b/vlib/v/doc/doc.v @@ -40,24 +40,32 @@ pub mut: parent_type string = '' } -pub fn write_comment_bw(stmts []ast.Stmt, start_idx int) string { +pub fn get_comment_block_right_before(stmts []ast.Stmt) string { + if stmts.len == 0 { + return '' + } mut comment := '' - for i := start_idx; i >= 0; i-- { + mut last_comment_line_nr := 0 + for i := stmts.len-1; i >= 0; i-- { stmt := stmts[i] - if stmt is ast.Comment { - cmt := stmt as ast.Comment - cmt_content := cmt.text.trim_left('|') - comment = cmt_content + if cmt_content.starts_with('```') { - '\n' - } else { - ' ' - } + comment - } else { + if stmt !is ast.Comment { panic('Not a comment') } - if i - 1 >= 0 && !(stmts[i - 1] is ast.Comment) { - break + cmt := stmt as ast.Comment + if last_comment_line_nr != 0 && cmt.pos.line_nr < last_comment_line_nr - 1 { + // skip comments that are not part of a continuous block, + // located right above the top level statement. + // break } + cmt_content := cmt.text.trim_left('|') + if cmt_content.len == cmt.text.len { + // ignore /* */ style comments for now + continue + } + //eprintln('cmt: $cmt') + cseparator := if cmt_content.starts_with('```') {'\n'} else {' '} + comment = cmt_content + cseparator + comment + last_comment_line_nr = cmt.pos.line_nr } return comment } @@ -145,7 +153,7 @@ pub fn (nodes []DocNode) find_children_of(parent_type string) []DocNode { fn get_parent_mod(dir string) ?string { $if windows { - // windows root path is C: or D: + // windows root path is C: or D: if dir.len <= 2 { return error('root folder reached') } } $else { if dir.len == 0 { return error('root folder reached') } @@ -168,9 +176,9 @@ fn get_parent_mod(dir string) ?string { } return error('No V files found.') } - file_ast := parser.parse_file(v_files[0], table.new_table(), .skip_comments, prefs, &ast.Scope{ - parent: 0 - }) + tbl := table.new_table() + scope := &ast.Scope{ parent: 0 } + file_ast := parser.parse_file(v_files[0], tbl, .skip_comments, prefs, scope) if file_ast.mod.name == 'main' { return '' } @@ -196,7 +204,7 @@ pub fn (mut d Doc) generate() ?bool { // parse files mut file_asts := []ast.File{} // TODO: remove later for vlib - comments_mode := if d.with_comments { scanner.CommentsMode.parse_comments } else { scanner.CommentsMode.skip_comments } + comments_mode := if d.with_comments { scanner.CommentsMode.toplevel_comments } else { scanner.CommentsMode.skip_comments } for file in v_files { file_ast := parser.parse_file(file, d.table, comments_mode, d.prefs, &ast.Scope{ parent: 0 @@ -224,51 +232,69 @@ pub fn (mut d Doc) generate() ?bool { } else if file_ast.mod.name != orig_mod_name { continue } + mut prev_comments := []ast.Stmt{} stmts := file_ast.stmts - for si, stmt in stmts { + for _, stmt in stmts { + //eprintln('stmt typeof: ' + typeof(stmt)) if stmt is ast.Comment { + prev_comments << stmt continue } - if stmt !is ast.Module { - // todo: accumulate consts - mut name := d.get_name(stmt) - signature := d.get_signature(stmt) - pos := d.get_pos(stmt) - if !signature.starts_with('pub') && d.pub_only { + if stmt is ast.Module { + // the previous comments were probably a copyright/license one + module_comment := get_comment_block_right_before(prev_comments) + prev_comments = [] + if module_comment == '' { continue } - if name.starts_with(orig_mod_name + '.') { - name = name.all_after(orig_mod_name + '.') + if module_comment == d.head.comment { + continue } - mut node := DocNode{ - name: name - content: signature - comment: '' - pos: convert_pos(v_files[i], pos) - file_path: v_files[i] + if d.head.comment != '' { + d.head.comment += '\n' } - if stmt is ast.FnDecl { - fnd := stmt as ast.FnDecl - if fnd.receiver.typ != 0 { - mut parent_type := d.table.get_type_name(fnd.receiver.typ) - if parent_type.starts_with(module_name + '.') { - parent_type = parent_type.all_after(module_name + '.') - } - node.parent_type = parent_type + d.head.comment += module_comment + continue + } + // todo: accumulate consts + mut name := d.get_name(stmt) + signature := d.get_signature(stmt) + pos := d.get_pos(stmt) + if !signature.starts_with('pub') && d.pub_only { + prev_comments = [] + continue + } + if name.starts_with(orig_mod_name + '.') { + name = name.all_after(orig_mod_name + '.') + } + mut node := DocNode{ + name: name + content: signature + comment: '' + pos: convert_pos(v_files[i], pos) + file_path: v_files[i] + } + if stmt is ast.FnDecl { + fnd := stmt as ast.FnDecl + if fnd.receiver.typ != 0 { + mut parent_type := d.table.get_type_name(fnd.receiver.typ) + if parent_type.starts_with(module_name + '.') { + parent_type = parent_type.all_after(module_name + '.') } + node.parent_type = parent_type } - if node.name.len == 0 && node.comment.len == 0 && node.content.len == 0 { continue } - d.contents << node + } - if d.with_comments && (si - 1 >= 0 && stmts[si - 1] is ast.Comment) { - if stmt is ast.Module { - d.head.comment = write_comment_bw(stmts, si - 1) - } else { - last_comment := d.contents[d.contents.len - 1].comment - d.contents[d.contents.len - 1].comment = last_comment + '\n' + write_comment_bw(stmts, - si - 1) - } + if node.name.len == 0 && node.comment.len == 0 && node.content.len == 0 { + continue } + d.contents << node + if d.with_comments && (prev_comments.len > 0) { + last_comment := d.contents[d.contents.len - 1].comment + cmt := last_comment + '\n' + get_comment_block_right_before(prev_comments) + d.contents[d.contents.len - 1].comment = cmt + } + prev_comments = [] } } d.time_generated = time.now() diff --git a/vlib/v/parser/fn.v b/vlib/v/parser/fn.v index 8fe86d7b73..fb366bfc11 100644 --- a/vlib/v/parser/fn.v +++ b/vlib/v/parser/fn.v @@ -71,7 +71,7 @@ pub fn (mut p Parser) call_expr(language table.Language, mod string) ast.CallExp is_used: true }) or_kind = .block - or_stmts = p.parse_block_no_scope() + or_stmts = p.parse_block_no_scope(false) p.close_scope() p.inside_or_expr = was_inside_or_expr } @@ -117,6 +117,7 @@ pub fn (mut p Parser) call_args() []ast.CallArg { } fn (mut p Parser) fn_decl() ast.FnDecl { + p.top_level_statement_start() start_pos := p.tok.position() is_deprecated := p.attr == 'deprecated' is_pub := p.tok.kind == .key_pub @@ -268,7 +269,7 @@ fn (mut p Parser) fn_decl() ast.FnDecl { no_body := p.tok.kind != .lcbr body_start_pos := p.peek_tok.position() if p.tok.kind == .lcbr { - stmts = p.parse_block_no_scope() + stmts = p.parse_block_no_scope(true) } p.close_scope() p.attr = '' @@ -321,7 +322,7 @@ fn (mut p Parser) anon_fn() ast.AnonFn { mut stmts := []ast.Stmt{} no_body := p.tok.kind != .lcbr if p.tok.kind == .lcbr { - stmts = p.parse_block_no_scope() + stmts = p.parse_block_no_scope(false) } p.close_scope() mut func := table.Fn{ diff --git a/vlib/v/parser/parser.v b/vlib/v/parser/parser.v index c31b384d20..a289ef46c0 100644 --- a/vlib/v/parser/parser.v +++ b/vlib/v/parser/parser.v @@ -14,12 +14,12 @@ import os import runtime import time -// import sync pub struct Parser { file_name string // "/home/user/hello.v" file_name_dir string // "/home/user" mut: scanner &scanner.Scanner + comments_mode scanner.CommentsMode = .skip_comments // see comment in parse_file tok token.Token prev_tok token.Token peek_tok token.Token @@ -75,6 +75,11 @@ pub fn parse_stmt(text string, table &table.Table, scope &ast.Scope) ast.Stmt { } pub fn parse_file(path string, b_table &table.Table, comments_mode scanner.CommentsMode, pref &pref.Preferences, global_scope &ast.Scope) ast.File { + // NB: when comments_mode == .toplevel_comments, + // the parser gives feedback to the scanner about toplevel statements, so that the scanner can skip + // all the tricky inner comments. This is needed because we do not have a good general solution + // for handling them, and should be removed when we do (the general solution is also needed for vfmt) + // println('parse_file("$path")') // text := os.read_file(path) or { // panic(err) @@ -82,6 +87,7 @@ pub fn parse_file(path string, b_table &table.Table, comments_mode scanner.Comme mut stmts := []ast.Stmt{} mut p := Parser{ scanner: scanner.new_scanner_file(path, comments_mode) + comments_mode: comments_mode table: b_table file_name: path file_name_dir: os.dir(path) @@ -213,7 +219,10 @@ pub fn parse_files(paths []string, table &table.Table, pref &pref.Preferences, g return files } -pub fn (p &Parser) init_parse_fns() { +pub fn (mut p Parser) init_parse_fns() { + if p.comments_mode == .toplevel_comments { + p.scanner.scan_all_tokens_in_buffer() + } // p.prefix_parse_fns = make(100, 100, sizeof(PrefixParseFn)) // p.prefix_parse_fns[token.Kind.name] = parse_name } @@ -265,13 +274,13 @@ pub fn (mut p Parser) close_scope() { pub fn (mut p Parser) parse_block() []ast.Stmt { p.open_scope() // println('parse block') - stmts := p.parse_block_no_scope() + stmts := p.parse_block_no_scope(false) p.close_scope() // println('nr exprs in block = $exprs.len') return stmts } -pub fn (mut p Parser) parse_block_no_scope() []ast.Stmt { +pub fn (mut p Parser) parse_block_no_scope(is_top_level bool) []ast.Stmt { p.check(.lcbr) mut stmts := []ast.Stmt{} if p.tok.kind != .rcbr { @@ -283,6 +292,9 @@ pub fn (mut p Parser) parse_block_no_scope() []ast.Stmt { } } } + if is_top_level { + p.top_level_statement_end() + } p.check(.rcbr) return stmts } @@ -1031,7 +1043,7 @@ fn (mut p Parser) dot_expr(left ast.Expr) ast.Expr { is_used: true }) or_kind = .block - or_stmts = p.parse_block_no_scope() + or_stmts = p.parse_block_no_scope(false) p.close_scope() } // `foo()?` @@ -1254,6 +1266,7 @@ fn (mut p Parser) import_stmt() ast.Import { } fn (mut p Parser) const_decl() ast.ConstDecl { + p.top_level_statement_start() start_pos := p.tok.position() is_pub := p.tok.kind == .key_pub if is_pub { @@ -1291,6 +1304,7 @@ fn (mut p Parser) const_decl() ast.ConstDecl { fields << field p.global_scope.register(field.name, field) } + p.top_level_statement_end() p.check(.rpar) return ast.ConstDecl{ pos: start_pos.extend(end_pos) @@ -1370,6 +1384,7 @@ fn (mut p Parser) global_decl() ast.GlobalDecl { } fn (mut p Parser) enum_decl() ast.EnumDecl { + p.top_level_statement_start() is_pub := p.tok.kind == .key_pub start_pos := p.tok.position() if is_pub { @@ -1402,6 +1417,7 @@ fn (mut p Parser) enum_decl() ast.EnumDecl { has_expr: has_expr } } + p.top_level_statement_end() p.check(.rcbr) attr := p.attr is_flag := attr == 'flag' @@ -1563,3 +1579,46 @@ fn (p &Parser) new_true_expr() ast.Expr { fn verror(s string) { util.verror('parser error', s) } + +fn (mut p Parser) top_level_statement_start() { + if p.comments_mode == .toplevel_comments { + p.scanner.set_is_inside_toplevel_statement(true) + p.rewind_scanner_to_current_token_in_new_mode() + $if debugscanner ? { + eprintln('>> p.top_level_statement_start | tidx:${p.tok.tidx:-5} | p.tok.kind: ${p.tok.kind:-10} | p.tok.lit: $p.tok.lit $p.peek_tok.lit $p.peek_tok2.lit $p.peek_tok3.lit ...') + } + } +} + +fn (mut p Parser) top_level_statement_end() { + if p.comments_mode == .toplevel_comments { + p.scanner.set_is_inside_toplevel_statement(false) + p.rewind_scanner_to_current_token_in_new_mode() + $if debugscanner ? { + eprintln('>> p.top_level_statement_end | tidx:${p.tok.tidx:-5} | p.tok.kind: ${p.tok.kind:-10} | p.tok.lit: $p.tok.lit $p.peek_tok.lit $p.peek_tok2.lit $p.peek_tok3.lit ...') + } + } +} + +fn (mut p Parser) rewind_scanner_to_current_token_in_new_mode() { + // Go back and rescan some tokens, ensuring that the parser's + // lookahead buffer p.peek_tok .. p.peek_tok3, will now contain + // the correct tokens (possible comments), for the new mode + // This refilling of the lookahead buffer is needed for the + // .toplevel_comments parsing mode. + tidx := p.tok.tidx + p.scanner.set_current_tidx(tidx - 5) + no_token := token.Token{} + p.prev_tok = no_token + p.tok = no_token + p.peek_tok = no_token + p.peek_tok2 = no_token + p.peek_tok3 = no_token + for { + p.next() + //eprintln('rewinding to ${p.tok.tidx:5} | goal: ${tidx:5}') + if tidx == p.tok.tidx { + break + } + } +} diff --git a/vlib/v/parser/struct.v b/vlib/v/parser/struct.v index e2376efb31..0b9510a47d 100644 --- a/vlib/v/parser/struct.v +++ b/vlib/v/parser/struct.v @@ -9,6 +9,7 @@ import v.token import v.util fn (mut p Parser) struct_decl() ast.StructDecl { + p.top_level_statement_start() start_pos := p.tok.position() is_pub := p.tok.kind == .key_pub if is_pub { @@ -162,6 +163,7 @@ fn (mut p Parser) struct_decl() ast.StructDecl { } // println('struct field $ti.name $field_name') } + p.top_level_statement_end() p.check(.rcbr) } if language == .c { @@ -277,6 +279,7 @@ fn (mut p Parser) struct_init(short_syntax bool) ast.StructInit { } fn (mut p Parser) interface_decl() ast.InterfaceDecl { + p.top_level_statement_start() start_pos := p.tok.position() is_pub := p.tok.kind == .key_pub if is_pub { @@ -334,6 +337,7 @@ fn (mut p Parser) interface_decl() ast.InterfaceDecl { is_pub: true }) } + p.top_level_statement_end() p.check(.rcbr) return ast.InterfaceDecl{ name: interface_name diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index b99cd8530a..afc5d00d5f 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -44,12 +44,53 @@ pub mut: is_fmt bool // Used only for skipping ${} in strings, since we need literal // string values when generating formatted code. comments_mode CommentsMode + is_inside_toplvl_statement bool = false // *only* used in comments_mode: .toplevel_comments, toggled by parser + all_tokens []token.Token // *only* used in comments_mode: .toplevel_comments, contains all tokens + tidx int eofs int } +/* +How the .toplevel_comments mode works: +In this mode, the scanner scans *everything* at once, before parsing starts, +including all the comments, and stores the results in an buffer s.all_tokens. + +Then .scan() just returns s.all_tokens[ s.tidx++ ] *ignoring* the +comment tokens. In other words, by default in this mode, the parser +*will not see any comments* inside top level statements, so it has +no reason to complain about them. + +When the parser determines, that it is outside of a top level statement, +it tells the scanner to backtrack s.tidx to the current p.tok index, +then it changes .is_inside_toplvl_statement to false , and refills its +lookahead buffer (i.e. p.peek_tok, p.peek_tok2, p.peek_tok3) from the +scanner. + +In effect, from the parser's point of view, the next tokens, that it will +receive with p.next(), will be the same, as if comments are not ignored +anymore, *between* top level statements. + +When the parser determines, that it is going again inside a top level +statement, it does the same, this time setting .is_inside_toplvl_statement +to true, again refilling the lookahead buffer => calling .next() in this +mode, will again ignore all the comment tokens, till the top level statement +is finished. +*/ + +// The different kinds of scanner modes: +// +// .skip_comments - simplest/fastest, just ignores all comments early. +// This mode is used by the compiler itself. +// +// .parse_comments is used by vfmt. Ideally it should handle inline /* */ +// comments too, i.e. it returns every kind of comment as a new token. +// +// .toplevel_comments is used by vdoc, parses *only* top level ones +// that are *outside* structs/enums/fns. pub enum CommentsMode { skip_comments parse_comments + toplevel_comments } // new scanner from file. @@ -80,13 +121,32 @@ pub fn new_scanner(text string, comments_mode CommentsMode) &Scanner { return s } -fn (s &Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Token { + + +[inline] +fn (s &Scanner) should_parse_comment() bool { + res := (s.comments_mode == .parse_comments) || (s.comments_mode == .toplevel_comments && !s.is_inside_toplvl_statement) + return res +} +// NB: this is called by v's parser +pub fn (mut s Scanner) set_is_inside_toplevel_statement(newstate bool) { + s.is_inside_toplvl_statement = newstate +} +pub fn (mut s Scanner) set_current_tidx(cidx int) { + mut tidx := if cidx < 0 { 0 } else { cidx } + tidx = if tidx > s.all_tokens.len { s.all_tokens.len } else { tidx } + s.tidx = tidx +} +fn (mut s Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Token { + cidx := s.tidx + s.tidx++ return token.Token{ kind: tok_kind lit: lit line_nr: s.line_nr + 1 pos: s.pos - len + 1 len: len + tidx: cidx } } @@ -537,7 +597,51 @@ fn (mut s Scanner) end_of_file() token.Token { return s.new_token(.eof, '', 1) } +pub fn (mut s Scanner) scan_all_tokens_in_buffer(){ + // s.scan_all_tokens_in_buffer is used mainly by vdoc, + // in order to implement the .toplevel_comments mode. + cmode := s.comments_mode + s.comments_mode = .parse_comments + for { + mut t := s.text_scan() + s.all_tokens << t + if t.kind == .eof { + break + } + } + s.comments_mode = cmode + s.tidx = 0 + $if debugscanner ? { + for t in s.all_tokens { + eprintln('> tidx:${t.tidx:-5} | kind: ${t.kind:-10} | lit: ${t.lit}') + } + } +} + pub fn (mut s Scanner) scan() token.Token { + if s.comments_mode == .toplevel_comments { + return s.buffer_scan() + } + return s.text_scan() +} + +pub fn (mut s Scanner) buffer_scan() token.Token { + for { + cidx := s.tidx + s.tidx++ + if cidx >= s.all_tokens.len { + return s.end_of_file() + } + if s.all_tokens[cidx].kind == .comment { + if !s.should_parse_comment() { + continue + } + } + return s.all_tokens[cidx] + } +} + +fn (mut s Scanner) text_scan() token.Token { // if s.comments_mode == .parse_comments { // println('\nscan()') // } @@ -972,7 +1076,7 @@ pub fn (mut s Scanner) scan() token.Token { // fix line_nr, \n was read, and the comment is marked // on the next line s.line_nr-- - if s.comments_mode == .parse_comments { + if s.should_parse_comment() { // Find out if this comment is on its own line (for vfmt) mut is_separate_line_comment := true for j := start-2; j >= 0 && s.text[j] != `\n`; j-- { @@ -1013,7 +1117,7 @@ pub fn (mut s Scanner) scan() token.Token { } } s.pos++ - if s.comments_mode == .parse_comments { + if s.should_parse_comment() { comment := s.text[start..(s.pos - 1)].trim_space() return s.new_token(.comment, comment, comment.len + 4) } diff --git a/vlib/v/token/token.v b/vlib/v/token/token.v index a881b5ec93..502ded7353 100644 --- a/vlib/v/token/token.v +++ b/vlib/v/token/token.v @@ -11,6 +11,7 @@ pub: // name_idx int // name table index for O(1) lookup pos int // the position of the token in scanner text len int // length of the literal + tidx int // the index of the token } pub enum Kind {