vdoc: implement .toplevel_comments parsing mode

2020-06-06 18:47:16 +03:00 · 2020-06-06 18:47:16 +03:00 · 8d3f680d07
parent 3aecdeab63
commit 8d3f680d07
7 changed files with 259 additions and 64 deletions
--- a/cmd/tools/vdoc.v
+++ b/cmd/tools/vdoc.v
@ -399,7 +399,7 @@ fn (cfg DocConfig) gen_plaintext(idx int) string {
 	for cn in dcs.contents {
 		pw.writeln(cn.content)
 		if cn.comment.len > 0 {
-			pw.writeln('\n' + cn.comment)
+			pw.writeln('\n' + '\/\/ ' + cn.comment.trim_space())
 		}
 		if cfg.show_loc {
 			pw.writeln('Location: ${cn.file_path}:${cn.pos.line}:${cn.pos.col}\n\n')
@ -509,7 +509,7 @@ fn (mut cfg DocConfig) generate_docs_from_file() {
 	dirs := if cfg.is_multi { get_modules_list(cfg.input_path) } else { [cfg.input_path] } 
 	for dirpath in dirs {
 		cfg.vprintln('Generating docs for ${dirpath}...')
-		mut dcs := doc.generate(dirpath, cfg.pub_only, !is_vlib) or {
+		mut dcs := doc.generate(dirpath, cfg.pub_only, true) or {
 			panic(err)
 		}
 		if dcs.contents.len == 0 { continue }
--- a/vlib/v/doc/doc.v
+++ b/vlib/v/doc/doc.v
@ -40,24 +40,32 @@ pub mut:
 	parent_type string = ''
 }
-pub fn write_comment_bw(stmts []ast.Stmt, start_idx int) string {
+pub fn get_comment_block_right_before(stmts []ast.Stmt) string {
 	if stmts.len == 0 {
 		return ''
 	}
 	mut comment := ''
-	for i := start_idx; i >= 0; i-- {
+	mut last_comment_line_nr := 0
 	for i := stmts.len-1; i >= 0; i-- {
 		stmt := stmts[i]
-		if stmt is ast.Comment {
+		if stmt !is ast.Comment {
 			cmt := stmt as ast.Comment
 			cmt_content := cmt.text.trim_left('|')
 			comment = cmt_content + if cmt_content.starts_with('```') {
 				'\n'
 			} else {
 				' '
 			} + comment
 		} else {
 			panic('Not a comment')
 		}
-		if i - 1 >= 0 && !(stmts[i - 1] is ast.Comment) {
+		cmt := stmt as ast.Comment
-			break
+		if last_comment_line_nr != 0 && cmt.pos.line_nr < last_comment_line_nr - 1 {
 			// skip comments that are not part of a continuous block,
 			// located right above the top level statement.
 			//			break
 		}
 		cmt_content := cmt.text.trim_left('|')
 		if cmt_content.len == cmt.text.len {
 			// ignore /* */ style comments for now
 			continue
 		}
 		//eprintln('cmt: $cmt')
 		cseparator := if cmt_content.starts_with('```') {'\n'} else {' '}
 		comment = cmt_content + cseparator + comment
 		last_comment_line_nr = cmt.pos.line_nr
 	}
 	return comment
 }
@ -168,9 +176,9 @@ fn get_parent_mod(dir string) ?string {
 		}
 		return error('No V files found.')
 	}
-	file_ast := parser.parse_file(v_files[0], table.new_table(), .skip_comments, prefs, &ast.Scope{
+	tbl := table.new_table()
-		parent: 0
+	scope := &ast.Scope{ parent: 0 }
-	})
+	file_ast := parser.parse_file(v_files[0], tbl, .skip_comments, prefs, scope)
 	if file_ast.mod.name == 'main' {
 		return ''
 	}
@ -196,7 +204,7 @@ pub fn (mut d Doc) generate() ?bool {
 	// parse files
 	mut file_asts := []ast.File{}
 	// TODO: remove later for vlib
-	comments_mode := if d.with_comments { scanner.CommentsMode.parse_comments } else { scanner.CommentsMode.skip_comments }
+	comments_mode := if d.with_comments { scanner.CommentsMode.toplevel_comments } else { scanner.CommentsMode.skip_comments }
 	for file in v_files {
 		file_ast := parser.parse_file(file, d.table, comments_mode, d.prefs, &ast.Scope{
 			parent: 0
@ -224,17 +232,36 @@ pub fn (mut d Doc) generate() ?bool {
 		} else if file_ast.mod.name != orig_mod_name {
 			continue
 		}
 		mut prev_comments := []ast.Stmt{}
 		stmts := file_ast.stmts
-		for si, stmt in stmts {
+		for _, stmt in stmts {
 			//eprintln('stmt typeof: ' + typeof(stmt))
 			if stmt is ast.Comment {
 				prev_comments << stmt
 				continue
 			}
 			if stmt is ast.Module {
 				// the previous comments were probably a copyright/license one
 				module_comment := get_comment_block_right_before(prev_comments)
 				prev_comments = []
 				if module_comment == '' {
 					continue
 				}
 				if module_comment == d.head.comment {
 					continue
 				}
 				if d.head.comment != '' {
 					d.head.comment += '\n'
 				}
 				d.head.comment += module_comment
 				continue
 			}
 			if stmt !is ast.Module {
 			// todo: accumulate consts
 			mut name := d.get_name(stmt)
 			signature := d.get_signature(stmt)
 			pos := d.get_pos(stmt)
 			if !signature.starts_with('pub') && d.pub_only {
 				prev_comments = []
 				continue
 			}
 			if name.starts_with(orig_mod_name + '.') {
@ -256,19 +283,18 @@ pub fn (mut d Doc) generate() ?bool {
 					}
 					node.parent_type = parent_type
 				}
 			}
 			if node.name.len == 0 && node.comment.len == 0 && node.content.len == 0 {
 				continue
 			}
 				if node.name.len == 0 && node.comment.len == 0 && node.content.len == 0 { continue }
 			d.contents << node
-			}
+			if d.with_comments && (prev_comments.len > 0) {
 			if d.with_comments && (si - 1 >= 0 && stmts[si - 1] is ast.Comment) {
 				if stmt is ast.Module {
 					d.head.comment = write_comment_bw(stmts, si - 1)
 				} else {
 				last_comment := d.contents[d.contents.len - 1].comment
-					d.contents[d.contents.len - 1].comment = last_comment + '\n' + write_comment_bw(stmts,
+				cmt := last_comment + '\n' + get_comment_block_right_before(prev_comments)
-						si - 1)
+				d.contents[d.contents.len - 1].comment = cmt
 				}
 			}
 			prev_comments = []
 		}
 	}
 	d.time_generated = time.now()
--- a/vlib/v/parser/fn.v
+++ b/vlib/v/parser/fn.v
@ -71,7 +71,7 @@ pub fn (mut p Parser) call_expr(language table.Language, mod string) ast.CallExp
 			is_used: true
 		})
 		or_kind = .block
-		or_stmts = p.parse_block_no_scope()
+		or_stmts = p.parse_block_no_scope(false)
 		p.close_scope()
 		p.inside_or_expr = was_inside_or_expr
 	}
@ -117,6 +117,7 @@ pub fn (mut p Parser) call_args() []ast.CallArg {
 }
 fn (mut p Parser) fn_decl() ast.FnDecl {
 	p.top_level_statement_start()
 	start_pos := p.tok.position()
 	is_deprecated := p.attr == 'deprecated'
 	is_pub := p.tok.kind == .key_pub
@ -268,7 +269,7 @@ fn (mut p Parser) fn_decl() ast.FnDecl {
 	no_body := p.tok.kind != .lcbr
 	body_start_pos := p.peek_tok.position()
 	if p.tok.kind == .lcbr {
-		stmts = p.parse_block_no_scope()
+		stmts = p.parse_block_no_scope(true)
 	}
 	p.close_scope()
 	p.attr = ''
@ -321,7 +322,7 @@ fn (mut p Parser) anon_fn() ast.AnonFn {
 	mut stmts := []ast.Stmt{}
 	no_body := p.tok.kind != .lcbr
 	if p.tok.kind == .lcbr {
-		stmts = p.parse_block_no_scope()
+		stmts = p.parse_block_no_scope(false)
 	}
 	p.close_scope()
 	mut func := table.Fn{
--- a/vlib/v/parser/parser.v
+++ b/vlib/v/parser/parser.v
@ -14,12 +14,12 @@ import os
 import runtime
 import time
 // import sync
 pub struct Parser {
 	file_name         string // "/home/user/hello.v"
 	file_name_dir     string // "/home/user"
 mut:
 	scanner           &scanner.Scanner
 	comments_mode     scanner.CommentsMode = .skip_comments // see comment in parse_file
 	tok               token.Token
 	prev_tok          token.Token
 	peek_tok          token.Token
@ -75,6 +75,11 @@ pub fn parse_stmt(text string, table &table.Table, scope &ast.Scope) ast.Stmt {
 }
 pub fn parse_file(path string, b_table &table.Table, comments_mode scanner.CommentsMode, pref &pref.Preferences, global_scope &ast.Scope) ast.File {
 	// NB: when comments_mode == .toplevel_comments,
 	// the parser gives feedback to the scanner about toplevel statements, so that the scanner can skip
 	// all the tricky inner comments. This is needed because we do not have a good general solution
 	// for handling them, and should be removed when we do (the general solution is also needed for vfmt)
 	// println('parse_file("$path")')
 	// text := os.read_file(path) or {
 	// panic(err)
@ -82,6 +87,7 @@ pub fn parse_file(path string, b_table &table.Table, comments_mode scanner.Comme
 	mut stmts := []ast.Stmt{}
 	mut p := Parser{
 		scanner: scanner.new_scanner_file(path, comments_mode)
 		comments_mode: comments_mode
 		table: b_table
 		file_name: path
 		file_name_dir: os.dir(path)
@ -213,7 +219,10 @@ pub fn parse_files(paths []string, table &table.Table, pref &pref.Preferences, g
 	return files
 }
-pub fn (p &Parser) init_parse_fns() {
+pub fn (mut p Parser) init_parse_fns() {
 	if p.comments_mode == .toplevel_comments {
 		p.scanner.scan_all_tokens_in_buffer()
 	}
 	// p.prefix_parse_fns = make(100, 100, sizeof(PrefixParseFn))
 	// p.prefix_parse_fns[token.Kind.name] = parse_name
 }
@ -265,13 +274,13 @@ pub fn (mut p Parser) close_scope() {
 pub fn (mut p Parser) parse_block() []ast.Stmt {
 	p.open_scope()
 	// println('parse block')
-	stmts := p.parse_block_no_scope()
+	stmts := p.parse_block_no_scope(false)
 	p.close_scope()
 	// println('nr exprs in block = $exprs.len')
 	return stmts
 }
-pub fn (mut p Parser) parse_block_no_scope() []ast.Stmt {
+pub fn (mut p Parser) parse_block_no_scope(is_top_level bool) []ast.Stmt {
 	p.check(.lcbr)
 	mut stmts := []ast.Stmt{}
 	if p.tok.kind != .rcbr {
@ -283,6 +292,9 @@ pub fn (mut p Parser) parse_block_no_scope() []ast.Stmt {
 			}
 		}
 	}
 	if is_top_level {
 		p.top_level_statement_end()
 	}
 	p.check(.rcbr)
 	return stmts
 }
@ -1031,7 +1043,7 @@ fn (mut p Parser) dot_expr(left ast.Expr) ast.Expr {
 				is_used: true
 			})
 			or_kind = .block
-			or_stmts = p.parse_block_no_scope()
+			or_stmts = p.parse_block_no_scope(false)
 			p.close_scope()
 		}
 		// `foo()?`
@ -1254,6 +1266,7 @@ fn (mut p Parser) import_stmt() ast.Import {
 }
 fn (mut p Parser) const_decl() ast.ConstDecl {
 	p.top_level_statement_start()
 	start_pos := p.tok.position()
 	is_pub := p.tok.kind == .key_pub
 	if is_pub {
@ -1291,6 +1304,7 @@ fn (mut p Parser) const_decl() ast.ConstDecl {
 		fields << field
 		p.global_scope.register(field.name, field)
 	}
 	p.top_level_statement_end()
 	p.check(.rpar)
 	return ast.ConstDecl{
 		pos: start_pos.extend(end_pos)
@ -1370,6 +1384,7 @@ fn (mut p Parser) global_decl() ast.GlobalDecl {
 }
 fn (mut p Parser) enum_decl() ast.EnumDecl {
 	p.top_level_statement_start()
 	is_pub := p.tok.kind == .key_pub
 	start_pos := p.tok.position()
 	if is_pub {
@ -1402,6 +1417,7 @@ fn (mut p Parser) enum_decl() ast.EnumDecl {
 			has_expr: has_expr
 		}
 	}
 	p.top_level_statement_end()
 	p.check(.rcbr)
 	attr := p.attr
 	is_flag := attr == 'flag'
@ -1563,3 +1579,46 @@ fn (p &Parser) new_true_expr() ast.Expr {
 fn verror(s string) {
 	util.verror('parser error', s)
 }
 fn (mut p Parser) top_level_statement_start() {
 	if p.comments_mode == .toplevel_comments {
 		p.scanner.set_is_inside_toplevel_statement(true)
 		p.rewind_scanner_to_current_token_in_new_mode()
 		$if debugscanner ? {
 			eprintln('>> p.top_level_statement_start | tidx:${p.tok.tidx:-5} | p.tok.kind: ${p.tok.kind:-10} | p.tok.lit: $p.tok.lit $p.peek_tok.lit $p.peek_tok2.lit $p.peek_tok3.lit ...')
 		}
 	}
 }
 fn (mut p Parser) top_level_statement_end() {
 	if p.comments_mode == .toplevel_comments {
 		p.scanner.set_is_inside_toplevel_statement(false)
 		p.rewind_scanner_to_current_token_in_new_mode()
 		$if debugscanner ? {
 			eprintln('>> p.top_level_statement_end   | tidx:${p.tok.tidx:-5} | p.tok.kind: ${p.tok.kind:-10} | p.tok.lit: $p.tok.lit $p.peek_tok.lit $p.peek_tok2.lit $p.peek_tok3.lit ...')
 		}
 	}
 }
 fn (mut p Parser) rewind_scanner_to_current_token_in_new_mode() {
 	// Go back and rescan some tokens, ensuring that the parser's
 	// lookahead buffer p.peek_tok .. p.peek_tok3, will now contain
 	// the correct tokens (possible comments), for the new mode
 	// This refilling of the lookahead buffer is needed for the
 	// .toplevel_comments parsing mode.
 	tidx := p.tok.tidx
 	p.scanner.set_current_tidx(tidx - 5)
 	no_token := token.Token{}
 	p.prev_tok = no_token
 	p.tok = no_token
 	p.peek_tok = no_token
 	p.peek_tok2 = no_token
 	p.peek_tok3 = no_token
 	for {
 		p.next()
 		//eprintln('rewinding to ${p.tok.tidx:5} | goal: ${tidx:5}')
 		if tidx == p.tok.tidx {
 			break
 		}
 	}
 }
--- a/vlib/v/parser/struct.v
+++ b/vlib/v/parser/struct.v
@ -9,6 +9,7 @@ import v.token
 import v.util
 fn (mut p Parser) struct_decl() ast.StructDecl {
 	p.top_level_statement_start()
 	start_pos := p.tok.position()
 	is_pub := p.tok.kind == .key_pub
 	if is_pub {
@ -162,6 +163,7 @@ fn (mut p Parser) struct_decl() ast.StructDecl {
 			}
 			// println('struct field $ti.name $field_name')
 		}
 		p.top_level_statement_end()
 		p.check(.rcbr)
 	}
 	if language == .c {
@ -277,6 +279,7 @@ fn (mut p Parser) struct_init(short_syntax bool) ast.StructInit {
 }
 fn (mut p Parser) interface_decl() ast.InterfaceDecl {
 	p.top_level_statement_start()
 	start_pos := p.tok.position()
 	is_pub := p.tok.kind == .key_pub
 	if is_pub {
@ -334,6 +337,7 @@ fn (mut p Parser) interface_decl() ast.InterfaceDecl {
 			is_pub: true
 		})
 	}
 	p.top_level_statement_end()
 	p.check(.rcbr)
 	return ast.InterfaceDecl{
 		name: interface_name
--- a/vlib/v/scanner/scanner.v
+++ b/vlib/v/scanner/scanner.v
@ -44,12 +44,53 @@ pub mut:
 	is_fmt                      bool // Used only for skipping ${} in strings, since we need literal
 	// string values when generating formatted code.
 	comments_mode               CommentsMode
 	is_inside_toplvl_statement  bool = false // *only* used in comments_mode: .toplevel_comments, toggled by parser
 	all_tokens                  []token.Token // *only* used in comments_mode: .toplevel_comments, contains all tokens
 	tidx                        int
 	eofs                        int
 }
 /*
 How the .toplevel_comments mode works:
 In this mode, the scanner scans *everything* at once, before parsing starts,
 including all the comments, and stores the results in an buffer s.all_tokens.
 Then .scan() just returns s.all_tokens[ s.tidx++ ] *ignoring* the
 comment tokens. In other words, by default in this mode, the parser
 *will not see any comments* inside top level statements, so it has
 no reason to complain about them.
 When the parser determines, that it is outside of a top level statement,
 it tells the scanner to backtrack s.tidx to the current p.tok index,
 then it changes .is_inside_toplvl_statement to false , and refills its
 lookahead buffer (i.e. p.peek_tok, p.peek_tok2, p.peek_tok3) from the
 scanner.
 In effect, from the parser's point of view, the next tokens, that it will
 receive with p.next(), will be the same, as if comments are not ignored
 anymore, *between* top level statements.
 When the parser determines, that it is going again inside a top level
 statement, it does the same, this time setting .is_inside_toplvl_statement
 to true, again refilling the lookahead buffer => calling .next() in this
 mode, will again ignore all the comment tokens, till the top level statement
 is finished.
 */
 // The different kinds of scanner modes:
 //
 // .skip_comments - simplest/fastest, just ignores all comments early.
 // This mode is used by the compiler itself.
 //
 // .parse_comments is used by vfmt. Ideally it should handle inline /* */
 // comments too, i.e. it returns every kind of comment as a new token.
 //
 // .toplevel_comments is used by vdoc, parses *only* top level ones
 // that are *outside* structs/enums/fns.
 pub enum CommentsMode {
 	skip_comments
 	parse_comments
 	toplevel_comments
 }
 // new scanner from file.
@ -80,13 +121,32 @@ pub fn new_scanner(text string, comments_mode CommentsMode) &Scanner {
 	return s
 }
-fn (s &Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Token {
+
 [inline]
 fn (s &Scanner) should_parse_comment() bool {
 	res := (s.comments_mode == .parse_comments) || (s.comments_mode == .toplevel_comments && !s.is_inside_toplvl_statement)
 	return res
 }
 // NB: this is called by v's parser
 pub fn (mut s Scanner) set_is_inside_toplevel_statement(newstate bool) {
 	s.is_inside_toplvl_statement = newstate
 }
 pub fn (mut s Scanner) set_current_tidx(cidx int) {
 	mut tidx := if cidx < 0 { 0 } else { cidx }
 	tidx = if tidx > s.all_tokens.len { s.all_tokens.len } else { tidx }
 	s.tidx = tidx
 }
 fn (mut s Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Token {
 	cidx := s.tidx
 	s.tidx++
 	return token.Token{
 		kind: tok_kind
 		lit: lit
 		line_nr: s.line_nr + 1
 		pos: s.pos - len + 1
 		len: len
 		tidx: cidx
 	}
 }
@ -537,7 +597,51 @@ fn (mut s Scanner) end_of_file() token.Token {
 	return s.new_token(.eof, '', 1)
 }
 pub fn (mut s Scanner) scan_all_tokens_in_buffer(){
 	// s.scan_all_tokens_in_buffer is used mainly by vdoc,
 	// in order to implement the .toplevel_comments mode.
 	cmode := s.comments_mode
 	s.comments_mode = .parse_comments
 	for {
 		mut t := s.text_scan()
 		s.all_tokens << t
 		if t.kind == .eof {
 			break
 		}
 	}
 	s.comments_mode = cmode
 	s.tidx = 0
 	$if debugscanner ? {
 		for t in s.all_tokens {
 			eprintln('> tidx:${t.tidx:-5} | kind: ${t.kind:-10} | lit: ${t.lit}')
 		}
 	}
 }
 pub fn (mut s Scanner) scan() token.Token {
 	if s.comments_mode == .toplevel_comments {
 		return s.buffer_scan()
 	}
 	return s.text_scan()
 }
 pub fn (mut s Scanner) buffer_scan() token.Token {
 	for {
 		cidx := s.tidx
 		s.tidx++
 		if cidx >= s.all_tokens.len {
 			return s.end_of_file()
 		}
 		if s.all_tokens[cidx].kind == .comment {
 			if !s.should_parse_comment() {
 				continue
 			}
 		}
 		return s.all_tokens[cidx]
 	}
 }
 fn (mut s Scanner) text_scan() token.Token {
 	// if s.comments_mode == .parse_comments {
 	// println('\nscan()')
 	// }
@ -972,7 +1076,7 @@ pub fn (mut s Scanner) scan() token.Token {
 				// fix line_nr, \n was read, and the comment is marked
 				// on the next line
 				s.line_nr--
-				if s.comments_mode == .parse_comments {
+				if s.should_parse_comment() {
 					// Find out if this comment is on its own line (for vfmt)
 					mut is_separate_line_comment := true
 					for j := start-2; j >= 0 && s.text[j] != `\n`; j-- {
@ -1013,7 +1117,7 @@ pub fn (mut s Scanner) scan() token.Token {
 					}
 				}
 				s.pos++
-				if s.comments_mode == .parse_comments {
+				if s.should_parse_comment() {
 					comment := s.text[start..(s.pos - 1)].trim_space()
 					return s.new_token(.comment, comment, comment.len + 4)
 				}
--- a/vlib/v/token/token.v
+++ b/vlib/v/token/token.v
@ -11,6 +11,7 @@ pub:
 	// name_idx int // name table index for O(1) lookup
 	pos     int // the position of the token in scanner text
 	len     int // length of the literal
 	tidx    int // the index of the token
 }
 pub enum Kind {