vdoc: implement .toplevel_comments parsing mode

pull/5250/head
Delyan Angelov 2020-06-06 18:47:16 +03:00
parent 3aecdeab63
commit 8d3f680d07
7 changed files with 259 additions and 64 deletions

View File

@ -399,7 +399,7 @@ fn (cfg DocConfig) gen_plaintext(idx int) string {
for cn in dcs.contents {
pw.writeln(cn.content)
if cn.comment.len > 0 {
pw.writeln('\n' + cn.comment)
pw.writeln('\n' + '\/\/ ' + cn.comment.trim_space())
}
if cfg.show_loc {
pw.writeln('Location: ${cn.file_path}:${cn.pos.line}:${cn.pos.col}\n\n')
@ -509,7 +509,7 @@ fn (mut cfg DocConfig) generate_docs_from_file() {
dirs := if cfg.is_multi { get_modules_list(cfg.input_path) } else { [cfg.input_path] }
for dirpath in dirs {
cfg.vprintln('Generating docs for ${dirpath}...')
mut dcs := doc.generate(dirpath, cfg.pub_only, !is_vlib) or {
mut dcs := doc.generate(dirpath, cfg.pub_only, true) or {
panic(err)
}
if dcs.contents.len == 0 { continue }

View File

@ -40,24 +40,32 @@ pub mut:
parent_type string = ''
}
pub fn write_comment_bw(stmts []ast.Stmt, start_idx int) string {
pub fn get_comment_block_right_before(stmts []ast.Stmt) string {
if stmts.len == 0 {
return ''
}
mut comment := ''
for i := start_idx; i >= 0; i-- {
mut last_comment_line_nr := 0
for i := stmts.len-1; i >= 0; i-- {
stmt := stmts[i]
if stmt is ast.Comment {
cmt := stmt as ast.Comment
cmt_content := cmt.text.trim_left('|')
comment = cmt_content + if cmt_content.starts_with('```') {
'\n'
} else {
' '
} + comment
} else {
if stmt !is ast.Comment {
panic('Not a comment')
}
if i - 1 >= 0 && !(stmts[i - 1] is ast.Comment) {
break
cmt := stmt as ast.Comment
if last_comment_line_nr != 0 && cmt.pos.line_nr < last_comment_line_nr - 1 {
// skip comments that are not part of a continuous block,
// located right above the top level statement.
// break
}
cmt_content := cmt.text.trim_left('|')
if cmt_content.len == cmt.text.len {
// ignore /* */ style comments for now
continue
}
//eprintln('cmt: $cmt')
cseparator := if cmt_content.starts_with('```') {'\n'} else {' '}
comment = cmt_content + cseparator + comment
last_comment_line_nr = cmt.pos.line_nr
}
return comment
}
@ -168,9 +176,9 @@ fn get_parent_mod(dir string) ?string {
}
return error('No V files found.')
}
file_ast := parser.parse_file(v_files[0], table.new_table(), .skip_comments, prefs, &ast.Scope{
parent: 0
})
tbl := table.new_table()
scope := &ast.Scope{ parent: 0 }
file_ast := parser.parse_file(v_files[0], tbl, .skip_comments, prefs, scope)
if file_ast.mod.name == 'main' {
return ''
}
@ -196,7 +204,7 @@ pub fn (mut d Doc) generate() ?bool {
// parse files
mut file_asts := []ast.File{}
// TODO: remove later for vlib
comments_mode := if d.with_comments { scanner.CommentsMode.parse_comments } else { scanner.CommentsMode.skip_comments }
comments_mode := if d.with_comments { scanner.CommentsMode.toplevel_comments } else { scanner.CommentsMode.skip_comments }
for file in v_files {
file_ast := parser.parse_file(file, d.table, comments_mode, d.prefs, &ast.Scope{
parent: 0
@ -224,17 +232,36 @@ pub fn (mut d Doc) generate() ?bool {
} else if file_ast.mod.name != orig_mod_name {
continue
}
mut prev_comments := []ast.Stmt{}
stmts := file_ast.stmts
for si, stmt in stmts {
for _, stmt in stmts {
//eprintln('stmt typeof: ' + typeof(stmt))
if stmt is ast.Comment {
prev_comments << stmt
continue
}
if stmt is ast.Module {
// the previous comments were probably a copyright/license one
module_comment := get_comment_block_right_before(prev_comments)
prev_comments = []
if module_comment == '' {
continue
}
if module_comment == d.head.comment {
continue
}
if d.head.comment != '' {
d.head.comment += '\n'
}
d.head.comment += module_comment
continue
}
if stmt !is ast.Module {
// todo: accumulate consts
mut name := d.get_name(stmt)
signature := d.get_signature(stmt)
pos := d.get_pos(stmt)
if !signature.starts_with('pub') && d.pub_only {
prev_comments = []
continue
}
if name.starts_with(orig_mod_name + '.') {
@ -256,19 +283,18 @@ pub fn (mut d Doc) generate() ?bool {
}
node.parent_type = parent_type
}
}
if node.name.len == 0 && node.comment.len == 0 && node.content.len == 0 {
continue
}
if node.name.len == 0 && node.comment.len == 0 && node.content.len == 0 { continue }
d.contents << node
}
if d.with_comments && (si - 1 >= 0 && stmts[si - 1] is ast.Comment) {
if stmt is ast.Module {
d.head.comment = write_comment_bw(stmts, si - 1)
} else {
if d.with_comments && (prev_comments.len > 0) {
last_comment := d.contents[d.contents.len - 1].comment
d.contents[d.contents.len - 1].comment = last_comment + '\n' + write_comment_bw(stmts,
si - 1)
}
cmt := last_comment + '\n' + get_comment_block_right_before(prev_comments)
d.contents[d.contents.len - 1].comment = cmt
}
prev_comments = []
}
}
d.time_generated = time.now()

View File

@ -71,7 +71,7 @@ pub fn (mut p Parser) call_expr(language table.Language, mod string) ast.CallExp
is_used: true
})
or_kind = .block
or_stmts = p.parse_block_no_scope()
or_stmts = p.parse_block_no_scope(false)
p.close_scope()
p.inside_or_expr = was_inside_or_expr
}
@ -117,6 +117,7 @@ pub fn (mut p Parser) call_args() []ast.CallArg {
}
fn (mut p Parser) fn_decl() ast.FnDecl {
p.top_level_statement_start()
start_pos := p.tok.position()
is_deprecated := p.attr == 'deprecated'
is_pub := p.tok.kind == .key_pub
@ -268,7 +269,7 @@ fn (mut p Parser) fn_decl() ast.FnDecl {
no_body := p.tok.kind != .lcbr
body_start_pos := p.peek_tok.position()
if p.tok.kind == .lcbr {
stmts = p.parse_block_no_scope()
stmts = p.parse_block_no_scope(true)
}
p.close_scope()
p.attr = ''
@ -321,7 +322,7 @@ fn (mut p Parser) anon_fn() ast.AnonFn {
mut stmts := []ast.Stmt{}
no_body := p.tok.kind != .lcbr
if p.tok.kind == .lcbr {
stmts = p.parse_block_no_scope()
stmts = p.parse_block_no_scope(false)
}
p.close_scope()
mut func := table.Fn{

View File

@ -14,12 +14,12 @@ import os
import runtime
import time
// import sync
pub struct Parser {
file_name string // "/home/user/hello.v"
file_name_dir string // "/home/user"
mut:
scanner &scanner.Scanner
comments_mode scanner.CommentsMode = .skip_comments // see comment in parse_file
tok token.Token
prev_tok token.Token
peek_tok token.Token
@ -75,6 +75,11 @@ pub fn parse_stmt(text string, table &table.Table, scope &ast.Scope) ast.Stmt {
}
pub fn parse_file(path string, b_table &table.Table, comments_mode scanner.CommentsMode, pref &pref.Preferences, global_scope &ast.Scope) ast.File {
// NB: when comments_mode == .toplevel_comments,
// the parser gives feedback to the scanner about toplevel statements, so that the scanner can skip
// all the tricky inner comments. This is needed because we do not have a good general solution
// for handling them, and should be removed when we do (the general solution is also needed for vfmt)
// println('parse_file("$path")')
// text := os.read_file(path) or {
// panic(err)
@ -82,6 +87,7 @@ pub fn parse_file(path string, b_table &table.Table, comments_mode scanner.Comme
mut stmts := []ast.Stmt{}
mut p := Parser{
scanner: scanner.new_scanner_file(path, comments_mode)
comments_mode: comments_mode
table: b_table
file_name: path
file_name_dir: os.dir(path)
@ -213,7 +219,10 @@ pub fn parse_files(paths []string, table &table.Table, pref &pref.Preferences, g
return files
}
pub fn (p &Parser) init_parse_fns() {
pub fn (mut p Parser) init_parse_fns() {
if p.comments_mode == .toplevel_comments {
p.scanner.scan_all_tokens_in_buffer()
}
// p.prefix_parse_fns = make(100, 100, sizeof(PrefixParseFn))
// p.prefix_parse_fns[token.Kind.name] = parse_name
}
@ -265,13 +274,13 @@ pub fn (mut p Parser) close_scope() {
pub fn (mut p Parser) parse_block() []ast.Stmt {
p.open_scope()
// println('parse block')
stmts := p.parse_block_no_scope()
stmts := p.parse_block_no_scope(false)
p.close_scope()
// println('nr exprs in block = $exprs.len')
return stmts
}
pub fn (mut p Parser) parse_block_no_scope() []ast.Stmt {
pub fn (mut p Parser) parse_block_no_scope(is_top_level bool) []ast.Stmt {
p.check(.lcbr)
mut stmts := []ast.Stmt{}
if p.tok.kind != .rcbr {
@ -283,6 +292,9 @@ pub fn (mut p Parser) parse_block_no_scope() []ast.Stmt {
}
}
}
if is_top_level {
p.top_level_statement_end()
}
p.check(.rcbr)
return stmts
}
@ -1031,7 +1043,7 @@ fn (mut p Parser) dot_expr(left ast.Expr) ast.Expr {
is_used: true
})
or_kind = .block
or_stmts = p.parse_block_no_scope()
or_stmts = p.parse_block_no_scope(false)
p.close_scope()
}
// `foo()?`
@ -1254,6 +1266,7 @@ fn (mut p Parser) import_stmt() ast.Import {
}
fn (mut p Parser) const_decl() ast.ConstDecl {
p.top_level_statement_start()
start_pos := p.tok.position()
is_pub := p.tok.kind == .key_pub
if is_pub {
@ -1291,6 +1304,7 @@ fn (mut p Parser) const_decl() ast.ConstDecl {
fields << field
p.global_scope.register(field.name, field)
}
p.top_level_statement_end()
p.check(.rpar)
return ast.ConstDecl{
pos: start_pos.extend(end_pos)
@ -1370,6 +1384,7 @@ fn (mut p Parser) global_decl() ast.GlobalDecl {
}
fn (mut p Parser) enum_decl() ast.EnumDecl {
p.top_level_statement_start()
is_pub := p.tok.kind == .key_pub
start_pos := p.tok.position()
if is_pub {
@ -1402,6 +1417,7 @@ fn (mut p Parser) enum_decl() ast.EnumDecl {
has_expr: has_expr
}
}
p.top_level_statement_end()
p.check(.rcbr)
attr := p.attr
is_flag := attr == 'flag'
@ -1563,3 +1579,46 @@ fn (p &Parser) new_true_expr() ast.Expr {
fn verror(s string) {
util.verror('parser error', s)
}
fn (mut p Parser) top_level_statement_start() {
if p.comments_mode == .toplevel_comments {
p.scanner.set_is_inside_toplevel_statement(true)
p.rewind_scanner_to_current_token_in_new_mode()
$if debugscanner ? {
eprintln('>> p.top_level_statement_start | tidx:${p.tok.tidx:-5} | p.tok.kind: ${p.tok.kind:-10} | p.tok.lit: $p.tok.lit $p.peek_tok.lit $p.peek_tok2.lit $p.peek_tok3.lit ...')
}
}
}
fn (mut p Parser) top_level_statement_end() {
if p.comments_mode == .toplevel_comments {
p.scanner.set_is_inside_toplevel_statement(false)
p.rewind_scanner_to_current_token_in_new_mode()
$if debugscanner ? {
eprintln('>> p.top_level_statement_end | tidx:${p.tok.tidx:-5} | p.tok.kind: ${p.tok.kind:-10} | p.tok.lit: $p.tok.lit $p.peek_tok.lit $p.peek_tok2.lit $p.peek_tok3.lit ...')
}
}
}
fn (mut p Parser) rewind_scanner_to_current_token_in_new_mode() {
// Go back and rescan some tokens, ensuring that the parser's
// lookahead buffer p.peek_tok .. p.peek_tok3, will now contain
// the correct tokens (possible comments), for the new mode
// This refilling of the lookahead buffer is needed for the
// .toplevel_comments parsing mode.
tidx := p.tok.tidx
p.scanner.set_current_tidx(tidx - 5)
no_token := token.Token{}
p.prev_tok = no_token
p.tok = no_token
p.peek_tok = no_token
p.peek_tok2 = no_token
p.peek_tok3 = no_token
for {
p.next()
//eprintln('rewinding to ${p.tok.tidx:5} | goal: ${tidx:5}')
if tidx == p.tok.tidx {
break
}
}
}

View File

@ -9,6 +9,7 @@ import v.token
import v.util
fn (mut p Parser) struct_decl() ast.StructDecl {
p.top_level_statement_start()
start_pos := p.tok.position()
is_pub := p.tok.kind == .key_pub
if is_pub {
@ -162,6 +163,7 @@ fn (mut p Parser) struct_decl() ast.StructDecl {
}
// println('struct field $ti.name $field_name')
}
p.top_level_statement_end()
p.check(.rcbr)
}
if language == .c {
@ -277,6 +279,7 @@ fn (mut p Parser) struct_init(short_syntax bool) ast.StructInit {
}
fn (mut p Parser) interface_decl() ast.InterfaceDecl {
p.top_level_statement_start()
start_pos := p.tok.position()
is_pub := p.tok.kind == .key_pub
if is_pub {
@ -334,6 +337,7 @@ fn (mut p Parser) interface_decl() ast.InterfaceDecl {
is_pub: true
})
}
p.top_level_statement_end()
p.check(.rcbr)
return ast.InterfaceDecl{
name: interface_name

View File

@ -44,12 +44,53 @@ pub mut:
is_fmt bool // Used only for skipping ${} in strings, since we need literal
// string values when generating formatted code.
comments_mode CommentsMode
is_inside_toplvl_statement bool = false // *only* used in comments_mode: .toplevel_comments, toggled by parser
all_tokens []token.Token // *only* used in comments_mode: .toplevel_comments, contains all tokens
tidx int
eofs int
}
/*
How the .toplevel_comments mode works:
In this mode, the scanner scans *everything* at once, before parsing starts,
including all the comments, and stores the results in an buffer s.all_tokens.
Then .scan() just returns s.all_tokens[ s.tidx++ ] *ignoring* the
comment tokens. In other words, by default in this mode, the parser
*will not see any comments* inside top level statements, so it has
no reason to complain about them.
When the parser determines, that it is outside of a top level statement,
it tells the scanner to backtrack s.tidx to the current p.tok index,
then it changes .is_inside_toplvl_statement to false , and refills its
lookahead buffer (i.e. p.peek_tok, p.peek_tok2, p.peek_tok3) from the
scanner.
In effect, from the parser's point of view, the next tokens, that it will
receive with p.next(), will be the same, as if comments are not ignored
anymore, *between* top level statements.
When the parser determines, that it is going again inside a top level
statement, it does the same, this time setting .is_inside_toplvl_statement
to true, again refilling the lookahead buffer => calling .next() in this
mode, will again ignore all the comment tokens, till the top level statement
is finished.
*/
// The different kinds of scanner modes:
//
// .skip_comments - simplest/fastest, just ignores all comments early.
// This mode is used by the compiler itself.
//
// .parse_comments is used by vfmt. Ideally it should handle inline /* */
// comments too, i.e. it returns every kind of comment as a new token.
//
// .toplevel_comments is used by vdoc, parses *only* top level ones
// that are *outside* structs/enums/fns.
pub enum CommentsMode {
skip_comments
parse_comments
toplevel_comments
}
// new scanner from file.
@ -80,13 +121,32 @@ pub fn new_scanner(text string, comments_mode CommentsMode) &Scanner {
return s
}
fn (s &Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Token {
[inline]
fn (s &Scanner) should_parse_comment() bool {
res := (s.comments_mode == .parse_comments) || (s.comments_mode == .toplevel_comments && !s.is_inside_toplvl_statement)
return res
}
// NB: this is called by v's parser
pub fn (mut s Scanner) set_is_inside_toplevel_statement(newstate bool) {
s.is_inside_toplvl_statement = newstate
}
pub fn (mut s Scanner) set_current_tidx(cidx int) {
mut tidx := if cidx < 0 { 0 } else { cidx }
tidx = if tidx > s.all_tokens.len { s.all_tokens.len } else { tidx }
s.tidx = tidx
}
fn (mut s Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Token {
cidx := s.tidx
s.tidx++
return token.Token{
kind: tok_kind
lit: lit
line_nr: s.line_nr + 1
pos: s.pos - len + 1
len: len
tidx: cidx
}
}
@ -537,7 +597,51 @@ fn (mut s Scanner) end_of_file() token.Token {
return s.new_token(.eof, '', 1)
}
pub fn (mut s Scanner) scan_all_tokens_in_buffer(){
// s.scan_all_tokens_in_buffer is used mainly by vdoc,
// in order to implement the .toplevel_comments mode.
cmode := s.comments_mode
s.comments_mode = .parse_comments
for {
mut t := s.text_scan()
s.all_tokens << t
if t.kind == .eof {
break
}
}
s.comments_mode = cmode
s.tidx = 0
$if debugscanner ? {
for t in s.all_tokens {
eprintln('> tidx:${t.tidx:-5} | kind: ${t.kind:-10} | lit: ${t.lit}')
}
}
}
pub fn (mut s Scanner) scan() token.Token {
if s.comments_mode == .toplevel_comments {
return s.buffer_scan()
}
return s.text_scan()
}
pub fn (mut s Scanner) buffer_scan() token.Token {
for {
cidx := s.tidx
s.tidx++
if cidx >= s.all_tokens.len {
return s.end_of_file()
}
if s.all_tokens[cidx].kind == .comment {
if !s.should_parse_comment() {
continue
}
}
return s.all_tokens[cidx]
}
}
fn (mut s Scanner) text_scan() token.Token {
// if s.comments_mode == .parse_comments {
// println('\nscan()')
// }
@ -972,7 +1076,7 @@ pub fn (mut s Scanner) scan() token.Token {
// fix line_nr, \n was read, and the comment is marked
// on the next line
s.line_nr--
if s.comments_mode == .parse_comments {
if s.should_parse_comment() {
// Find out if this comment is on its own line (for vfmt)
mut is_separate_line_comment := true
for j := start-2; j >= 0 && s.text[j] != `\n`; j-- {
@ -1013,7 +1117,7 @@ pub fn (mut s Scanner) scan() token.Token {
}
}
s.pos++
if s.comments_mode == .parse_comments {
if s.should_parse_comment() {
comment := s.text[start..(s.pos - 1)].trim_space()
return s.new_token(.comment, comment, comment.len + 4)
}

View File

@ -11,6 +11,7 @@ pub:
// name_idx int // name table index for O(1) lookup
pos int // the position of the token in scanner text
len int // length of the literal
tidx int // the index of the token
}
pub enum Kind {