fix(parser): allow blank lines within a table block

Blank (whitespace-only) lines inside a table no longer split it into
separate blocks. They are buffered and discarded if more table rows
follow, enabling patterns like pre-filling a recurring meeting entry
with a blank line separating it from the rest of the day's entries.
This commit is contained in:
Jef Roosens 2026-05-22 12:01:06 +02:00
parent ac1e9f959a
commit d5dbe8791b
Signed by: Jef Roosens
GPG key ID: 119385BCAA005C21
2 changed files with 54 additions and 5 deletions

View file

@ -38,21 +38,31 @@ def extract_table_blocks(lines: list[str]) -> list[list[str]]:
"""
Split a markdown document into contiguous table blocks.
A block is a maximal run of lines that are either table rows or table
separators. Non-table lines (headings, prose, bullet points, blank lines)
break a block. Each returned block contains at least a header and a
separator line; shorter runs are discarded.
A block is a maximal run of table rows/separators, with blank (whitespace-
only) lines tolerated in the middle as long as more table rows follow.
Non-table, non-blank lines always break a block.
Each returned block contains at least a header and a separator line;
shorter runs are discarded.
"""
blocks: list[list[str]] = []
current: list[str] = []
pending_blanks: list[str] = [] # blank lines seen after last table row
for line in lines:
if _is_table_line(line):
# Resume block, discarding the pending blanks (they were internal)
pending_blanks.clear()
current.append(line)
elif not line.strip():
# Blank line — buffer it; don't break the block yet
if current:
pending_blanks.append(line)
else:
if len(current) >= 2: # at minimum: header + separator
# Non-table, non-blank line — close any open block
if len(current) >= 2:
blocks.append(current)
current = []
pending_blanks.clear()
if len(current) >= 2:
blocks.append(current)