fix(parser): allow blank lines within a table block

Blank (whitespace-only) lines inside a table no longer split it into
separate blocks. They are buffered and discarded if more table rows
follow, enabling patterns like pre-filling a recurring meeting entry
with a blank line separating it from the rest of the day's entries.
This commit is contained in:
Jef Roosens 2026-05-22 12:01:06 +02:00
parent ac1e9f959a
commit d5dbe8791b
Signed by: Jef Roosens
GPG key ID: 119385BCAA005C21
2 changed files with 54 additions and 5 deletions

View file

@ -38,21 +38,31 @@ def extract_table_blocks(lines: list[str]) -> list[list[str]]:
""" """
Split a markdown document into contiguous table blocks. Split a markdown document into contiguous table blocks.
A block is a maximal run of lines that are either table rows or table A block is a maximal run of table rows/separators, with blank (whitespace-
separators. Non-table lines (headings, prose, bullet points, blank lines) only) lines tolerated in the middle as long as more table rows follow.
break a block. Each returned block contains at least a header and a Non-table, non-blank lines always break a block.
separator line; shorter runs are discarded. Each returned block contains at least a header and a separator line;
shorter runs are discarded.
""" """
blocks: list[list[str]] = [] blocks: list[list[str]] = []
current: list[str] = [] current: list[str] = []
pending_blanks: list[str] = [] # blank lines seen after last table row
for line in lines: for line in lines:
if _is_table_line(line): if _is_table_line(line):
# Resume block, discarding the pending blanks (they were internal)
pending_blanks.clear()
current.append(line) current.append(line)
elif not line.strip():
# Blank line — buffer it; don't break the block yet
if current:
pending_blanks.append(line)
else: else:
if len(current) >= 2: # at minimum: header + separator # Non-table, non-blank line — close any open block
if len(current) >= 2:
blocks.append(current) blocks.append(current)
current = [] current = []
pending_blanks.clear()
if len(current) >= 2: if len(current) >= 2:
blocks.append(current) blocks.append(current)

View file

@ -91,6 +91,45 @@ class TestExtractTableBlocks:
blocks = extract_table_blocks(lines) blocks = extract_table_blocks(lines)
assert len(blocks) == 1 assert len(blocks) == 1
def test_blank_line_within_table_kept_as_one_block(self):
# A blank line in the middle of a table should not split it
lines = [
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------|------||",
"| 08:00 | 08:30 | bugs | | |",
"",
"| 09:00 | 09:30 | scrum | | dsu |",
]
blocks = extract_table_blocks(lines)
assert len(blocks) == 1
assert len(blocks[0]) == 4 # header + sep + row + row (blank dropped)
def test_multiple_blank_lines_within_table(self):
lines = [
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------|------||",
"| 08:00 | 08:30 | bugs | | |",
"",
"",
"| 09:00 | 09:30 | scrum | | dsu |",
]
blocks = extract_table_blocks(lines)
assert len(blocks) == 1
assert len(blocks[0]) == 4
def test_blank_line_at_end_of_table_does_not_include_blank(self):
# Blank after the last row should not be included in the block
lines = WITH_DURATION + ["", "# Next section"]
blocks = extract_table_blocks(lines)
assert len(blocks) == 1
assert "" not in blocks[0]
def test_blank_between_tables_still_splits(self):
# A blank followed by prose should still end the first block
lines = WITH_DURATION + ["", "# Next day", ""] + WITHOUT_DURATION
blocks = extract_table_blocks(lines)
assert len(blocks) == 2
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# parse_table # parse_table