fix(parser): allow blank lines within a table block
Blank (whitespace-only) lines inside a table no longer split it into separate blocks. They are buffered and discarded if more table rows follow, enabling patterns like pre-filling a recurring meeting entry with a blank line separating it from the rest of the day's entries.
This commit is contained in:
parent
ac1e9f959a
commit
d5dbe8791b
2 changed files with 54 additions and 5 deletions
|
|
@ -38,21 +38,31 @@ def extract_table_blocks(lines: list[str]) -> list[list[str]]:
|
|||
"""
|
||||
Split a markdown document into contiguous table blocks.
|
||||
|
||||
A block is a maximal run of lines that are either table rows or table
|
||||
separators. Non-table lines (headings, prose, bullet points, blank lines)
|
||||
break a block. Each returned block contains at least a header and a
|
||||
separator line; shorter runs are discarded.
|
||||
A block is a maximal run of table rows/separators, with blank (whitespace-
|
||||
only) lines tolerated in the middle as long as more table rows follow.
|
||||
Non-table, non-blank lines always break a block.
|
||||
Each returned block contains at least a header and a separator line;
|
||||
shorter runs are discarded.
|
||||
"""
|
||||
blocks: list[list[str]] = []
|
||||
current: list[str] = []
|
||||
pending_blanks: list[str] = [] # blank lines seen after last table row
|
||||
|
||||
for line in lines:
|
||||
if _is_table_line(line):
|
||||
# Resume block, discarding the pending blanks (they were internal)
|
||||
pending_blanks.clear()
|
||||
current.append(line)
|
||||
elif not line.strip():
|
||||
# Blank line — buffer it; don't break the block yet
|
||||
if current:
|
||||
pending_blanks.append(line)
|
||||
else:
|
||||
if len(current) >= 2: # at minimum: header + separator
|
||||
# Non-table, non-blank line — close any open block
|
||||
if len(current) >= 2:
|
||||
blocks.append(current)
|
||||
current = []
|
||||
pending_blanks.clear()
|
||||
|
||||
if len(current) >= 2:
|
||||
blocks.append(current)
|
||||
|
|
|
|||
|
|
@ -91,6 +91,45 @@ class TestExtractTableBlocks:
|
|||
blocks = extract_table_blocks(lines)
|
||||
assert len(blocks) == 1
|
||||
|
||||
def test_blank_line_within_table_kept_as_one_block(self):
|
||||
# A blank line in the middle of a table should not split it
|
||||
lines = [
|
||||
"| Start | End | Project | Story | Note |",
|
||||
"|-------|-------|---------|-------|------||",
|
||||
"| 08:00 | 08:30 | bugs | | |",
|
||||
"",
|
||||
"| 09:00 | 09:30 | scrum | | dsu |",
|
||||
]
|
||||
blocks = extract_table_blocks(lines)
|
||||
assert len(blocks) == 1
|
||||
assert len(blocks[0]) == 4 # header + sep + row + row (blank dropped)
|
||||
|
||||
def test_multiple_blank_lines_within_table(self):
|
||||
lines = [
|
||||
"| Start | End | Project | Story | Note |",
|
||||
"|-------|-------|---------|-------|------||",
|
||||
"| 08:00 | 08:30 | bugs | | |",
|
||||
"",
|
||||
"",
|
||||
"| 09:00 | 09:30 | scrum | | dsu |",
|
||||
]
|
||||
blocks = extract_table_blocks(lines)
|
||||
assert len(blocks) == 1
|
||||
assert len(blocks[0]) == 4
|
||||
|
||||
def test_blank_line_at_end_of_table_does_not_include_blank(self):
|
||||
# Blank after the last row should not be included in the block
|
||||
lines = WITH_DURATION + ["", "# Next section"]
|
||||
blocks = extract_table_blocks(lines)
|
||||
assert len(blocks) == 1
|
||||
assert "" not in blocks[0]
|
||||
|
||||
def test_blank_between_tables_still_splits(self):
|
||||
# A blank followed by prose should still end the first block
|
||||
lines = WITH_DURATION + ["", "# Next day", ""] + WITHOUT_DURATION
|
||||
blocks = extract_table_blocks(lines)
|
||||
assert len(blocks) == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# parse_table
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue