diff --git a/src/timesheets/parser.py b/src/timesheets/parser.py index f146b8c..b95dbdc 100644 --- a/src/timesheets/parser.py +++ b/src/timesheets/parser.py @@ -38,21 +38,31 @@ def extract_table_blocks(lines: list[str]) -> list[list[str]]: """ Split a markdown document into contiguous table blocks. - A block is a maximal run of lines that are either table rows or table - separators. Non-table lines (headings, prose, bullet points, blank lines) - break a block. Each returned block contains at least a header and a - separator line; shorter runs are discarded. + A block is a maximal run of table rows/separators, with blank (whitespace- + only) lines tolerated in the middle as long as more table rows follow. + Non-table, non-blank lines always break a block. + Each returned block contains at least a header and a separator line; + shorter runs are discarded. """ blocks: list[list[str]] = [] current: list[str] = [] + pending_blanks: list[str] = [] # blank lines seen after last table row for line in lines: if _is_table_line(line): + # Resume block, discarding the pending blanks (they were internal) + pending_blanks.clear() current.append(line) + elif not line.strip(): + # Blank line — buffer it; don't break the block yet + if current: + pending_blanks.append(line) else: - if len(current) >= 2: # at minimum: header + separator + # Non-table, non-blank line — close any open block + if len(current) >= 2: blocks.append(current) current = [] + pending_blanks.clear() if len(current) >= 2: blocks.append(current) diff --git a/tests/test_parser.py b/tests/test_parser.py index a043573..fff5e6e 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -91,6 +91,45 @@ class TestExtractTableBlocks: blocks = extract_table_blocks(lines) assert len(blocks) == 1 + def test_blank_line_within_table_kept_as_one_block(self): + # A blank line in the middle of a table should not split it + lines = [ + "| Start | End | Project | Story | Note |", + "|-------|-------|---------|-------|------||", + "| 08:00 | 08:30 | bugs | | |", + "", + "| 09:00 | 09:30 | scrum | | dsu |", + ] + blocks = extract_table_blocks(lines) + assert len(blocks) == 1 + assert len(blocks[0]) == 4 # header + sep + row + row (blank dropped) + + def test_multiple_blank_lines_within_table(self): + lines = [ + "| Start | End | Project | Story | Note |", + "|-------|-------|---------|-------|------||", + "| 08:00 | 08:30 | bugs | | |", + "", + "", + "| 09:00 | 09:30 | scrum | | dsu |", + ] + blocks = extract_table_blocks(lines) + assert len(blocks) == 1 + assert len(blocks[0]) == 4 + + def test_blank_line_at_end_of_table_does_not_include_blank(self): + # Blank after the last row should not be included in the block + lines = WITH_DURATION + ["", "# Next section"] + blocks = extract_table_blocks(lines) + assert len(blocks) == 1 + assert "" not in blocks[0] + + def test_blank_between_tables_still_splits(self): + # A blank followed by prose should still end the first block + lines = WITH_DURATION + ["", "# Next day", ""] + WITHOUT_DURATION + blocks = extract_table_blocks(lines) + assert len(blocks) == 2 + # --------------------------------------------------------------------------- # parse_table