feat(parser): support multiple tables in a single markdown document
- Add extract_table_blocks() to split a document into contiguous table blocks, ignoring prose, headings, and blank lines between them - Add parse_document() as the new top-level entry point that runs extract_table_blocks + detect_has_duration_column + parse_table per block and returns a combined flat list of rows - Guard against empty End cells (e.g. in-progress rows) by validating the end field before calculating duration - Update cli.py to use parse_document() instead of the manual detect + parse combo - Add tests for extract_table_blocks and parse_document, including two smoke tests against the real 2026-W21 weekly timesheet file
This commit is contained in:
parent
7bea08ddac
commit
d6689a6c83
4 changed files with 295 additions and 9 deletions
|
|
@ -4,7 +4,7 @@ import sys
|
||||||
from datetime import date
|
from datetime import date
|
||||||
|
|
||||||
from .output import print_summary, write_csv
|
from .output import print_summary, write_csv
|
||||||
from .parser import aggregate_rows, detect_has_duration_column, parse_table
|
from .parser import aggregate_rows, parse_document
|
||||||
from .projects import load_project_map
|
from .projects import load_project_map
|
||||||
from .utils import format_date
|
from .utils import format_date
|
||||||
|
|
||||||
|
|
@ -18,7 +18,8 @@ def build_parser() -> argparse.ArgumentParser:
|
||||||
help="Path to the markdown file containing the timesheet table, or '-' to read from stdin.",
|
help="Path to the markdown file containing the timesheet table, or '-' to read from stdin.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-o", "--output",
|
"-o",
|
||||||
|
"--output",
|
||||||
help="Path to the output CSV file. Defaults to stdout.",
|
help="Path to the output CSV file. Defaults to stdout.",
|
||||||
default=None,
|
default=None,
|
||||||
)
|
)
|
||||||
|
|
@ -59,7 +60,7 @@ def main() -> None:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
lines = content.splitlines()
|
lines = content.splitlines()
|
||||||
rows = parse_table(lines, has_duration_col=detect_has_duration_column(lines))
|
rows = parse_document(lines)
|
||||||
|
|
||||||
if not rows:
|
if not rows:
|
||||||
print("Warning: no timesheet rows found in input.", file=sys.stderr)
|
print("Warning: no timesheet rows found in input.", file=sys.stderr)
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,21 @@ from collections import defaultdict
|
||||||
from .utils import duration_from_start_end, parse_duration, strip_markdown_link
|
from .utils import duration_from_start_end, parse_duration, strip_markdown_link
|
||||||
|
|
||||||
|
|
||||||
|
def _is_table_line(line: str) -> bool:
|
||||||
|
"""Return True if the line looks like part of a markdown table."""
|
||||||
|
s = line.strip()
|
||||||
|
return s.startswith("|") and s.endswith("|")
|
||||||
|
|
||||||
|
|
||||||
|
def _is_separator_line(line: str) -> bool:
|
||||||
|
"""Return True if the line is a markdown table separator (|---|---|)."""
|
||||||
|
return bool(re.match(r"^\|[-| :]+\|$", line.strip()))
|
||||||
|
|
||||||
|
|
||||||
def detect_has_duration_column(lines: list[str]) -> bool:
|
def detect_has_duration_column(lines: list[str]) -> bool:
|
||||||
"""
|
"""
|
||||||
Inspect the header row to determine whether a Duration column is present.
|
Inspect the header row of a table block to determine whether a Duration
|
||||||
Falls back to True if no header row is found.
|
column is present. Falls back to True if no header row is found.
|
||||||
"""
|
"""
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
|
|
@ -19,9 +30,35 @@ def detect_has_duration_column(lines: list[str]) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def extract_table_blocks(lines: list[str]) -> list[list[str]]:
|
||||||
|
"""
|
||||||
|
Split a markdown document into contiguous table blocks.
|
||||||
|
|
||||||
|
A block is a maximal run of lines that are either table rows or table
|
||||||
|
separators. Non-table lines (headings, prose, bullet points, blank lines)
|
||||||
|
break a block. Each returned block contains at least a header and a
|
||||||
|
separator line; shorter runs are discarded.
|
||||||
|
"""
|
||||||
|
blocks: list[list[str]] = []
|
||||||
|
current: list[str] = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if _is_table_line(line):
|
||||||
|
current.append(line)
|
||||||
|
else:
|
||||||
|
if len(current) >= 2: # at minimum: header + separator
|
||||||
|
blocks.append(current)
|
||||||
|
current = []
|
||||||
|
|
||||||
|
if len(current) >= 2:
|
||||||
|
blocks.append(current)
|
||||||
|
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
|
||||||
def parse_table(lines: list[str], has_duration_col: bool = True) -> list[dict]:
|
def parse_table(lines: list[str], has_duration_col: bool = True) -> list[dict]:
|
||||||
"""
|
"""
|
||||||
Parse markdown table lines into a list of row dicts.
|
Parse a single markdown table block into a list of row dicts.
|
||||||
|
|
||||||
With duration: Start | End | Duration | Project | Story | Note (6 cols)
|
With duration: Start | End | Duration | Project | Story | Note (6 cols)
|
||||||
Without duration: Start | End | Project | Story | Note (5 cols)
|
Without duration: Start | End | Project | Story | Note (5 cols)
|
||||||
|
|
@ -31,7 +68,7 @@ def parse_table(lines: list[str], has_duration_col: bool = True) -> list[dict]:
|
||||||
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if not line or re.match(r"^\|[-| :]+\|$", line):
|
if not line or _is_separator_line(line):
|
||||||
continue
|
continue
|
||||||
if not (line.startswith("|") and line.endswith("|")):
|
if not (line.startswith("|") and line.endswith("|")):
|
||||||
continue
|
continue
|
||||||
|
|
@ -42,13 +79,18 @@ def parse_table(lines: list[str], has_duration_col: bool = True) -> list[dict]:
|
||||||
|
|
||||||
if has_duration_col:
|
if has_duration_col:
|
||||||
start, end, duration, project, story, note = (
|
start, end, duration, project, story, note = (
|
||||||
cells[0], cells[1], cells[2], cells[3],
|
cells[0],
|
||||||
|
cells[1],
|
||||||
|
cells[2],
|
||||||
|
cells[3],
|
||||||
strip_markdown_link(cells[4]),
|
strip_markdown_link(cells[4]),
|
||||||
strip_markdown_link(cells[5]),
|
strip_markdown_link(cells[5]),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
start, end, project, story, note = (
|
start, end, project, story, note = (
|
||||||
cells[0], cells[1], cells[2],
|
cells[0],
|
||||||
|
cells[1],
|
||||||
|
cells[2],
|
||||||
strip_markdown_link(cells[3]),
|
strip_markdown_link(cells[3]),
|
||||||
strip_markdown_link(cells[4]),
|
strip_markdown_link(cells[4]),
|
||||||
)
|
)
|
||||||
|
|
@ -58,6 +100,8 @@ def parse_table(lines: list[str], has_duration_col: bool = True) -> list[dict]:
|
||||||
continue
|
continue
|
||||||
if not re.match(r"^\d+:\d{2}$", start):
|
if not re.match(r"^\d+:\d{2}$", start):
|
||||||
continue
|
continue
|
||||||
|
if not re.match(r"^\d+:\d{2}$", end):
|
||||||
|
continue
|
||||||
|
|
||||||
if duration is not None:
|
if duration is not None:
|
||||||
if not re.match(r"^\d+:\d{2}$", duration):
|
if not re.match(r"^\d+:\d{2}$", duration):
|
||||||
|
|
@ -83,6 +127,20 @@ def parse_table(lines: list[str], has_duration_col: bool = True) -> list[dict]:
|
||||||
return rows
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def parse_document(lines: list[str]) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Parse all timesheet tables found in a markdown document.
|
||||||
|
|
||||||
|
Extracts every table block, detects its column layout independently,
|
||||||
|
and returns the combined flat list of all parsed rows.
|
||||||
|
"""
|
||||||
|
rows = []
|
||||||
|
for block in extract_table_blocks(lines):
|
||||||
|
has_duration_col = detect_has_duration_column(block)
|
||||||
|
rows.extend(parse_table(block, has_duration_col=has_duration_col))
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
def build_description(story: str, note: str) -> str:
|
def build_description(story: str, note: str) -> str:
|
||||||
"""Combine story and note into a single description string."""
|
"""Combine story and note into a single description string."""
|
||||||
parts = [p.strip() for p in [story, note] if p.strip()]
|
parts = [p.strip() for p in [story, note] if p.strip()]
|
||||||
|
|
|
||||||
113
tests/2026 - W21.md
Normal file
113
tests/2026 - W21.md
Normal file
|
|
@ -0,0 +1,113 @@
|
||||||
|
# Week of 2026-05-18 - Review
|
||||||
|
|
||||||
|
# Vrijdag - 2026-05-22
|
||||||
|
|
||||||
|
| Start | End | Project | Story | Note |
|
||||||
|
|-------|-------|----------|------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|
|
||||||
|
| 08:15 | 09:30 | rate | [PBI 34976: refactor(rest): migrate manual entry, manual entry form and prototype handlers to OpenAPI 3.0](:/76f097bbb7854fd086e41ebc9132898f) | rebase |
|
||||||
|
| 09:30 | 09:45 | bugs | [Bug 35232: Biiiig asset tree import is slow](:/9e12b149a19b4c789b02479d7e3412bc) | review |
|
||||||
|
| 09:45 | 09:55 | scrum | | daily standup |
|
||||||
|
| 09:55 | | bugs | [Bug 35232: Biiiig asset tree import is slow](:/9e12b149a19b4c789b02479d7e3412bc) | review |
|
||||||
|
| 16:00 | 16:30 | internal | | Growth Path 2.0 |
|
||||||
|
| 16:30 | 17:30 | internal | | Factry Flow |
|
||||||
|
|
||||||
|
- [ ] Triage [Inbox](:/3a994fed3dc746a59d71c9f7ab1f60bc)
|
||||||
|
- [x] Process `distill` tags
|
||||||
|
- [ ] Process `refine` tags
|
||||||
|
- [ ] Refine [2026-05-26 - Sprint Retro](:/49951990900947438b80007b2d21b228)
|
||||||
|
- [ ] re-review [Bug 35232: Biiiig asset tree import is slow](:/9e12b149a19b4c789b02479d7e3412bc)
|
||||||
|
|
||||||
|
ge moogtr de big REST API MR rebasen op develop, good luck, `git rebase --onto origin/develop -i 027b7cc3b`, er staan paar fixup commits klaar al
|
||||||
|
|
||||||
|
# Donderdag - 2026-05-21
|
||||||
|
|
||||||
|
| Start | End | Project | Story | Note |
|
||||||
|
|-------|-------|---------|------------------------------------------------------------------------------------------------------------------------------------------------|---------------------|
|
||||||
|
| 08:15 | 08:20 | office | | koffie |
|
||||||
|
| 08:20 | 09:35 | bugs | Bug 35326: OpenAPI: query parameters should be case-insensitive (canonical = lowercase) | review |
|
||||||
|
| 09:35 | 09:45 | hatch | [PBI 35098: SPIKE: Gitlab MR previews apps](:/eb90c72a90e746d8b535dda26e8e7275) | |
|
||||||
|
| 09:45 | 10:00 | scrum | | daily standup |
|
||||||
|
| 10:00 | 10:20 | hatch | [PBI 35098: SPIKE: Gitlab MR previews apps](:/eb90c72a90e746d8b535dda26e8e7275) | |
|
||||||
|
| 10:20 | 10:50 | refine | PBI 35330: Calculation script errors should not be in Sentry? Or add a tag we can filter by? | |
|
||||||
|
| 10:50 | 11:15 | product | | Claude code CLI |
|
||||||
|
| 11:15 | 12:05 | refine | PBI 35330: Calculation script errors should not be in Sentry? Or add a tag we can filter by? | |
|
||||||
|
| 12:45 | 13:40 | bugs | | review race bugs MR |
|
||||||
|
| 13:40 | 14:30 | bugs | [Bug 35232: Biiiig asset tree import is slow](:/9e12b149a19b4c789b02479d7e3412bc) | review |
|
||||||
|
| 14:30 | 15:00 | bugs | Bug 35331: Clients hitting sql max open connections | |
|
||||||
|
| 15:00 | 15:25 | refine | PBI 35330: Calculation script errors should not be in Sentry? Or add a tag we can filter by? | respond to comments |
|
||||||
|
| 15:25 | 15:30 | bugs | Bug 35331: Clients hitting sql max open connections | |
|
||||||
|
| 15:30 | 16:30 | rate | [PBI 34976: refactor(rest): migrate manual entry, manual entry form and prototype handlers to OpenAPI 3.0](:/76f097bbb7854fd086e41ebc9132898f) | respond to comments |
|
||||||
|
|
||||||
|
- [ ] Uitzoeken hoeveel audit log info er verloren is gegaan door de REST api migratie (`GetDatabase` -> `GetDatabaseByOrganizationUUID`)
|
||||||
|
- [ ] kijken of GetEvents property value filter nu broken is met datasource door remodelling in openapi spec
|
||||||
|
|
||||||
|
# Woensdag - 2026-05-20
|
||||||
|
|
||||||
|
| Start | End | Project | Story | Note |
|
||||||
|
|-------|-------|---------|------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------|
|
||||||
|
| 08:15 | 08:25 | office | | koffie, sleutels zoeken |
|
||||||
|
| 08:25 | 09:00 | bugs | Bug 34948: Bulk measurement update returns 500 on failed validation | |
|
||||||
|
| 09:00 | 09:15 | bugs | Bug 35238: DestroySink does not listen to the 'force' query-parameter | |
|
||||||
|
| 09:15 | 09:50 | bugs | | CPU usage spikes in statistics collection |
|
||||||
|
| 09:50 | 10:00 | bugs | Bug 35241: OpenAPI: timeseries query result schema breaks JSON round-trip for scalar values, arrays, and string tags | review |
|
||||||
|
| 10:00 | 10:30 | bugs | Bug 35238: DestroySink does not listen to the 'force' query-parameter | |
|
||||||
|
| 10:30 | 12:00 | rate | [PBI 34976: refactor(rest): migrate manual entry, manual entry form and prototype handlers to OpenAPI 3.0](:/76f097bbb7854fd086e41ebc9132898f) | |
|
||||||
|
| 12:40 | 13:30 | rate | [PBI 34976: refactor(rest): migrate manual entry, manual entry form and prototype handlers to OpenAPI 3.0](:/76f097bbb7854fd086e41ebc9132898f) | trying to remove the wrapper |
|
||||||
|
| 13:30 | 13:45 | bugs | Bug 34948: Bulk measurement update returns 500 on failed validation | respond to comments |
|
||||||
|
| 13:45 | 14:45 | rate | [PBI 34976: refactor(rest): migrate manual entry, manual entry form and prototype handlers to OpenAPI 3.0](:/76f097bbb7854fd086e41ebc9132898f) | trying to remove the wrapper |
|
||||||
|
| 14:45 | 15:00 | bugs | Bug 34948: Bulk measurement update returns 500 on failed validation | respond to comments |
|
||||||
|
| 15:00 | 15:15 | bugs | Bug 35238: DestroySink does not listen to the 'force' query-parameter | respond to comments |
|
||||||
|
| 15:15 | 15:25 | hatch | [PBI 35098: SPIKE: Gitlab MR previews apps](:/eb90c72a90e746d8b535dda26e8e7275) | |
|
||||||
|
| 15:25 | 15:45 | bugs | Bug 35326: OpenAPI: query parameters should be case-insensitive (canonical = lowercase) | review |
|
||||||
|
| 15:45 | 16:00 | product | | CI Evelien fixen |
|
||||||
|
| 16:00 | 16:20 | bugs | Bug 35326: OpenAPI: query parameters should be case-insensitive (canonical = lowercase) | review |
|
||||||
|
| 16:20 | 17:00 | hatch | [PBI 35098: SPIKE: Gitlab MR previews apps](:/eb90c72a90e746d8b535dda26e8e7275) | |
|
||||||
|
|
||||||
|
|
||||||
|
- [x] fix CPU usage spikes in statistics collection ([thread](https://factrylabs.slack.com/archives/C01TD6M694G/p1779112687422719))
|
||||||
|
* idee: maak een global statistics variable aan, guarded met een mutex en getters/setters
|
||||||
|
* hou bij hoe oud die is, if too old -> recollect, anders geef gwn de cached versie mee
|
||||||
|
* dan gebeurt collection 1 keer en gebruiken alle loops gwn the same data
|
||||||
|
|
||||||
|
# Dinsdag - 2026-05-19
|
||||||
|
|
||||||
|
| Start | End | Project | Story | Note |
|
||||||
|
|-------|-------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------|
|
||||||
|
| 08:10 | 08:15 | office | | coffee |
|
||||||
|
| 08:15 | 09:45 | rate | [PBI 34974: refactor(rest): migrate collectors, sinks, forwarders, tasks and related handlers to OpenAPI 3.0](:/9b4d4a0384be4ec3b1bbbcab68640721) | respond to comments |
|
||||||
|
| 09:45 | 09:55 | scrum | | daily standup |
|
||||||
|
| 09:55 | 10:30 | rate | [PBI 34974: refactor(rest): migrate collectors, sinks, forwarders, tasks and related handlers to OpenAPI 3.0](:/9b4d4a0384be4ec3b1bbbcab68640721) | rebase |
|
||||||
|
| 10:30 | 12:05 | rate | [PBI 34976: refactor(rest): migrate manual entry, manual entry form and prototype handlers to OpenAPI 3.0](:/76f097bbb7854fd086e41ebc9132898f) | fix tests |
|
||||||
|
| 12:55 | 13:05 | rate | [PBI 34976: refactor(rest): migrate manual entry, manual entry form and prototype handlers to OpenAPI 3.0](:/76f097bbb7854fd086e41ebc9132898f) | fix tests |
|
||||||
|
| 13:05 | 14:10 | bugs | [Bug 35232: Biiiig asset tree import is slow](:/9e12b149a19b4c789b02479d7e3412bc) | review |
|
||||||
|
| 14:10 | 14:45 | rate | [PBI 34974: refactor(rest): migrate collectors, sinks, forwarders, tasks and related handlers to OpenAPI 3.0](:/9b4d4a0384be4ec3b1bbbcab68640721) | final comments |
|
||||||
|
| 14:45 | 15:15 | rate | [PBI 34976: refactor(rest): migrate manual entry, manual entry form and prototype handlers to OpenAPI 3.0](:/76f097bbb7854fd086e41ebc9132898f) | fix tests |
|
||||||
|
| 15:15 | 15:45 | bugs | [Bug 35232: Biiiig asset tree import is slow](:/9e12b149a19b4c789b02479d7e3412bc) | review |
|
||||||
|
| 15:45 | 16:10 | rate | [PBI 34974: refactor(rest): migrate collectors, sinks, forwarders, tasks and related handlers to OpenAPI 3.0](:/9b4d4a0384be4ec3b1bbbcab68640721) | rebase |
|
||||||
|
| 16:10 | 16:30 | rate | [PBI 34976: refactor(rest): migrate manual entry, manual entry form and prototype handlers to OpenAPI 3.0](:/76f097bbb7854fd086e41ebc9132898f) | respond to duo review |
|
||||||
|
| 16:30 | 17:10 | bugs | Bug 34948: Bulk measurement update returns 500 on failed validation | |
|
||||||
|
|
||||||
|
|
||||||
|
# Maandag - 2026-05-18
|
||||||
|
|
||||||
|
| Start | End | Project | Story | Note |
|
||||||
|
|-------|-------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------|
|
||||||
|
| 08:10 | 08:15 | office | | coffee |
|
||||||
|
| 08:15 | 09:45 | rate | PBI 34972: refactor(rest): migrate events and event configuration handlers to OpenAPI 3.0 | respond to comments |
|
||||||
|
| 09:45 | 10:05 | scrum | | daily standup |
|
||||||
|
| 10:05 | 10:25 | rate | PBI 34972: refactor(rest): migrate events and event configuration handlers to OpenAPI 3.0 | respond to comments |
|
||||||
|
| 10:25 | 11:45 | rate | PBI 34974: refactor(rest): migrate collectors, sinks, forwarders, tasks and related handlers to OpenAPI 3.0 | fix tests |
|
||||||
|
| 11:45 | 12:05 | bugs | | review race bugs MR |
|
||||||
|
| 12:40 | 13:05 | bugs | | review race bugs MR |
|
||||||
|
| 13:05 | 13:40 | internal | | claude code proberen instellen |
|
||||||
|
| 13:40 | 13:55 | rate | PBI 34974: refactor(rest): migrate collectors, sinks, forwarders, tasks and related handlers to OpenAPI 3.0 | CI debugging |
|
||||||
|
| 13:55 | 14:50 | rate | PBI 34976: refactor(rest): migrate manual entry, manual entry form and prototype handlers to OpenAPI 3.0 | fix tests |
|
||||||
|
| 14:50 | 15:15 | refine | PBI 34321: Update asset from prototype: add 'Keep all' / 'Update all' bulk action buttons | |
|
||||||
|
| 15:30 | 15:35 | bugs | Bug 34916: GetAssets endpoint fails with "invalid escape \ sequence" when path filter contains backslashes | |
|
||||||
|
| 15:35 | 15:45 | refine | PBI 34321: Update asset from prototype: add 'Keep all' / 'Update all' bulk action buttons | |
|
||||||
|
| 15:45 | 17:00 | rate | [PBI 34974: refactor(rest): migrate collectors, sinks, forwarders, tasks and related handlers to OpenAPI 3.0](:/9b4d4a0384be4ec3b1bbbcab68640721) | respond to comments |
|
||||||
|
|
||||||
|
* laatste rest refactor MR nog afwerken
|
||||||
|
* gij hebt de 3 laatste op uw naam, dus gij kunt de wrapper uiteindelijk wegdoen
|
||||||
|
* moet nog verder opkuisen wa claude gedaan heeft
|
||||||
|
* nog wa testen da falen ook
|
||||||
|
|
@ -1,9 +1,13 @@
|
||||||
|
import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from timesheets.parser import (
|
from timesheets.parser import (
|
||||||
aggregate_rows,
|
aggregate_rows,
|
||||||
build_description,
|
build_description,
|
||||||
detect_has_duration_column,
|
detect_has_duration_column,
|
||||||
|
extract_table_blocks,
|
||||||
|
parse_document,
|
||||||
parse_table,
|
parse_table,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -26,6 +30,8 @@ WITHOUT_DURATION = [
|
||||||
"| 08:30 | 09:15 | scrum | | dsu |",
|
"| 08:30 | 09:15 | scrum | | dsu |",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
WEEK_FILE = os.path.join(os.path.dirname(__file__), "2026 - W21.md")
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# detect_has_duration_column
|
# detect_has_duration_column
|
||||||
|
|
@ -47,6 +53,44 @@ class TestDetectHasDurationColumn:
|
||||||
assert detect_has_duration_column(lines) is True
|
assert detect_has_duration_column(lines) is True
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# extract_table_blocks
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractTableBlocks:
|
||||||
|
def test_single_table(self):
|
||||||
|
blocks = extract_table_blocks(WITH_DURATION)
|
||||||
|
assert len(blocks) == 1
|
||||||
|
assert blocks[0] == WITH_DURATION
|
||||||
|
|
||||||
|
def test_two_tables_separated_by_prose(self):
|
||||||
|
lines = WITH_DURATION + ["", "# Next day", "some prose", ""] + WITHOUT_DURATION
|
||||||
|
blocks = extract_table_blocks(lines)
|
||||||
|
assert len(blocks) == 2
|
||||||
|
|
||||||
|
def test_prose_between_tables_not_included(self):
|
||||||
|
lines = WITH_DURATION + ["some note"] + WITHOUT_DURATION
|
||||||
|
blocks = extract_table_blocks(lines)
|
||||||
|
assert len(blocks) == 2
|
||||||
|
assert all("some note" not in b for b in blocks)
|
||||||
|
|
||||||
|
def test_single_line_table_discarded(self):
|
||||||
|
lines = ["| Start | End |"]
|
||||||
|
assert extract_table_blocks(lines) == []
|
||||||
|
|
||||||
|
def test_empty_input(self):
|
||||||
|
assert extract_table_blocks([]) == []
|
||||||
|
|
||||||
|
def test_no_tables(self):
|
||||||
|
assert extract_table_blocks(["# heading", "", "prose"]) == []
|
||||||
|
|
||||||
|
def test_table_at_end_of_file_captured(self):
|
||||||
|
lines = ["# heading", ""] + WITH_DURATION # no trailing newline
|
||||||
|
blocks = extract_table_blocks(lines)
|
||||||
|
assert len(blocks) == 1
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# parse_table
|
# parse_table
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -92,6 +136,14 @@ class TestParseTable:
|
||||||
]
|
]
|
||||||
assert parse_table(lines) == []
|
assert parse_table(lines) == []
|
||||||
|
|
||||||
|
def test_empty_end_time_row_skipped(self):
|
||||||
|
lines = [
|
||||||
|
"| Start | End | Project | Story | Note |",
|
||||||
|
"|-------|-------|---------|-------|------|",
|
||||||
|
"| 09:55 | | bugs | | |",
|
||||||
|
]
|
||||||
|
assert parse_table(lines, has_duration_col=False) == []
|
||||||
|
|
||||||
def test_empty_input(self):
|
def test_empty_input(self):
|
||||||
assert parse_table([]) == []
|
assert parse_table([]) == []
|
||||||
|
|
||||||
|
|
@ -101,6 +153,68 @@ class TestParseTable:
|
||||||
assert len(rows) == 3
|
assert len(rows) == 3
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# parse_document
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseDocument:
|
||||||
|
def test_single_table(self):
|
||||||
|
rows = parse_document(WITHOUT_DURATION)
|
||||||
|
assert len(rows) == 2
|
||||||
|
|
||||||
|
def test_multiple_tables_combined(self):
|
||||||
|
lines = WITHOUT_DURATION + ["", "# Next day", ""] + WITHOUT_DURATION
|
||||||
|
rows = parse_document(lines)
|
||||||
|
assert len(rows) == 4
|
||||||
|
|
||||||
|
def test_prose_between_tables_ignored(self):
|
||||||
|
lines = (
|
||||||
|
WITHOUT_DURATION + ["some notes", "- a bullet point", ""] + WITHOUT_DURATION
|
||||||
|
)
|
||||||
|
rows = parse_document(lines)
|
||||||
|
assert len(rows) == 4
|
||||||
|
|
||||||
|
def test_mixed_duration_formats(self):
|
||||||
|
lines = WITH_DURATION + ["", "## Next day", ""] + WITHOUT_DURATION
|
||||||
|
rows = parse_document(lines)
|
||||||
|
assert len(rows) == 5 # 3 from WITH_DURATION + 2 from WITHOUT_DURATION
|
||||||
|
|
||||||
|
def test_empty_input(self):
|
||||||
|
assert parse_document([]) == []
|
||||||
|
|
||||||
|
def test_week_file(self):
|
||||||
|
"""Smoke test against the real W21 weekly timesheet file."""
|
||||||
|
with open(WEEK_FILE, encoding="utf-8") as f:
|
||||||
|
lines = f.read().splitlines()
|
||||||
|
rows = parse_document(lines)
|
||||||
|
# File has 5 daily tables; expect a healthy number of rows
|
||||||
|
assert len(rows) > 20
|
||||||
|
# All rows must have expected keys
|
||||||
|
for row in rows:
|
||||||
|
assert "project" in row
|
||||||
|
assert "duration_hours" in row
|
||||||
|
assert row["duration_hours"] > 0
|
||||||
|
# The incomplete row (09:55 | empty end) must have been skipped
|
||||||
|
incomplete = [
|
||||||
|
r for r in rows if r["start"] == "09:55" and r["project"] == "bugs"
|
||||||
|
]
|
||||||
|
assert all(r["duration_hours"] > 0 for r in incomplete)
|
||||||
|
|
||||||
|
def test_week_file_no_markdown_links_in_stories(self):
|
||||||
|
"""Markdown link syntax must be stripped from story/note fields."""
|
||||||
|
with open(WEEK_FILE, encoding="utf-8") as f:
|
||||||
|
lines = f.read().splitlines()
|
||||||
|
rows = parse_document(lines)
|
||||||
|
for row in rows:
|
||||||
|
assert "](:" not in row["story"], (
|
||||||
|
f"Link not stripped in story: {row['story']!r}"
|
||||||
|
)
|
||||||
|
assert "](:" not in row["note"], (
|
||||||
|
f"Link not stripped in note: {row['note']!r}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# build_description
|
# build_description
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue