odoo-timesheets/tests/test_parser.py
Jef Roosens d6689a6c83
feat(parser): support multiple tables in a single markdown document
- Add extract_table_blocks() to split a document into contiguous table
  blocks, ignoring prose, headings, and blank lines between them
- Add parse_document() as the new top-level entry point that runs
  extract_table_blocks + detect_has_duration_column + parse_table per
  block and returns a combined flat list of rows
- Guard against empty End cells (e.g. in-progress rows) by validating
  the end field before calculating duration
- Update cli.py to use parse_document() instead of the manual
  detect + parse combo
- Add tests for extract_table_blocks and parse_document, including two
  smoke tests against the real 2026-W21 weekly timesheet file
2026-06-02 09:31:02 +02:00

264 lines
9.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import pytest
from timesheets.parser import (
aggregate_rows,
build_description,
detect_has_duration_column,
extract_table_blocks,
parse_document,
parse_table,
)
# ---------------------------------------------------------------------------
# Fixtures / shared data
# ---------------------------------------------------------------------------
WITH_DURATION = [
"| Start | End | Duration | Project | Story | Note |",
"|-------|-------|----------|---------|-------------|---------|",
"| 08:00 | 08:30 | 00:30 | bugs | story one | |",
"| 08:30 | 09:00 | 00:30 | bugs | story one | |",
"| 09:00 | 09:15 | 00:15 | scrum | | dsu |",
]
WITHOUT_DURATION = [
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------------|---------|",
"| 08:00 | 08:30 | bugs | story one | |",
"| 08:30 | 09:15 | scrum | | dsu |",
]
WEEK_FILE = os.path.join(os.path.dirname(__file__), "2026 - W21.md")
# ---------------------------------------------------------------------------
# detect_has_duration_column
# ---------------------------------------------------------------------------
class TestDetectHasDurationColumn:
def test_with_duration(self):
assert detect_has_duration_column(WITH_DURATION) is True
def test_without_duration(self):
assert detect_has_duration_column(WITHOUT_DURATION) is False
def test_no_header_defaults_to_true(self):
assert detect_has_duration_column(["no table here"]) is True
def test_case_insensitive(self):
lines = ["| Start | End | DURATION | Project | Story | Note |"]
assert detect_has_duration_column(lines) is True
# ---------------------------------------------------------------------------
# extract_table_blocks
# ---------------------------------------------------------------------------
class TestExtractTableBlocks:
def test_single_table(self):
blocks = extract_table_blocks(WITH_DURATION)
assert len(blocks) == 1
assert blocks[0] == WITH_DURATION
def test_two_tables_separated_by_prose(self):
lines = WITH_DURATION + ["", "# Next day", "some prose", ""] + WITHOUT_DURATION
blocks = extract_table_blocks(lines)
assert len(blocks) == 2
def test_prose_between_tables_not_included(self):
lines = WITH_DURATION + ["some note"] + WITHOUT_DURATION
blocks = extract_table_blocks(lines)
assert len(blocks) == 2
assert all("some note" not in b for b in blocks)
def test_single_line_table_discarded(self):
lines = ["| Start | End |"]
assert extract_table_blocks(lines) == []
def test_empty_input(self):
assert extract_table_blocks([]) == []
def test_no_tables(self):
assert extract_table_blocks(["# heading", "", "prose"]) == []
def test_table_at_end_of_file_captured(self):
lines = ["# heading", ""] + WITH_DURATION # no trailing newline
blocks = extract_table_blocks(lines)
assert len(blocks) == 1
# ---------------------------------------------------------------------------
# parse_table
# ---------------------------------------------------------------------------
class TestParseTable:
def test_with_duration_column(self):
rows = parse_table(WITH_DURATION, has_duration_col=True)
assert len(rows) == 3
assert rows[0]["project"] == "bugs"
assert rows[0]["duration_hours"] == 0.5
assert rows[2]["project"] == "scrum"
assert rows[2]["note"] == "dsu"
def test_without_duration_column(self):
rows = parse_table(WITHOUT_DURATION, has_duration_col=False)
assert len(rows) == 2
assert rows[0]["duration_hours"] == 0.5 # 08:0008:30
assert rows[1]["duration_hours"] == 0.75 # 08:3009:15
def test_header_row_skipped(self):
rows = parse_table(WITH_DURATION)
assert all(r["start"] != "Start" for r in rows)
def test_separator_row_skipped(self):
rows = parse_table(WITH_DURATION)
assert all(r["start"] != "---" for r in rows)
def test_markdown_link_stripped_in_story(self):
lines = [
"| Start | End | Duration | Project | Story | Note |",
"|-------|-------|----------|---------|----------------------------|------|",
"| 08:00 | 08:30 | 00:30 | bugs | [ticket 1](:/abc123) | |",
]
rows = parse_table(lines)
assert rows[0]["story"] == "ticket 1"
def test_invalid_duration_row_skipped(self):
lines = [
"| Start | End | Duration | Project | Story | Note |",
"|-------|-------|----------|---------|-------|------|",
"| 08:00 | 08:30 | bad | bugs | | |",
]
assert parse_table(lines) == []
def test_empty_end_time_row_skipped(self):
lines = [
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------|------|",
"| 09:55 | | bugs | | |",
]
assert parse_table(lines, has_duration_col=False) == []
def test_empty_input(self):
assert parse_table([]) == []
def test_non_table_lines_ignored(self):
lines = ["# My Timesheet", "", "Some prose."] + WITH_DURATION
rows = parse_table(lines)
assert len(rows) == 3
# ---------------------------------------------------------------------------
# parse_document
# ---------------------------------------------------------------------------
class TestParseDocument:
def test_single_table(self):
rows = parse_document(WITHOUT_DURATION)
assert len(rows) == 2
def test_multiple_tables_combined(self):
lines = WITHOUT_DURATION + ["", "# Next day", ""] + WITHOUT_DURATION
rows = parse_document(lines)
assert len(rows) == 4
def test_prose_between_tables_ignored(self):
lines = (
WITHOUT_DURATION + ["some notes", "- a bullet point", ""] + WITHOUT_DURATION
)
rows = parse_document(lines)
assert len(rows) == 4
def test_mixed_duration_formats(self):
lines = WITH_DURATION + ["", "## Next day", ""] + WITHOUT_DURATION
rows = parse_document(lines)
assert len(rows) == 5 # 3 from WITH_DURATION + 2 from WITHOUT_DURATION
def test_empty_input(self):
assert parse_document([]) == []
def test_week_file(self):
"""Smoke test against the real W21 weekly timesheet file."""
with open(WEEK_FILE, encoding="utf-8") as f:
lines = f.read().splitlines()
rows = parse_document(lines)
# File has 5 daily tables; expect a healthy number of rows
assert len(rows) > 20
# All rows must have expected keys
for row in rows:
assert "project" in row
assert "duration_hours" in row
assert row["duration_hours"] > 0
# The incomplete row (09:55 | empty end) must have been skipped
incomplete = [
r for r in rows if r["start"] == "09:55" and r["project"] == "bugs"
]
assert all(r["duration_hours"] > 0 for r in incomplete)
def test_week_file_no_markdown_links_in_stories(self):
"""Markdown link syntax must be stripped from story/note fields."""
with open(WEEK_FILE, encoding="utf-8") as f:
lines = f.read().splitlines()
rows = parse_document(lines)
for row in rows:
assert "](:" not in row["story"], (
f"Link not stripped in story: {row['story']!r}"
)
assert "](:" not in row["note"], (
f"Link not stripped in note: {row['note']!r}"
)
# ---------------------------------------------------------------------------
# build_description
# ---------------------------------------------------------------------------
class TestBuildDescription:
def test_story_and_note(self):
assert build_description("story", "note") == "story - note"
def test_story_only(self):
assert build_description("story", "") == "story"
def test_note_only(self):
assert build_description("", "note") == "note"
def test_both_empty(self):
assert build_description("", "") == "/"
def test_strips_whitespace(self):
assert build_description(" story ", " note ") == "story - note"
# ---------------------------------------------------------------------------
# aggregate_rows
# ---------------------------------------------------------------------------
class TestAggregateRows:
def test_same_project_story_summed(self):
rows = parse_table(WITH_DURATION)
aggregated = aggregate_rows(rows)
bugs = next(e for e in aggregated if e["project"] == "bugs")
assert bugs["quantity"] == 1.0 # 00:30 + 00:30
def test_distinct_entries_preserved(self):
rows = parse_table(WITH_DURATION)
aggregated = aggregate_rows(rows)
assert len(aggregated) == 2 # bugs/story-one and scrum/dsu
def test_insertion_order_preserved(self):
rows = parse_table(WITH_DURATION)
aggregated = aggregate_rows(rows)
assert aggregated[0]["project"] == "bugs"
assert aggregated[1]["project"] == "scrum"
def test_empty_input(self):
assert aggregate_rows([]) == []