import os import pytest from timesheets.parser import ( aggregate_rows, build_description, detect_has_duration_column, extract_table_blocks, filter_rows_by_date, parse_document, parse_table, resolve_overlaps, ) # --------------------------------------------------------------------------- # Fixtures / shared data # --------------------------------------------------------------------------- WITH_DURATION = [ "| Start | End | Duration | Project | Story | Note |", "|-------|-------|----------|---------|-------------|---------|", "| 08:00 | 08:30 | 00:30 | bugs | story one | |", "| 08:30 | 09:00 | 00:30 | bugs | story one | |", "| 09:00 | 09:15 | 00:15 | scrum | | dsu |", ] WITHOUT_DURATION = [ "| Start | End | Project | Story | Note |", "|-------|-------|---------|-------------|---------|", "| 08:00 | 08:30 | bugs | story one | |", "| 08:30 | 09:15 | scrum | | dsu |", ] WEEK_FILE = os.path.join(os.path.dirname(__file__), "2026 - W21.md") # --------------------------------------------------------------------------- # detect_has_duration_column # --------------------------------------------------------------------------- class TestDetectHasDurationColumn: def test_with_duration(self): assert detect_has_duration_column(WITH_DURATION) is True def test_without_duration(self): assert detect_has_duration_column(WITHOUT_DURATION) is False def test_no_header_defaults_to_true(self): assert detect_has_duration_column(["no table here"]) is True def test_case_insensitive(self): lines = ["| Start | End | DURATION | Project | Story | Note |"] assert detect_has_duration_column(lines) is True # --------------------------------------------------------------------------- # extract_table_blocks # --------------------------------------------------------------------------- class TestExtractTableBlocks: def test_single_table(self): blocks = extract_table_blocks(WITH_DURATION) assert len(blocks) == 1 assert blocks[0] == WITH_DURATION def test_two_tables_separated_by_prose(self): lines = WITH_DURATION + ["", "# Next day", "some prose", ""] + WITHOUT_DURATION blocks = extract_table_blocks(lines) assert len(blocks) == 2 def test_prose_between_tables_not_included(self): lines = WITH_DURATION + ["some note"] + WITHOUT_DURATION blocks = extract_table_blocks(lines) assert len(blocks) == 2 assert all("some note" not in b for b in blocks) def test_single_line_table_discarded(self): lines = ["| Start | End |"] assert extract_table_blocks(lines) == [] def test_empty_input(self): assert extract_table_blocks([]) == [] def test_no_tables(self): assert extract_table_blocks(["# heading", "", "prose"]) == [] def test_table_at_end_of_file_captured(self): lines = ["# heading", ""] + WITH_DURATION # no trailing newline blocks = extract_table_blocks(lines) assert len(blocks) == 1 def test_blank_line_within_table_kept_as_one_block(self): # A blank line in the middle of a table should not split it lines = [ "| Start | End | Project | Story | Note |", "|-------|-------|---------|-------|------||", "| 08:00 | 08:30 | bugs | | |", "", "| 09:00 | 09:30 | scrum | | dsu |", ] blocks = extract_table_blocks(lines) assert len(blocks) == 1 assert len(blocks[0]) == 4 # header + sep + row + row (blank dropped) def test_multiple_blank_lines_within_table(self): lines = [ "| Start | End | Project | Story | Note |", "|-------|-------|---------|-------|------||", "| 08:00 | 08:30 | bugs | | |", "", "", "| 09:00 | 09:30 | scrum | | dsu |", ] blocks = extract_table_blocks(lines) assert len(blocks) == 1 assert len(blocks[0]) == 4 def test_blank_line_at_end_of_table_does_not_include_blank(self): # Blank after the last row should not be included in the block lines = WITH_DURATION + ["", "# Next section"] blocks = extract_table_blocks(lines) assert len(blocks) == 1 assert "" not in blocks[0] def test_blank_between_tables_still_splits(self): # A blank followed by prose should still end the first block lines = WITH_DURATION + ["", "# Next day", ""] + WITHOUT_DURATION blocks = extract_table_blocks(lines) assert len(blocks) == 2 # --------------------------------------------------------------------------- # parse_table # --------------------------------------------------------------------------- class TestParseTable: def test_with_duration_column(self): rows = parse_table(WITH_DURATION, has_duration_col=True) assert len(rows) == 3 assert rows[0]["project"] == "bugs" assert rows[0]["duration_hours"] == 0.5 assert rows[2]["project"] == "scrum" assert rows[2]["note"] == "dsu" def test_without_duration_column(self): rows = parse_table(WITHOUT_DURATION, has_duration_col=False) assert len(rows) == 2 assert rows[0]["duration_hours"] == 0.5 # 08:00–08:30 assert rows[1]["duration_hours"] == 0.75 # 08:30–09:15 def test_header_row_skipped(self): rows = parse_table(WITH_DURATION) assert all(r["start"] != "Start" for r in rows) def test_separator_row_skipped(self): rows = parse_table(WITH_DURATION) assert all(r["start"] != "---" for r in rows) def test_markdown_link_stripped_in_story(self): lines = [ "| Start | End | Duration | Project | Story | Note |", "|-------|-------|----------|---------|----------------------------|------|", "| 08:00 | 08:30 | 00:30 | bugs | [ticket 1](:/abc123) | |", ] rows = parse_table(lines) assert rows[0]["story"] == "ticket 1" def test_invalid_duration_row_skipped(self): lines = [ "| Start | End | Duration | Project | Story | Note |", "|-------|-------|----------|---------|-------|------|", "| 08:00 | 08:30 | bad | bugs | | |", ] assert parse_table(lines) == [] def test_empty_end_time_row_skipped(self): # Open entry (no end, no duration) is now preserved with duration_hours=None lines = [ "| Start | End | Project | Story | Note |", "|-------|-------|---------|-------|------||", "| 09:55 | | bugs | | |", ] rows = parse_table(lines, has_duration_col=False) assert len(rows) == 1 assert rows[0]["duration_hours"] is None assert rows[0]["end"] is None def test_empty_input(self): assert parse_table([]) == [] def test_non_table_lines_ignored(self): lines = ["# My Timesheet", "", "Some prose."] + WITH_DURATION rows = parse_table(lines) assert len(rows) == 3 # --------------------------------------------------------------------------- # parse_document # --------------------------------------------------------------------------- class TestParseDocument: def test_single_table(self): rows = parse_document(WITHOUT_DURATION) assert len(rows) == 2 def test_multiple_tables_combined(self): lines = WITHOUT_DURATION + ["", "# Next day", ""] + WITHOUT_DURATION rows = parse_document(lines) assert len(rows) == 4 def test_prose_between_tables_ignored(self): lines = ( WITHOUT_DURATION + ["some notes", "- a bullet point", ""] + WITHOUT_DURATION ) rows = parse_document(lines) assert len(rows) == 4 def test_mixed_duration_formats(self): lines = WITH_DURATION + ["", "## Next day", ""] + WITHOUT_DURATION rows = parse_document(lines) assert len(rows) == 5 # 3 from WITH_DURATION + 2 from WITHOUT_DURATION def test_empty_input(self): assert parse_document([]) == [] def test_week_file(self): """Smoke test against the real W21 weekly timesheet file.""" with open(WEEK_FILE, encoding="utf-8") as f: lines = f.read().splitlines() rows = parse_document(lines) # File has 5 daily tables; expect a healthy number of rows assert len(rows) > 20 # All rows must have expected keys # All closed rows must have positive duration; open entries have None for row in rows: assert "project" in row assert "duration_hours" in row if row["duration_hours"] is not None: assert row["duration_hours"] > 0 # The open entry (09:55, no end) must be preserved with duration_hours=None incomplete = [ r for r in rows if r["start"] == "09:55" and r["project"] == "bugs" ] assert len(incomplete) == 1 assert incomplete[0]["duration_hours"] is None def test_week_file_no_markdown_links_in_stories(self): """Markdown link syntax must be stripped from story/note fields.""" with open(WEEK_FILE, encoding="utf-8") as f: lines = f.read().splitlines() rows = parse_document(lines) for row in rows: assert "](:" not in row["story"], ( f"Link not stripped in story: {row['story']!r}" ) assert "](:" not in row["note"], ( f"Link not stripped in note: {row['note']!r}" ) # --------------------------------------------------------------------------- # filter_rows_by_date # --------------------------------------------------------------------------- class TestFilterRowsByDate: # Reuse the W21 file which has one table per day-heading with open(WEEK_FILE, encoding="utf-8") as _f: _WEEK_LINES = _f.read().splitlines() def test_returns_only_matching_day(self): from datetime import date rows = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 22)) assert len(rows) > 0 # Friday has these projects in the sample file projects = {r["project"] for r in rows} assert "scrum" in projects def test_different_day_returns_different_rows(self): from datetime import date rows_fri = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 22)) rows_mon = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 18)) assert rows_fri != rows_mon assert len(rows_mon) > 0 def test_no_match_returns_empty(self): from datetime import date rows = filter_rows_by_date(self._WEEK_LINES, date(2026, 1, 1)) assert rows == [] def test_inline_document(self): from datetime import date lines = [ "# Maandag - 2026-05-18", "| Start | End | Project | Story | Note |", "|-------|-------|---------|-------|------||", "| 08:00 | 08:30 | bugs | | fix |", "", "# Dinsdag - 2026-05-19", "| Start | End | Project | Story | Note |", "|-------|-------|---------|-------|------||", "| 09:00 | 09:30 | scrum | | dsu |", ] rows = filter_rows_by_date(lines, date(2026, 5, 18)) assert len(rows) == 1 assert rows[0]["project"] == "bugs" rows = filter_rows_by_date(lines, date(2026, 5, 19)) assert len(rows) == 1 assert rows[0]["project"] == "scrum" def test_empty_input(self): from datetime import date assert filter_rows_by_date([], date(2026, 5, 22)) == [] # --------------------------------------------------------------------------- # build_description # --------------------------------------------------------------------------- class TestBuildDescription: def test_story_and_note(self): assert build_description("story", "note") == "story - note" def test_story_only(self): assert build_description("story", "") == "story" def test_note_only(self): assert build_description("", "note") == "note" def test_both_empty(self): assert build_description("", "") == "/" def test_strips_whitespace(self): assert build_description(" story ", " note ") == "story - note" # --------------------------------------------------------------------------- # aggregate_rows # --------------------------------------------------------------------------- class TestAggregateRows: def test_same_project_story_summed(self): rows = parse_table(WITH_DURATION) aggregated = aggregate_rows(rows) bugs = next(e for e in aggregated if e["project"] == "bugs") assert bugs["quantity"] == 1.0 # 00:30 + 00:30 def test_distinct_entries_preserved(self): rows = parse_table(WITH_DURATION) aggregated = aggregate_rows(rows) assert len(aggregated) == 2 # bugs/story-one and scrum/dsu def test_insertion_order_preserved(self): rows = parse_table(WITH_DURATION) aggregated = aggregate_rows(rows) assert aggregated[0]["project"] == "bugs" assert aggregated[1]["project"] == "scrum" def test_empty_input(self): assert aggregate_rows([]) == [] # --------------------------------------------------------------------------- # resolve_overlaps # --------------------------------------------------------------------------- class TestResolveOverlaps: """Tests for overlap resolution between timesheet entries.""" def _row(self, start, end, project="proj", story="s", note=""): """Build a minimal closed row dict.""" if end is None: return { "start": start, "end": None, "duration_hours": None, "project": project, "story": story, "story_raw": story, "note": note, } h1, m1 = map(int, start.split(":")) h2, m2 = map(int, end.split(":")) duration = (h2 * 60 + m2 - h1 * 60 - m1) / 60.0 return { "start": start, "end": end, "duration_hours": duration, "project": project, "story": story, "story_raw": story, "note": note, } def _sorted(self, rows): return sorted( [r for r in rows if r["end"] is not None], key=lambda r: r["start"], ) # --- no-op cases --- def test_empty_input(self): assert resolve_overlaps([]) == [] def test_single_entry_unchanged(self): rows = [self._row("09:00", "10:00")] assert resolve_overlaps(rows) == rows def test_no_overlap_unchanged(self): rows = [self._row("09:00", "10:00"), self._row("10:00", "11:00")] result = resolve_overlaps(rows) assert len(result) == 2 s = self._sorted(result) assert (s[0]["start"], s[0]["end"]) == ("09:00", "10:00") assert (s[1]["start"], s[1]["end"]) == ("10:00", "11:00") def test_only_open_entries_unchanged(self): rows = [self._row("09:00", None), self._row("10:00", None)] result = resolve_overlaps(rows) assert result == rows # --- partial overlap --- def test_partial_overlap_spec_example(self): """Spec example: 9:00-10:00 vs 9:30-10:30 → boundary at 9:45.""" rows = [self._row("09:00", "10:00", "a"), self._row("09:30", "10:30", "b")] result = self._sorted(resolve_overlaps(rows)) assert len(result) == 2 assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:45") assert result[0]["project"] == "a" assert (result[1]["start"], result[1]["end"]) == ("09:45", "10:30") assert result[1]["project"] == "b" def test_partial_overlap_duration_recalculated(self): rows = [self._row("09:00", "10:00"), self._row("09:30", "10:30")] result = self._sorted(resolve_overlaps(rows)) assert result[0]["duration_hours"] == pytest.approx(0.75) # 45 min assert result[1]["duration_hours"] == pytest.approx(0.75) # 45 min def test_partial_overlap_total_hours(self): """Total logged time after resolution equals the spanned wall-clock time.""" rows = [self._row("09:00", "10:00"), self._row("09:30", "10:30")] result = resolve_overlaps(rows) total = sum(r["duration_hours"] for r in result) assert total == pytest.approx(1.5) # 9:00–10:30 = 90 min def test_partial_overlap_input_order_independent(self): """Result should be the same regardless of input order.""" rows_forward = [ self._row("09:00", "10:00", "a"), self._row("09:30", "10:30", "b"), ] rows_reverse = [ self._row("09:30", "10:30", "b"), self._row("09:00", "10:00", "a"), ] r1 = self._sorted(resolve_overlaps(rows_forward)) r2 = self._sorted(resolve_overlaps(rows_reverse)) assert [(r["start"], r["end"]) for r in r1] == [ (r["start"], r["end"]) for r in r2 ] # --- full containment --- def test_containment_spec_example(self): """Spec example: 9:00-10:00 contains 9:15-9:45 → A1: 9:00-9:30, B: 9:30-9:45, A2: 9:45-10:00.""" rows = [self._row("09:00", "10:00", "a"), self._row("09:15", "09:45", "b")] result = self._sorted(resolve_overlaps(rows)) assert len(result) == 3 assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:30") assert result[0]["project"] == "a" assert (result[1]["start"], result[1]["end"]) == ("09:30", "09:45") assert result[1]["project"] == "b" assert (result[2]["start"], result[2]["end"]) == ("09:45", "10:00") assert result[2]["project"] == "a" def test_containment_total_hours(self): """Total after containment resolution equals the outer entry's original duration.""" rows = [self._row("09:00", "10:00"), self._row("09:15", "09:45")] result = resolve_overlaps(rows) total = sum(r["duration_hours"] for r in result) assert total == pytest.approx(1.0) # 9:00–10:00 = 60 min def test_containment_same_start(self): """Smaller entry starts at the same time as the larger one.""" rows = [self._row("09:00", "10:00", "a"), self._row("09:00", "09:30", "b")] result = self._sorted(resolve_overlaps(rows)) assert len(result) == 3 assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:15") assert result[0]["project"] == "a" assert (result[1]["start"], result[1]["end"]) == ("09:15", "09:30") assert result[1]["project"] == "b" assert (result[2]["start"], result[2]["end"]) == ("09:30", "10:00") assert result[2]["project"] == "a" def test_containment_same_end(self): """Smaller entry ends at the same time as the larger one.""" rows = [self._row("09:00", "10:00", "a"), self._row("09:30", "10:00", "b")] result = self._sorted(resolve_overlaps(rows)) assert len(result) == 2 assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:45") assert result[0]["project"] == "a" assert (result[1]["start"], result[1]["end"]) == ("09:45", "10:00") assert result[1]["project"] == "b" # --- open entries --- def test_open_entry_passed_through(self): open_row = self._row("09:00", None, "open") closed_row = self._row("09:30", "10:30", "closed") result = resolve_overlaps([open_row, closed_row]) assert any(r["end"] is None for r in result) assert any(r["end"] == "10:30" for r in result) # --- metadata preservation --- def test_project_and_story_preserved(self): rows = [ self._row("09:00", "10:00", project="p1", story="s1", note="n1"), self._row("09:30", "10:30", project="p2", story="s2", note="n2"), ] result = self._sorted(resolve_overlaps(rows)) assert result[0]["project"] == "p1" assert result[0]["story"] == "s1" assert result[0]["note"] == "n1" assert result[1]["project"] == "p2" assert result[1]["story"] == "s2" assert result[1]["note"] == "n2" # --- no remaining overlaps --- def test_result_has_no_overlaps(self): """After resolution, no two entries in the result should overlap.""" rows = [ self._row("09:00", "11:00", "a"), self._row("09:30", "10:30", "b"), self._row("10:00", "12:00", "c"), ] result = self._sorted(resolve_overlaps(rows)) for i in range(len(result) - 1): assert result[i]["end"] <= result[i + 1]["start"], ( f"Overlap between entry {i} ({result[i]}) and {i + 1} ({result[i + 1]})" )