odoo-timesheets/tests/test_parser.py
Jef Roosens 9f0a6e2027
feat(parser): resolve overlapping timesheet entries
Parallel work is logged as overlapping entries. resolve_overlaps()
splits the shared time equally using the midpoint of the overlap region:

- Partial overlap: the midpoint becomes the boundary between the two
  entries (earlier entry trimmed, later entry delayed).
- Full containment: the containing entry is split into two pieces
  surrounding the contained one, with the midpoint rule applied to
  the overlap region.

Open entries (no end time) are passed through unchanged.

resolve_overlaps() is called automatically in filter_rows_by_date,
filter_week_sections, and the --input single-day path in cli.py, so
all subcommands benefit without further changes.
2026-06-02 09:31:11 +02:00

558 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import pytest
from timesheets.parser import (
aggregate_rows,
build_description,
detect_has_duration_column,
extract_table_blocks,
filter_rows_by_date,
parse_document,
parse_table,
resolve_overlaps,
)
# ---------------------------------------------------------------------------
# Fixtures / shared data
# ---------------------------------------------------------------------------
WITH_DURATION = [
"| Start | End | Duration | Project | Story | Note |",
"|-------|-------|----------|---------|-------------|---------|",
"| 08:00 | 08:30 | 00:30 | bugs | story one | |",
"| 08:30 | 09:00 | 00:30 | bugs | story one | |",
"| 09:00 | 09:15 | 00:15 | scrum | | dsu |",
]
WITHOUT_DURATION = [
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------------|---------|",
"| 08:00 | 08:30 | bugs | story one | |",
"| 08:30 | 09:15 | scrum | | dsu |",
]
WEEK_FILE = os.path.join(os.path.dirname(__file__), "2026 - W21.md")
# ---------------------------------------------------------------------------
# detect_has_duration_column
# ---------------------------------------------------------------------------
class TestDetectHasDurationColumn:
def test_with_duration(self):
assert detect_has_duration_column(WITH_DURATION) is True
def test_without_duration(self):
assert detect_has_duration_column(WITHOUT_DURATION) is False
def test_no_header_defaults_to_true(self):
assert detect_has_duration_column(["no table here"]) is True
def test_case_insensitive(self):
lines = ["| Start | End | DURATION | Project | Story | Note |"]
assert detect_has_duration_column(lines) is True
# ---------------------------------------------------------------------------
# extract_table_blocks
# ---------------------------------------------------------------------------
class TestExtractTableBlocks:
def test_single_table(self):
blocks = extract_table_blocks(WITH_DURATION)
assert len(blocks) == 1
assert blocks[0] == WITH_DURATION
def test_two_tables_separated_by_prose(self):
lines = WITH_DURATION + ["", "# Next day", "some prose", ""] + WITHOUT_DURATION
blocks = extract_table_blocks(lines)
assert len(blocks) == 2
def test_prose_between_tables_not_included(self):
lines = WITH_DURATION + ["some note"] + WITHOUT_DURATION
blocks = extract_table_blocks(lines)
assert len(blocks) == 2
assert all("some note" not in b for b in blocks)
def test_single_line_table_discarded(self):
lines = ["| Start | End |"]
assert extract_table_blocks(lines) == []
def test_empty_input(self):
assert extract_table_blocks([]) == []
def test_no_tables(self):
assert extract_table_blocks(["# heading", "", "prose"]) == []
def test_table_at_end_of_file_captured(self):
lines = ["# heading", ""] + WITH_DURATION # no trailing newline
blocks = extract_table_blocks(lines)
assert len(blocks) == 1
def test_blank_line_within_table_kept_as_one_block(self):
# A blank line in the middle of a table should not split it
lines = [
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------|------||",
"| 08:00 | 08:30 | bugs | | |",
"",
"| 09:00 | 09:30 | scrum | | dsu |",
]
blocks = extract_table_blocks(lines)
assert len(blocks) == 1
assert len(blocks[0]) == 4 # header + sep + row + row (blank dropped)
def test_multiple_blank_lines_within_table(self):
lines = [
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------|------||",
"| 08:00 | 08:30 | bugs | | |",
"",
"",
"| 09:00 | 09:30 | scrum | | dsu |",
]
blocks = extract_table_blocks(lines)
assert len(blocks) == 1
assert len(blocks[0]) == 4
def test_blank_line_at_end_of_table_does_not_include_blank(self):
# Blank after the last row should not be included in the block
lines = WITH_DURATION + ["", "# Next section"]
blocks = extract_table_blocks(lines)
assert len(blocks) == 1
assert "" not in blocks[0]
def test_blank_between_tables_still_splits(self):
# A blank followed by prose should still end the first block
lines = WITH_DURATION + ["", "# Next day", ""] + WITHOUT_DURATION
blocks = extract_table_blocks(lines)
assert len(blocks) == 2
# ---------------------------------------------------------------------------
# parse_table
# ---------------------------------------------------------------------------
class TestParseTable:
def test_with_duration_column(self):
rows = parse_table(WITH_DURATION, has_duration_col=True)
assert len(rows) == 3
assert rows[0]["project"] == "bugs"
assert rows[0]["duration_hours"] == 0.5
assert rows[2]["project"] == "scrum"
assert rows[2]["note"] == "dsu"
def test_without_duration_column(self):
rows = parse_table(WITHOUT_DURATION, has_duration_col=False)
assert len(rows) == 2
assert rows[0]["duration_hours"] == 0.5 # 08:0008:30
assert rows[1]["duration_hours"] == 0.75 # 08:3009:15
def test_header_row_skipped(self):
rows = parse_table(WITH_DURATION)
assert all(r["start"] != "Start" for r in rows)
def test_separator_row_skipped(self):
rows = parse_table(WITH_DURATION)
assert all(r["start"] != "---" for r in rows)
def test_markdown_link_stripped_in_story(self):
lines = [
"| Start | End | Duration | Project | Story | Note |",
"|-------|-------|----------|---------|----------------------------|------|",
"| 08:00 | 08:30 | 00:30 | bugs | [ticket 1](:/abc123) | |",
]
rows = parse_table(lines)
assert rows[0]["story"] == "ticket 1"
def test_invalid_duration_row_skipped(self):
lines = [
"| Start | End | Duration | Project | Story | Note |",
"|-------|-------|----------|---------|-------|------|",
"| 08:00 | 08:30 | bad | bugs | | |",
]
assert parse_table(lines) == []
def test_empty_end_time_row_skipped(self):
# Open entry (no end, no duration) is now preserved with duration_hours=None
lines = [
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------|------||",
"| 09:55 | | bugs | | |",
]
rows = parse_table(lines, has_duration_col=False)
assert len(rows) == 1
assert rows[0]["duration_hours"] is None
assert rows[0]["end"] is None
def test_empty_input(self):
assert parse_table([]) == []
def test_non_table_lines_ignored(self):
lines = ["# My Timesheet", "", "Some prose."] + WITH_DURATION
rows = parse_table(lines)
assert len(rows) == 3
# ---------------------------------------------------------------------------
# parse_document
# ---------------------------------------------------------------------------
class TestParseDocument:
def test_single_table(self):
rows = parse_document(WITHOUT_DURATION)
assert len(rows) == 2
def test_multiple_tables_combined(self):
lines = WITHOUT_DURATION + ["", "# Next day", ""] + WITHOUT_DURATION
rows = parse_document(lines)
assert len(rows) == 4
def test_prose_between_tables_ignored(self):
lines = (
WITHOUT_DURATION + ["some notes", "- a bullet point", ""] + WITHOUT_DURATION
)
rows = parse_document(lines)
assert len(rows) == 4
def test_mixed_duration_formats(self):
lines = WITH_DURATION + ["", "## Next day", ""] + WITHOUT_DURATION
rows = parse_document(lines)
assert len(rows) == 5 # 3 from WITH_DURATION + 2 from WITHOUT_DURATION
def test_empty_input(self):
assert parse_document([]) == []
def test_week_file(self):
"""Smoke test against the real W21 weekly timesheet file."""
with open(WEEK_FILE, encoding="utf-8") as f:
lines = f.read().splitlines()
rows = parse_document(lines)
# File has 5 daily tables; expect a healthy number of rows
assert len(rows) > 20
# All rows must have expected keys
# All closed rows must have positive duration; open entries have None
for row in rows:
assert "project" in row
assert "duration_hours" in row
if row["duration_hours"] is not None:
assert row["duration_hours"] > 0
# The open entry (09:55, no end) must be preserved with duration_hours=None
incomplete = [
r for r in rows if r["start"] == "09:55" and r["project"] == "bugs"
]
assert len(incomplete) == 1
assert incomplete[0]["duration_hours"] is None
def test_week_file_no_markdown_links_in_stories(self):
"""Markdown link syntax must be stripped from story/note fields."""
with open(WEEK_FILE, encoding="utf-8") as f:
lines = f.read().splitlines()
rows = parse_document(lines)
for row in rows:
assert "](:" not in row["story"], (
f"Link not stripped in story: {row['story']!r}"
)
assert "](:" not in row["note"], (
f"Link not stripped in note: {row['note']!r}"
)
# ---------------------------------------------------------------------------
# filter_rows_by_date
# ---------------------------------------------------------------------------
class TestFilterRowsByDate:
# Reuse the W21 file which has one table per day-heading
with open(WEEK_FILE, encoding="utf-8") as _f:
_WEEK_LINES = _f.read().splitlines()
def test_returns_only_matching_day(self):
from datetime import date
rows = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 22))
assert len(rows) > 0
# Friday has these projects in the sample file
projects = {r["project"] for r in rows}
assert "scrum" in projects
def test_different_day_returns_different_rows(self):
from datetime import date
rows_fri = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 22))
rows_mon = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 18))
assert rows_fri != rows_mon
assert len(rows_mon) > 0
def test_no_match_returns_empty(self):
from datetime import date
rows = filter_rows_by_date(self._WEEK_LINES, date(2026, 1, 1))
assert rows == []
def test_inline_document(self):
from datetime import date
lines = [
"# Maandag - 2026-05-18",
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------|------||",
"| 08:00 | 08:30 | bugs | | fix |",
"",
"# Dinsdag - 2026-05-19",
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------|------||",
"| 09:00 | 09:30 | scrum | | dsu |",
]
rows = filter_rows_by_date(lines, date(2026, 5, 18))
assert len(rows) == 1
assert rows[0]["project"] == "bugs"
rows = filter_rows_by_date(lines, date(2026, 5, 19))
assert len(rows) == 1
assert rows[0]["project"] == "scrum"
def test_empty_input(self):
from datetime import date
assert filter_rows_by_date([], date(2026, 5, 22)) == []
# ---------------------------------------------------------------------------
# build_description
# ---------------------------------------------------------------------------
class TestBuildDescription:
def test_story_and_note(self):
assert build_description("story", "note") == "story - note"
def test_story_only(self):
assert build_description("story", "") == "story"
def test_note_only(self):
assert build_description("", "note") == "note"
def test_both_empty(self):
assert build_description("", "") == "/"
def test_strips_whitespace(self):
assert build_description(" story ", " note ") == "story - note"
# ---------------------------------------------------------------------------
# aggregate_rows
# ---------------------------------------------------------------------------
class TestAggregateRows:
def test_same_project_story_summed(self):
rows = parse_table(WITH_DURATION)
aggregated = aggregate_rows(rows)
bugs = next(e for e in aggregated if e["project"] == "bugs")
assert bugs["quantity"] == 1.0 # 00:30 + 00:30
def test_distinct_entries_preserved(self):
rows = parse_table(WITH_DURATION)
aggregated = aggregate_rows(rows)
assert len(aggregated) == 2 # bugs/story-one and scrum/dsu
def test_insertion_order_preserved(self):
rows = parse_table(WITH_DURATION)
aggregated = aggregate_rows(rows)
assert aggregated[0]["project"] == "bugs"
assert aggregated[1]["project"] == "scrum"
def test_empty_input(self):
assert aggregate_rows([]) == []
# ---------------------------------------------------------------------------
# resolve_overlaps
# ---------------------------------------------------------------------------
class TestResolveOverlaps:
"""Tests for overlap resolution between timesheet entries."""
def _row(self, start, end, project="proj", story="s", note=""):
"""Build a minimal closed row dict."""
if end is None:
return {
"start": start,
"end": None,
"duration_hours": None,
"project": project,
"story": story,
"story_raw": story,
"note": note,
}
h1, m1 = map(int, start.split(":"))
h2, m2 = map(int, end.split(":"))
duration = (h2 * 60 + m2 - h1 * 60 - m1) / 60.0
return {
"start": start,
"end": end,
"duration_hours": duration,
"project": project,
"story": story,
"story_raw": story,
"note": note,
}
def _sorted(self, rows):
return sorted(
[r for r in rows if r["end"] is not None],
key=lambda r: r["start"],
)
# --- no-op cases ---
def test_empty_input(self):
assert resolve_overlaps([]) == []
def test_single_entry_unchanged(self):
rows = [self._row("09:00", "10:00")]
assert resolve_overlaps(rows) == rows
def test_no_overlap_unchanged(self):
rows = [self._row("09:00", "10:00"), self._row("10:00", "11:00")]
result = resolve_overlaps(rows)
assert len(result) == 2
s = self._sorted(result)
assert (s[0]["start"], s[0]["end"]) == ("09:00", "10:00")
assert (s[1]["start"], s[1]["end"]) == ("10:00", "11:00")
def test_only_open_entries_unchanged(self):
rows = [self._row("09:00", None), self._row("10:00", None)]
result = resolve_overlaps(rows)
assert result == rows
# --- partial overlap ---
def test_partial_overlap_spec_example(self):
"""Spec example: 9:00-10:00 vs 9:30-10:30 → boundary at 9:45."""
rows = [self._row("09:00", "10:00", "a"), self._row("09:30", "10:30", "b")]
result = self._sorted(resolve_overlaps(rows))
assert len(result) == 2
assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:45")
assert result[0]["project"] == "a"
assert (result[1]["start"], result[1]["end"]) == ("09:45", "10:30")
assert result[1]["project"] == "b"
def test_partial_overlap_duration_recalculated(self):
rows = [self._row("09:00", "10:00"), self._row("09:30", "10:30")]
result = self._sorted(resolve_overlaps(rows))
assert result[0]["duration_hours"] == pytest.approx(0.75) # 45 min
assert result[1]["duration_hours"] == pytest.approx(0.75) # 45 min
def test_partial_overlap_total_hours(self):
"""Total logged time after resolution equals the spanned wall-clock time."""
rows = [self._row("09:00", "10:00"), self._row("09:30", "10:30")]
result = resolve_overlaps(rows)
total = sum(r["duration_hours"] for r in result)
assert total == pytest.approx(1.5) # 9:0010:30 = 90 min
def test_partial_overlap_input_order_independent(self):
"""Result should be the same regardless of input order."""
rows_forward = [
self._row("09:00", "10:00", "a"),
self._row("09:30", "10:30", "b"),
]
rows_reverse = [
self._row("09:30", "10:30", "b"),
self._row("09:00", "10:00", "a"),
]
r1 = self._sorted(resolve_overlaps(rows_forward))
r2 = self._sorted(resolve_overlaps(rows_reverse))
assert [(r["start"], r["end"]) for r in r1] == [
(r["start"], r["end"]) for r in r2
]
# --- full containment ---
def test_containment_spec_example(self):
"""Spec example: 9:00-10:00 contains 9:15-9:45 → A1: 9:00-9:30, B: 9:30-9:45, A2: 9:45-10:00."""
rows = [self._row("09:00", "10:00", "a"), self._row("09:15", "09:45", "b")]
result = self._sorted(resolve_overlaps(rows))
assert len(result) == 3
assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:30")
assert result[0]["project"] == "a"
assert (result[1]["start"], result[1]["end"]) == ("09:30", "09:45")
assert result[1]["project"] == "b"
assert (result[2]["start"], result[2]["end"]) == ("09:45", "10:00")
assert result[2]["project"] == "a"
def test_containment_total_hours(self):
"""Total after containment resolution equals the outer entry's original duration."""
rows = [self._row("09:00", "10:00"), self._row("09:15", "09:45")]
result = resolve_overlaps(rows)
total = sum(r["duration_hours"] for r in result)
assert total == pytest.approx(1.0) # 9:0010:00 = 60 min
def test_containment_same_start(self):
"""Smaller entry starts at the same time as the larger one."""
rows = [self._row("09:00", "10:00", "a"), self._row("09:00", "09:30", "b")]
result = self._sorted(resolve_overlaps(rows))
assert len(result) == 3
assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:15")
assert result[0]["project"] == "a"
assert (result[1]["start"], result[1]["end"]) == ("09:15", "09:30")
assert result[1]["project"] == "b"
assert (result[2]["start"], result[2]["end"]) == ("09:30", "10:00")
assert result[2]["project"] == "a"
def test_containment_same_end(self):
"""Smaller entry ends at the same time as the larger one."""
rows = [self._row("09:00", "10:00", "a"), self._row("09:30", "10:00", "b")]
result = self._sorted(resolve_overlaps(rows))
assert len(result) == 2
assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:45")
assert result[0]["project"] == "a"
assert (result[1]["start"], result[1]["end"]) == ("09:45", "10:00")
assert result[1]["project"] == "b"
# --- open entries ---
def test_open_entry_passed_through(self):
open_row = self._row("09:00", None, "open")
closed_row = self._row("09:30", "10:30", "closed")
result = resolve_overlaps([open_row, closed_row])
assert any(r["end"] is None for r in result)
assert any(r["end"] == "10:30" for r in result)
# --- metadata preservation ---
def test_project_and_story_preserved(self):
rows = [
self._row("09:00", "10:00", project="p1", story="s1", note="n1"),
self._row("09:30", "10:30", project="p2", story="s2", note="n2"),
]
result = self._sorted(resolve_overlaps(rows))
assert result[0]["project"] == "p1"
assert result[0]["story"] == "s1"
assert result[0]["note"] == "n1"
assert result[1]["project"] == "p2"
assert result[1]["story"] == "s2"
assert result[1]["note"] == "n2"
# --- no remaining overlaps ---
def test_result_has_no_overlaps(self):
"""After resolution, no two entries in the result should overlap."""
rows = [
self._row("09:00", "11:00", "a"),
self._row("09:30", "10:30", "b"),
self._row("10:00", "12:00", "c"),
]
result = self._sorted(resolve_overlaps(rows))
for i in range(len(result) - 1):
assert result[i]["end"] <= result[i + 1]["start"], (
f"Overlap between entry {i} ({result[i]}) and {i + 1} ({result[i + 1]})"
)