odoo-timesheets/tests/test_parser.py
Jef Roosens de46399010
Add ~ marker to exclude entries from CSV export
Prefix the project name or note column with ~ to mark an entry as
count-but-don't-export. Marked entries are included in summary and
status totals but omitted from all csv output (both --raw and
aggregated, single-day and weekly).

  | 09:00 | 17:00 | 8:00 | ~Leave |  | Day off  |
  | 09:00 | 17:00 | 8:00 |  Leave |  | ~Day off |

The ~ is stripped from whichever field carries it before any
downstream processing, so project map resolution is unaffected.

Implementation:
- parse_table sets skip_csv=True on marked rows and strips the ~
- new filter_skip_csv() helper in parser.py
- to_csv_entries() skips skip_csv rows
- _cmd_csv calls filter_skip_csv() before aggregate_rows()
2026-06-02 09:31:16 +02:00

680 lines
25 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import pytest
from timesheets.parser import (
aggregate_rows,
build_description,
detect_has_duration_column,
extract_table_blocks,
filter_rows_by_date,
filter_skip_csv,
parse_document,
parse_table,
resolve_overlaps,
)
# ---------------------------------------------------------------------------
# Fixtures / shared data
# ---------------------------------------------------------------------------
WITH_DURATION = [
"| Start | End | Duration | Project | Story | Note |",
"|-------|-------|----------|---------|-------------|---------|",
"| 08:00 | 08:30 | 00:30 | bugs | story one | |",
"| 08:30 | 09:00 | 00:30 | bugs | story one | |",
"| 09:00 | 09:15 | 00:15 | scrum | | dsu |",
]
WITHOUT_DURATION = [
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------------|---------|",
"| 08:00 | 08:30 | bugs | story one | |",
"| 08:30 | 09:15 | scrum | | dsu |",
]
WEEK_FILE = os.path.join(os.path.dirname(__file__), "2026 - W21.md")
# ---------------------------------------------------------------------------
# detect_has_duration_column
# ---------------------------------------------------------------------------
class TestDetectHasDurationColumn:
def test_with_duration(self):
assert detect_has_duration_column(WITH_DURATION) is True
def test_without_duration(self):
assert detect_has_duration_column(WITHOUT_DURATION) is False
def test_no_header_defaults_to_true(self):
assert detect_has_duration_column(["no table here"]) is True
def test_case_insensitive(self):
lines = ["| Start | End | DURATION | Project | Story | Note |"]
assert detect_has_duration_column(lines) is True
# ---------------------------------------------------------------------------
# extract_table_blocks
# ---------------------------------------------------------------------------
class TestExtractTableBlocks:
def test_single_table(self):
blocks = extract_table_blocks(WITH_DURATION)
assert len(blocks) == 1
assert blocks[0] == WITH_DURATION
def test_two_tables_separated_by_prose(self):
lines = WITH_DURATION + ["", "# Next day", "some prose", ""] + WITHOUT_DURATION
blocks = extract_table_blocks(lines)
assert len(blocks) == 2
def test_prose_between_tables_not_included(self):
lines = WITH_DURATION + ["some note"] + WITHOUT_DURATION
blocks = extract_table_blocks(lines)
assert len(blocks) == 2
assert all("some note" not in b for b in blocks)
def test_single_line_table_discarded(self):
lines = ["| Start | End |"]
assert extract_table_blocks(lines) == []
def test_empty_input(self):
assert extract_table_blocks([]) == []
def test_no_tables(self):
assert extract_table_blocks(["# heading", "", "prose"]) == []
def test_table_at_end_of_file_captured(self):
lines = ["# heading", ""] + WITH_DURATION # no trailing newline
blocks = extract_table_blocks(lines)
assert len(blocks) == 1
def test_blank_line_within_table_kept_as_one_block(self):
# A blank line in the middle of a table should not split it
lines = [
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------|------||",
"| 08:00 | 08:30 | bugs | | |",
"",
"| 09:00 | 09:30 | scrum | | dsu |",
]
blocks = extract_table_blocks(lines)
assert len(blocks) == 1
assert len(blocks[0]) == 4 # header + sep + row + row (blank dropped)
def test_multiple_blank_lines_within_table(self):
lines = [
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------|------||",
"| 08:00 | 08:30 | bugs | | |",
"",
"",
"| 09:00 | 09:30 | scrum | | dsu |",
]
blocks = extract_table_blocks(lines)
assert len(blocks) == 1
assert len(blocks[0]) == 4
def test_blank_line_at_end_of_table_does_not_include_blank(self):
# Blank after the last row should not be included in the block
lines = WITH_DURATION + ["", "# Next section"]
blocks = extract_table_blocks(lines)
assert len(blocks) == 1
assert "" not in blocks[0]
def test_blank_between_tables_still_splits(self):
# A blank followed by prose should still end the first block
lines = WITH_DURATION + ["", "# Next day", ""] + WITHOUT_DURATION
blocks = extract_table_blocks(lines)
assert len(blocks) == 2
# ---------------------------------------------------------------------------
# parse_table
# ---------------------------------------------------------------------------
class TestParseTable:
def test_with_duration_column(self):
rows = parse_table(WITH_DURATION, has_duration_col=True)
assert len(rows) == 3
assert rows[0]["project"] == "bugs"
assert rows[0]["duration_hours"] == 0.5
assert rows[2]["project"] == "scrum"
assert rows[2]["note"] == "dsu"
def test_without_duration_column(self):
rows = parse_table(WITHOUT_DURATION, has_duration_col=False)
assert len(rows) == 2
assert rows[0]["duration_hours"] == 0.5 # 08:0008:30
assert rows[1]["duration_hours"] == 0.75 # 08:3009:15
def test_header_row_skipped(self):
rows = parse_table(WITH_DURATION)
assert all(r["start"] != "Start" for r in rows)
def test_separator_row_skipped(self):
rows = parse_table(WITH_DURATION)
assert all(r["start"] != "---" for r in rows)
def test_markdown_link_stripped_in_story(self):
lines = [
"| Start | End | Duration | Project | Story | Note |",
"|-------|-------|----------|---------|----------------------------|------|",
"| 08:00 | 08:30 | 00:30 | bugs | [ticket 1](:/abc123) | |",
]
rows = parse_table(lines)
assert rows[0]["story"] == "ticket 1"
def test_invalid_duration_row_skipped(self):
lines = [
"| Start | End | Duration | Project | Story | Note |",
"|-------|-------|----------|---------|-------|------|",
"| 08:00 | 08:30 | bad | bugs | | |",
]
assert parse_table(lines) == []
def test_empty_end_time_row_skipped(self):
# Open entry (no end, no duration) is now preserved with duration_hours=None
lines = [
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------|------||",
"| 09:55 | | bugs | | |",
]
rows = parse_table(lines, has_duration_col=False)
assert len(rows) == 1
assert rows[0]["duration_hours"] is None
assert rows[0]["end"] is None
def test_empty_input(self):
assert parse_table([]) == []
def test_non_table_lines_ignored(self):
lines = ["# My Timesheet", "", "Some prose."] + WITH_DURATION
rows = parse_table(lines)
assert len(rows) == 3
# ---------------------------------------------------------------------------
# parse_document
# ---------------------------------------------------------------------------
class TestParseDocument:
def test_single_table(self):
rows = parse_document(WITHOUT_DURATION)
assert len(rows) == 2
def test_multiple_tables_combined(self):
lines = WITHOUT_DURATION + ["", "# Next day", ""] + WITHOUT_DURATION
rows = parse_document(lines)
assert len(rows) == 4
def test_prose_between_tables_ignored(self):
lines = (
WITHOUT_DURATION + ["some notes", "- a bullet point", ""] + WITHOUT_DURATION
)
rows = parse_document(lines)
assert len(rows) == 4
def test_mixed_duration_formats(self):
lines = WITH_DURATION + ["", "## Next day", ""] + WITHOUT_DURATION
rows = parse_document(lines)
assert len(rows) == 5 # 3 from WITH_DURATION + 2 from WITHOUT_DURATION
def test_empty_input(self):
assert parse_document([]) == []
def test_week_file(self):
"""Smoke test against the real W21 weekly timesheet file."""
with open(WEEK_FILE, encoding="utf-8") as f:
lines = f.read().splitlines()
rows = parse_document(lines)
# File has 5 daily tables; expect a healthy number of rows
assert len(rows) > 20
# All rows must have expected keys
# All closed rows must have positive duration; open entries have None
for row in rows:
assert "project" in row
assert "duration_hours" in row
if row["duration_hours"] is not None:
assert row["duration_hours"] > 0
# The open entry (09:55, no end) must be preserved with duration_hours=None
incomplete = [
r for r in rows if r["start"] == "09:55" and r["project"] == "bugs"
]
assert len(incomplete) == 1
assert incomplete[0]["duration_hours"] is None
def test_week_file_no_markdown_links_in_stories(self):
"""Markdown link syntax must be stripped from story/note fields."""
with open(WEEK_FILE, encoding="utf-8") as f:
lines = f.read().splitlines()
rows = parse_document(lines)
for row in rows:
assert "](:" not in row["story"], (
f"Link not stripped in story: {row['story']!r}"
)
assert "](:" not in row["note"], (
f"Link not stripped in note: {row['note']!r}"
)
# ---------------------------------------------------------------------------
# filter_rows_by_date
# ---------------------------------------------------------------------------
class TestFilterRowsByDate:
# Reuse the W21 file which has one table per day-heading
with open(WEEK_FILE, encoding="utf-8") as _f:
_WEEK_LINES = _f.read().splitlines()
def test_returns_only_matching_day(self):
from datetime import date
rows = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 22))
assert len(rows) > 0
# Friday has these projects in the sample file
projects = {r["project"] for r in rows}
assert "scrum" in projects
def test_different_day_returns_different_rows(self):
from datetime import date
rows_fri = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 22))
rows_mon = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 18))
assert rows_fri != rows_mon
assert len(rows_mon) > 0
def test_no_match_returns_empty(self):
from datetime import date
rows = filter_rows_by_date(self._WEEK_LINES, date(2026, 1, 1))
assert rows == []
def test_inline_document(self):
from datetime import date
lines = [
"# Maandag - 2026-05-18",
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------|------||",
"| 08:00 | 08:30 | bugs | | fix |",
"",
"# Dinsdag - 2026-05-19",
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------|------||",
"| 09:00 | 09:30 | scrum | | dsu |",
]
rows = filter_rows_by_date(lines, date(2026, 5, 18))
assert len(rows) == 1
assert rows[0]["project"] == "bugs"
rows = filter_rows_by_date(lines, date(2026, 5, 19))
assert len(rows) == 1
assert rows[0]["project"] == "scrum"
def test_empty_input(self):
from datetime import date
assert filter_rows_by_date([], date(2026, 5, 22)) == []
# ---------------------------------------------------------------------------
# build_description
# ---------------------------------------------------------------------------
class TestBuildDescription:
def test_story_and_note(self):
assert build_description("story", "note") == "story - note"
def test_story_only(self):
assert build_description("story", "") == "story"
def test_note_only(self):
assert build_description("", "note") == "note"
def test_both_empty(self):
assert build_description("", "") == "/"
def test_strips_whitespace(self):
assert build_description(" story ", " note ") == "story - note"
# ---------------------------------------------------------------------------
# aggregate_rows
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# skip_csv marker (~project)
# ---------------------------------------------------------------------------
class TestParseTableSkipCsv:
_LINES = [
"| Start | End | Duration | Project | Story | Note |",
"|-------|-------|----------|---------|-------|----------|",
"| 09:00 | 17:00 | 8:00 | ~Leave | | Day off |",
"| 09:00 | 10:00 | 1:00 | work | st | |",
]
def test_tilde_sets_skip_csv(self):
rows = parse_table(self._LINES)
leave = next(r for r in rows if "Leave" in r["project"])
assert leave["skip_csv"] is True
def test_tilde_stripped_from_project(self):
rows = parse_table(self._LINES)
leave = next(r for r in rows if r.get("skip_csv"))
assert leave["project"] == "Leave"
def test_tilde_with_space_stripped(self):
lines = [
"| Start | End | Duration | Project | Story | Note |",
"|-------|-------|----------|----------|-------|------|",
"| 09:00 | 17:00 | 8:00 | ~ Leave | | |",
]
rows = parse_table(lines)
assert rows[0]["skip_csv"] is True
assert rows[0]["project"] == "Leave"
def test_normal_row_has_no_skip_csv(self):
rows = parse_table(WITH_DURATION)
assert all("skip_csv" not in r for r in rows)
def test_skip_csv_on_open_entry(self):
lines = [
"| Start | End | Project | Story | Note |",
"|-------|-----|---------|-------|------|",
"| 09:00 | | ~Leave | | |",
]
rows = parse_table(lines, has_duration_col=False)
assert len(rows) == 1
assert rows[0]["skip_csv"] is True
assert rows[0]["project"] == "Leave"
def test_note_tilde_sets_skip_csv(self):
lines = [
"| Start | End | Duration | Project | Story | Note |",
"|-------|-------|----------|---------|-------|----------|",
"| 09:00 | 17:00 | 8:00 | Leave | | ~Day off |",
]
rows = parse_table(lines)
assert rows[0]["skip_csv"] is True
def test_note_tilde_stripped_from_note(self):
lines = [
"| Start | End | Duration | Project | Story | Note |",
"|-------|-------|----------|---------|-------|----------|",
"| 09:00 | 17:00 | 8:00 | Leave | | ~Day off |",
]
rows = parse_table(lines)
assert rows[0]["note"] == "Day off"
def test_note_tilde_with_space_stripped(self):
lines = [
"| Start | End | Duration | Project | Story | Note |",
"|-------|-------|----------|---------|-------|-------|",
"| 09:00 | 17:00 | 8:00 | Leave | | ~ Day off |",
]
rows = parse_table(lines)
assert rows[0]["skip_csv"] is True
assert rows[0]["note"] == "Day off"
def test_note_tilde_without_duration_col(self):
lines = [
"| Start | End | Project | Story | Note |",
"|-------|-------|---------|-------|----------|",
"| 09:00 | 17:00 | Leave | | ~Day off |",
]
rows = parse_table(lines, has_duration_col=False)
assert rows[0]["skip_csv"] is True
assert rows[0]["note"] == "Day off"
def test_note_tilde_does_not_affect_normal_row(self):
rows = parse_table(WITH_DURATION)
assert all("skip_csv" not in r for r in rows)
class TestFilterSkipCsv:
def test_removes_skip_csv_rows(self):
rows = [
{"project": "Leave", "skip_csv": True, "duration_hours": 8.0},
{"project": "work", "duration_hours": 1.0},
]
result = filter_skip_csv(rows)
assert len(result) == 1
assert result[0]["project"] == "work"
def test_keeps_all_normal_rows(self):
rows = [
{"project": "work", "duration_hours": 1.0},
{"project": "scrum", "duration_hours": 0.5},
]
assert filter_skip_csv(rows) == rows
def test_empty_input(self):
assert filter_skip_csv([]) == []
def test_all_skip_csv_returns_empty(self):
rows = [{"project": "Leave", "skip_csv": True, "duration_hours": 8.0}]
assert filter_skip_csv(rows) == []
def test_rows_without_key_treated_as_normal(self):
"""Rows that never had the key at all should pass through."""
rows = [{"project": "work", "duration_hours": 1.0}]
assert filter_skip_csv(rows) == rows
class TestAggregateRows:
def test_same_project_story_summed(self):
rows = parse_table(WITH_DURATION)
aggregated = aggregate_rows(rows)
bugs = next(e for e in aggregated if e["project"] == "bugs")
assert bugs["quantity"] == 1.0 # 00:30 + 00:30
def test_distinct_entries_preserved(self):
rows = parse_table(WITH_DURATION)
aggregated = aggregate_rows(rows)
assert len(aggregated) == 2 # bugs/story-one and scrum/dsu
def test_insertion_order_preserved(self):
rows = parse_table(WITH_DURATION)
aggregated = aggregate_rows(rows)
assert aggregated[0]["project"] == "bugs"
assert aggregated[1]["project"] == "scrum"
def test_empty_input(self):
assert aggregate_rows([]) == []
# ---------------------------------------------------------------------------
# resolve_overlaps
# ---------------------------------------------------------------------------
class TestResolveOverlaps:
"""Tests for overlap resolution between timesheet entries."""
def _row(self, start, end, project="proj", story="s", note=""):
"""Build a minimal closed row dict."""
if end is None:
return {
"start": start,
"end": None,
"duration_hours": None,
"project": project,
"story": story,
"story_raw": story,
"note": note,
}
h1, m1 = map(int, start.split(":"))
h2, m2 = map(int, end.split(":"))
duration = (h2 * 60 + m2 - h1 * 60 - m1) / 60.0
return {
"start": start,
"end": end,
"duration_hours": duration,
"project": project,
"story": story,
"story_raw": story,
"note": note,
}
def _sorted(self, rows):
return sorted(
[r for r in rows if r["end"] is not None],
key=lambda r: r["start"],
)
# --- no-op cases ---
def test_empty_input(self):
assert resolve_overlaps([]) == []
def test_single_entry_unchanged(self):
rows = [self._row("09:00", "10:00")]
assert resolve_overlaps(rows) == rows
def test_no_overlap_unchanged(self):
rows = [self._row("09:00", "10:00"), self._row("10:00", "11:00")]
result = resolve_overlaps(rows)
assert len(result) == 2
s = self._sorted(result)
assert (s[0]["start"], s[0]["end"]) == ("09:00", "10:00")
assert (s[1]["start"], s[1]["end"]) == ("10:00", "11:00")
def test_only_open_entries_unchanged(self):
rows = [self._row("09:00", None), self._row("10:00", None)]
result = resolve_overlaps(rows)
assert result == rows
# --- partial overlap ---
def test_partial_overlap_spec_example(self):
"""Spec example: 9:00-10:00 vs 9:30-10:30 → boundary at 9:45."""
rows = [self._row("09:00", "10:00", "a"), self._row("09:30", "10:30", "b")]
result = self._sorted(resolve_overlaps(rows))
assert len(result) == 2
assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:45")
assert result[0]["project"] == "a"
assert (result[1]["start"], result[1]["end"]) == ("09:45", "10:30")
assert result[1]["project"] == "b"
def test_partial_overlap_duration_recalculated(self):
rows = [self._row("09:00", "10:00"), self._row("09:30", "10:30")]
result = self._sorted(resolve_overlaps(rows))
assert result[0]["duration_hours"] == pytest.approx(0.75) # 45 min
assert result[1]["duration_hours"] == pytest.approx(0.75) # 45 min
def test_partial_overlap_total_hours(self):
"""Total logged time after resolution equals the spanned wall-clock time."""
rows = [self._row("09:00", "10:00"), self._row("09:30", "10:30")]
result = resolve_overlaps(rows)
total = sum(r["duration_hours"] for r in result)
assert total == pytest.approx(1.5) # 9:0010:30 = 90 min
def test_partial_overlap_input_order_independent(self):
"""Result should be the same regardless of input order."""
rows_forward = [
self._row("09:00", "10:00", "a"),
self._row("09:30", "10:30", "b"),
]
rows_reverse = [
self._row("09:30", "10:30", "b"),
self._row("09:00", "10:00", "a"),
]
r1 = self._sorted(resolve_overlaps(rows_forward))
r2 = self._sorted(resolve_overlaps(rows_reverse))
assert [(r["start"], r["end"]) for r in r1] == [
(r["start"], r["end"]) for r in r2
]
# --- full containment ---
def test_containment_spec_example(self):
"""Spec example: 9:00-10:00 contains 9:15-9:45 → A1: 9:00-9:30, B: 9:30-9:45, A2: 9:45-10:00."""
rows = [self._row("09:00", "10:00", "a"), self._row("09:15", "09:45", "b")]
result = self._sorted(resolve_overlaps(rows))
assert len(result) == 3
assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:30")
assert result[0]["project"] == "a"
assert (result[1]["start"], result[1]["end"]) == ("09:30", "09:45")
assert result[1]["project"] == "b"
assert (result[2]["start"], result[2]["end"]) == ("09:45", "10:00")
assert result[2]["project"] == "a"
def test_containment_total_hours(self):
"""Total after containment resolution equals the outer entry's original duration."""
rows = [self._row("09:00", "10:00"), self._row("09:15", "09:45")]
result = resolve_overlaps(rows)
total = sum(r["duration_hours"] for r in result)
assert total == pytest.approx(1.0) # 9:0010:00 = 60 min
def test_containment_same_start(self):
"""Smaller entry starts at the same time as the larger one."""
rows = [self._row("09:00", "10:00", "a"), self._row("09:00", "09:30", "b")]
result = self._sorted(resolve_overlaps(rows))
assert len(result) == 3
assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:15")
assert result[0]["project"] == "a"
assert (result[1]["start"], result[1]["end"]) == ("09:15", "09:30")
assert result[1]["project"] == "b"
assert (result[2]["start"], result[2]["end"]) == ("09:30", "10:00")
assert result[2]["project"] == "a"
def test_containment_same_end(self):
"""Smaller entry ends at the same time as the larger one."""
rows = [self._row("09:00", "10:00", "a"), self._row("09:30", "10:00", "b")]
result = self._sorted(resolve_overlaps(rows))
assert len(result) == 2
assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:45")
assert result[0]["project"] == "a"
assert (result[1]["start"], result[1]["end"]) == ("09:45", "10:00")
assert result[1]["project"] == "b"
# --- open entries ---
def test_open_entry_passed_through(self):
open_row = self._row("09:00", None, "open")
closed_row = self._row("09:30", "10:30", "closed")
result = resolve_overlaps([open_row, closed_row])
assert any(r["end"] is None for r in result)
assert any(r["end"] == "10:30" for r in result)
# --- metadata preservation ---
def test_project_and_story_preserved(self):
rows = [
self._row("09:00", "10:00", project="p1", story="s1", note="n1"),
self._row("09:30", "10:30", project="p2", story="s2", note="n2"),
]
result = self._sorted(resolve_overlaps(rows))
assert result[0]["project"] == "p1"
assert result[0]["story"] == "s1"
assert result[0]["note"] == "n1"
assert result[1]["project"] == "p2"
assert result[1]["story"] == "s2"
assert result[1]["note"] == "n2"
# --- no remaining overlaps ---
def test_result_has_no_overlaps(self):
"""After resolution, no two entries in the result should overlap."""
rows = [
self._row("09:00", "11:00", "a"),
self._row("09:30", "10:30", "b"),
self._row("10:00", "12:00", "c"),
]
result = self._sorted(resolve_overlaps(rows))
for i in range(len(result) - 1):
assert result[i]["end"] <= result[i + 1]["start"], (
f"Overlap between entry {i} ({result[i]}) and {i + 1} ({result[i + 1]})"
)