odoo-timesheets/tests/test_parser.py

import os

import pytest

from timesheets.parser import (
    aggregate_rows,
    build_description,
    detect_has_duration_column,
    extract_table_blocks,
    filter_rows_by_date,
    filter_skip_csv,
    parse_document,
    parse_table,
    resolve_overlaps,
)

# ---------------------------------------------------------------------------
# Fixtures / shared data
# ---------------------------------------------------------------------------

WITH_DURATION = [
    "| Start | End   | Duration | Project | Story       | Note    |",
    "|-------|-------|----------|---------|-------------|---------|",
    "| 08:00 | 08:30 | 00:30    | bugs    | story one   |         |",
    "| 08:30 | 09:00 | 00:30    | bugs    | story one   |         |",
    "| 09:00 | 09:15 | 00:15    | scrum   |             | dsu     |",
]

WITHOUT_DURATION = [
    "| Start | End   | Project | Story       | Note    |",
    "|-------|-------|---------|-------------|---------|",
    "| 08:00 | 08:30 | bugs    | story one   |         |",
    "| 08:30 | 09:15 | scrum   |             | dsu     |",
]

WEEK_FILE = os.path.join(os.path.dirname(__file__), "2026 - W21.md")


# ---------------------------------------------------------------------------
# detect_has_duration_column
# ---------------------------------------------------------------------------


class TestDetectHasDurationColumn:
    def test_with_duration(self):
        assert detect_has_duration_column(WITH_DURATION) is True

    def test_without_duration(self):
        assert detect_has_duration_column(WITHOUT_DURATION) is False

    def test_no_header_defaults_to_true(self):
        assert detect_has_duration_column(["no table here"]) is True

    def test_case_insensitive(self):
        lines = ["| Start | End | DURATION | Project | Story | Note |"]
        assert detect_has_duration_column(lines) is True


# ---------------------------------------------------------------------------
# extract_table_blocks
# ---------------------------------------------------------------------------


class TestExtractTableBlocks:
    def test_single_table(self):
        blocks = extract_table_blocks(WITH_DURATION)
        assert len(blocks) == 1
        assert blocks[0] == WITH_DURATION

    def test_two_tables_separated_by_prose(self):
        lines = WITH_DURATION + ["", "# Next day", "some prose", ""] + WITHOUT_DURATION
        blocks = extract_table_blocks(lines)
        assert len(blocks) == 2

    def test_prose_between_tables_not_included(self):
        lines = WITH_DURATION + ["some note"] + WITHOUT_DURATION
        blocks = extract_table_blocks(lines)
        assert len(blocks) == 2
        assert all("some note" not in b for b in blocks)

    def test_single_line_table_discarded(self):
        lines = ["| Start | End |"]
        assert extract_table_blocks(lines) == []

    def test_empty_input(self):
        assert extract_table_blocks([]) == []

    def test_no_tables(self):
        assert extract_table_blocks(["# heading", "", "prose"]) == []

    def test_table_at_end_of_file_captured(self):
        lines = ["# heading", ""] + WITH_DURATION  # no trailing newline
        blocks = extract_table_blocks(lines)
        assert len(blocks) == 1

    def test_blank_line_within_table_kept_as_one_block(self):
        # A blank line in the middle of a table should not split it
        lines = [
            "| Start | End   | Project | Story | Note |",
            "|-------|-------|---------|-------|------||",
            "| 08:00 | 08:30 | bugs    |       |      |",
            "",
            "| 09:00 | 09:30 | scrum   |       | dsu  |",
        ]
        blocks = extract_table_blocks(lines)
        assert len(blocks) == 1
        assert len(blocks[0]) == 4  # header + sep + row + row (blank dropped)

    def test_multiple_blank_lines_within_table(self):
        lines = [
            "| Start | End   | Project | Story | Note |",
            "|-------|-------|---------|-------|------||",
            "| 08:00 | 08:30 | bugs    |       |      |",
            "",
            "",
            "| 09:00 | 09:30 | scrum   |       | dsu  |",
        ]
        blocks = extract_table_blocks(lines)
        assert len(blocks) == 1
        assert len(blocks[0]) == 4

    def test_blank_line_at_end_of_table_does_not_include_blank(self):
        # Blank after the last row should not be included in the block
        lines = WITH_DURATION + ["", "# Next section"]
        blocks = extract_table_blocks(lines)
        assert len(blocks) == 1
        assert "" not in blocks[0]

    def test_blank_between_tables_still_splits(self):
        # A blank followed by prose should still end the first block
        lines = WITH_DURATION + ["", "# Next day", ""] + WITHOUT_DURATION
        blocks = extract_table_blocks(lines)
        assert len(blocks) == 2


# ---------------------------------------------------------------------------
# parse_table
# ---------------------------------------------------------------------------


class TestParseTable:
    def test_with_duration_column(self):
        rows = parse_table(WITH_DURATION, has_duration_col=True)
        assert len(rows) == 3
        assert rows[0]["project"] == "bugs"
        assert rows[0]["duration_hours"] == 0.5
        assert rows[2]["project"] == "scrum"
        assert rows[2]["note"] == "dsu"

    def test_without_duration_column(self):
        rows = parse_table(WITHOUT_DURATION, has_duration_col=False)
        assert len(rows) == 2
        assert rows[0]["duration_hours"] == 0.5  # 08:00–08:30
        assert rows[1]["duration_hours"] == 0.75  # 08:30–09:15

    def test_header_row_skipped(self):
        rows = parse_table(WITH_DURATION)
        assert all(r["start"] != "Start" for r in rows)

    def test_separator_row_skipped(self):
        rows = parse_table(WITH_DURATION)
        assert all(r["start"] != "---" for r in rows)

    def test_markdown_link_stripped_in_story(self):
        lines = [
            "| Start | End   | Duration | Project | Story                      | Note |",
            "|-------|-------|----------|---------|----------------------------|------|",
            "| 08:00 | 08:30 | 00:30    | bugs    | [ticket 1](:/abc123)       |      |",
        ]
        rows = parse_table(lines)
        assert rows[0]["story"] == "ticket 1"

    def test_invalid_duration_row_skipped(self):
        lines = [
            "| Start | End   | Duration | Project | Story | Note |",
            "|-------|-------|----------|---------|-------|------|",
            "| 08:00 | 08:30 | bad      | bugs    |       |      |",
        ]
        assert parse_table(lines) == []

    def test_empty_end_time_row_skipped(self):
        # Open entry (no end, no duration) is now preserved with duration_hours=None
        lines = [
            "| Start | End   | Project | Story | Note |",
            "|-------|-------|---------|-------|------||",
            "| 09:55 |       | bugs    |       |      |",
        ]
        rows = parse_table(lines, has_duration_col=False)
        assert len(rows) == 1
        assert rows[0]["duration_hours"] is None
        assert rows[0]["end"] is None

    def test_empty_input(self):
        assert parse_table([]) == []

    def test_non_table_lines_ignored(self):
        lines = ["# My Timesheet", "", "Some prose."] + WITH_DURATION
        rows = parse_table(lines)
        assert len(rows) == 3


# ---------------------------------------------------------------------------
# parse_document
# ---------------------------------------------------------------------------


class TestParseDocument:
    def test_single_table(self):
        rows = parse_document(WITHOUT_DURATION)
        assert len(rows) == 2

    def test_multiple_tables_combined(self):
        lines = WITHOUT_DURATION + ["", "# Next day", ""] + WITHOUT_DURATION
        rows = parse_document(lines)
        assert len(rows) == 4

    def test_prose_between_tables_ignored(self):
        lines = (
            WITHOUT_DURATION + ["some notes", "- a bullet point", ""] + WITHOUT_DURATION
        )
        rows = parse_document(lines)
        assert len(rows) == 4

    def test_mixed_duration_formats(self):
        lines = WITH_DURATION + ["", "## Next day", ""] + WITHOUT_DURATION
        rows = parse_document(lines)
        assert len(rows) == 5  # 3 from WITH_DURATION + 2 from WITHOUT_DURATION

    def test_empty_input(self):
        assert parse_document([]) == []

    def test_week_file(self):
        """Smoke test against the real W21 weekly timesheet file."""
        with open(WEEK_FILE, encoding="utf-8") as f:
            lines = f.read().splitlines()
        rows = parse_document(lines)
        # File has 5 daily tables; expect a healthy number of rows
        assert len(rows) > 20
        # All rows must have expected keys
        # All closed rows must have positive duration; open entries have None
        for row in rows:
            assert "project" in row
            assert "duration_hours" in row
            if row["duration_hours"] is not None:
                assert row["duration_hours"] > 0
        # The open entry (09:55, no end) must be preserved with duration_hours=None
        incomplete = [
            r for r in rows if r["start"] == "09:55" and r["project"] == "bugs"
        ]
        assert len(incomplete) == 1
        assert incomplete[0]["duration_hours"] is None

    def test_week_file_no_markdown_links_in_stories(self):
        """Markdown link syntax must be stripped from story/note fields."""
        with open(WEEK_FILE, encoding="utf-8") as f:
            lines = f.read().splitlines()
        rows = parse_document(lines)
        for row in rows:
            assert "](:" not in row["story"], (
                f"Link not stripped in story: {row['story']!r}"
            )
            assert "](:" not in row["note"], (
                f"Link not stripped in note: {row['note']!r}"
            )


# ---------------------------------------------------------------------------
# filter_rows_by_date
# ---------------------------------------------------------------------------


class TestFilterRowsByDate:
    # Reuse the W21 file which has one table per day-heading
    with open(WEEK_FILE, encoding="utf-8") as _f:
        _WEEK_LINES = _f.read().splitlines()

    def test_returns_only_matching_day(self):
        from datetime import date

        rows = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 22))
        assert len(rows) > 0
        # Friday has these projects in the sample file
        projects = {r["project"] for r in rows}
        assert "scrum" in projects

    def test_different_day_returns_different_rows(self):
        from datetime import date

        rows_fri = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 22))
        rows_mon = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 18))
        assert rows_fri != rows_mon
        assert len(rows_mon) > 0

    def test_no_match_returns_empty(self):
        from datetime import date

        rows = filter_rows_by_date(self._WEEK_LINES, date(2026, 1, 1))
        assert rows == []

    def test_inline_document(self):
        from datetime import date

        lines = [
            "# Maandag - 2026-05-18",
            "| Start | End   | Project | Story | Note |",
            "|-------|-------|---------|-------|------||",
            "| 08:00 | 08:30 | bugs    |       | fix  |",
            "",
            "# Dinsdag - 2026-05-19",
            "| Start | End   | Project | Story | Note |",
            "|-------|-------|---------|-------|------||",
            "| 09:00 | 09:30 | scrum   |       | dsu  |",
        ]
        rows = filter_rows_by_date(lines, date(2026, 5, 18))
        assert len(rows) == 1
        assert rows[0]["project"] == "bugs"

        rows = filter_rows_by_date(lines, date(2026, 5, 19))
        assert len(rows) == 1
        assert rows[0]["project"] == "scrum"

    def test_empty_input(self):
        from datetime import date

        assert filter_rows_by_date([], date(2026, 5, 22)) == []


# ---------------------------------------------------------------------------
# build_description
# ---------------------------------------------------------------------------


class TestBuildDescription:
    def test_story_and_note(self):
        assert build_description("story", "note") == "story - note"

    def test_story_only(self):
        assert build_description("story", "") == "story"

    def test_note_only(self):
        assert build_description("", "note") == "note"

    def test_both_empty(self):
        assert build_description("", "") == "/"

    def test_strips_whitespace(self):
        assert build_description("  story  ", "  note  ") == "story - note"


# ---------------------------------------------------------------------------
# aggregate_rows
# ---------------------------------------------------------------------------


# ---------------------------------------------------------------------------
# skip_csv marker (~project)
# ---------------------------------------------------------------------------


class TestParseTableSkipCsv:
    _LINES = [
        "| Start | End   | Duration | Project | Story | Note     |",
        "|-------|-------|----------|---------|-------|----------|",
        "| 09:00 | 17:00 | 8:00     | ~Leave  |       | Day off  |",
        "| 09:00 | 10:00 | 1:00     | work    | st    |          |",
    ]

    def test_tilde_sets_skip_csv(self):
        rows = parse_table(self._LINES)
        leave = next(r for r in rows if "Leave" in r["project"])
        assert leave["skip_csv"] is True

    def test_tilde_stripped_from_project(self):
        rows = parse_table(self._LINES)
        leave = next(r for r in rows if r.get("skip_csv"))
        assert leave["project"] == "Leave"

    def test_tilde_with_space_stripped(self):
        lines = [
            "| Start | End   | Duration | Project  | Story | Note |",
            "|-------|-------|----------|----------|-------|------|",
            "| 09:00 | 17:00 | 8:00     | ~ Leave  |       |      |",
        ]
        rows = parse_table(lines)
        assert rows[0]["skip_csv"] is True
        assert rows[0]["project"] == "Leave"

    def test_normal_row_has_no_skip_csv(self):
        rows = parse_table(WITH_DURATION)
        assert all("skip_csv" not in r for r in rows)

    def test_skip_csv_on_open_entry(self):
        lines = [
            "| Start | End | Project | Story | Note |",
            "|-------|-----|---------|-------|------|",
            "| 09:00 |     | ~Leave  |       |      |",
        ]
        rows = parse_table(lines, has_duration_col=False)
        assert len(rows) == 1
        assert rows[0]["skip_csv"] is True
        assert rows[0]["project"] == "Leave"

    def test_note_tilde_sets_skip_csv(self):
        lines = [
            "| Start | End   | Duration | Project | Story | Note     |",
            "|-------|-------|----------|---------|-------|----------|",
            "| 09:00 | 17:00 | 8:00     | Leave   |       | ~Day off |",
        ]
        rows = parse_table(lines)
        assert rows[0]["skip_csv"] is True

    def test_note_tilde_stripped_from_note(self):
        lines = [
            "| Start | End   | Duration | Project | Story | Note     |",
            "|-------|-------|----------|---------|-------|----------|",
            "| 09:00 | 17:00 | 8:00     | Leave   |       | ~Day off |",
        ]
        rows = parse_table(lines)
        assert rows[0]["note"] == "Day off"

    def test_note_tilde_with_space_stripped(self):
        lines = [
            "| Start | End   | Duration | Project | Story | Note      |",
            "|-------|-------|----------|---------|-------|-------|",
            "| 09:00 | 17:00 | 8:00     | Leave   |       | ~ Day off |",
        ]
        rows = parse_table(lines)
        assert rows[0]["skip_csv"] is True
        assert rows[0]["note"] == "Day off"

    def test_note_tilde_without_duration_col(self):
        lines = [
            "| Start | End   | Project | Story | Note     |",
            "|-------|-------|---------|-------|----------|",
            "| 09:00 | 17:00 | Leave   |       | ~Day off |",
        ]
        rows = parse_table(lines, has_duration_col=False)
        assert rows[0]["skip_csv"] is True
        assert rows[0]["note"] == "Day off"

    def test_note_tilde_does_not_affect_normal_row(self):
        rows = parse_table(WITH_DURATION)
        assert all("skip_csv" not in r for r in rows)


class TestFilterSkipCsv:
    def test_removes_skip_csv_rows(self):
        rows = [
            {"project": "Leave", "skip_csv": True, "duration_hours": 8.0},
            {"project": "work", "duration_hours": 1.0},
        ]
        result = filter_skip_csv(rows)
        assert len(result) == 1
        assert result[0]["project"] == "work"

    def test_keeps_all_normal_rows(self):
        rows = [
            {"project": "work", "duration_hours": 1.0},
            {"project": "scrum", "duration_hours": 0.5},
        ]
        assert filter_skip_csv(rows) == rows

    def test_empty_input(self):
        assert filter_skip_csv([]) == []

    def test_all_skip_csv_returns_empty(self):
        rows = [{"project": "Leave", "skip_csv": True, "duration_hours": 8.0}]
        assert filter_skip_csv(rows) == []

    def test_rows_without_key_treated_as_normal(self):
        """Rows that never had the key at all should pass through."""
        rows = [{"project": "work", "duration_hours": 1.0}]
        assert filter_skip_csv(rows) == rows


class TestAggregateRows:
    def test_same_project_story_summed(self):
        rows = parse_table(WITH_DURATION)
        aggregated = aggregate_rows(rows)
        bugs = next(e for e in aggregated if e["project"] == "bugs")
        assert bugs["quantity"] == 1.0  # 00:30 + 00:30

    def test_distinct_entries_preserved(self):
        rows = parse_table(WITH_DURATION)
        aggregated = aggregate_rows(rows)
        assert len(aggregated) == 2  # bugs/story-one and scrum/dsu

    def test_insertion_order_preserved(self):
        rows = parse_table(WITH_DURATION)
        aggregated = aggregate_rows(rows)
        assert aggregated[0]["project"] == "bugs"
        assert aggregated[1]["project"] == "scrum"

    def test_empty_input(self):
        assert aggregate_rows([]) == []


# ---------------------------------------------------------------------------
# resolve_overlaps
# ---------------------------------------------------------------------------


class TestResolveOverlaps:
    """Tests for overlap resolution between timesheet entries."""

    def _row(self, start, end, project="proj", story="s", note=""):
        """Build a minimal closed row dict."""
        if end is None:
            return {
                "start": start,
                "end": None,
                "duration_hours": None,
                "project": project,
                "story": story,
                "story_raw": story,
                "note": note,
            }
        h1, m1 = map(int, start.split(":"))
        h2, m2 = map(int, end.split(":"))
        duration = (h2 * 60 + m2 - h1 * 60 - m1) / 60.0
        return {
            "start": start,
            "end": end,
            "duration_hours": duration,
            "project": project,
            "story": story,
            "story_raw": story,
            "note": note,
        }

    def _sorted(self, rows):
        return sorted(
            [r for r in rows if r["end"] is not None],
            key=lambda r: r["start"],
        )

    # --- no-op cases ---

    def test_empty_input(self):
        assert resolve_overlaps([]) == []

    def test_single_entry_unchanged(self):
        rows = [self._row("09:00", "10:00")]
        assert resolve_overlaps(rows) == rows

    def test_no_overlap_unchanged(self):
        rows = [self._row("09:00", "10:00"), self._row("10:00", "11:00")]
        result = resolve_overlaps(rows)
        assert len(result) == 2
        s = self._sorted(result)
        assert (s[0]["start"], s[0]["end"]) == ("09:00", "10:00")
        assert (s[1]["start"], s[1]["end"]) == ("10:00", "11:00")

    def test_only_open_entries_unchanged(self):
        rows = [self._row("09:00", None), self._row("10:00", None)]
        result = resolve_overlaps(rows)
        assert result == rows

    # --- partial overlap ---

    def test_partial_overlap_spec_example(self):
        """Spec example: 9:00-10:00 vs 9:30-10:30 → boundary at 9:45."""
        rows = [self._row("09:00", "10:00", "a"), self._row("09:30", "10:30", "b")]
        result = self._sorted(resolve_overlaps(rows))
        assert len(result) == 2
        assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:45")
        assert result[0]["project"] == "a"
        assert (result[1]["start"], result[1]["end"]) == ("09:45", "10:30")
        assert result[1]["project"] == "b"

    def test_partial_overlap_duration_recalculated(self):
        rows = [self._row("09:00", "10:00"), self._row("09:30", "10:30")]
        result = self._sorted(resolve_overlaps(rows))
        assert result[0]["duration_hours"] == pytest.approx(0.75)  # 45 min
        assert result[1]["duration_hours"] == pytest.approx(0.75)  # 45 min

    def test_partial_overlap_total_hours(self):
        """Total logged time after resolution equals the spanned wall-clock time."""
        rows = [self._row("09:00", "10:00"), self._row("09:30", "10:30")]
        result = resolve_overlaps(rows)
        total = sum(r["duration_hours"] for r in result)
        assert total == pytest.approx(1.5)  # 9:00–10:30 = 90 min

    def test_partial_overlap_input_order_independent(self):
        """Result should be the same regardless of input order."""
        rows_forward = [
            self._row("09:00", "10:00", "a"),
            self._row("09:30", "10:30", "b"),
        ]
        rows_reverse = [
            self._row("09:30", "10:30", "b"),
            self._row("09:00", "10:00", "a"),
        ]
        r1 = self._sorted(resolve_overlaps(rows_forward))
        r2 = self._sorted(resolve_overlaps(rows_reverse))
        assert [(r["start"], r["end"]) for r in r1] == [
            (r["start"], r["end"]) for r in r2
        ]

    # --- full containment ---

    def test_containment_spec_example(self):
        """Spec example: 9:00-10:00 contains 9:15-9:45 → A1: 9:00-9:30, B: 9:30-9:45, A2: 9:45-10:00."""
        rows = [self._row("09:00", "10:00", "a"), self._row("09:15", "09:45", "b")]
        result = self._sorted(resolve_overlaps(rows))
        assert len(result) == 3
        assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:30")
        assert result[0]["project"] == "a"
        assert (result[1]["start"], result[1]["end"]) == ("09:30", "09:45")
        assert result[1]["project"] == "b"
        assert (result[2]["start"], result[2]["end"]) == ("09:45", "10:00")
        assert result[2]["project"] == "a"

    def test_containment_total_hours(self):
        """Total after containment resolution equals the outer entry's original duration."""
        rows = [self._row("09:00", "10:00"), self._row("09:15", "09:45")]
        result = resolve_overlaps(rows)
        total = sum(r["duration_hours"] for r in result)
        assert total == pytest.approx(1.0)  # 9:00–10:00 = 60 min

    def test_containment_same_start(self):
        """Smaller entry starts at the same time as the larger one."""
        rows = [self._row("09:00", "10:00", "a"), self._row("09:00", "09:30", "b")]
        result = self._sorted(resolve_overlaps(rows))
        assert len(result) == 3
        assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:15")
        assert result[0]["project"] == "a"
        assert (result[1]["start"], result[1]["end"]) == ("09:15", "09:30")
        assert result[1]["project"] == "b"
        assert (result[2]["start"], result[2]["end"]) == ("09:30", "10:00")
        assert result[2]["project"] == "a"

    def test_containment_same_end(self):
        """Smaller entry ends at the same time as the larger one."""
        rows = [self._row("09:00", "10:00", "a"), self._row("09:30", "10:00", "b")]
        result = self._sorted(resolve_overlaps(rows))
        assert len(result) == 2
        assert (result[0]["start"], result[0]["end"]) == ("09:00", "09:45")
        assert result[0]["project"] == "a"
        assert (result[1]["start"], result[1]["end"]) == ("09:45", "10:00")
        assert result[1]["project"] == "b"

    # --- open entries ---

    def test_open_entry_passed_through(self):
        open_row = self._row("09:00", None, "open")
        closed_row = self._row("09:30", "10:30", "closed")
        result = resolve_overlaps([open_row, closed_row])
        assert any(r["end"] is None for r in result)
        assert any(r["end"] == "10:30" for r in result)

    # --- metadata preservation ---

    def test_project_and_story_preserved(self):
        rows = [
            self._row("09:00", "10:00", project="p1", story="s1", note="n1"),
            self._row("09:30", "10:30", project="p2", story="s2", note="n2"),
        ]
        result = self._sorted(resolve_overlaps(rows))
        assert result[0]["project"] == "p1"
        assert result[0]["story"] == "s1"
        assert result[0]["note"] == "n1"
        assert result[1]["project"] == "p2"
        assert result[1]["story"] == "s2"
        assert result[1]["note"] == "n2"

    # --- no remaining overlaps ---

    def test_result_has_no_overlaps(self):
        """After resolution, no two entries in the result should overlap."""
        rows = [
            self._row("09:00", "11:00", "a"),
            self._row("09:30", "10:30", "b"),
            self._row("10:00", "12:00", "c"),
        ]
        result = self._sorted(resolve_overlaps(rows))
        for i in range(len(result) - 1):
            assert result[i]["end"] <= result[i + 1]["start"], (
                f"Overlap between entry {i} ({result[i]}) and {i + 1} ({result[i + 1]})"
            )