odoo-timesheets/tests/test_parser.py

import os

import pytest

from timesheets.parser import (
    aggregate_rows,
    build_description,
    detect_has_duration_column,
    extract_table_blocks,
    filter_rows_by_date,
    parse_document,
    parse_table,
)

# ---------------------------------------------------------------------------
# Fixtures / shared data
# ---------------------------------------------------------------------------

WITH_DURATION = [
    "| Start | End   | Duration | Project | Story       | Note    |",
    "|-------|-------|----------|---------|-------------|---------|",
    "| 08:00 | 08:30 | 00:30    | bugs    | story one   |         |",
    "| 08:30 | 09:00 | 00:30    | bugs    | story one   |         |",
    "| 09:00 | 09:15 | 00:15    | scrum   |             | dsu     |",
]

WITHOUT_DURATION = [
    "| Start | End   | Project | Story       | Note    |",
    "|-------|-------|---------|-------------|---------|",
    "| 08:00 | 08:30 | bugs    | story one   |         |",
    "| 08:30 | 09:15 | scrum   |             | dsu     |",
]

WEEK_FILE = os.path.join(os.path.dirname(__file__), "2026 - W21.md")


# ---------------------------------------------------------------------------
# detect_has_duration_column
# ---------------------------------------------------------------------------


class TestDetectHasDurationColumn:
    def test_with_duration(self):
        assert detect_has_duration_column(WITH_DURATION) is True

    def test_without_duration(self):
        assert detect_has_duration_column(WITHOUT_DURATION) is False

    def test_no_header_defaults_to_true(self):
        assert detect_has_duration_column(["no table here"]) is True

    def test_case_insensitive(self):
        lines = ["| Start | End | DURATION | Project | Story | Note |"]
        assert detect_has_duration_column(lines) is True


# ---------------------------------------------------------------------------
# extract_table_blocks
# ---------------------------------------------------------------------------


class TestExtractTableBlocks:
    def test_single_table(self):
        blocks = extract_table_blocks(WITH_DURATION)
        assert len(blocks) == 1
        assert blocks[0] == WITH_DURATION

    def test_two_tables_separated_by_prose(self):
        lines = WITH_DURATION + ["", "# Next day", "some prose", ""] + WITHOUT_DURATION
        blocks = extract_table_blocks(lines)
        assert len(blocks) == 2

    def test_prose_between_tables_not_included(self):
        lines = WITH_DURATION + ["some note"] + WITHOUT_DURATION
        blocks = extract_table_blocks(lines)
        assert len(blocks) == 2
        assert all("some note" not in b for b in blocks)

    def test_single_line_table_discarded(self):
        lines = ["| Start | End |"]
        assert extract_table_blocks(lines) == []

    def test_empty_input(self):
        assert extract_table_blocks([]) == []

    def test_no_tables(self):
        assert extract_table_blocks(["# heading", "", "prose"]) == []

    def test_table_at_end_of_file_captured(self):
        lines = ["# heading", ""] + WITH_DURATION  # no trailing newline
        blocks = extract_table_blocks(lines)
        assert len(blocks) == 1


# ---------------------------------------------------------------------------
# parse_table
# ---------------------------------------------------------------------------


class TestParseTable:
    def test_with_duration_column(self):
        rows = parse_table(WITH_DURATION, has_duration_col=True)
        assert len(rows) == 3
        assert rows[0]["project"] == "bugs"
        assert rows[0]["duration_hours"] == 0.5
        assert rows[2]["project"] == "scrum"
        assert rows[2]["note"] == "dsu"

    def test_without_duration_column(self):
        rows = parse_table(WITHOUT_DURATION, has_duration_col=False)
        assert len(rows) == 2
        assert rows[0]["duration_hours"] == 0.5  # 08:00–08:30
        assert rows[1]["duration_hours"] == 0.75  # 08:30–09:15

    def test_header_row_skipped(self):
        rows = parse_table(WITH_DURATION)
        assert all(r["start"] != "Start" for r in rows)

    def test_separator_row_skipped(self):
        rows = parse_table(WITH_DURATION)
        assert all(r["start"] != "---" for r in rows)

    def test_markdown_link_stripped_in_story(self):
        lines = [
            "| Start | End   | Duration | Project | Story                      | Note |",
            "|-------|-------|----------|---------|----------------------------|------|",
            "| 08:00 | 08:30 | 00:30    | bugs    | [ticket 1](:/abc123)       |      |",
        ]
        rows = parse_table(lines)
        assert rows[0]["story"] == "ticket 1"

    def test_invalid_duration_row_skipped(self):
        lines = [
            "| Start | End   | Duration | Project | Story | Note |",
            "|-------|-------|----------|---------|-------|------|",
            "| 08:00 | 08:30 | bad      | bugs    |       |      |",
        ]
        assert parse_table(lines) == []

    def test_empty_end_time_row_skipped(self):
        lines = [
            "| Start | End   | Project | Story | Note |",
            "|-------|-------|---------|-------|------|",
            "| 09:55 |       | bugs    |       |      |",
        ]
        assert parse_table(lines, has_duration_col=False) == []

    def test_empty_input(self):
        assert parse_table([]) == []

    def test_non_table_lines_ignored(self):
        lines = ["# My Timesheet", "", "Some prose."] + WITH_DURATION
        rows = parse_table(lines)
        assert len(rows) == 3


# ---------------------------------------------------------------------------
# parse_document
# ---------------------------------------------------------------------------


class TestParseDocument:
    def test_single_table(self):
        rows = parse_document(WITHOUT_DURATION)
        assert len(rows) == 2

    def test_multiple_tables_combined(self):
        lines = WITHOUT_DURATION + ["", "# Next day", ""] + WITHOUT_DURATION
        rows = parse_document(lines)
        assert len(rows) == 4

    def test_prose_between_tables_ignored(self):
        lines = (
            WITHOUT_DURATION + ["some notes", "- a bullet point", ""] + WITHOUT_DURATION
        )
        rows = parse_document(lines)
        assert len(rows) == 4

    def test_mixed_duration_formats(self):
        lines = WITH_DURATION + ["", "## Next day", ""] + WITHOUT_DURATION
        rows = parse_document(lines)
        assert len(rows) == 5  # 3 from WITH_DURATION + 2 from WITHOUT_DURATION

    def test_empty_input(self):
        assert parse_document([]) == []

    def test_week_file(self):
        """Smoke test against the real W21 weekly timesheet file."""
        with open(WEEK_FILE, encoding="utf-8") as f:
            lines = f.read().splitlines()
        rows = parse_document(lines)
        # File has 5 daily tables; expect a healthy number of rows
        assert len(rows) > 20
        # All rows must have expected keys
        for row in rows:
            assert "project" in row
            assert "duration_hours" in row
            assert row["duration_hours"] > 0
        # The incomplete row (09:55 | empty end) must have been skipped
        incomplete = [
            r for r in rows if r["start"] == "09:55" and r["project"] == "bugs"
        ]
        assert all(r["duration_hours"] > 0 for r in incomplete)

    def test_week_file_no_markdown_links_in_stories(self):
        """Markdown link syntax must be stripped from story/note fields."""
        with open(WEEK_FILE, encoding="utf-8") as f:
            lines = f.read().splitlines()
        rows = parse_document(lines)
        for row in rows:
            assert "](:" not in row["story"], (
                f"Link not stripped in story: {row['story']!r}"
            )
            assert "](:" not in row["note"], (
                f"Link not stripped in note: {row['note']!r}"
            )


# ---------------------------------------------------------------------------
# filter_rows_by_date
# ---------------------------------------------------------------------------


class TestFilterRowsByDate:
    # Reuse the W21 file which has one table per day-heading
    with open(WEEK_FILE, encoding="utf-8") as _f:
        _WEEK_LINES = _f.read().splitlines()

    def test_returns_only_matching_day(self):
        from datetime import date

        rows = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 22))
        assert len(rows) > 0
        # Friday has these projects in the sample file
        projects = {r["project"] for r in rows}
        assert "scrum" in projects

    def test_different_day_returns_different_rows(self):
        from datetime import date

        rows_fri = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 22))
        rows_mon = filter_rows_by_date(self._WEEK_LINES, date(2026, 5, 18))
        assert rows_fri != rows_mon
        assert len(rows_mon) > 0

    def test_no_match_returns_empty(self):
        from datetime import date

        rows = filter_rows_by_date(self._WEEK_LINES, date(2026, 1, 1))
        assert rows == []

    def test_inline_document(self):
        from datetime import date

        lines = [
            "# Maandag - 2026-05-18",
            "| Start | End   | Project | Story | Note |",
            "|-------|-------|---------|-------|------||",
            "| 08:00 | 08:30 | bugs    |       | fix  |",
            "",
            "# Dinsdag - 2026-05-19",
            "| Start | End   | Project | Story | Note |",
            "|-------|-------|---------|-------|------||",
            "| 09:00 | 09:30 | scrum   |       | dsu  |",
        ]
        rows = filter_rows_by_date(lines, date(2026, 5, 18))
        assert len(rows) == 1
        assert rows[0]["project"] == "bugs"

        rows = filter_rows_by_date(lines, date(2026, 5, 19))
        assert len(rows) == 1
        assert rows[0]["project"] == "scrum"

    def test_empty_input(self):
        from datetime import date

        assert filter_rows_by_date([], date(2026, 5, 22)) == []


# ---------------------------------------------------------------------------
# build_description
# ---------------------------------------------------------------------------


class TestBuildDescription:
    def test_story_and_note(self):
        assert build_description("story", "note") == "story - note"

    def test_story_only(self):
        assert build_description("story", "") == "story"

    def test_note_only(self):
        assert build_description("", "note") == "note"

    def test_both_empty(self):
        assert build_description("", "") == "/"

    def test_strips_whitespace(self):
        assert build_description("  story  ", "  note  ") == "story - note"


# ---------------------------------------------------------------------------
# aggregate_rows
# ---------------------------------------------------------------------------


class TestAggregateRows:
    def test_same_project_story_summed(self):
        rows = parse_table(WITH_DURATION)
        aggregated = aggregate_rows(rows)
        bugs = next(e for e in aggregated if e["project"] == "bugs")
        assert bugs["quantity"] == 1.0  # 00:30 + 00:30

    def test_distinct_entries_preserved(self):
        rows = parse_table(WITH_DURATION)
        aggregated = aggregate_rows(rows)
        assert len(aggregated) == 2  # bugs/story-one and scrum/dsu

    def test_insertion_order_preserved(self):
        rows = parse_table(WITH_DURATION)
        aggregated = aggregate_rows(rows)
        assert aggregated[0]["project"] == "bugs"
        assert aggregated[1]["project"] == "scrum"

    def test_empty_input(self):
        assert aggregate_rows([]) == []