Miab-376-javhd.today02-01-29 Min -
Error handling
API
Unit tests – 100 % branch coverage for the parser and API. miab-376-javhd.today02-01-29 Min
Documentation – Updated README, Swagger spec, and in‑code doc‑strings. Error handling
| # | As a … | I want … | So that … |
|---|--------|----------|-----------|
| 1 | Scraper | The scraper to recognise strings like 02‑01‑29 Min on a video page | I can store an accurate duration for every video |
| 2 | API consumer | An endpoint /v1/videos/id to return duration_seconds and duration_iso8601 | I don’t need to parse the raw string myself |
| 3 | Analytics | A utility format_duration(seconds) that returns a human‑readable hh:mm:ss string | Reports are consistent across services | Unit tests – 100 % branch coverage for
"""
javhd_today.duration
~~~~~~~~~~~~~~~~~~~~
Utility for parsing the `hh-mm-ss Min` duration format used on
javhd.today video pages.
Public API
----------
- `parse_duration(raw: str) -> Duration`
- `format_duration(seconds: int) -> str`
Both functions are pure and have no external dependencies.
"""
import re
from dataclasses import dataclass
from typing import Final
# --------------------------------------------------------------------------- #
# Exceptions
# --------------------------------------------------------------------------- #
class DurationParseError(ValueError):
"""Raised when a duration string cannot be parsed."""
# --------------------------------------------------------------------------- #
# Dataclass representing a normalised duration
# --------------------------------------------------------------------------- #
@dataclass(frozen=True, slots=True)
class Duration:
"""A normalised video duration.
Attributes
----------
total_seconds: int
Total number of seconds.
iso8601: str
ISO‑8601 representation, e.g. ``PT2H1M29S``.
"""
total_seconds: int
iso8601: str
# --------------------------------------------------------------------------- #
# Regex – compiled once (performance!)
# --------------------------------------------------------------------------- #
_DURATION_RE: Final[re.Pattern] = re.compile(
r"""^\s* # optional leading whitespace
(?P<h>\d2)- # two‑digit hours
(?P<m>\d2)- # two‑digit minutes
(?P<s>\d2) # two‑digit seconds
\s+Min\s*$ # literal "Min" with optional surrounding spaces
""",
re.VERBOSE | re.IGNORECASE,
)
# --------------------------------------------------------------------------- #
# Core parsing logic
# --------------------------------------------------------------------------- #
def _to_int(value: str) -> int:
"""Convert a zero‑padded numeric string to int, raising a helpful error."""
try:
return int(value)
except ValueError as exc:
raise DurationParseError(f"Invalid numeric component 'value'.") from exc
def parse_duration(raw: str) -> Duration:
"""Parse a raw duration string from javhd.today.
Parameters
----------
raw: str
The raw text, e.g. ``"02-01-29 Min"``.
Returns
-------
Duration
Normalised duration.
Raises
------
DurationParseError
If the input does not match the expected pattern.
"""
if raw is None:
raise DurationParseError("Duration string is None.")
match = _DURATION_RE.match(raw)
if not match:
raise DurationParseError(
f"Unable to parse duration 'raw'. Expected format HH-MM-SS Min."
)
hours = _to_int(match.group("h"))
minutes = _to_int(match.group("m"))
seconds = _to_int(match.group("s"))
total_seconds = hours * 3600 + minutes * 60 + seconds
# Build ISO‑8601 duration.
parts = []
if hours:
parts.append(f"hoursH")
if minutes:
parts.append(f"minutesM")
if seconds or not parts: # always include seconds if everything else is zero
parts.append(f"secondsS")
iso8601 = f"PT''.join(parts)"
return Duration(total_seconds=total_seconds, iso8601=iso8601)
# --------------------------------------------------------------------------- #
# Helper for UI / reporting: seconds → hh:mm:ss
# --------------------------------------------------------------------------- #
def format_duration(seconds: int) -> str:
"""Return a human‑readable ``HH:MM:SS`` string for *seconds*.
Parameters
----------
seconds: int
Number of seconds (non‑negative).
Returns
-------
str
``HH:MM:SS`` (zero‑padded).
Raises
------
ValueError
If *seconds* is negative.
"""
if seconds < 0:
raise ValueError("seconds must be non‑negative")
h, remainder = divmod(seconds, 3600)
m, s = divmod(remainder, 60)
return f"h:02d:m:02d:s:02d"
At a retail price of $799, the MIAB‑376‑JAVHD sits above most mainstream wearables. Early adoption is likely to be concentrated among tech‑savvy professionals and affluent consumers, potentially widening the gap between “cognitive‑augmented” and “non‑augmented” workers.
# scraper/javhd_today_worker.py (excerpt)
from javhd_today.duration import parse_duration, DurationParseError
def extract_video_data(page_html: str) -> dict:
"""
Return a dict with raw + normalised fields.
"""
soup = BeautifulSoup(page_html, "html.parser")
raw_duration = soup.select_one(".video-duration").get_text(strip=True)
try:
dur: Duration = parse_duration(raw_duration)
duration_seconds = dur.total_seconds
duration_iso8601 = dur.iso8601
except DurationParseError as exc:
logger.warning("Failed to parse duration for video %s: %s", video_id, exc)
duration_seconds = None
duration_iso8601 = None
return
"raw_duration": raw_duration,
"duration_seconds": duration_seconds,
"duration_iso8601": duration_iso8601,
# … other fields …