Alter Celva Acel Ngewe Gaya 69 Full Extra Quality Durasi Terbaru Indo18 ❲DELUXE | 2026❳

The keywords suggest a few things:
# --------------------------------------------------------------
# file: video_title_parser.py
# --------------------------------------------------------------
import re
import json
from dataclasses import dataclass, asdict
from typing import List, Optional
# ------------------------------------------------------------------
# Helper tables – you can extend them without touching the core logic
# ------------------------------------------------------------------
QUALITY_MAP = 
    # slang → canonical
    "full extra quality": "1080p",
    "full hd": "1080p",
    "ultra hd": "1080p",
    "hd": "720p",
    "720p": "720p",
    "1080p": "1080p",
    "4k": "4K",
    "sd": "SD",
NEW_RELEASE_TOKENS = "baru", "terbaru", "new_release", "new", "latest"
REGION_MAP = 
    "indo": "Indonesia",
    "indonesia": "Indonesia",
    "id": "Indonesia",
    "malay": "Malaysia",
    "my": "Malaysia",
    "sg": "Singapore",
GENRE_SET = 
    "lifestyle",
    "entertainment",
    "music",
    "comedy",
    "drama",
    "news",
    "sports",
    "gaming",
    "tech",
    "travel",
    "food",
    "fashion",
    "beauty",
def _normalise_token(tok: str) -> str:
    """Lower‑case and strip non‑alphanumeric characters."""
    return re.sub(r"[^a-z0-9]", "", tok.lower())
def _detect_year(tokens: List[str]) -> Optional[int]:
    for tok in tokens:
        if re.fullmatch(r"\d4", tok):
            yr = int(tok)
            if 1900 <= yr <= 2099:
                return yr
    return None
def _detect_quality(tokens: List[str]) -> Optional[str]:
    # Look for multi‑word phrase first
    phrase = " ".join(tokens[:3])  # up to 3‑word combos like “full extra quality”
    for src, canon in QUALITY_MAP.items():
        if src in phrase:
            return canon
# Fallback: single‑token matches
    for tok in tokens:
        if tok in QUALITY_MAP:
            return QUALITY_MAP[tok]
    return None
def _detect_new_release(tokens: List[str]) -> bool:
    return any(tok in NEW_RELEASE_TOKENS for tok in tokens)
def _detect_region(tokens: List[str]) -> Optional[str]:
    for tok in tokens:
        if tok in REGION_MAP:
            return REGION_MAP[tok]
    return None
def _detect_genres(tokens: List[str]) -> List[str]:
    found = tok.title() for tok in tokens if tok in GENRE_SET
    return sorted(found)
def _extract_title(tokens: List[str], meta_indices: set) -> str:
    """Re‑assemble tokens that are *not* part of meta‑data."""
    title_parts = [tok for i, tok in enumerate(tokens) if i not in meta_indices]
    # Capitalise first letter of each word, keep numeric tokens untouched
    return " ".join(part.capitalize() if part.isalpha() else part for part in title_parts)
# ------------------------------------------------------------------
# Public dataclass – the consumer‑friendly result object
# ------------------------------------------------------------------
@dataclass
class ParsedTitle:
    original: str
    clean_title: str
    year: Optional[int] = None
    quality: Optional[str] = None
    is_new_release: bool = False
    region: Optional[str] = None
    genres: List[str] = None
def as_dict(self) -> dict:
        return asdict(self)
def to_json(self, **kwargs) -> str:
        return json.dumps(self.as_dict(), **kwargs)
def display_title(self) -> str:
        """Human‑readable, SEO‑friendly string."""
        parts = [self.clean_title]
        if self.year:
            parts.append(f"(self.year)")
        if self.quality:
            parts.append(f"– self.quality")
        if self.genres:
            parts.append("– " + " / ".join(self.genres))
        return " ".join(parts)
# ------------------------------------------------------------------
# Core parser – the only public entry point
# ------------------------------------------------------------------
class VideoTitleParser:
    @staticmethod
    def parse(raw_title: str) -> ParsedTitle:
        # 1️⃣ Normalise & tokenise
        tokens_raw = re.split(r"\s+", raw_title.strip())
        tokens = [_normalise_token(tok) for tok in tokens_raw]
# 2️⃣ Detect meta‑data, remembering the index positions we consume
        meta_indices = set()
        year = _detect_year(tokens)
        if year:
            meta_indices.update(i for i, t in enumerate(tokens) if t == str(year))
quality = _detect_quality(tokens)
        if quality:
            # Find the first occurrence of any token that contributed to the quality match
            for i, t in enumerate(tokens):
                if t in QUALITY_MAP or any(src in " ".join(tokens[i:i+3]) for src in QUALITY_MAP):
                    meta_indices.add(i)
is_new = _detect_new_release(tokens)
        if is_new:
            meta_indices.update(i for i, t in enumerate(tokens) if t in NEW_RELEASE_TOKENS)
region = _detect_region(tokens)
        if region:
            meta_indices.update(i for i, t in enumerate(tokens) if t in REGION_MAP)
genres = _detect_genres(tokens)
        if genres:
            meta_indices.update(i for i, t in enumerate(tokens) if t in GENRE_SET)
# 3️⃣ Build the cleaned title
        clean_title = _extract_title(tokens_raw, meta_indices)
return ParsedTitle(
            original=raw_title,
            clean_title=clean_title,
            year=year,
            quality=quality,
            is_new_release=is_new,
            region=region,
            genres=genres,
        )
Pattern Matching
Genre Extraction
Title Reconstruction
Result Object
Helper methods: .as_dict(), .display_title(), .to_json().
If your goal is to create content around lifestyle and entertainment, here are some ideas: Pattern Matching
Alter Celva Acel Ngewe Gaya 69 Full Extra Quality Durasi Terbaru Indo18 ❲DELUXE | 2026❳

Trending