Alter Celva Acel Ngewe Gaya 69 Full Extra Quality Durasi Terbaru Indo18 ❲DELUXE | 2026❳
The keywords suggest a few things:
# --------------------------------------------------------------
# file: video_title_parser.py
# --------------------------------------------------------------
import re
import json
from dataclasses import dataclass, asdict
from typing import List, Optional
# ------------------------------------------------------------------
# Helper tables – you can extend them without touching the core logic
# ------------------------------------------------------------------
QUALITY_MAP =
# slang → canonical
"full extra quality": "1080p",
"full hd": "1080p",
"ultra hd": "1080p",
"hd": "720p",
"720p": "720p",
"1080p": "1080p",
"4k": "4K",
"sd": "SD",
NEW_RELEASE_TOKENS = "baru", "terbaru", "new_release", "new", "latest"
REGION_MAP =
"indo": "Indonesia",
"indonesia": "Indonesia",
"id": "Indonesia",
"malay": "Malaysia",
"my": "Malaysia",
"sg": "Singapore",
GENRE_SET =
"lifestyle",
"entertainment",
"music",
"comedy",
"drama",
"news",
"sports",
"gaming",
"tech",
"travel",
"food",
"fashion",
"beauty",
def _normalise_token(tok: str) -> str:
"""Lower‑case and strip non‑alphanumeric characters."""
return re.sub(r"[^a-z0-9]", "", tok.lower())
def _detect_year(tokens: List[str]) -> Optional[int]:
for tok in tokens:
if re.fullmatch(r"\d4", tok):
yr = int(tok)
if 1900 <= yr <= 2099:
return yr
return None
def _detect_quality(tokens: List[str]) -> Optional[str]:
# Look for multi‑word phrase first
phrase = " ".join(tokens[:3]) # up to 3‑word combos like “full extra quality”
for src, canon in QUALITY_MAP.items():
if src in phrase:
return canon
# Fallback: single‑token matches
for tok in tokens:
if tok in QUALITY_MAP:
return QUALITY_MAP[tok]
return None
def _detect_new_release(tokens: List[str]) -> bool:
return any(tok in NEW_RELEASE_TOKENS for tok in tokens)
def _detect_region(tokens: List[str]) -> Optional[str]:
for tok in tokens:
if tok in REGION_MAP:
return REGION_MAP[tok]
return None
def _detect_genres(tokens: List[str]) -> List[str]:
found = tok.title() for tok in tokens if tok in GENRE_SET
return sorted(found)
def _extract_title(tokens: List[str], meta_indices: set) -> str:
"""Re‑assemble tokens that are *not* part of meta‑data."""
title_parts = [tok for i, tok in enumerate(tokens) if i not in meta_indices]
# Capitalise first letter of each word, keep numeric tokens untouched
return " ".join(part.capitalize() if part.isalpha() else part for part in title_parts)
# ------------------------------------------------------------------
# Public dataclass – the consumer‑friendly result object
# ------------------------------------------------------------------
@dataclass
class ParsedTitle:
original: str
clean_title: str
year: Optional[int] = None
quality: Optional[str] = None
is_new_release: bool = False
region: Optional[str] = None
genres: List[str] = None
def as_dict(self) -> dict:
return asdict(self)
def to_json(self, **kwargs) -> str:
return json.dumps(self.as_dict(), **kwargs)
def display_title(self) -> str:
"""Human‑readable, SEO‑friendly string."""
parts = [self.clean_title]
if self.year:
parts.append(f"(self.year)")
if self.quality:
parts.append(f"– self.quality")
if self.genres:
parts.append("– " + " / ".join(self.genres))
return " ".join(parts)
# ------------------------------------------------------------------
# Core parser – the only public entry point
# ------------------------------------------------------------------
class VideoTitleParser:
@staticmethod
def parse(raw_title: str) -> ParsedTitle:
# 1️⃣ Normalise & tokenise
tokens_raw = re.split(r"\s+", raw_title.strip())
tokens = [_normalise_token(tok) for tok in tokens_raw]
# 2️⃣ Detect meta‑data, remembering the index positions we consume
meta_indices = set()
year = _detect_year(tokens)
if year:
meta_indices.update(i for i, t in enumerate(tokens) if t == str(year))
quality = _detect_quality(tokens)
if quality:
# Find the first occurrence of any token that contributed to the quality match
for i, t in enumerate(tokens):
if t in QUALITY_MAP or any(src in " ".join(tokens[i:i+3]) for src in QUALITY_MAP):
meta_indices.add(i)
is_new = _detect_new_release(tokens)
if is_new:
meta_indices.update(i for i, t in enumerate(tokens) if t in NEW_RELEASE_TOKENS)
region = _detect_region(tokens)
if region:
meta_indices.update(i for i, t in enumerate(tokens) if t in REGION_MAP)
genres = _detect_genres(tokens)
if genres:
meta_indices.update(i for i, t in enumerate(tokens) if t in GENRE_SET)
# 3️⃣ Build the cleaned title
clean_title = _extract_title(tokens_raw, meta_indices)
return ParsedTitle(
original=raw_title,
clean_title=clean_title,
year=year,
quality=quality,
is_new_release=is_new,
region=region,
genres=genres,
)
Pattern Matching
Genre Extraction
Title Reconstruction
Result Object
.as_dict(), .display_title(), .to_json().If your goal is to create content around lifestyle and entertainment, here are some ideas: Pattern Matching