diff --git a/custom_components/bahmcloud_store/providers.py b/custom_components/bahmcloud_store/providers.py index a491ab8..494f61d 100644 --- a/custom_components/bahmcloud_store/providers.py +++ b/custom_components/bahmcloud_store/providers.py @@ -82,9 +82,6 @@ async def _safe_text(session, url: str, *, headers: dict | None = None, timeout: def _extract_tag_from_github_url(url: str) -> str | None: - # Examples: - # https://github.com/owner/repo/releases/tag/v1.2.3 - # https://github.com/owner/repo/tag/v1.2.3 (rare) m = re.search(r"/releases/tag/([^/?#]+)", url) if m: return m.group(1) @@ -94,11 +91,83 @@ def _extract_tag_from_github_url(url: str) -> str | None: return None +def _strip_html(s: str) -> str: + # minimal HTML entity cleanup for meta descriptions + out = ( + s.replace("&", "&") + .replace(""", '"') + .replace("'", "'") + .replace("<", "<") + .replace(">", ">") + ) + return re.sub(r"\s+", " ", out).strip() + + +def _extract_meta(html: str, *, prop: str | None = None, name: str | None = None) -> str | None: + # Extract + # or + if prop: + # property="..." content="..." + m = re.search( + r']+property=["\']' + re.escape(prop) + r'["\'][^>]+content=["\']([^"\']+)["\']', + html, + flags=re.IGNORECASE, + ) + if m: + return _strip_html(m.group(1)) + m = re.search( + r']+content=["\']([^"\']+)["\'][^>]+property=["\']' + re.escape(prop) + r'["\']', + html, + flags=re.IGNORECASE, + ) + if m: + return _strip_html(m.group(1)) + + if name: + m = re.search( + r']+name=["\']' + re.escape(name) + r'["\'][^>]+content=["\']([^"\']+)["\']', + html, + flags=re.IGNORECASE, + ) + if m: + return _strip_html(m.group(1)) + m = re.search( + r']+content=["\']([^"\']+)["\'][^>]+name=["\']' + re.escape(name) + r'["\']', + html, + flags=re.IGNORECASE, + ) + if m: + return _strip_html(m.group(1)) + + return None + + +async def _github_description_html(hass: HomeAssistant, owner: str, repo: str) -> str | None: + """ + GitHub API may be rate-limited; fetch public HTML and read meta description. + """ + session = async_get_clientsession(hass) + headers = { + "User-Agent": UA, + "Accept": "text/html,application/xhtml+xml", + } + + html, status = await _safe_text(session, f"https://github.com/{owner}/{repo}", headers=headers) + if not html or status != 200: + return None + + desc = _extract_meta(html, prop="og:description") + if desc: + return desc + + desc = _extract_meta(html, name="description") + if desc: + return desc + + return None + + async def _github_latest_version_atom(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]: - """ - Uses GitHub public Atom feed (no api.github.com). - This avoids API rate limits and works for most public repos. - """ session = async_get_clientsession(hass) headers = {"User-Agent": UA, "Accept": "application/atom+xml,text/xml;q=0.9,*/*;q=0.8"} @@ -111,8 +180,6 @@ async def _github_latest_version_atom(hass: HomeAssistant, owner: str, repo: str except Exception: return None, None - # Atom namespace can vary; search entries robustly - # Find first then a that points to a release tag. for entry in root.findall(".//{*}entry"): for link in entry.findall(".//{*}link"): href = link.attrib.get("href") @@ -126,9 +193,6 @@ async def _github_latest_version_atom(hass: HomeAssistant, owner: str, repo: str async def _github_latest_version_redirect(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]: - """ - Fallback: HEAD /releases/latest and parse Location header. - """ session = async_get_clientsession(hass) headers = {"User-Agent": UA} url = f"https://github.com/{owner}/{repo}/releases/latest" @@ -146,9 +210,6 @@ async def _github_latest_version_redirect(hass: HomeAssistant, owner: str, repo: async def _github_latest_version_api(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]: - """ - Optional API path (may be rate-limited). Keep as last resort. - """ session = async_get_clientsession(hass) headers = {"Accept": "application/vnd.github+json", "User-Agent": UA} @@ -168,12 +229,6 @@ async def _github_latest_version_api(hass: HomeAssistant, owner: str, repo: str) async def _github_latest_version(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]: - """ - Durable strategy: - 1) Atom feed (no API) - 2) Redirect parse (no API) - 3) API fallback - """ tag, src = await _github_latest_version_atom(hass, owner, repo) if tag: return tag, src @@ -252,9 +307,10 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo: try: if provider == "github": - # Repo details: try API first, but don't fail if blocked + # Try API repo details (may be rate-limited) headers = {"Accept": "application/vnd.github+json", "User-Agent": UA} data, status = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}", headers=headers) + if isinstance(data, dict): info.description = data.get("description") info.repo_name = _normalize_repo_name(data.get("name")) or repo @@ -262,9 +318,16 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo: if isinstance(data.get("owner"), dict) and data["owner"].get("login"): info.owner = data["owner"]["login"] else: - # If API blocked, at least keep defaults, provider remains github + # If API blocked, still set reasonable defaults if status == 403: _LOGGER.debug("GitHub API blocked/rate-limited for repo info %s/%s", owner, repo) + info.default_branch = "main" + + # If description missing, fetch from GitHub HTML + if not info.description: + desc = await _github_description_html(hass, owner, repo) + if desc: + info.description = desc ver, src = await _github_latest_version(hass, owner, repo) info.latest_version = ver