diff --git a/custom_components/bahmcloud_store/providers.py b/custom_components/bahmcloud_store/providers.py index 0898d2f..a7432b7 100644 --- a/custom_components/bahmcloud_store/providers.py +++ b/custom_components/bahmcloud_store/providers.py @@ -76,6 +76,35 @@ async def _safe_text(session, url: str, *, headers: dict | None = None, timeout: return None, None +def _extract_tag_from_github_url(url: str) -> str | None: + # Example: https://github.com/owner/repo/releases/tag/v1.2.3 + m = re.search(r"/releases/tag/([^/?#]+)", url or "") + if not m: + return None + return m.group(1).strip() or None + + +def _strip_html(s: str) -> str: + if not s: + return "" + return re.sub(r"<[^>]+>", "", s).strip() + + +def _extract_meta(html: str, *, prop: str | None = None, name: str | None = None) -> str | None: + # Try common meta tags + if not html: + return None + if prop: + m = re.search(rf' str | None: session = async_get_clientsession(hass) url = f"https://github.com/{owner}/{repo}" @@ -83,29 +112,72 @@ async def _github_description_html(hass: HomeAssistant, owner: str, repo: str) - if status != 200 or not html: return None - # Try to locate the repository description meta tags - m = re.search(r' tuple[str | None, str | None]: +async def _github_latest_version_atom(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]: + session = async_get_clientsession(hass) + url = f"https://github.com/{owner}/{repo}/releases.atom" + atom, status = await _safe_text(session, url, headers={"User-Agent": UA}) + if status != 200 or not atom: + return None, None + + try: + root = ET.fromstring(atom) + ns = {"a": "http://www.w3.org/2005/Atom"} + entry = root.find("a:entry", ns) + if entry is None: + return None, None + link = entry.find("a:link", ns) + if link is not None and link.attrib.get("href"): + tag = _extract_tag_from_github_url(link.attrib["href"]) + if tag: + return tag, "atom" + title = entry.find("a:title", ns) + if title is not None and title.text: + t = title.text.strip() + if t: + return t, "atom" + except Exception: + return None, None + + return None, None + + +async def _github_latest_version_redirect(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]: + # Use the /latest redirect (no API token needed), then parse tag out of final URL + session = async_get_clientsession(hass) + url = f"https://github.com/{owner}/{repo}/releases/latest" + try: + async with session.get(url, timeout=20, headers={"User-Agent": UA}, allow_redirects=True) as resp: + if resp.status != 200: + return None, None + final = str(resp.url) + tag = _extract_tag_from_github_url(final) + if tag: + return tag, "release" + except Exception: + return None, None + + return None, None + + +async def _github_latest_version_api(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]: session = async_get_clientsession(hass) headers = {"Accept": "application/vnd.github+json", "User-Agent": UA} - # Prefer releases data, status = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}/releases/latest", headers=headers) if isinstance(data, dict) and data.get("tag_name"): return str(data["tag_name"]), "release" + # No releases -> tags if status == 404: - # No releases -> try tags data, _ = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}/tags?per_page=1", headers=headers) if isinstance(data, list) and data: t = data[0] @@ -115,27 +187,58 @@ async def _github_latest_version(hass: HomeAssistant, owner: str, repo: str) -> return None, None -async def _gitlab_latest_version(hass: HomeAssistant, base: str, owner: str, repo: str) -> tuple[str | None, str | None]: +async def _github_latest_version(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]: + # Order: + # 1) redirect /latest + # 2) API (may rate-limit) + # 3) Atom feed fallback + tag, src = await _github_latest_version_redirect(hass, owner, repo) + if tag: + return tag, src + + tag, src = await _github_latest_version_api(hass, owner, repo) + if tag: + return tag, src + + return await _github_latest_version_atom(hass, owner, repo) + + +async def _gitea_latest_version(hass: HomeAssistant, base: str, owner: str, repo: str) -> tuple[str | None, str | None]: + session = async_get_clientsession(hass) + + data, _ = await _safe_json(session, f"{base}/api/v1/repos/{owner}/{repo}/releases?limit=1") + if isinstance(data, list) and data: + r = data[0] + if isinstance(r, dict) and r.get("tag_name"): + return str(r["tag_name"]), "release" + + data, _ = await _safe_json(session, f"{base}/api/v1/repos/{owner}/{repo}/tags?limit=1") + if isinstance(data, list) and data: + t = data[0] + if isinstance(t, dict) and t.get("name"): + return str(t["name"]), "tag" + + return None, None + + +async def _gitlab_latest_version(hass: HomeAssistant, base: ...str, owner: str, repo: str) -> tuple[str | None, str | None]: session = async_get_clientsession(hass) headers = {"User-Agent": UA} project = quote_plus(f"{owner}/{repo}") - # Releases data, _ = await _safe_json(session, f"{base}/api/v4/projects/{project}/releases?per_page=1", headers=headers) if isinstance(data, list) and data: r = data[0] if isinstance(r, dict) and r.get("tag_name"): return str(r["tag_name"]), "release" - # Tags data, _ = await _safe_json(session, f"{base}/api/v4/projects/{project}/repository/tags?per_page=1", headers=headers) if isinstance(data, list) and data: t = data[0] if isinstance(t, dict) and t.get("name"): return str(t["name"]), "tag" - # Atom feed fallback (public instances) atom, status = await _safe_text(session, f"{base}/{owner}/{repo}/-/tags?format=atom", headers=headers) if status == 200 and atom: try: @@ -173,7 +276,6 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo: try: if provider == "github": - # Try API repo details (may be rate-limited) headers = {"Accept": "application/vnd.github+json", "User-Agent": UA} data, status = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}", headers=headers) @@ -184,12 +286,10 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo: if isinstance(data.get("owner"), dict) and data["owner"].get("login"): info.owner = data["owner"]["login"] else: - # If API blocked, still set reasonable defaults if status == 403: _LOGGER.debug("GitHub API blocked/rate-limited for repo info %s/%s", owner, repo) info.default_branch = "main" - # If description missing, fetch from GitHub HTML if not info.description: desc = await _github_description_html(hass, owner, repo) if desc: @@ -229,4 +329,118 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo: info.description = data.get("description") info.repo_name = _normalize_repo_name(data.get("name")) or repo info.default_branch = data.get("default_branch") or "main" - if isinstance(data.get("owner"), dict) and data["owner"].get("lo + if isinstance(data.get("owner"), dict) and data["owner"].get("login"): + info.owner = data["owner"]["login"] + + ver, src = await _gitea_latest_version(hass, base, owner, repo) + info.latest_version = ver + info.latest_version_source = src + return info + + except Exception as e: + _LOGGER.debug("fetch_repo_info failed for %s: %s", repo_url, e) + + return info + + +async def fetch_readme_markdown( + hass: HomeAssistant, + repo_url: str, + *, + provider: str | None = None, + default_branch: str | None = None, +) -> str | None: + """Fetch README Markdown for public repositories (GitHub/GitLab/Gitea). + + Defensive behavior: + - tries multiple common README filenames + - tries multiple branches (default, main, master) + - uses public raw endpoints (no tokens required for public repositories) + """ + repo_url = (repo_url or "").strip() + if not repo_url: + return None + + prov = (provider or "").strip().lower() if provider else "" + if not prov: + prov = detect_provider(repo_url) + + branch_candidates: list[str] = [] + if default_branch and str(default_branch).strip(): + branch_candidates.append(str(default_branch).strip()) + for b in ("main", "master"): + if b not in branch_candidates: + branch_candidates.append(b) + + filenames = ["README.md", "readme.md", "README.MD", "README.rst", "README"] + + session = async_get_clientsession(hass) + headers = {"User-Agent": UA} + + def _normalize_gitlab_path(path: str) -> str | None: + p = (path or "").strip().strip("/") + if not p: + return None + parts = [x for x in p.split("/") if x] + if len(parts) < 2: + return None + if parts[-1].endswith(".git"): + parts[-1] = parts[-1][:-4] + return "/".join(parts) + + candidates: list[str] = [] + + if prov == "github": + owner, repo = _split_owner_repo(repo_url) + if not owner or not repo: + return None + for branch in branch_candidates: + base = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}" + for fn in filenames: + candidates.append(f"{base}/{fn}") + + elif prov == "gitea": + owner, repo = _split_owner_repo(repo_url) + if not owner or not repo: + return None + u = urlparse(repo_url.rstrip("/")) + root = f"{u.scheme}://{u.netloc}/{owner}/{repo}" + for branch in branch_candidates: + bases = [ + f"{root}/raw/branch/{branch}", + f"{root}/raw/{branch}", + ] + for b in bases: + for fn in filenames: + candidates.append(f"{b}/{fn}") + + elif prov == "gitlab": + u = urlparse(repo_url.rstrip("/")) + path_repo = _normalize_gitlab_path(u.path) + if not path_repo: + return None + root = f"{u.scheme}://{u.netloc}/{path_repo}" + for branch in branch_candidates: + bases = [ + f"{root}/-/raw/{branch}", + f"{root}/raw/{branch}", + ] + for b in bases: + for fn in filenames: + candidates.append(f"{b}/{fn}") + + else: + return None + + for url in candidates: + try: + async with session.get(url, timeout=20, headers=headers) as resp: + if resp.status != 200: + continue + txt = await resp.text() + if txt and txt.strip(): + return txt + except Exception: + continue + + return None