From 1caad401f6eada2c42d6575258c5cd766cab2ebd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Bachmann?= Date: Thu, 15 Jan 2026 14:38:11 +0000 Subject: [PATCH] revert c4f0f94a6feef06158582b1b80efee529bd2c582 revert custom_components/bahmcloud_store/providers.py aktualisiert --- .../bahmcloud_store/providers.py | 355 +++++++++++------- 1 file changed, 217 insertions(+), 138 deletions(-) diff --git a/custom_components/bahmcloud_store/providers.py b/custom_components/bahmcloud_store/providers.py index e0561c6..494f61d 100644 --- a/custom_components/bahmcloud_store/providers.py +++ b/custom_components/bahmcloud_store/providers.py @@ -10,10 +10,8 @@ from homeassistant.core import HomeAssistant from homeassistant.helpers.aiohttp_client import async_get_clientsession _LOGGER = logging.getLogger(__name__) -UA = "BahmcloudStore (Home Assistant)" -_RE_GITHUB = re.compile(r"(?:^|/)github\.com/([^/]+)/([^/#?]+)", re.IGNORECASE) -_RE_GITLAB = re.compile(r"(?:^|/)gitlab\.com/([^/]+)/([^/#?]+)", re.IGNORECASE) +UA = "BahmcloudStore (Home Assistant)" @dataclass @@ -23,6 +21,7 @@ class RepoInfo: description: str | None = None provider: str | None = None default_branch: str | None = None + latest_version: str | None = None latest_version_source: str | None = None # "release" | "tag" | "atom" | None @@ -38,126 +37,251 @@ def _normalize_repo_name(name: str | None) -> str | None: def _split_owner_repo(repo_url: str) -> tuple[str | None, str | None]: u = urlparse(repo_url.rstrip("/")) - parts = [p for p in u.path.split("/") if p] + parts = [p for p in u.path.strip("/").split("/") if p] if len(parts) < 2: return None, None - owner = parts[0] - repo = parts[1] - repo = repo[:-4] if repo.endswith(".git") else repo + owner = parts[0].strip() or None + repo = _normalize_repo_name(parts[1]) return owner, repo def detect_provider(repo_url: str) -> str: - u = urlparse(repo_url.rstrip("/")) - host = (u.netloc or "").lower() + host = urlparse(repo_url).netloc.lower() if "github.com" in host: return "github" - if "gitlab.com" in host: + if "gitlab" in host: return "gitlab" - # gitea heuristic: /user/repo + typical endpoints handled elsewhere - return "gitea" if host else "other" + + owner, repo = _split_owner_repo(repo_url) + if owner and repo: + return "gitea" + + return "generic" -async def _safe_json(session, url: str, headers: dict) -> tuple[object | None, int]: +async def _safe_json(session, url: str, *, headers: dict | None = None, timeout: int = 20): try: - async with session.get(url, headers=headers, timeout=20) as r: - status = r.status + async with session.get(url, timeout=timeout, headers=headers) as resp: + status = resp.status if status != 200: return None, status - return await r.json(), status + return await resp.json(), status except Exception: - return None, 0 + return None, None -async def _safe_text(session, url: str, headers: dict) -> tuple[str | None, int]: +async def _safe_text(session, url: str, *, headers: dict | None = None, timeout: int = 20): try: - async with session.get(url, headers=headers, timeout=20) as r: - status = r.status + async with session.get(url, timeout=timeout, headers=headers) as resp: + status = resp.status if status != 200: return None, status - return await r.text(), status + return await resp.text(), status except Exception: - return None, 0 + return None, None + + +def _extract_tag_from_github_url(url: str) -> str | None: + m = re.search(r"/releases/tag/([^/?#]+)", url) + if m: + return m.group(1) + m = re.search(r"/tag/([^/?#]+)", url) + if m: + return m.group(1) + return None def _strip_html(s: str) -> str: - s = re.sub(r"<[^>]+>", " ", s or "") - s = re.sub(r"\s+", " ", s).strip() - return s + # minimal HTML entity cleanup for meta descriptions + out = ( + s.replace("&", "&") + .replace(""", '"') + .replace("'", "'") + .replace("<", "<") + .replace(">", ">") + ) + return re.sub(r"\s+", " ", out).strip() -async def _github_description_html(hass: HomeAssistant, owner: str, repo: str) -> str | None: - session = async_get_clientsession(hass) - url = f"https://github.com/{owner}/{repo}" - html, status = await _safe_text(session, url, {"User-Agent": UA}) - if not html or status != 200: - return None +def _extract_meta(html: str, *, prop: str | None = None, name: str | None = None) -> str | None: + # Extract + # or + if prop: + # property="..." content="..." + m = re.search( + r']+property=["\']' + re.escape(prop) + r'["\'][^>]+content=["\']([^"\']+)["\']', + html, + flags=re.IGNORECASE, + ) + if m: + return _strip_html(m.group(1)) + m = re.search( + r']+content=["\']([^"\']+)["\'][^>]+property=["\']' + re.escape(prop) + r'["\']', + html, + flags=re.IGNORECASE, + ) + if m: + return _strip_html(m.group(1)) - # prefer og:description / description meta - m = re.search(r']+property="og:description"[^>]+content="([^"]+)"', html, re.IGNORECASE) - if m: - return _strip_html(m.group(1)) - - m = re.search(r']+name="description"[^>]+content="([^"]+)"', html, re.IGNORECASE) - if m: - return _strip_html(m.group(1)) + if name: + m = re.search( + r']+name=["\']' + re.escape(name) + r'["\'][^>]+content=["\']([^"\']+)["\']', + html, + flags=re.IGNORECASE, + ) + if m: + return _strip_html(m.group(1)) + m = re.search( + r']+content=["\']([^"\']+)["\'][^>]+name=["\']' + re.escape(name) + r'["\']', + html, + flags=re.IGNORECASE, + ) + if m: + return _strip_html(m.group(1)) return None -async def _github_latest_version(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]: +async def _github_description_html(hass: HomeAssistant, owner: str, repo: str) -> str | None: + """ + GitHub API may be rate-limited; fetch public HTML and read meta description. + """ session = async_get_clientsession(hass) - headers = {"User-Agent": UA, "Accept": "application/vnd.github+json"} + headers = { + "User-Agent": UA, + "Accept": "text/html,application/xhtml+xml", + } - # 1) latest release - data, st = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}/releases/latest", headers) - if isinstance(data, dict) and data.get("tag_name"): - return str(data["tag_name"]), "release" + html, status = await _safe_text(session, f"https://github.com/{owner}/{repo}", headers=headers) + if not html or status != 200: + return None - # 2) tags (first) - data, st = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}/tags?per_page=1", headers) - if isinstance(data, list) and data: - t = data[0] - if isinstance(t, dict) and t.get("name"): - return str(t["name"]), "tag" + desc = _extract_meta(html, prop="og:description") + if desc: + return desc - # 3) atom releases feed fallback (HTML blocked cases) - atom, st = await _safe_text(session, f"https://github.com/{owner}/{repo}/releases.atom", {"User-Agent": UA}) - if atom and st == 200: - try: - root = ET.fromstring(atom) - ns = {"a": "http://www.w3.org/2005/Atom"} - entry = root.find("a:entry", ns) - if entry is not None: - title = entry.findtext("a:title", default="", namespaces=ns) or "" - title = title.strip() - # often: "v1.2.3" or "Release v1.2.3" - m = re.search(r"v?\d+\.\d+\.\d+([-\w\.]+)?", title) - if m: - return m.group(0), "atom" - except Exception: - pass + desc = _extract_meta(html, name="description") + if desc: + return desc + + return None + + +async def _github_latest_version_atom(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]: + session = async_get_clientsession(hass) + headers = {"User-Agent": UA, "Accept": "application/atom+xml,text/xml;q=0.9,*/*;q=0.8"} + + xml_text, _ = await _safe_text(session, f"https://github.com/{owner}/{repo}/releases.atom", headers=headers) + if not xml_text: + return None, None + + try: + root = ET.fromstring(xml_text) + except Exception: + return None, None + + for entry in root.findall(".//{*}entry"): + for link in entry.findall(".//{*}link"): + href = link.attrib.get("href") + if not href: + continue + tag = _extract_tag_from_github_url(href) + if tag: + return tag, "atom" return None, None +async def _github_latest_version_redirect(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]: + session = async_get_clientsession(hass) + headers = {"User-Agent": UA} + url = f"https://github.com/{owner}/{repo}/releases/latest" + try: + async with session.head(url, allow_redirects=False, timeout=15, headers=headers) as resp: + if resp.status in (301, 302, 303, 307, 308): + loc = resp.headers.get("Location") + if loc: + tag = _extract_tag_from_github_url(loc) + if tag: + return tag, "release" + except Exception: + pass + return None, None + + +async def _github_latest_version_api(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]: + session = async_get_clientsession(hass) + headers = {"Accept": "application/vnd.github+json", "User-Agent": UA} + + data, _ = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}/releases/latest", headers=headers) + if isinstance(data, dict): + tag = data.get("tag_name") or data.get("name") + if isinstance(tag, str) and tag.strip(): + return tag.strip(), "release" + + data, _ = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}/tags?per_page=1", headers=headers) + if isinstance(data, list) and data: + tag = data[0].get("name") + if isinstance(tag, str) and tag.strip(): + return tag.strip(), "tag" + + return None, None + + +async def _github_latest_version(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]: + tag, src = await _github_latest_version_atom(hass, owner, repo) + if tag: + return tag, src + + tag, src = await _github_latest_version_redirect(hass, owner, repo) + if tag: + return tag, src + + return await _github_latest_version_api(hass, owner, repo) + + async def _gitea_latest_version(hass: HomeAssistant, base: str, owner: str, repo: str) -> tuple[str | None, str | None]: session = async_get_clientsession(hass) - headers = {"User-Agent": UA, "Accept": "application/json"} - # releases latest - data, st = await _safe_json(session, f"{base}/api/v1/repos/{owner}/{repo}/releases?limit=1", headers) + data, _ = await _safe_json(session, f"{base}/api/v1/repos/{owner}/{repo}/releases?limit=1") if isinstance(data, list) and data: - d0 = data[0] - if isinstance(d0, dict) and d0.get("tag_name"): - return str(d0["tag_name"]), "release" + tag = data[0].get("tag_name") or data[0].get("name") + if isinstance(tag, str) and tag.strip(): + return tag.strip(), "release" - # tags latest - data, st = await _safe_json(session, f"{base}/api/v1/repos/{owner}/{repo}/tags?limit=1", headers) + data, _ = await _safe_json(session, f"{base}/api/v1/repos/{owner}/{repo}/tags?limit=1") if isinstance(data, list) and data: - d0 = data[0] - if isinstance(d0, dict) and d0.get("name"): - return str(d0["name"]), "tag" + tag = data[0].get("name") + if isinstance(tag, str) and tag.strip(): + return tag.strip(), "tag" + + return None, None + + +async def _gitlab_latest_version(hass: HomeAssistant, base: str, owner: str, repo: str) -> tuple[str | None, str | None]: + session = async_get_clientsession(hass) + headers = {"User-Agent": UA} + project = quote_plus(f"{owner}/{repo}") + + data, _ = await _safe_json( + session, + f"{base}/api/v4/projects/{project}/releases?per_page=1&order_by=released_at&sort=desc", + headers=headers, + ) + if isinstance(data, list) and data: + tag = data[0].get("tag_name") or data[0].get("name") + if isinstance(tag, str) and tag.strip(): + return tag.strip(), "release" + + data, _ = await _safe_json( + session, + f"{base}/api/v4/projects/{project}/repository/tags?per_page=1&order_by=updated&sort=desc", + headers=headers, + ) + if isinstance(data, list) and data: + tag = data[0].get("name") + if isinstance(tag, str) and tag.strip(): + return tag.strip(), "tag" return None, None @@ -183,8 +307,10 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo: try: if provider == "github": - headers = {"User-Agent": UA, "Accept": "application/vnd.github+json"} - data, st = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}", headers) + # Try API repo details (may be rate-limited) + headers = {"Accept": "application/vnd.github+json", "User-Agent": UA} + data, status = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}", headers=headers) + if isinstance(data, dict): info.description = data.get("description") info.repo_name = _normalize_repo_name(data.get("name")) or repo @@ -192,8 +318,12 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo: if isinstance(data.get("owner"), dict) and data["owner"].get("login"): info.owner = data["owner"]["login"] else: + # If API blocked, still set reasonable defaults + if status == 403: + _LOGGER.debug("GitHub API blocked/rate-limited for repo info %s/%s", owner, repo) info.default_branch = "main" + # If description missing, fetch from GitHub HTML if not info.description: desc = await _github_description_html(hass, owner, repo) if desc: @@ -207,10 +337,10 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo: if provider == "gitlab": u = urlparse(repo_url.rstrip("/")) base = f"{u.scheme}://{u.netloc}" - headers = {"User-Agent": UA, "Accept": "application/json"} + headers = {"User-Agent": UA} project = quote_plus(f"{owner}/{repo}") - data, st = await _safe_json(session, f"{base}/api/v4/projects/{project}", headers) + data, _ = await _safe_json(session, f"{base}/api/v4/projects/{project}", headers=headers) if isinstance(data, dict): info.description = data.get("description") info.repo_name = _normalize_repo_name(data.get("path")) or repo @@ -219,22 +349,16 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo: if isinstance(ns, dict) and ns.get("path"): info.owner = ns.get("path") - # latest tag (gitlab) - tags, st = await _safe_json(session, f"{base}/api/v4/projects/{project}/repository/tags?per_page=1", headers) - if isinstance(tags, list) and tags: - t0 = tags[0] - if isinstance(t0, dict) and t0.get("name"): - info.latest_version = str(t0["name"]) - info.latest_version_source = "tag" - + ver, src = await _gitlab_latest_version(hass, base, owner, repo) + info.latest_version = ver + info.latest_version_source = src return info if provider == "gitea": u = urlparse(repo_url.rstrip("/")) base = f"{u.scheme}://{u.netloc}" - headers = {"User-Agent": UA, "Accept": "application/json"} - data, st = await _safe_json(session, f"{base}/api/v1/repos/{owner}/{repo}", headers) + data, _ = await _safe_json(session, f"{base}/api/v1/repos/{owner}/{repo}") if isinstance(data, dict): info.description = data.get("description") info.repo_name = _normalize_repo_name(data.get("name")) or repo @@ -251,49 +375,4 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo: except Exception as e: _LOGGER.debug("Provider fetch failed for %s: %s", repo_url, e) - return info - - -async def fetch_readme(hass: HomeAssistant, repo_url: str, default_branch: str | None) -> object | None: - owner, repo = _split_owner_repo(repo_url) - if not owner or not repo: - return None - - provider = detect_provider(repo_url) - branch = default_branch or "main" - session = async_get_clientsession(hass) - - # GitHub: raw - if provider == "github": - for fn in ("README.md", "Readme.md", "readme.md"): - url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{fn}" - txt, st = await _safe_text(session, url, {"User-Agent": UA}) - if txt and st == 200: - return txt - return None - - # GitLab raw - if provider == "gitlab": - u = urlparse(repo_url.rstrip("/")) - root = f"{u.scheme}://{u.netloc}/{owner}/{repo}" - for fn in ("README.md", "Readme.md", "readme.md"): - url = f"{root}/-/raw/{branch}/{fn}" - txt, st = await _safe_text(session, url, {"User-Agent": UA}) - if txt and st == 200: - return txt - return None - - # Gitea raw (supports both raw formats) - if provider == "gitea": - u = urlparse(repo_url.rstrip("/")) - root = f"{u.scheme}://{u.netloc}/{owner}/{repo}" - bases = [f"{root}/raw/branch/{branch}", f"{root}/raw/{branch}"] - for fn in ("README.md", "Readme.md", "readme.md"): - for b in bases: - url = f"{b}/{fn}" - txt, st = await _safe_text(session, url, {"User-Agent": UA}) - if txt and st == 200: - return txt - return None - - return None + return info \ No newline at end of file