diff --git a/custom_components/bahmcloud_store/providers.py b/custom_components/bahmcloud_store/providers.py
index e0561c6..494f61d 100644
--- a/custom_components/bahmcloud_store/providers.py
+++ b/custom_components/bahmcloud_store/providers.py
@@ -10,10 +10,8 @@ from homeassistant.core import HomeAssistant
from homeassistant.helpers.aiohttp_client import async_get_clientsession
_LOGGER = logging.getLogger(__name__)
-UA = "BahmcloudStore (Home Assistant)"
-_RE_GITHUB = re.compile(r"(?:^|/)github\.com/([^/]+)/([^/#?]+)", re.IGNORECASE)
-_RE_GITLAB = re.compile(r"(?:^|/)gitlab\.com/([^/]+)/([^/#?]+)", re.IGNORECASE)
+UA = "BahmcloudStore (Home Assistant)"
@dataclass
@@ -23,6 +21,7 @@ class RepoInfo:
description: str | None = None
provider: str | None = None
default_branch: str | None = None
+
latest_version: str | None = None
latest_version_source: str | None = None # "release" | "tag" | "atom" | None
@@ -38,126 +37,251 @@ def _normalize_repo_name(name: str | None) -> str | None:
def _split_owner_repo(repo_url: str) -> tuple[str | None, str | None]:
u = urlparse(repo_url.rstrip("/"))
- parts = [p for p in u.path.split("/") if p]
+ parts = [p for p in u.path.strip("/").split("/") if p]
if len(parts) < 2:
return None, None
- owner = parts[0]
- repo = parts[1]
- repo = repo[:-4] if repo.endswith(".git") else repo
+ owner = parts[0].strip() or None
+ repo = _normalize_repo_name(parts[1])
return owner, repo
def detect_provider(repo_url: str) -> str:
- u = urlparse(repo_url.rstrip("/"))
- host = (u.netloc or "").lower()
+ host = urlparse(repo_url).netloc.lower()
if "github.com" in host:
return "github"
- if "gitlab.com" in host:
+ if "gitlab" in host:
return "gitlab"
- # gitea heuristic: /user/repo + typical endpoints handled elsewhere
- return "gitea" if host else "other"
+
+ owner, repo = _split_owner_repo(repo_url)
+ if owner and repo:
+ return "gitea"
+
+ return "generic"
-async def _safe_json(session, url: str, headers: dict) -> tuple[object | None, int]:
+async def _safe_json(session, url: str, *, headers: dict | None = None, timeout: int = 20):
try:
- async with session.get(url, headers=headers, timeout=20) as r:
- status = r.status
+ async with session.get(url, timeout=timeout, headers=headers) as resp:
+ status = resp.status
if status != 200:
return None, status
- return await r.json(), status
+ return await resp.json(), status
except Exception:
- return None, 0
+ return None, None
-async def _safe_text(session, url: str, headers: dict) -> tuple[str | None, int]:
+async def _safe_text(session, url: str, *, headers: dict | None = None, timeout: int = 20):
try:
- async with session.get(url, headers=headers, timeout=20) as r:
- status = r.status
+ async with session.get(url, timeout=timeout, headers=headers) as resp:
+ status = resp.status
if status != 200:
return None, status
- return await r.text(), status
+ return await resp.text(), status
except Exception:
- return None, 0
+ return None, None
+
+
+def _extract_tag_from_github_url(url: str) -> str | None:
+ m = re.search(r"/releases/tag/([^/?#]+)", url)
+ if m:
+ return m.group(1)
+ m = re.search(r"/tag/([^/?#]+)", url)
+ if m:
+ return m.group(1)
+ return None
def _strip_html(s: str) -> str:
- s = re.sub(r"<[^>]+>", " ", s or "")
- s = re.sub(r"\s+", " ", s).strip()
- return s
+ # minimal HTML entity cleanup for meta descriptions
+ out = (
+ s.replace("&", "&")
+ .replace(""", '"')
+ .replace("'", "'")
+ .replace("<", "<")
+ .replace(">", ">")
+ )
+ return re.sub(r"\s+", " ", out).strip()
-async def _github_description_html(hass: HomeAssistant, owner: str, repo: str) -> str | None:
- session = async_get_clientsession(hass)
- url = f"https://github.com/{owner}/{repo}"
- html, status = await _safe_text(session, url, {"User-Agent": UA})
- if not html or status != 200:
- return None
+def _extract_meta(html: str, *, prop: str | None = None, name: str | None = None) -> str | None:
+ # Extract
+ # or
+ if prop:
+ # property="..." content="..."
+ m = re.search(
+ r']+property=["\']' + re.escape(prop) + r'["\'][^>]+content=["\']([^"\']+)["\']',
+ html,
+ flags=re.IGNORECASE,
+ )
+ if m:
+ return _strip_html(m.group(1))
+ m = re.search(
+ r']+content=["\']([^"\']+)["\'][^>]+property=["\']' + re.escape(prop) + r'["\']',
+ html,
+ flags=re.IGNORECASE,
+ )
+ if m:
+ return _strip_html(m.group(1))
- # prefer og:description / description meta
- m = re.search(r']+property="og:description"[^>]+content="([^"]+)"', html, re.IGNORECASE)
- if m:
- return _strip_html(m.group(1))
-
- m = re.search(r']+name="description"[^>]+content="([^"]+)"', html, re.IGNORECASE)
- if m:
- return _strip_html(m.group(1))
+ if name:
+ m = re.search(
+ r']+name=["\']' + re.escape(name) + r'["\'][^>]+content=["\']([^"\']+)["\']',
+ html,
+ flags=re.IGNORECASE,
+ )
+ if m:
+ return _strip_html(m.group(1))
+ m = re.search(
+ r']+content=["\']([^"\']+)["\'][^>]+name=["\']' + re.escape(name) + r'["\']',
+ html,
+ flags=re.IGNORECASE,
+ )
+ if m:
+ return _strip_html(m.group(1))
return None
-async def _github_latest_version(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]:
+async def _github_description_html(hass: HomeAssistant, owner: str, repo: str) -> str | None:
+ """
+ GitHub API may be rate-limited; fetch public HTML and read meta description.
+ """
session = async_get_clientsession(hass)
- headers = {"User-Agent": UA, "Accept": "application/vnd.github+json"}
+ headers = {
+ "User-Agent": UA,
+ "Accept": "text/html,application/xhtml+xml",
+ }
- # 1) latest release
- data, st = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}/releases/latest", headers)
- if isinstance(data, dict) and data.get("tag_name"):
- return str(data["tag_name"]), "release"
+ html, status = await _safe_text(session, f"https://github.com/{owner}/{repo}", headers=headers)
+ if not html or status != 200:
+ return None
- # 2) tags (first)
- data, st = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}/tags?per_page=1", headers)
- if isinstance(data, list) and data:
- t = data[0]
- if isinstance(t, dict) and t.get("name"):
- return str(t["name"]), "tag"
+ desc = _extract_meta(html, prop="og:description")
+ if desc:
+ return desc
- # 3) atom releases feed fallback (HTML blocked cases)
- atom, st = await _safe_text(session, f"https://github.com/{owner}/{repo}/releases.atom", {"User-Agent": UA})
- if atom and st == 200:
- try:
- root = ET.fromstring(atom)
- ns = {"a": "http://www.w3.org/2005/Atom"}
- entry = root.find("a:entry", ns)
- if entry is not None:
- title = entry.findtext("a:title", default="", namespaces=ns) or ""
- title = title.strip()
- # often: "v1.2.3" or "Release v1.2.3"
- m = re.search(r"v?\d+\.\d+\.\d+([-\w\.]+)?", title)
- if m:
- return m.group(0), "atom"
- except Exception:
- pass
+ desc = _extract_meta(html, name="description")
+ if desc:
+ return desc
+
+ return None
+
+
+async def _github_latest_version_atom(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]:
+ session = async_get_clientsession(hass)
+ headers = {"User-Agent": UA, "Accept": "application/atom+xml,text/xml;q=0.9,*/*;q=0.8"}
+
+ xml_text, _ = await _safe_text(session, f"https://github.com/{owner}/{repo}/releases.atom", headers=headers)
+ if not xml_text:
+ return None, None
+
+ try:
+ root = ET.fromstring(xml_text)
+ except Exception:
+ return None, None
+
+ for entry in root.findall(".//{*}entry"):
+ for link in entry.findall(".//{*}link"):
+ href = link.attrib.get("href")
+ if not href:
+ continue
+ tag = _extract_tag_from_github_url(href)
+ if tag:
+ return tag, "atom"
return None, None
+async def _github_latest_version_redirect(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]:
+ session = async_get_clientsession(hass)
+ headers = {"User-Agent": UA}
+ url = f"https://github.com/{owner}/{repo}/releases/latest"
+ try:
+ async with session.head(url, allow_redirects=False, timeout=15, headers=headers) as resp:
+ if resp.status in (301, 302, 303, 307, 308):
+ loc = resp.headers.get("Location")
+ if loc:
+ tag = _extract_tag_from_github_url(loc)
+ if tag:
+ return tag, "release"
+ except Exception:
+ pass
+ return None, None
+
+
+async def _github_latest_version_api(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]:
+ session = async_get_clientsession(hass)
+ headers = {"Accept": "application/vnd.github+json", "User-Agent": UA}
+
+ data, _ = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}/releases/latest", headers=headers)
+ if isinstance(data, dict):
+ tag = data.get("tag_name") or data.get("name")
+ if isinstance(tag, str) and tag.strip():
+ return tag.strip(), "release"
+
+ data, _ = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}/tags?per_page=1", headers=headers)
+ if isinstance(data, list) and data:
+ tag = data[0].get("name")
+ if isinstance(tag, str) and tag.strip():
+ return tag.strip(), "tag"
+
+ return None, None
+
+
+async def _github_latest_version(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]:
+ tag, src = await _github_latest_version_atom(hass, owner, repo)
+ if tag:
+ return tag, src
+
+ tag, src = await _github_latest_version_redirect(hass, owner, repo)
+ if tag:
+ return tag, src
+
+ return await _github_latest_version_api(hass, owner, repo)
+
+
async def _gitea_latest_version(hass: HomeAssistant, base: str, owner: str, repo: str) -> tuple[str | None, str | None]:
session = async_get_clientsession(hass)
- headers = {"User-Agent": UA, "Accept": "application/json"}
- # releases latest
- data, st = await _safe_json(session, f"{base}/api/v1/repos/{owner}/{repo}/releases?limit=1", headers)
+ data, _ = await _safe_json(session, f"{base}/api/v1/repos/{owner}/{repo}/releases?limit=1")
if isinstance(data, list) and data:
- d0 = data[0]
- if isinstance(d0, dict) and d0.get("tag_name"):
- return str(d0["tag_name"]), "release"
+ tag = data[0].get("tag_name") or data[0].get("name")
+ if isinstance(tag, str) and tag.strip():
+ return tag.strip(), "release"
- # tags latest
- data, st = await _safe_json(session, f"{base}/api/v1/repos/{owner}/{repo}/tags?limit=1", headers)
+ data, _ = await _safe_json(session, f"{base}/api/v1/repos/{owner}/{repo}/tags?limit=1")
if isinstance(data, list) and data:
- d0 = data[0]
- if isinstance(d0, dict) and d0.get("name"):
- return str(d0["name"]), "tag"
+ tag = data[0].get("name")
+ if isinstance(tag, str) and tag.strip():
+ return tag.strip(), "tag"
+
+ return None, None
+
+
+async def _gitlab_latest_version(hass: HomeAssistant, base: str, owner: str, repo: str) -> tuple[str | None, str | None]:
+ session = async_get_clientsession(hass)
+ headers = {"User-Agent": UA}
+ project = quote_plus(f"{owner}/{repo}")
+
+ data, _ = await _safe_json(
+ session,
+ f"{base}/api/v4/projects/{project}/releases?per_page=1&order_by=released_at&sort=desc",
+ headers=headers,
+ )
+ if isinstance(data, list) and data:
+ tag = data[0].get("tag_name") or data[0].get("name")
+ if isinstance(tag, str) and tag.strip():
+ return tag.strip(), "release"
+
+ data, _ = await _safe_json(
+ session,
+ f"{base}/api/v4/projects/{project}/repository/tags?per_page=1&order_by=updated&sort=desc",
+ headers=headers,
+ )
+ if isinstance(data, list) and data:
+ tag = data[0].get("name")
+ if isinstance(tag, str) and tag.strip():
+ return tag.strip(), "tag"
return None, None
@@ -183,8 +307,10 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo:
try:
if provider == "github":
- headers = {"User-Agent": UA, "Accept": "application/vnd.github+json"}
- data, st = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}", headers)
+ # Try API repo details (may be rate-limited)
+ headers = {"Accept": "application/vnd.github+json", "User-Agent": UA}
+ data, status = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}", headers=headers)
+
if isinstance(data, dict):
info.description = data.get("description")
info.repo_name = _normalize_repo_name(data.get("name")) or repo
@@ -192,8 +318,12 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo:
if isinstance(data.get("owner"), dict) and data["owner"].get("login"):
info.owner = data["owner"]["login"]
else:
+ # If API blocked, still set reasonable defaults
+ if status == 403:
+ _LOGGER.debug("GitHub API blocked/rate-limited for repo info %s/%s", owner, repo)
info.default_branch = "main"
+ # If description missing, fetch from GitHub HTML
if not info.description:
desc = await _github_description_html(hass, owner, repo)
if desc:
@@ -207,10 +337,10 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo:
if provider == "gitlab":
u = urlparse(repo_url.rstrip("/"))
base = f"{u.scheme}://{u.netloc}"
- headers = {"User-Agent": UA, "Accept": "application/json"}
+ headers = {"User-Agent": UA}
project = quote_plus(f"{owner}/{repo}")
- data, st = await _safe_json(session, f"{base}/api/v4/projects/{project}", headers)
+ data, _ = await _safe_json(session, f"{base}/api/v4/projects/{project}", headers=headers)
if isinstance(data, dict):
info.description = data.get("description")
info.repo_name = _normalize_repo_name(data.get("path")) or repo
@@ -219,22 +349,16 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo:
if isinstance(ns, dict) and ns.get("path"):
info.owner = ns.get("path")
- # latest tag (gitlab)
- tags, st = await _safe_json(session, f"{base}/api/v4/projects/{project}/repository/tags?per_page=1", headers)
- if isinstance(tags, list) and tags:
- t0 = tags[0]
- if isinstance(t0, dict) and t0.get("name"):
- info.latest_version = str(t0["name"])
- info.latest_version_source = "tag"
-
+ ver, src = await _gitlab_latest_version(hass, base, owner, repo)
+ info.latest_version = ver
+ info.latest_version_source = src
return info
if provider == "gitea":
u = urlparse(repo_url.rstrip("/"))
base = f"{u.scheme}://{u.netloc}"
- headers = {"User-Agent": UA, "Accept": "application/json"}
- data, st = await _safe_json(session, f"{base}/api/v1/repos/{owner}/{repo}", headers)
+ data, _ = await _safe_json(session, f"{base}/api/v1/repos/{owner}/{repo}")
if isinstance(data, dict):
info.description = data.get("description")
info.repo_name = _normalize_repo_name(data.get("name")) or repo
@@ -251,49 +375,4 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo:
except Exception as e:
_LOGGER.debug("Provider fetch failed for %s: %s", repo_url, e)
- return info
-
-
-async def fetch_readme(hass: HomeAssistant, repo_url: str, default_branch: str | None) -> object | None:
- owner, repo = _split_owner_repo(repo_url)
- if not owner or not repo:
- return None
-
- provider = detect_provider(repo_url)
- branch = default_branch or "main"
- session = async_get_clientsession(hass)
-
- # GitHub: raw
- if provider == "github":
- for fn in ("README.md", "Readme.md", "readme.md"):
- url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{fn}"
- txt, st = await _safe_text(session, url, {"User-Agent": UA})
- if txt and st == 200:
- return txt
- return None
-
- # GitLab raw
- if provider == "gitlab":
- u = urlparse(repo_url.rstrip("/"))
- root = f"{u.scheme}://{u.netloc}/{owner}/{repo}"
- for fn in ("README.md", "Readme.md", "readme.md"):
- url = f"{root}/-/raw/{branch}/{fn}"
- txt, st = await _safe_text(session, url, {"User-Agent": UA})
- if txt and st == 200:
- return txt
- return None
-
- # Gitea raw (supports both raw formats)
- if provider == "gitea":
- u = urlparse(repo_url.rstrip("/"))
- root = f"{u.scheme}://{u.netloc}/{owner}/{repo}"
- bases = [f"{root}/raw/branch/{branch}", f"{root}/raw/{branch}"]
- for fn in ("README.md", "Readme.md", "readme.md"):
- for b in bases:
- url = f"{b}/{fn}"
- txt, st = await _safe_text(session, url, {"User-Agent": UA})
- if txt and st == 200:
- return txt
- return None
-
- return None
+ return info
\ No newline at end of file