diff --git a/custom_components/bahmcloud_store/providers.py b/custom_components/bahmcloud_store/providers.py
index a491ab8..494f61d 100644
--- a/custom_components/bahmcloud_store/providers.py
+++ b/custom_components/bahmcloud_store/providers.py
@@ -82,9 +82,6 @@ async def _safe_text(session, url: str, *, headers: dict | None = None, timeout:
def _extract_tag_from_github_url(url: str) -> str | None:
- # Examples:
- # https://github.com/owner/repo/releases/tag/v1.2.3
- # https://github.com/owner/repo/tag/v1.2.3 (rare)
m = re.search(r"/releases/tag/([^/?#]+)", url)
if m:
return m.group(1)
@@ -94,11 +91,83 @@ def _extract_tag_from_github_url(url: str) -> str | None:
return None
+def _strip_html(s: str) -> str:
+ # minimal HTML entity cleanup for meta descriptions
+ out = (
+ s.replace("&", "&")
+ .replace(""", '"')
+ .replace("'", "'")
+ .replace("<", "<")
+ .replace(">", ">")
+ )
+ return re.sub(r"\s+", " ", out).strip()
+
+
+def _extract_meta(html: str, *, prop: str | None = None, name: str | None = None) -> str | None:
+ # Extract
+ # or
+ if prop:
+ # property="..." content="..."
+ m = re.search(
+ r']+property=["\']' + re.escape(prop) + r'["\'][^>]+content=["\']([^"\']+)["\']',
+ html,
+ flags=re.IGNORECASE,
+ )
+ if m:
+ return _strip_html(m.group(1))
+ m = re.search(
+ r']+content=["\']([^"\']+)["\'][^>]+property=["\']' + re.escape(prop) + r'["\']',
+ html,
+ flags=re.IGNORECASE,
+ )
+ if m:
+ return _strip_html(m.group(1))
+
+ if name:
+ m = re.search(
+ r']+name=["\']' + re.escape(name) + r'["\'][^>]+content=["\']([^"\']+)["\']',
+ html,
+ flags=re.IGNORECASE,
+ )
+ if m:
+ return _strip_html(m.group(1))
+ m = re.search(
+ r']+content=["\']([^"\']+)["\'][^>]+name=["\']' + re.escape(name) + r'["\']',
+ html,
+ flags=re.IGNORECASE,
+ )
+ if m:
+ return _strip_html(m.group(1))
+
+ return None
+
+
+async def _github_description_html(hass: HomeAssistant, owner: str, repo: str) -> str | None:
+ """
+ GitHub API may be rate-limited; fetch public HTML and read meta description.
+ """
+ session = async_get_clientsession(hass)
+ headers = {
+ "User-Agent": UA,
+ "Accept": "text/html,application/xhtml+xml",
+ }
+
+ html, status = await _safe_text(session, f"https://github.com/{owner}/{repo}", headers=headers)
+ if not html or status != 200:
+ return None
+
+ desc = _extract_meta(html, prop="og:description")
+ if desc:
+ return desc
+
+ desc = _extract_meta(html, name="description")
+ if desc:
+ return desc
+
+ return None
+
+
async def _github_latest_version_atom(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]:
- """
- Uses GitHub public Atom feed (no api.github.com).
- This avoids API rate limits and works for most public repos.
- """
session = async_get_clientsession(hass)
headers = {"User-Agent": UA, "Accept": "application/atom+xml,text/xml;q=0.9,*/*;q=0.8"}
@@ -111,8 +180,6 @@ async def _github_latest_version_atom(hass: HomeAssistant, owner: str, repo: str
except Exception:
return None, None
- # Atom namespace can vary; search entries robustly
- # Find first then a that points to a release tag.
for entry in root.findall(".//{*}entry"):
for link in entry.findall(".//{*}link"):
href = link.attrib.get("href")
@@ -126,9 +193,6 @@ async def _github_latest_version_atom(hass: HomeAssistant, owner: str, repo: str
async def _github_latest_version_redirect(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]:
- """
- Fallback: HEAD /releases/latest and parse Location header.
- """
session = async_get_clientsession(hass)
headers = {"User-Agent": UA}
url = f"https://github.com/{owner}/{repo}/releases/latest"
@@ -146,9 +210,6 @@ async def _github_latest_version_redirect(hass: HomeAssistant, owner: str, repo:
async def _github_latest_version_api(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]:
- """
- Optional API path (may be rate-limited). Keep as last resort.
- """
session = async_get_clientsession(hass)
headers = {"Accept": "application/vnd.github+json", "User-Agent": UA}
@@ -168,12 +229,6 @@ async def _github_latest_version_api(hass: HomeAssistant, owner: str, repo: str)
async def _github_latest_version(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]:
- """
- Durable strategy:
- 1) Atom feed (no API)
- 2) Redirect parse (no API)
- 3) API fallback
- """
tag, src = await _github_latest_version_atom(hass, owner, repo)
if tag:
return tag, src
@@ -252,9 +307,10 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo:
try:
if provider == "github":
- # Repo details: try API first, but don't fail if blocked
+ # Try API repo details (may be rate-limited)
headers = {"Accept": "application/vnd.github+json", "User-Agent": UA}
data, status = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}", headers=headers)
+
if isinstance(data, dict):
info.description = data.get("description")
info.repo_name = _normalize_repo_name(data.get("name")) or repo
@@ -262,9 +318,16 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo:
if isinstance(data.get("owner"), dict) and data["owner"].get("login"):
info.owner = data["owner"]["login"]
else:
- # If API blocked, at least keep defaults, provider remains github
+ # If API blocked, still set reasonable defaults
if status == 403:
_LOGGER.debug("GitHub API blocked/rate-limited for repo info %s/%s", owner, repo)
+ info.default_branch = "main"
+
+ # If description missing, fetch from GitHub HTML
+ if not info.description:
+ desc = await _github_description_html(hass, owner, repo)
+ if desc:
+ info.description = desc
ver, src = await _github_latest_version(hass, owner, repo)
info.latest_version = ver