This commit is contained in:
2026-01-15 12:17:58 +00:00
parent 1305656d10
commit ec60211339

View File

@@ -82,9 +82,6 @@ async def _safe_text(session, url: str, *, headers: dict | None = None, timeout:
def _extract_tag_from_github_url(url: str) -> str | None:
# Examples:
# https://github.com/owner/repo/releases/tag/v1.2.3
# https://github.com/owner/repo/tag/v1.2.3 (rare)
m = re.search(r"/releases/tag/([^/?#]+)", url)
if m:
return m.group(1)
@@ -94,11 +91,83 @@ def _extract_tag_from_github_url(url: str) -> str | None:
return None
def _strip_html(s: str) -> str:
# minimal HTML entity cleanup for meta descriptions
out = (
s.replace("&", "&")
.replace(""", '"')
.replace("'", "'")
.replace("&lt;", "<")
.replace("&gt;", ">")
)
return re.sub(r"\s+", " ", out).strip()
def _extract_meta(html: str, *, prop: str | None = None, name: str | None = None) -> str | None:
# Extract <meta property="og:description" content="...">
# or <meta name="description" content="...">
if prop:
# property="..." content="..."
m = re.search(
r'<meta[^>]+property=["\']' + re.escape(prop) + r'["\'][^>]+content=["\']([^"\']+)["\']',
html,
flags=re.IGNORECASE,
)
if m:
return _strip_html(m.group(1))
m = re.search(
r'<meta[^>]+content=["\']([^"\']+)["\'][^>]+property=["\']' + re.escape(prop) + r'["\']',
html,
flags=re.IGNORECASE,
)
if m:
return _strip_html(m.group(1))
if name:
m = re.search(
r'<meta[^>]+name=["\']' + re.escape(name) + r'["\'][^>]+content=["\']([^"\']+)["\']',
html,
flags=re.IGNORECASE,
)
if m:
return _strip_html(m.group(1))
m = re.search(
r'<meta[^>]+content=["\']([^"\']+)["\'][^>]+name=["\']' + re.escape(name) + r'["\']',
html,
flags=re.IGNORECASE,
)
if m:
return _strip_html(m.group(1))
return None
async def _github_description_html(hass: HomeAssistant, owner: str, repo: str) -> str | None:
"""
GitHub API may be rate-limited; fetch public HTML and read meta description.
"""
session = async_get_clientsession(hass)
headers = {
"User-Agent": UA,
"Accept": "text/html,application/xhtml+xml",
}
html, status = await _safe_text(session, f"https://github.com/{owner}/{repo}", headers=headers)
if not html or status != 200:
return None
desc = _extract_meta(html, prop="og:description")
if desc:
return desc
desc = _extract_meta(html, name="description")
if desc:
return desc
return None
async def _github_latest_version_atom(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]:
"""
Uses GitHub public Atom feed (no api.github.com).
This avoids API rate limits and works for most public repos.
"""
session = async_get_clientsession(hass)
headers = {"User-Agent": UA, "Accept": "application/atom+xml,text/xml;q=0.9,*/*;q=0.8"}
@@ -111,8 +180,6 @@ async def _github_latest_version_atom(hass: HomeAssistant, owner: str, repo: str
except Exception:
return None, None
# Atom namespace can vary; search entries robustly
# Find first <entry> then a <link href="..."> that points to a release tag.
for entry in root.findall(".//{*}entry"):
for link in entry.findall(".//{*}link"):
href = link.attrib.get("href")
@@ -126,9 +193,6 @@ async def _github_latest_version_atom(hass: HomeAssistant, owner: str, repo: str
async def _github_latest_version_redirect(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]:
"""
Fallback: HEAD /releases/latest and parse Location header.
"""
session = async_get_clientsession(hass)
headers = {"User-Agent": UA}
url = f"https://github.com/{owner}/{repo}/releases/latest"
@@ -146,9 +210,6 @@ async def _github_latest_version_redirect(hass: HomeAssistant, owner: str, repo:
async def _github_latest_version_api(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]:
"""
Optional API path (may be rate-limited). Keep as last resort.
"""
session = async_get_clientsession(hass)
headers = {"Accept": "application/vnd.github+json", "User-Agent": UA}
@@ -168,12 +229,6 @@ async def _github_latest_version_api(hass: HomeAssistant, owner: str, repo: str)
async def _github_latest_version(hass: HomeAssistant, owner: str, repo: str) -> tuple[str | None, str | None]:
"""
Durable strategy:
1) Atom feed (no API)
2) Redirect parse (no API)
3) API fallback
"""
tag, src = await _github_latest_version_atom(hass, owner, repo)
if tag:
return tag, src
@@ -252,9 +307,10 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo:
try:
if provider == "github":
# Repo details: try API first, but don't fail if blocked
# Try API repo details (may be rate-limited)
headers = {"Accept": "application/vnd.github+json", "User-Agent": UA}
data, status = await _safe_json(session, f"https://api.github.com/repos/{owner}/{repo}", headers=headers)
if isinstance(data, dict):
info.description = data.get("description")
info.repo_name = _normalize_repo_name(data.get("name")) or repo
@@ -262,9 +318,16 @@ async def fetch_repo_info(hass: HomeAssistant, repo_url: str) -> RepoInfo:
if isinstance(data.get("owner"), dict) and data["owner"].get("login"):
info.owner = data["owner"]["login"]
else:
# If API blocked, at least keep defaults, provider remains github
# If API blocked, still set reasonable defaults
if status == 403:
_LOGGER.debug("GitHub API blocked/rate-limited for repo info %s/%s", owner, repo)
info.default_branch = "main"
# If description missing, fetch from GitHub HTML
if not info.description:
desc = await _github_description_html(hass, owner, repo)
if desc:
info.description = desc
ver, src = await _github_latest_version(hass, owner, repo)
info.latest_version = ver