diff --git a/custom_components/bahmcloud_store/views.py b/custom_components/bahmcloud_store/views.py index 4aca65e..fdb1b70 100644 --- a/custom_components/bahmcloud_store/views.py +++ b/custom_components/bahmcloud_store/views.py @@ -16,29 +16,20 @@ _LOGGER = logging.getLogger(__name__) def _render_markdown_server_side(md: str) -> str | None: - """ - Render Markdown -> sanitized HTML. - Server-side rendering is required because HA frontend context for custom panels - may not expose marked/DOMPurify/ha-markdown consistently. - """ + """Render Markdown -> sanitized HTML (server-side).""" text = (md or "").strip() if not text: return None html: str | None = None - # 1) Try python-markdown + # 1) python-markdown try: import markdown as mdlib # type: ignore html = mdlib.markdown( text, - extensions=[ - "fenced_code", - "tables", - "sane_lists", - "toc", - ], + extensions=["fenced_code", "tables", "sane_lists", "toc"], output_format="html5", ) except Exception as e: @@ -48,7 +39,7 @@ def _render_markdown_server_side(md: str) -> str | None: if not html: return None - # 2) Sanitize via bleach if available + # 2) Sanitize via bleach try: import bleach # type: ignore @@ -102,66 +93,109 @@ def _render_markdown_server_side(md: str) -> str | None: strip=True, ) - # Make links safe sanitized = sanitized.replace( ' str | None: - """ - Ensure README is always a plain markdown string. +_TEXT_KEYS = ("readme", "markdown", "text", "content", "data", "body") - Handles cases where upstream functions return: - - str (already fine) - - dict from GitHub/GitLab "contents" endpoints (base64 content) - - dict with a nested 'readme' field - """ - if maybe_md is None: + +def _maybe_decode_base64(content: str, encoding: Any) -> str | None: + if not isinstance(content, str): + return None + enc = "" + if isinstance(encoding, str): + enc = encoding.strip().lower() + if "base64" not in enc: + return None + try: + raw = base64.b64decode(content.encode("utf-8"), validate=False) + return raw.decode("utf-8", errors="replace") + except Exception: return None - if isinstance(maybe_md, str): - return maybe_md - if isinstance(maybe_md, dict): - # Common pattern: { "content": "", "encoding": "base64" } - content = maybe_md.get("content") - encoding = maybe_md.get("encoding") +def _extract_text_recursive(obj: Any, depth: int = 0) -> str | None: + """ + Robust extraction for README markdown. - if isinstance(content, str) and isinstance(encoding, str) and encoding.lower() == "base64": - try: - raw = base64.b64decode(content.encode("utf-8"), validate=False) - return raw.decode("utf-8", errors="replace") - except Exception: - pass + Handles: + - str / bytes + - dict with: + - {content: "...", encoding: "base64"} (possibly nested) + - {readme: "..."} etc. + - list of dicts (pick first matching) + """ + if obj is None: + return None - # Another common pattern: { "readme": "..." } - nested = maybe_md.get("readme") - if isinstance(nested, str): - return nested + if isinstance(obj, bytes): + try: + return obj.decode("utf-8", errors="replace") + except Exception: + return None - # Some APIs use { "content": "plain text" } without encoding - if isinstance(content, str) and not encoding: + if isinstance(obj, str): + return obj + + if depth > 8: + return None + + if isinstance(obj, dict): + # 1) If it looks like "file content" + content = obj.get("content") + encoding = obj.get("encoding") + + # Base64 decode if possible + decoded = _maybe_decode_base64(content, encoding) + if decoded: + return decoded + + # content may already be plain text + if isinstance(content, str) and (not isinstance(encoding, str) or not encoding.strip()): + # Heuristic: treat as markdown if it has typical markdown chars, otherwise still return return content - # Anything else (list/int/etc.) is unsupported + # 2) direct text keys (readme/markdown/text/body/data) + for k in _TEXT_KEYS: + v = obj.get(k) + if isinstance(v, str): + return v + if isinstance(v, bytes): + try: + return v.decode("utf-8", errors="replace") + except Exception: + pass + + # 3) Sometimes nested under "file" / "result" / "payload" etc. + for v in obj.values(): + out = _extract_text_recursive(v, depth + 1) + if out: + return out + + return None + + if isinstance(obj, list): + for item in obj: + out = _extract_text_recursive(item, depth + 1) + if out: + return out + return None + return None class StaticAssetsView(HomeAssistantView): url = "/api/bahmcloud_store_static/{path:.*}" name = "api:bahmcloud_store_static" - - # Keep your working behavior (static assets must load without auth for panel modules) requires_auth = False async def get(self, request: web.Request, path: str) -> web.Response: @@ -174,7 +208,6 @@ class StaticAssetsView(HomeAssistantView): target = (base / req_path).resolve() - # Prevent traversal if not str(target).startswith(str(base_resolved)): return web.Response(status=404) @@ -185,7 +218,6 @@ class StaticAssetsView(HomeAssistantView): _LOGGER.error("BCS static asset not found: %s", target) return web.Response(status=404) - # IMPORTANT: content_type must NOT include charset (aiohttp restriction) content_type = "text/plain" charset = None @@ -219,11 +251,7 @@ class BCSApiView(HomeAssistantView): async def get(self, request: web.Request) -> web.Response: return web.json_response( - { - "ok": True, - "version": self.core.version, - "repos": self.core.list_repos_public(), - } + {"ok": True, "version": self.core.version, "repos": self.core.list_repos_public()} ) async def post(self, request: web.Request) -> web.Response: @@ -273,15 +301,16 @@ class BCSReadmeView(HomeAssistantView): maybe_md = await self.core.fetch_readme_markdown(repo_id) - md = _coerce_readme_to_text(maybe_md) - if not md: - # Provide a useful debug hint without leaking internal objects + md = _extract_text_recursive(maybe_md) + if not md or not md.strip(): t = type(maybe_md).__name__ return web.json_response( - {"ok": False, "message": f"README not found or invalid format (got {t})."}, + {"ok": False, "message": f"README not found or unsupported format (got {t})."}, status=404, ) - html = _render_markdown_server_side(md) + # Ensure strict JSON string output (avoid accidental objects) + md_str = str(md) - return web.json_response({"ok": True, "readme": md, "html": html}) \ No newline at end of file + html = _render_markdown_server_side(md_str) + return web.json_response({"ok": True, "readme": md_str, "html": html})