Spaces:

OrganizedProgrammers
/

ApplyCRs

Running

App Files Files Community

heymenn commited on 5 days ago

Commit

f8638ca

1 Parent(s): f646c65

modify UI, independant downloads away from docfinder, warnings, retry and manual upload

Browse files

Files changed (6) hide show

app.py +211 -18
scripts/etsi_client.py +495 -0
scripts/fetch_crs.py +48 -80
scripts/finalize_ts.py +25 -11
scripts/orchestrate_cr.py +273 -11
scripts/ts_applicator.py +131 -22

app.py CHANGED Viewed

@@ -23,6 +23,30 @@ from pathlib import Path
 import streamlit as st
 # ── Scripts dir (same folder as app.py / scripts/) ───────────────────────────
 SCRIPTS_DIR = Path(__file__).parent / "scripts"
 sys.path.insert(0, str(SCRIPTS_DIR))
@@ -74,7 +98,7 @@ def save_state(sid: str, state: dict) -> None:
 def new_state(sid: str) -> dict:
     return {
         "session_id": sid,
-        "status": "upload",
         "excel_filename": None,
         "person_name": "Ly Thanh PHAN",
         "cr_list": [],
@@ -126,21 +150,34 @@ def tail_log(log_path: str, n: int = 100) -> str:
 def parse_log_results(log_path: str) -> list[dict]:
-    """Extract per-TS result lines from the Final Report section."""
     p = Path(log_path)
     if not p.exists():
         return []
     lines = p.read_text(errors="replace").splitlines()
     results, in_report = [], False
     for line in lines:
-        if "Final Report" in line:
             in_report = True
-        if in_report:
-            for tag in ("OK", "WARN", "FAIL"):
-                if f"[{tag}]" in line:
-                    ts_name = line.split(f"[{tag}]", 1)[-1].strip()
-                    results.append({"Status": tag, "TS": ts_name})
-                    break
     return results
@@ -231,6 +268,11 @@ if "sid" not in st.session_state:
 sid: str = st.session_state.sid
 state: dict = st.session_state.state
 # ── Sidebar ───────────────────────────────────────────────────────────────────
 with st.sidebar:
     st.header("Session")
@@ -251,10 +293,37 @@ with st.sidebar:
 # ── State machine ─────────────────────────────────────────────────────────────
 status: str = state["status"]
 # ════════════════════════════════════════════════════════════════════════════
 # UPLOAD
 # ════════════════════════════════════════════════════════════════════════════
-if status == "upload":
     st.subheader("Step 1 — Upload contribution list")
     uploaded = st.file_uploader(
@@ -330,12 +399,16 @@ elif status == "preview":
                 "--output-dir", str(output_dir),
             ]
             log_file = open(str(log_path), "w")
             proc = subprocess.Popen(
                 cmd,
                 stdout=log_file,
                 stderr=subprocess.STDOUT,
-                env=os.environ.copy(),
             )
             log_file.close()
@@ -405,25 +478,46 @@ elif status in ("done", "error"):
     else:
         st.error(f"❌ Pipeline finished with errors (return code: {rc})")
-    # Per-TS results table
-    results = parse_log_results(log_path)
     if results:
         st.subheader("Results per TS")
         import pandas as pd
-        df = pd.DataFrame(results)
         def _color_status(val):
             return {
                 "OK":   "background-color: #d4edda; color: #155724",
                 "WARN": "background-color: #fff3cd; color: #856404",
                 "FAIL": "background-color: #f8d7da; color: #721c24",
             }.get(val, "")
-        st.dataframe(
-            df.style.map(_color_status, subset=["Status"]),
-            use_container_width=True,
-        )
     # Download ZIP
     if output_dir.exists() and any(output_dir.rglob("*")):
@@ -446,6 +540,105 @@ elif status in ("done", "error"):
         else:
             st.text("Log not found.")
     # Start new session
     st.divider()
     if st.button("Start new session"):

 import streamlit as st
+# ── EOL credential verification ───────────────────────────────────────────────
+def verify_eol_credentials(username: str, password: str) -> bool:
+    import json as _json
+    import urllib3
+    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+    import requests as _req
+    session = _req.Session()
+    session.get(
+        "https://portal.etsi.org/LoginRedirection.aspx",
+        verify=False,
+        timeout=10,
+    )
+    resp = session.post(
+        "https://portal.etsi.org/ETSIPages/LoginEOL.ashx",
+        data=_json.dumps({"username": username, "password": password}),
+        headers={"Content-Type": "application/json; charset=UTF-8"},
+        verify=False,
+        allow_redirects=False,
+        timeout=10,
+    )
+    return resp.text.strip() != "Failed"
 # ── Scripts dir (same folder as app.py / scripts/) ───────────────────────────
 SCRIPTS_DIR = Path(__file__).parent / "scripts"
 sys.path.insert(0, str(SCRIPTS_DIR))
 def new_state(sid: str) -> dict:
     return {
         "session_id": sid,
+        "status": "login",
         "excel_filename": None,
         "person_name": "Ly Thanh PHAN",
         "cr_list": [],
 def parse_log_results(log_path: str) -> list[dict]:
+    """Extract per-TS result lines and warning messages from the Final/Retry Report."""
     p = Path(log_path)
     if not p.exists():
         return []
     lines = p.read_text(errors="replace").splitlines()
     results, in_report = [], False
+    current = None
     for line in lines:
+        if "Final Report" in line or "Retry Summary" in line:
             in_report = True
+            continue
+        if not in_report:
+            continue
+        matched = False
+        for tag in ("OK", "WARN", "FAIL", "SKIP"):
+            if f"[{tag}]" in line:
+                if current is not None:
+                    results.append(current)
+                ts_name = line.split(f"[{tag}]", 1)[-1].strip()
+                current = {"Status": tag, "TS": ts_name, "warnings": []}
+                matched = True
+                break
+        if not matched and current is not None:
+            stripped = line.strip()
+            if stripped.startswith("! "):
+                current["warnings"].append(stripped[2:])
+    if current is not None:
+        results.append(current)
     return results
 sid: str = st.session_state.sid
 state: dict = st.session_state.state
+# Credential guard: if credentials are not in memory (e.g. page refresh after login),
+# force re-login regardless of the persisted status.
+if state.get("status") not in ("login",) and "eol_user" not in st.session_state:
+    state["status"] = "login"
 # ── Sidebar ───────────────────────────────────────────────────────────────────
 with st.sidebar:
     st.header("Session")
 # ── State machine ─────────────────────────────────────────────────────────────
 status: str = state["status"]
+# ════════════════════════════════════════════════════════════════════════════
+# LOGIN
+# ════════════════════════════════════════════════════════════════════════════
+if status == "login":
+    st.subheader("Connect with your ETSI EOL account")
+    st.info(
+        "Your credentials are used only for this session and are never stored on disk.",
+        icon="🔒",
+    )
+    username = st.text_input("EOL Username")
+    password = st.text_input("EOL Password", type="password")
+    if st.button("Connect", type="primary"):
+        if not username or not password:
+            st.error("Please enter both username and password.")
+        else:
+            with st.spinner("Verifying credentials…"):
+                ok = verify_eol_credentials(username, password)
+            if ok:
+                st.session_state.eol_user = username
+                st.session_state.eol_password = password
+                state["status"] = "upload"
+                save_state(sid, state)
+                st.rerun()
+            else:
+                st.error("Login failed — check your EOL username and password.")
 # ════════════════════════════════════════════════════════════════════════════
 # UPLOAD
 # ════════════════════════════════════════════════════════════════════════════
+elif status == "upload":
     st.subheader("Step 1 — Upload contribution list")
     uploaded = st.file_uploader(
                 "--output-dir", str(output_dir),
             ]
+            env = os.environ.copy()
+            env["EOL_USER"] = st.session_state.eol_user
+            env["EOL_PASSWORD"] = st.session_state.eol_password
             log_file = open(str(log_path), "w")
             proc = subprocess.Popen(
                 cmd,
                 stdout=log_file,
                 stderr=subprocess.STDOUT,
+                env=env,
             )
             log_file.close()
     else:
         st.error(f"❌ Pipeline finished with errors (return code: {rc})")
+    # Per-TS results table — merge all pipeline logs so retry results don't
+    # replace original ones; later logs (pipeline_retry.log) supersede earlier
+    # ones (pipeline.log) for the same TS key.
+    _merged: dict[str, dict] = {}
+    for _lf in sorted(session_dir(sid).glob("pipeline*.log")):
+        for _r in parse_log_results(str(_lf)):
+            _merged[_r["TS"]] = _r
+    results = list(_merged.values())
     if results:
         st.subheader("Results per TS")
         import pandas as pd
+        n_warn = sum(1 for r in results if r["warnings"])
+        warn_label = f"Warnings ({n_warn})" if n_warn else "Warnings"
+        tab_summary, tab_warnings = st.tabs(["Summary", warn_label])
         def _color_status(val):
             return {
                 "OK":   "background-color: #d4edda; color: #155724",
                 "WARN": "background-color: #fff3cd; color: #856404",
                 "FAIL": "background-color: #f8d7da; color: #721c24",
+                "SKIP": "background-color: #e2e3e5; color: #383d41",
             }.get(val, "")
+        with tab_summary:
+            df = pd.DataFrame([{"Status": r["Status"], "TS": r["TS"]} for r in results])
+            st.dataframe(
+                df.style.map(_color_status, subset=["Status"]),
+                use_container_width=True,
+            )
+        with tab_warnings:
+            warned = [r for r in results if r["warnings"]]
+            if warned:
+                for r in warned:
+                    with st.expander(f"⚠️ {r['TS']} — {len(r['warnings'])} warning(s)"):
+                        for w in r["warnings"]:
+                            st.text(w)
+            else:
+                st.success("No warnings.")
     # Download ZIP
     if output_dir.exists() and any(output_dir.rglob("*")):
         else:
             st.text("Log not found.")
+    # ── TS Recovery ───────────────────────────────────────────────────────────
+    failed_ts_path = output_dir / "failed_ts.json"
+    if failed_ts_path.exists():
+        failed_ts_entries = json.loads(failed_ts_path.read_text())
+        if failed_ts_entries:
+            st.divider()
+            st.subheader("⚠️ Recover failed TS downloads")
+            st.info(
+                f"{len(failed_ts_entries)} TS(s) could not be downloaded. "
+                "Retry or upload each one manually, then apply the CRs."
+            )
+            for entry in failed_ts_entries:
+                spec_key  = f"{entry['spec_number']} v{entry['version']}"
+                dest_path = Path(entry["spec_dir"]) / entry["expected_filename"]
+                ready     = dest_path.exists()
+                label = f"{'✅' if ready else '❌'}  TS {spec_key} — CRs: {', '.join(entry['cr_uids'])}"
+                with st.expander(label, expanded=not ready):
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        if st.button("🔄 Retry download",
+                                     key=f"retry_{entry['spec_compact']}_{entry['version']}"):
+                            from fetch_crs import download_ts as _dl_ts
+                            with st.spinner(f"Downloading TS {spec_key}…"):
+                                fn, note = _dl_ts(
+                                    entry["spec_number"], entry["version"],
+                                    Path(entry["spec_dir"]),
+                                    st.session_state.eol_user,
+                                    st.session_state.eol_password,
+                                )
+                            if fn:
+                                st.success(f"Downloaded: {fn}")
+                                st.rerun()
+                            else:
+                                st.error(f"Failed: {note}")
+                    with col2:
+                        uploaded_ts = st.file_uploader(
+                            f"Or upload `{entry['expected_filename']}`",
+                            type=["docx"],
+                            key=f"upload_{entry['spec_compact']}_{entry['version']}",
+                        )
+                        if uploaded_ts is not None:
+                            Path(entry["spec_dir"]).mkdir(parents=True, exist_ok=True)
+                            dest_path.write_bytes(uploaded_ts.read())
+                            st.success("Saved ✓")
+                            st.rerun()
+            # Global apply button — enabled when ≥1 TS is now on disk
+            ready_entries = [
+                e for e in failed_ts_entries
+                if (Path(e["spec_dir"]) / e["expected_filename"]).exists()
+            ]
+            remaining = len(failed_ts_entries) - len(ready_entries)
+            if ready_entries:
+                if remaining:
+                    st.warning(f"{len(ready_entries)} ready, {remaining} will be skipped.")
+                else:
+                    st.success(f"All {len(ready_entries)} TS(s) ready.")
+                if st.button("▶ Apply CRs to recovered TSs", type="primary"):
+                    retry_log = str(session_dir(sid) / "pipeline_retry.log")
+                    _rc_path(sid).unlink(missing_ok=True)   # clear old returncode
+                    cmd = [
+                        sys.executable,
+                        str(SCRIPTS_DIR / "orchestrate_cr.py"),
+                        "--output-dir", state["output_dir"],
+                        "--retry-mode",
+                    ]
+                    env = os.environ.copy()
+                    env["EOL_USER"]     = st.session_state.eol_user
+                    env["EOL_PASSWORD"] = st.session_state.eol_password
+                    log_file = open(retry_log, "w")
+                    proc = subprocess.Popen(
+                        cmd, stdout=log_file, stderr=subprocess.STDOUT, env=env
+                    )
+                    log_file.close()
+                    threading.Thread(
+                        target=_run_and_save_rc,
+                        args=(proc, _rc_path(sid)),
+                        daemon=True,
+                    ).start()
+                    st.session_state.proc = proc
+                    state["status"]     = "running"
+                    state["pid"]        = proc.pid
+                    state["log_path"]   = retry_log
+                    state["started_at"] = datetime.now().isoformat()
+                    save_state(sid, state)
+                    st.rerun()
+            else:
+                st.warning("No TSs available yet — retry download or upload DOCX files above.")
     # Start new session
     st.divider()
     if st.button("Start new session"):

scripts/etsi_client.py ADDED Viewed

	@@ -0,0 +1,495 @@

+"""
+etsi_client.py — ETSI document download helpers for ApplyCRs.
+Provides:
+  ETSIDocFinder  — CR TDoc downloads via docbox.etsi.org
+  ETSISpecFinder — TS DOCX downloads via portal.etsi.org WKI chain
+"""
+import json
+import os
+import re
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from urllib.parse import urljoin
+import requests
+import urllib3
+from bs4 import BeautifulSoup
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+def _get_proxies() -> dict:
+    """Return a requests-compatible proxies dict from $http_proxy / $HTTP_PROXY."""
+    proxy = os.environ.get("http_proxy") or os.environ.get("HTTP_PROXY") or ""
+    if not proxy:
+        return {}
+    return {"http": proxy, "https": proxy}
+class ETSIDocFinder:
+    HEADERS = {
+        "User-Agent": (
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+            "AppleWebKit/537.36 (KHTML, like Gecko) "
+            "Chrome/136.0.0.0 Safari/537.36"
+        )
+    }
+    def __init__(self, eol_user: str, eol_password: str):
+        self.eol_user = eol_user
+        self.eol_password = eol_password
+        self.main_ftp_url = "https://docbox.etsi.org/SET"
+        req_data = self.connect()
+        self.session = req_data["session"]
+    def connect(self):
+        session = requests.Session()
+        session.headers.update(self.HEADERS)
+        session.proxies.update(_get_proxies())
+        # Seed DNN session cookies — docbox requires the portal session to be
+        # initialised with domain=docbox.etsi.org so the .DOTNETNUKE cookie
+        # is scoped to .etsi.org and accepted by docbox.etsi.org as well.
+        login_redir_url = (
+            "https://portal.etsi.org/LoginRedirection.aspx"
+            "?domain=docbox.etsi.org&ReturnUrl=/"
+        )
+        session.get(login_redir_url, verify=False, timeout=15)
+        req = session.post(
+            "https://portal.etsi.org/ETSIPages/LoginEOL.ashx",
+            data=json.dumps({"username": self.eol_user, "password": self.eol_password}),
+            headers={
+                "Content-Type": "application/json; charset=UTF-8",
+                "Referer": login_redir_url,
+            },
+            verify=False,
+            allow_redirects=False,
+            timeout=15,
+        )
+        if req.text == "Failed":
+            return {
+                "error": True,
+                "session": session,
+                "message": "Login failed! Check your credentials",
+            }
+        self.session = session
+        return {"error": False, "session": session, "message": "Login successful"}
+    def download_document(self, url: str) -> bytes:
+        """Download a docbox file using the authenticated session.
+        If the session has expired the portal redirects to LoginRedirection —
+        we detect this and re-authenticate before retrying.
+        """
+        resp = self.session.get(url, verify=False, timeout=30, allow_redirects=True)
+        if resp.url and "LoginRedirection" in resp.url:
+            self.connect()
+            resp = self.session.get(url, verify=False, timeout=30, allow_redirects=True)
+        return resp.content
+    def get_workgroup(self, doc: str):
+        main_tsg = (
+            "SET-WG-R"
+            if any(doc.startswith(kw) for kw in ["SETREQ", "SCPREQ"])
+            else "SET-WG-T"
+            if any(doc.startswith(kw) for kw in ["SETTEC", "SCPTEC"])
+            else "SET"
+            if any(doc.startswith(kw) for kw in ["SET", "SCP"])
+            else None
+        )
+        if main_tsg is None:
+            return None, None, None
+        regex = re.search(r"\(([^)]+)\)", doc)
+        workgroup = "20" + regex.group(1)
+        return main_tsg, workgroup, doc
+    def find_workgroup_url(self, main_tsg, workgroup):
+        url = f"{self.main_ftp_url}/{main_tsg}/05-CONTRIBUTIONS"
+        response = self.session.get(url, verify=False, timeout=15)
+        if "LoginRedirection" in response.url:
+            self.connect()
+            response = self.session.get(url, verify=False, timeout=15)
+        soup = BeautifulSoup(response.text, "html.parser")
+        for item in soup.find_all("tr"):
+            link = item.find("a")
+            if link and workgroup in link.get_text():
+                return (
+                    f"{self.main_ftp_url}/{main_tsg}/05-CONTRIBUTIONS/{link.get_text()}"
+                )
+        return f"{self.main_ftp_url}/{main_tsg}/05-CONTRIBUTIONS/{workgroup}"
+    def get_docs_from_url(self, url):
+        try:
+            response = self.session.get(url, verify=False, timeout=15)
+            soup = BeautifulSoup(response.text, "html.parser")
+            return [item.get_text() for item in soup.select("tr td a")]
+        except Exception as e:
+            print(f"Error accessing {url}: {e}")
+            return []
+    def search_document(self, doc_id: str):
+        original = doc_id
+        main_tsg, workgroup, doc = self.get_workgroup(doc_id)
+        urls = []
+        if main_tsg:
+            wg_url = self.find_workgroup_url(main_tsg, workgroup)
+            if wg_url:
+                entries = self.get_docs_from_url(wg_url)
+                for entry in entries:
+                    if doc in entry.lower() or original in entry:
+                        doc_url = f"{wg_url}/{entry}"
+                        urls.append(doc_url)
+                    elif "." not in entry.rstrip("/"):
+                        sub_url = f"{wg_url}/{entry}"
+                        files = self.get_docs_from_url(sub_url)
+                        for f in files:
+                            if doc in f.lower() or original in f:
+                                urls.append(f"{sub_url}/{f}")
+        return (
+            urls[0]
+            if len(urls) == 1
+            else urls[-1]
+            if len(urls) > 1
+            else f"Document {doc_id} not found"
+        )
+class ETSISpecFinder:
+    def __init__(self, eol_user: str, eol_password: str):
+        self.eol_user = eol_user
+        self.eol_password = eol_password
+        self.main_url = "https://www.etsi.org/deliver/etsi_ts"
+        self.second_url = "https://www.etsi.org/deliver/etsi_tr"
+        self.headers = {
+            "User-Agent": (
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+                "AppleWebKit/537.36 (KHTML, like Gecko) "
+                "Chrome/136.0.0.0 Safari/537.36"
+            )
+        }
+    def get_spec_path(self, doc_id: str):
+        if "-" in doc_id:
+            position, part = doc_id.split("-")
+        else:
+            position, part = doc_id, None
+        position = position.replace(" ", "")
+        if part:
+            if len(part) == 1:
+                part = "0" + part
+        spec_folder = position + part if part is not None else position
+        return (
+            f"{int(position) - (int(position) % 100)}_"
+            f"{int(position) - (int(position) % 100) + 99}/{spec_folder}"
+        )
+    def get_docs_from_url(self, url):
+        try:
+            response = requests.get(
+                url, verify=False, timeout=15, proxies=_get_proxies()
+            )
+            soup = BeautifulSoup(response.text, "html.parser")
+            docs = [item.get_text() for item in soup.find_all("a")][1:]
+            return docs
+        except Exception as e:
+            print(f"Error accessing {url}: {e}")
+            return []
+    def _normalise_version(self, version: str) -> str:
+        """Normalise a user-supplied version string to ETSI zero-padded format.
+        '17.6.0' -> '17.06.00'  (the '_60' release suffix is ignored during matching)
+        Already-normalised strings like '17.06.00' are returned unchanged."""
+        parts = version.strip("/").split(".")
+        if len(parts) == 3:
+            try:
+                return f"{int(parts[0]):02d}.{int(parts[1]):02d}.{int(parts[2]):02d}"
+            except ValueError:
+                pass
+        return version.strip("/")
+    def _pick_release(self, releases: list, version: str = None) -> str:
+        """Return the release folder matching version, or the latest if not found/specified."""
+        if version:
+            target = self._normalise_version(version)
+            for r in releases:
+                folder = r.strip("/").split("_")[0]
+                if folder == target:
+                    return r
+        return releases[-1]
+    def search_document(self, doc_id: str, version: str = None):
+        original = doc_id
+        url = f"{self.main_url}/{self.get_spec_path(original)}/"
+        url2 = f"{self.second_url}/{self.get_spec_path(original)}/"
+        print(url)
+        print(url2)
+        releases = self.get_docs_from_url(url)
+        if releases:
+            release = self._pick_release(releases, version)
+            files = self.get_docs_from_url(url + release)
+            for f in files:
+                if f.endswith(".pdf"):
+                    return url + release + "/" + f
+        releases = self.get_docs_from_url(url2)
+        if releases:
+            release = self._pick_release(releases, version)
+            files = self.get_docs_from_url(url2 + release)
+            for f in files:
+                if f.endswith(".pdf"):
+                    return url2 + release + "/" + f
+        return f"Specification {doc_id} not found"
+    def _get_wki_id_candidates(self, doc_id: str, version: str = None) -> tuple:
+        """Return (candidates, version_str) for a spec version (best match first)."""
+        if version:
+            version_str = version
+        else:
+            pdf_url = self.search_document(doc_id)
+            if "not found" in pdf_url.lower():
+                return [], ""
+            parts = pdf_url.rstrip("/").split("/")
+            version_folder = parts[-2]          # e.g. "18.04.00_60"
+            v_parts = version_folder.split("_")[0].split(".")  # ["18", "04", "00"]
+            try:
+                version_str = f"{int(v_parts[0])}.{int(v_parts[1])}.{int(v_parts[2])}"
+            except (ValueError, IndexError):
+                return [], ""
+        def fetch_candidates():
+            spec_num = doc_id.split("-")[0].replace(" ", "")
+            import datetime
+            today = datetime.date.today().isoformat()
+            base_params = {
+                "option":     "com_standardssearch",
+                "view":       "data",
+                "format":     "json",
+                "page":       "1",
+                "title":      "1",
+                "etsiNumber": "1",
+                "content":    "1",
+                "version":    "0",
+                "onApproval": "1",
+                "published":  "1",
+                "withdrawn":  "1",
+                "historical": "1",
+                "isCurrent":  "1",
+                "superseded": "1",
+                "startDate":  "1988-01-15",
+                "endDate":    today,
+                "harmonized": "0",
+                "keyword":    "",
+                "TB":         "",
+                "stdType":    "",
+                "frequency":  "",
+                "mandate":    "",
+                "collection": "",
+                "sort":       "1",
+            }
+            # ETSI UI sends capital-V version; try both to be safe
+            queries = [
+                f"{doc_id} V{version_str}",   # e.g. "104 005 V1.2.1"  (UI format)
+                f"{doc_id} v{version_str}",   # e.g. "104 005 v1.2.1"
+                doc_id,                        # e.g. "104 005"  (wider net)
+            ]
+            seen = {}
+            for query in queries:
+                params = {**base_params, "search": query}
+                try:
+                    resp = requests.get(
+                        "https://www.etsi.org/",
+                        params=params,
+                        headers=self.headers,
+                        verify=False,
+                        timeout=15,
+                        proxies=_get_proxies(),
+                    )
+                    data = resp.json()
+                    if data and isinstance(data, list):
+                        hits = [
+                            str(item["wki_id"])
+                            for item in data
+                            if "wki_id" in item and spec_num in json.dumps(item)
+                        ]
+                        for h in hits:
+                            seen[h] = None
+                        if hits:
+                            print(f"  wki_id search query={query!r} → {len(hits)} hit(s)")
+                            break
+                except Exception as e:
+                    print(f"Error getting wki_id for {doc_id} (query={query!r}): {e}")
+            return list(seen.keys())
+        candidates = list(dict.fromkeys(fetch_candidates()))
+        return candidates, version_str
+    def _authenticate_eol(self) -> requests.Session:
+        """Create a requests.Session authenticated to the ETSI EOL portal."""
+        session = requests.Session()
+        session.headers.update({"User-Agent": self.headers["User-Agent"]})
+        session.proxies.update(_get_proxies())
+        login_redir_url = (
+            "https://portal.etsi.org/LoginRedirection.aspx"
+            "?domain=docbox.etsi.org&ReturnUrl=/"
+        )
+        session.get(login_redir_url, verify=False, timeout=15)
+        login_resp = session.post(
+            "https://portal.etsi.org/ETSIPages/LoginEOL.ashx",
+            data=json.dumps({"username": self.eol_user, "password": self.eol_password}),
+            headers={
+                "Content-Type": "application/json; charset=UTF-8",
+                "Referer": login_redir_url,
+            },
+            verify=False,
+            allow_redirects=False,
+            timeout=15,
+        )
+        if login_resp.text.strip() == "Failed":
+            raise RuntimeError(
+                "ETSI EOL login failed — check EOL_USER / EOL_PASSWORD"
+            )
+        return session
+    def search_document_docx(self, doc_id: str, version: str = None) -> str:
+        """Download an ETSI spec as DOCX and return the local file path."""
+        candidates, version_str = self._get_wki_id_candidates(doc_id, version)
+        if not candidates:
+            return f"Specification {doc_id} not found"
+        try:
+            version_tag = "".join(f"{int(p):02d}" for p in version_str.split("."))
+        except (ValueError, AttributeError):
+            version_tag = ""
+        auth_session = self._authenticate_eol()
+        def try_wki(wki_id):
+            print(f"Trying wki_id={wki_id} for {doc_id}")
+            session = requests.Session()
+            session.headers.update({"User-Agent": self.headers["User-Agent"]})
+            session.proxies.update(_get_proxies())
+            session.cookies.update(auth_session.cookies)
+            # Step 1: LogonRedirection.asp registers the download intent server-side,
+            # generates a one-time profile_id, then 302s to NTaccount.asp.
+            # allow_redirects=True means the final response IS the NTaccount.asp page.
+            # Do NOT call NTaccount.asp again — a second call invalidates profile_id A
+            # and the server rejects the new profile_id B with "Your identifier is wrong".
+            r_logon = session.get(
+                f"https://portal.etsi.org/webapp/workprogram/LogonRedirection.asp"
+                f"?wki_id={wki_id}",
+                verify=False,
+                timeout=15,
+                allow_redirects=True,
+            )
+            meta_match = re.search(r"URL=([^\"'\s>]+)", r_logon.text)
+            if not meta_match:
+                print(
+                    f"  wki_id={wki_id}: authentication failed "
+                    f"(no URL= in NTaccount.asp), trying next"
+                )
+                return None
+            meta_url = urljoin(r_logon.url, meta_match.group(1))
+            r2 = session.get(meta_url, allow_redirects=False, verify=False, timeout=15)
+            if r2.status_code != 302:
+                print(
+                    f"  wki_id={wki_id}: unexpected status {r2.status_code}, trying next"
+                )
+                return None
+            location2 = r2.headers.get("Location", "")
+            if "processerror" in location2.lower():
+                print(f"  wki_id={wki_id}: portal rejected ({location2}), trying next")
+                return None
+            copy_url = urljoin("https://portal.etsi.org/", location2)
+            r3 = session.get(copy_url, allow_redirects=False, verify=False, timeout=15)
+            if r3.status_code == 302:
+                location3 = r3.headers.get("Location", "")
+                final_url = urljoin("https://portal.etsi.org/webapp/ewp/", location3)
+                r4 = session.get(final_url, verify=False, timeout=15)
+            else:
+                r4 = r3
+            docx_urls = re.findall(
+                r'href=["\']([^"\']*\.docx)["\']', r4.text, re.IGNORECASE
+            )
+            if not docx_urls:
+                print(f"  wki_id={wki_id}: DOCX not found in page, trying next")
+                return None
+            spec_num = doc_id.split("-")[0].replace(" ", "")
+            matching_urls = [u for u in docx_urls if spec_num in u.split("/")[-1]]
+            if not matching_urls:
+                print(
+                    f"  wki_id={wki_id}: DOCX spec mismatch "
+                    f"(expected {spec_num}), trying next"
+                )
+                return None
+            if version_tag:
+                version_candidates = [
+                    version_tag,                      # "010201"
+                    f"v{version_tag}",                # "v010201"
+                    version_str.replace(".", ""),      # "121"
+                    version_str,                       # "1.2.1"
+                    version_str.replace(".", "_"),     # "1_2_1"
+                ]
+                versioned_urls = []
+                for tag in version_candidates:
+                    versioned_urls = [
+                        u for u in matching_urls if tag in u.split("/")[-1]
+                    ]
+                    if versioned_urls:
+                        break
+                if not versioned_urls:
+                    found_names = [u.split("/")[-1] for u in matching_urls]
+                    print(
+                        f"  wki_id={wki_id}: version tag not in filenames {found_names}, "
+                        f"using first spec-matching DOCX as fallback"
+                    )
+                    versioned_urls = matching_urls
+                matching_urls = versioned_urls
+            docx_url = matching_urls[0]
+            dl = session.get(
+                docx_url,
+                headers={"Referer": r4.url},
+                verify=False,
+                timeout=60,
+            )
+            filename = docx_url.split("/")[-1]
+            tmp_path = f"/tmp/{filename}"
+            with open(tmp_path, "wb") as f:
+                f.write(dl.content)
+            print(f"  wki_id={wki_id}: success")
+            return tmp_path
+        executor = ThreadPoolExecutor(max_workers=min(len(candidates), 4))
+        try:
+            futures = {executor.submit(try_wki, wki_id): wki_id for wki_id in candidates}
+            for future in as_completed(futures):
+                result = future.result()
+                if result is not None:
+                    for f in futures:
+                        f.cancel()
+                    return result
+        finally:
+            executor.shutdown(wait=False)
+        return f"Specification {doc_id}: all {len(candidates)} wki_id candidate(s) rejected"

scripts/fetch_crs.py CHANGED Viewed

@@ -7,9 +7,9 @@ Usage:
 Steps:
     1. Parse Excel, filter Accepted CRs by person name
-    2. Download CR DOCXs via docfinder /find/tdoc/download
     3. Parse CR cover pages to extract target TS spec + version
-    4. Download TS DOCXs via docfinder /find/docx
     5. Print summary report
 """
@@ -17,15 +17,10 @@ import argparse
 import os
 import re
 import sys
-import time
 import zipfile
 from pathlib import Path
-import requests
-BASE_URL = "https://organizedprogrammers-docfinder.hf.space"
-#_proxy = os.environ.get("http_proxy") or None
-#PROXIES = {"http": _proxy, "https": os.environ.get("https_proxy") or None}
 # ---------------------------------------------------------------------------
@@ -178,7 +173,7 @@ def _parse_xlsx(path: Path, person_name: str):
 # Step 2 — Download CR DOCXs
 # ---------------------------------------------------------------------------
-def download_cr(uid: str, cr_dir: Path):
     """
     Download CR DOCX for the given UID.
@@ -193,19 +188,14 @@ def download_cr(uid: str, cr_dir: Path):
         return dest, "already existed"
     try:
-        resp = requests.post(
-            f"{BASE_URL}/find/tdoc/download",
-            json={"doc_id": uid},
-            #proxies=PROXIES,
-            timeout=60,
-        )
-    except requests.RequestException as e:
-        return None, f"network error: {e}"
-    if not resp.ok:
-        return None, f"HTTP {resp.status_code}"
-    content = resp.content
     if not content:
         return None, "empty response"
@@ -296,22 +286,11 @@ def parse_cr_cover(docx_path: Path):
 # Step 4 — Download TS DOCXs
 # ---------------------------------------------------------------------------
-def _is_html(resp: requests.Response) -> bool:
-    """Return True if the response body is an HTML page (e.g. HF Space loading page)."""
-    ct = resp.headers.get("content-type", "")
-    if "text/html" in ct:
-        return True
-    return resp.content[:5].lower() in (b"<!doc", b"<html")
 def download_ts(spec_number: str, version: str, ts_dir: Path,
-                max_retries: int = 3, retry_delay: int = 10):
     """
     Download TS DOCX for spec_number (e.g. "102 221") and version (e.g. "18.3.0").
-    Retries up to max_retries times when the HF Space returns an HTML loading page
-    instead of the DOCX binary (happens on cold-start / brief restarts).
     Returns (filename, note) or (None, error_msg).
     """
     spec_no_space = spec_number.replace(" ", "")
@@ -321,56 +300,40 @@ def download_ts(spec_number: str, version: str, ts_dir: Path,
     if dest.exists():
         return filename, "already existed"
-    last_error = "no attempts made"
-    for attempt in range(1, max_retries + 1):
-        try:
-            resp = requests.post(
-                f"{BASE_URL}/find/docx",
-                json={"doc_id": spec_number, "version": version},
-                #proxies=PROXIES,
-                timeout=120,
-            )
-        except requests.RequestException as e:
-            return None, f"network error: {e}"
-        if not resp.ok:
-            return None, f"HTTP {resp.status_code} — {resp.text[:200]}"
-        content = resp.content
-        if not content:
-            return None, "empty response"
-        # Detect HTML splash page (HF Space cold-start) — retry after a delay
-        if _is_html(resp):
-            last_error = f"got HTML instead of DOCX (attempt {attempt}/{max_retries})"
-            if attempt < max_retries:
-                print(f"\n    [retry in {retry_delay}s — HF Space loading…]", flush=True)
-                time.sleep(retry_delay)
-                continue
-            return None, f"invalid file (not a ZIP/DOCX, starts with {content[:4]!r}) after {max_retries} attempts"
-        # Good binary response
-        dest.write_bytes(content)
-        if content[:2] != b"PK":
             dest.unlink()
-            return None, f"invalid file (not a ZIP/DOCX, starts with {content[:4]!r})"
-        # Verify the TS contains the expected spec number in its first paragraph
-        try:
-            import docx as _docx
-            _doc = _docx.Document(dest)
-            first_para = _doc.paragraphs[0].text if _doc.paragraphs else ''
-            if spec_no_space not in first_para.replace(' ', ''):
-                dest.unlink()
-                return None, f"wrong TS returned by API: got {first_para[:80]!r} (expected spec {spec_no_space})"
-        except Exception:
-            pass  # Trust the ZIP check above
-        note = "downloaded" if attempt == 1 else f"downloaded (after {attempt} attempts)"
-        return filename, note
-    return None, last_error
 # ---------------------------------------------------------------------------
@@ -394,6 +357,11 @@ def main():
     person_name = args.person_name
     output_dir = Path(wsl_path(args.output_dir)).expanduser()
     cr_dir = output_dir / "CRs"
     ts_dir = output_dir / "TS"
     cr_dir.mkdir(parents=True, exist_ok=True)
@@ -419,8 +387,8 @@ def main():
     cr_results = []  # list of (uid, docx_path_or_None, note)
     for uid, title in cr_list:
-        print(f"  [{uid}] ", end="", flush=True)
-        docx_path, note = download_cr(uid, cr_dir)
         cr_results.append((uid, docx_path, note))
         if docx_path:
             print(f"OK ({note}) — {docx_path.name}")
@@ -452,7 +420,7 @@ def main():
     for (spec_number, version), uids in ts_targets.items():
         print(f"  [TS {spec_number} v{version}] ", end="", flush=True)
-        filename, note = download_ts(spec_number, version, ts_dir)
         ts_results.append((spec_number, version, filename, note))
         if filename:
             print(f"OK ({note}) — {filename}")

 Steps:
     1. Parse Excel, filter Accepted CRs by person name
+    2. Download CR DOCXs via ETSI docbox
     3. Parse CR cover pages to extract target TS spec + version
+    4. Download TS DOCXs via ETSI portal WKI chain
     5. Print summary report
 """
 import os
 import re
 import sys
 import zipfile
 from pathlib import Path
+from etsi_client import ETSIDocFinder, ETSISpecFinder
 # ---------------------------------------------------------------------------
 # Step 2 — Download CR DOCXs
 # ---------------------------------------------------------------------------
+def download_cr(uid: str, cr_dir: Path, eol_user: str, eol_password: str):
     """
     Download CR DOCX for the given UID.
         return dest, "already existed"
     try:
+        finder = ETSIDocFinder(eol_user, eol_password)
+        url = finder.search_document(uid)
+        if isinstance(url, str) and "not found" in url.lower():
+            return None, f"document not found: {uid}"
+        content = finder.download_document(url)
+    except Exception as e:
+        return None, f"download error: {e}"
     if not content:
         return None, "empty response"
 # Step 4 — Download TS DOCXs
 # ---------------------------------------------------------------------------
 def download_ts(spec_number: str, version: str, ts_dir: Path,
+                eol_user: str = "", eol_password: str = ""):
     """
     Download TS DOCX for spec_number (e.g. "102 221") and version (e.g. "18.3.0").
     Returns (filename, note) or (None, error_msg).
     """
     spec_no_space = spec_number.replace(" ", "")
     if dest.exists():
         return filename, "already existed"
+    try:
+        finder = ETSISpecFinder(eol_user, eol_password)
+        tmp_path = finder.search_document_docx(spec_number, version)
+    except Exception as e:
+        return None, f"download error: {e}"
+    if "not found" in str(tmp_path).lower() or "rejected" in str(tmp_path).lower():
+        return None, tmp_path
+    content = Path(tmp_path).read_bytes()
+    if not content:
+        return None, "empty response"
+    dest.write_bytes(content)
+    if content[:2] != b"PK":
+        dest.unlink()
+        return None, f"invalid file (not a ZIP/DOCX, starts with {content[:4]!r})"
+    # Verify the TS contains the expected spec number in its first paragraph
+    try:
+        import docx as _docx
+        _doc = _docx.Document(dest)
+        first_para = _doc.paragraphs[0].text if _doc.paragraphs else ""
+        if spec_no_space not in first_para.replace(" ", ""):
             dest.unlink()
+            return None, (
+                f"wrong TS returned: got {first_para[:80]!r} "
+                f"(expected spec {spec_no_space})"
+            )
+    except Exception:
+        pass  # Trust the ZIP check above
+    return filename, "downloaded"
 # ---------------------------------------------------------------------------
     person_name = args.person_name
     output_dir = Path(wsl_path(args.output_dir)).expanduser()
+    eol_user = os.environ.get("EOL_USER", "")
+    eol_password = os.environ.get("EOL_PASSWORD", "")
+    if not eol_user or not eol_password:
+        sys.exit("ERROR: EOL_USER and EOL_PASSWORD must be set")
     cr_dir = output_dir / "CRs"
     ts_dir = output_dir / "TS"
     cr_dir.mkdir(parents=True, exist_ok=True)
     cr_results = []  # list of (uid, docx_path_or_None, note)
     for uid, title in cr_list:
+        #print(f"  [{uid}] ", end="", flush=True)
+        docx_path, note = download_cr(uid, cr_dir, eol_user, eol_password)
         cr_results.append((uid, docx_path, note))
         if docx_path:
             print(f"OK ({note}) — {docx_path.name}")
     for (spec_number, version), uids in ts_targets.items():
         print(f"  [TS {spec_number} v{version}] ", end="", flush=True)
+        filename, note = download_ts(spec_number, version, ts_dir, eol_user, eol_password)
         ts_results.append((spec_number, version, filename, note))
         if filename:
             print(f"OK ({note}) — {filename}")

scripts/finalize_ts.py CHANGED Viewed

@@ -178,18 +178,32 @@ def _detect_meeting_separator(tbl):
 # ── TS table locators ─────────────────────────────────────────────────────────
 def find_change_history_table(ts_doc):
-    """Return ts_doc.tables[-2] (Change History / Annex V). Accepts 8 or 9 columns."""
-    tables = ts_doc.tables
-    if len(tables) < 2:
-        raise ValueError('TS has fewer than 2 tables')
-    tbl = tables[-2]
-    ncols = len(tbl.rows[-1].cells)
-    if ncols not in (8, 9):
-        raise ValueError(
-            f'Change History table has {ncols} columns, expected 8 or 9'
-        )
-    return tbl
 def find_history_table(ts_doc):

 # ── TS table locators ─────────────────────────────────────────────────────────
+class NoChangeHistoryTable(Exception):
+    """Raised when the document contains no recognisable Change History table."""
+    pass
 def find_change_history_table(ts_doc):
+    """
+    Scan all tables backward from the end looking for a Change History table.
+    A match requires both:
+      - 8 or 9 columns in the last row (standard ETSI Change History layout)
+      - At least one of the keywords 'cr', 'date', 'meeting', 'rev' in the header row
+    Raises NoChangeHistoryTable (not ValueError) when none is found so callers
+    can distinguish a structural absence from an unexpected error.
+    """
+    for tbl in reversed(ts_doc.tables):
+        ncols = len(tbl.rows[-1].cells)
+        if ncols not in (8, 9):
+            continue
+        if tbl.rows:
+            header_text = ' '.join(c.text.strip() for c in tbl.rows[0].cells).lower()
+            if any(kw in header_text for kw in ('cr', 'date', 'meeting', 'rev')):
+                return tbl
+    raise NoChangeHistoryTable(
+        'No Change History table found in this document '
+        '(no table with 8 or 9 columns and CR/Date/Meeting/Rev headers)'
+    )
 def find_history_table(ts_doc):

scripts/orchestrate_cr.py CHANGED Viewed

@@ -22,8 +22,11 @@ import argparse
 import contextlib
 import datetime
 import io
 import re
 import sys
 from pathlib import Path
 import docx as docx_lib
@@ -44,6 +47,7 @@ from finalize_ts import (
     update_change_history_table,
     update_history_table,
     update_title_para,
 )
 from docx_helpers import RevCounter, AUTHOR as DEFAULT_AUTHOR, DATE as DEFAULT_DATE
@@ -78,7 +82,12 @@ def main():
         description='Fully automated CR application pipeline.',
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
-    ap.add_argument('excel_path', help='Path to .xls or .xlsx contribution list')
     ap.add_argument(
         'person_name',
         nargs='?',
@@ -95,9 +104,21 @@ def main():
         default=DEFAULT_AUTHOR,
         help=f'Tracked change author name (default: "{DEFAULT_AUTHOR}")',
     )
     args = ap.parse_args()
-    excel_path = wsl_path(args.excel_path)
     output_dir = Path(wsl_path(args.output_dir)).expanduser()
     cr_dir = output_dir / 'CRs'
     ts_dir = output_dir / 'TS'   # spec subfolders created per-TS below
@@ -107,6 +128,212 @@ def main():
     author = args.author
     tc_date = DEFAULT_DATE
     # ── Step 1: Parse Excel ───────────────────────────────────────────────────
     _section('Step 1 — Parsing Excel')
     print(f'Excel:   {excel_path}')
@@ -117,9 +344,7 @@ def main():
     except Exception as e:
         sys.exit(f'ERROR parsing Excel: {e}')
-    print(f'Found {len(cr_list)} Accepted CR(s):')
-    for uid, title in cr_list:
-        print(f'  {uid}: {title[:80]}')
     if not cr_list:
         print('Nothing to process.')
@@ -130,13 +355,16 @@ def main():
     cr_paths = {}  # uid -> Path
     for uid, _ in cr_list:
-        print(f'  [{uid}] ', end='', flush=True)
-        docx_path, note = download_cr(uid, cr_dir)
         if docx_path:
             cr_paths[uid] = docx_path
-            print(f'OK ({note}) — {docx_path.name}')
-        else:
-            print(f'FAILED — {note}')
     # ── Step 3: Parse cover pages → group by target TS ───────────────────────
     _section('Step 3 — Parsing CR cover pages')
@@ -169,13 +397,41 @@ def main():
         spec_dirs[(spec_number, version)] = spec_dir
         print(f'  [TS {spec_number} v{version}] ', end='', flush=True)
-        filename, note = download_ts(spec_number, version, spec_dir)
         if filename:
             ts_paths[(spec_number, version)] = spec_dir / filename
             print(f'OK ({note}) — {spec_compact}/{filename}')
         else:
             print(f'FAILED — {note}')
     # ── Steps 5 & 6: Apply CRs + Finalise each TS ────────────────────────────
     _section('Steps 5 & 6 — Applying CRs and Finalising Metadata')
     report = []  # (ts_key, n_ok, n_skip, n_crs, out_path, log_path, errors)
@@ -258,6 +514,10 @@ def main():
             for line in log_lines:
                 print(f'  {line}')
             print(f'  -> Applied: {n_ok}  Skipped: {n_skip}')
             # 6. Finalise metadata (Change History, History, title paragraph)
@@ -285,6 +545,8 @@ def main():
                             ts_doc, meta, pub_ym, old_v, new_v, rev, author, tc_date
                         )
                         print(f'    [Change History] {uid}: {ch_cells}')
                     except Exception as e:
                         errors.append(f'[{uid}] Change History ERROR: {e}')
                         print(f'    [Change History] {uid}: ERROR — {e}')

 import contextlib
 import datetime
 import io
+import json
+import os
 import re
 import sys
+import time
 from pathlib import Path
 import docx as docx_lib
     update_change_history_table,
     update_history_table,
     update_title_para,
+    NoChangeHistoryTable,
 )
 from docx_helpers import RevCounter, AUTHOR as DEFAULT_AUTHOR, DATE as DEFAULT_DATE
         description='Fully automated CR application pipeline.',
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
+    ap.add_argument(
+        'excel_path',
+        nargs='?',
+        default=None,
+        help='Path to .xls or .xlsx contribution list (not required in --retry-mode)',
+    )
     ap.add_argument(
         'person_name',
         nargs='?',
         default=DEFAULT_AUTHOR,
         help=f'Tracked change author name (default: "{DEFAULT_AUTHOR}")',
     )
+    ap.add_argument(
+        '--retry-mode',
+        action='store_true',
+        help='Skip steps 1-4; apply CRs to TSs listed in failed_ts.json that now have their DOCX on disk',
+    )
     args = ap.parse_args()
+    if not args.retry_mode and not args.excel_path:
+        ap.error('excel_path is required when not in --retry-mode')
+    eol_user = os.environ.get("EOL_USER", "")
+    eol_password = os.environ.get("EOL_PASSWORD", "")
+    if not eol_user or not eol_password:
+        sys.exit("ERROR: EOL_USER and EOL_PASSWORD must be set")
     output_dir = Path(wsl_path(args.output_dir)).expanduser()
     cr_dir = output_dir / 'CRs'
     ts_dir = output_dir / 'TS'   # spec subfolders created per-TS below
     author = args.author
     tc_date = DEFAULT_DATE
+    # ── Retry mode — skip steps 1-4, reconstruct state from failed_ts.json ───
+    if args.retry_mode:
+        failed_ts_path = output_dir / 'failed_ts.json'
+        if not failed_ts_path.exists():
+            sys.exit('ERROR: failed_ts.json not found in output directory')
+        failed_ts_entries = json.loads(failed_ts_path.read_text())
+        if not failed_ts_entries:
+            print('No failed TSs in failed_ts.json — nothing to retry.')
+            return
+        _section('Retry mode — Steps 5 & 6 only')
+        print(f'Retrying {len(failed_ts_entries)} TS(s) from failed_ts.json')
+        ts_groups = {}
+        spec_dirs = {}
+        ts_paths = {}
+        cr_paths = {}
+        for entry in failed_ts_entries:
+            spec_number = entry['spec_number']
+            version = entry['version']
+            key = (spec_number, version)
+            ts_groups[key] = entry['cr_uids']
+            spec_dir = Path(entry['spec_dir'])
+            spec_dirs[key] = spec_dir
+            expected = spec_dir / entry['expected_filename']
+            if expected.exists():
+                ts_paths[key] = expected
+                print(f'  [TS {spec_number} v{version}] DOCX found — will apply')
+            else:
+                print(f'  [TS {spec_number} v{version}] DOCX missing — skipping')
+            # Reconstruct cr_paths for each UID
+            cr_entry_dir = Path(entry['cr_dir'])
+            for uid in entry['cr_uids']:
+                extracted = cr_entry_dir / f'{uid}_extracted.docx'
+                plain = cr_entry_dir / f'{uid}.docx'
+                if extracted.exists():
+                    cr_paths[uid] = extracted
+                elif plain.exists():
+                    cr_paths[uid] = plain
+        # ── Steps 5 & 6 (retry mode falls through to shared loop below) ──────
+        report = []
+        for (spec_number, version), uids in ts_groups.items():
+            ts_key = f'TS {spec_number} v{version}'
+            spec_compact = spec_number.replace(' ', '')
+            spec_dir = spec_dirs.get((spec_number, version), ts_dir / spec_compact)
+            spec_dir.mkdir(parents=True, exist_ok=True)
+            new_v = derive_new_version(version)
+            stem = f'ts_{spec_compact}_v{new_v}_was_v{version}'
+            ts_applied = spec_dir / f'ts_{spec_compact}_v{version}_applied.docx'
+            ts_final   = spec_dir / f'{stem}.docx'
+            log_path   = spec_dir / f'{stem}.log'
+            errors = []
+            print(f'\n-- {ts_key} ({len(uids)} CR(s): {", ".join(uids)}) --')
+            if (spec_number, version) not in ts_paths:
+                msg = 'TS DOCX not on disk — skipping'
+                print(f'  SKIP: {msg}')
+                report.append((ts_key, 0, 0, len(uids), None, log_path, [msg]))
+                continue
+            ts_in = ts_paths[(spec_number, version)]
+            log_buf = io.StringIO()
+            tee = _TeeWriter(sys.stdout, log_buf)
+            with contextlib.redirect_stdout(tee):
+                log_header = (
+                    f'Pipeline Log (retry)\n'
+                    f'TS:   {spec_number}  v{version} -> v{new_v}\n'
+                    f'CRs:  {", ".join(uids)}\n'
+                    f'Date: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n'
+                    f'{"=" * 60}\n'
+                )
+                print(log_header, end='')
+                combined_manifest = []
+                participating_uids = []
+                for uid in uids:
+                    if uid not in cr_paths:
+                        errors.append(f'[{uid}] CR DOCX not found — skipped')
+                        continue
+                    print(f'  Parsing {uid}... ', end='', flush=True)
+                    try:
+                        changes = parse_cr(cr_paths[uid])
+                        combined_manifest.extend(changes)
+                        participating_uids.append(uid)
+                        print(f'{len(changes)} change(s)')
+                    except Exception as e:
+                        errors.append(f'[{uid}] parse ERROR: {e}')
+                        print(f'ERROR: {e}')
+                if not combined_manifest:
+                    print('  No changes parsed — skipping apply step.')
+                    report.append((ts_key, 0, 0, len(uids), None, log_path,
+                                    errors + ['No changes parsed']))
+                    log_path.write_text(log_buf.getvalue(), encoding='utf-8')
+                    continue
+                print(f'  Applying {len(combined_manifest)} change(s) to {ts_in.name}...')
+                try:
+                    n_ok, n_skip, log_lines = apply_manifest(
+                        ts_in, combined_manifest, ts_applied, author=author, date=tc_date
+                    )
+                except Exception as e:
+                    errors.append(f'apply_manifest ERROR: {e}')
+                    print(f'  ERROR: {e}')
+                    report.append((ts_key, 0, 0, len(uids), None, log_path, errors))
+                    log_path.write_text(log_buf.getvalue(), encoding='utf-8')
+                    continue
+                for line in log_lines:
+                    print(f'  {line}')
+                # Bubble every un-applied change into the warnings list
+                for line in log_lines:
+                    if line.strip().startswith('ERROR'):
+                        errors.append(line.strip())
+                print(f'  -> Applied: {n_ok}  Skipped: {n_skip}')
+                print('  Finalising metadata...')
+                try:
+                    ts_doc = docx_lib.Document(str(ts_applied))
+                    rev = RevCounter(ts_doc)
+                    pub_ym, pub_month_year = compute_pub_date()
+                    old_v = version
+                    title_text = ts_doc.paragraphs[0].text
+                    date_match = re.search(r'\((\d{4}-\d{2})\)', title_text)
+                    old_date_str = date_match.group(1) if date_match else ''
+                    print(f'    Version:     {old_v} -> {new_v}')
+                    print(f'    Publication: {pub_month_year} ({pub_ym})')
+                    for uid in participating_uids:
+                        try:
+                            meta = extract_cr_metadata(str(cr_paths[uid]))
+                            ch_cells = update_change_history_table(
+                                ts_doc, meta, pub_ym, old_v, new_v, rev, author, tc_date
+                            )
+                            print(f'    [Change History] {uid}: {ch_cells}')
+                        except NoChangeHistoryTable:
+                            print(f'    [Change History] {uid}: NOT PRESENT — this document has no Change History table (History table only)')
+                        except Exception as e:
+                            errors.append(f'[{uid}] Change History ERROR: {e}')
+                            print(f'    [Change History] {uid}: ERROR — {e}')
+                    try:
+                        h_cells = update_history_table(
+                            ts_doc, new_v, pub_month_year, rev, author, tc_date
+                        )
+                        print(f'    [History] {h_cells}')
+                    except Exception as e:
+                        errors.append(f'History table ERROR: {e}')
+                        print(f'    [History] ERROR — {e}')
+                    if old_date_str:
+                        try:
+                            update_title_para(
+                                ts_doc, old_v, new_v, old_date_str, pub_ym, rev, author, tc_date
+                            )
+                            print(f'    [Title] V{old_v} -> V{new_v}, ({old_date_str}) -> ({pub_ym})')
+                        except Exception as e:
+                            errors.append(f'Title update ERROR: {e}')
+                            print(f'    [Title] ERROR — {e}')
+                    else:
+                        print(f'    [Title] SKIP — no (YYYY-MM) pattern in: {title_text!r}')
+                    ts_doc.save(str(ts_final))
+                    print(f'  Saved: {spec_compact}/{ts_final.name}')
+                    print(f'  Log:   {spec_compact}/{log_path.name}')
+                    report.append((ts_key, n_ok, n_skip, len(uids), ts_final, log_path, errors))
+                except Exception as e:
+                    errors.append(f'Finalisation ERROR: {e}')
+                    print(f'  Finalisation ERROR: {e}')
+                    report.append((ts_key, n_ok, n_skip, len(uids), ts_applied, log_path, errors))
+            log_path.write_text(log_buf.getvalue(), encoding='utf-8')
+        # Update failed_ts.json — remove entries that are now resolved
+        still_failed = [
+            e for e in failed_ts_entries
+            if not (Path(e['spec_dir']) / e['expected_filename']).exists()
+        ]
+        failed_ts_path.write_text(json.dumps(still_failed, indent=2))
+        _section('Retry Summary')
+        n_success = sum(1 for r in report if r[4] is not None and not r[6])
+        n_partial  = sum(1 for r in report if r[4] is not None and r[6])
+        n_failed   = sum(1 for r in report if r[4] is None)
+        print(f'TSs processed:  {n_success} fully OK, {n_partial} with warnings, {n_failed} skipped/failed')
+        for ts_key, n_ok, n_skip, n_crs, out_path, log_path, errors in report:
+            status_tag = 'OK' if out_path and not errors else ('WARN' if out_path else 'SKIP')
+            print(f'  [{status_tag}] {ts_key}')
+            for err in errors:
+                print(f'         ! {err}')
+        return
+    excel_path = wsl_path(args.excel_path)
     # ── Step 1: Parse Excel ───────────────────────────────────────────────────
     _section('Step 1 — Parsing Excel')
     print(f'Excel:   {excel_path}')
     except Exception as e:
         sys.exit(f'ERROR parsing Excel: {e}')
+    print(f'Found {len(cr_list)} Accepted CR(s)')
     if not cr_list:
         print('Nothing to process.')
     cr_paths = {}  # uid -> Path
     for uid, _ in cr_list:
+        docx_path, note = download_cr(uid, cr_dir, eol_user, eol_password)
         if docx_path:
             cr_paths[uid] = docx_path
+            print(f'  [{uid}] OK ({note}) — {docx_path.name}')
+    n_cr_failed = len(cr_list) - len(cr_paths)
+    if n_cr_failed:
+        print(f'  {len(cr_paths)}/{len(cr_list)} downloaded ({n_cr_failed} failed — details in warnings)')
+    else:
+        print(f'  All {len(cr_list)} CR(s) downloaded successfully')
     # ── Step 3: Parse cover pages → group by target TS ───────────────────────
     _section('Step 3 — Parsing CR cover pages')
         spec_dirs[(spec_number, version)] = spec_dir
         print(f'  [TS {spec_number} v{version}] ', end='', flush=True)
+        filename, note = None, "not attempted"
+        for attempt in range(1, 4):
+            filename, note = download_ts(spec_number, version, spec_dir, eol_user, eol_password)
+            if filename:
+                break
+            if attempt < 3:
+                print(f'\n    [attempt {attempt}/3 failed — retrying in 5s: {note}]', flush=True)
+                print(f'  [TS {spec_number} v{version}] ', end='', flush=True)
+                time.sleep(5)
+            else:
+                print(f'\n    [all 3 attempts failed]', flush=True)
         if filename:
             ts_paths[(spec_number, version)] = spec_dir / filename
             print(f'OK ({note}) — {spec_compact}/{filename}')
         else:
             print(f'FAILED — {note}')
+    # Write failed_ts.json (even when empty so app.py can detect "no failures")
+    failed_ts_entries = [
+        {
+            "spec_number":       spec_number,
+            "version":           version,
+            "spec_compact":      spec_number.replace(' ', ''),
+            "spec_dir":          str(spec_dirs[(spec_number, version)]),
+            "expected_filename": f"ts_{spec_number.replace(' ', '')}_v{version}.docx",
+            "cr_uids":           ts_groups[(spec_number, version)],
+            "cr_dir":            str(cr_dir),
+        }
+        for (spec_number, version) in ts_groups
+        if (spec_number, version) not in ts_paths
+    ]
+    (output_dir / "failed_ts.json").write_text(
+        json.dumps(failed_ts_entries, indent=2)
+    )
     # ── Steps 5 & 6: Apply CRs + Finalise each TS ────────────────────────────
     _section('Steps 5 & 6 — Applying CRs and Finalising Metadata')
     report = []  # (ts_key, n_ok, n_skip, n_crs, out_path, log_path, errors)
             for line in log_lines:
                 print(f'  {line}')
+            # Bubble every un-applied change into the warnings list
+            for line in log_lines:
+                if line.strip().startswith('ERROR'):
+                    errors.append(line.strip())
             print(f'  -> Applied: {n_ok}  Skipped: {n_skip}')
             # 6. Finalise metadata (Change History, History, title paragraph)
                             ts_doc, meta, pub_ym, old_v, new_v, rev, author, tc_date
                         )
                         print(f'    [Change History] {uid}: {ch_cells}')
+                    except NoChangeHistoryTable:
+                        print(f'    [Change History] {uid}: NOT PRESENT — this document has no Change History table (History table only)')
                     except Exception as e:
                         errors.append(f'[{uid}] Change History ERROR: {e}')
                         print(f'    [Change History] {uid}: ERROR — {e}')

scripts/ts_applicator.py CHANGED Viewed

@@ -33,11 +33,22 @@ from docx_helpers import (
 # ── Text normalisation ────────────────────────────────────────────────────────
 def _norm(text):
-    """Normalise non-breaking spaces and common Unicode dashes for comparison."""
     return (text
-            .replace('\xa0', ' ')
-            .replace('\u2013', '-')
-            .replace('\u2014', '-')
             .strip())
@@ -60,12 +71,53 @@ def _norm_ws(text):
     Used as a third-level fallback (confidence 0.8) after exact and NBSP-norm.
     """
     base = (text
-            .replace('\xa0', '')
             .replace('\u2013', '-')
-            .replace('\u2014', '-'))
     return re.sub(r'\s+', '', base)
 # ── Document search helpers ───────────────────────────────────────────────────
 def _full_para_text(para):
@@ -189,11 +241,22 @@ def _find_row(tbl, anchor_text):
     """
     Find first row in tbl where col-0 cell text contains anchor_text.
     Returns (row_idx, confidence) or (-1, 0.0).
-    Three confidence levels: 1.0 exact, 0.9 norm, 0.8 whitespace-stripped.
     """
-    norm_anchor = _norm(anchor_text)
-    ws_anchor = _norm_ws(anchor_text)
     best = (-1, 0.0)
     for idx, row in enumerate(tbl.rows):
         cell0 = row.cells[0].text if row.cells else ''
         if anchor_text in cell0:
@@ -202,7 +265,42 @@ def _find_row(tbl, anchor_text):
             best = (idx, 0.9)
         elif ws_anchor and ws_anchor in _norm_ws(cell0) and best[1] < 0.8:
             best = (idx, 0.8)
-    return best
 # ── vMerge row insertion ──────────────────────────────────────────────────────
@@ -329,7 +427,7 @@ def _apply_section_replace(doc, change, rev, author, date, log):
                     break
     if ts_para_elem is None:
-        log.append(f'  SKIP section_replace: del_heading {del_heading!r} not found in TS')
         return False
     ts_body = ts_para_elem.getparent()
@@ -395,7 +493,7 @@ def _apply_text_replace(doc, change, rev, author, date, log):
     if loc['kind'] == 'table_cell':
         tbl, t_conf = _find_table(doc, loc['table_header'])
         if tbl is None:
-            log.append(f"  SKIP text_replace: table not found {loc['table_header'][:2]!r}")
             return False
         col_idx = loc['col_idx']
         row_anchor = loc['row_anchor']
@@ -403,11 +501,22 @@ def _apply_text_replace(doc, change, rev, author, date, log):
         if row_anchor:
             row_idx, r_conf = _find_row(tbl, row_anchor)
             if row_idx < 0:
-                log.append(f"  SKIP text_replace: row anchor not found {row_anchor!r}")
                 return False
             row = tbl.rows[row_idx]
             if col_idx >= len(row.cells):
-                log.append(f"  SKIP text_replace: col_idx {col_idx} out of range")
                 return False
             cell = row.cells[col_idx]
             for para in cell.paragraphs:
@@ -415,7 +524,7 @@ def _apply_text_replace(doc, change, rev, author, date, log):
                     tracked_modify_para(para, old, new, rev, author, date)
                     log.append(f"  OK  text_replace (table_cell row={row_idx} col={col_idx}): {old!r} → {new!r}")
                     return True
-            log.append(f"  SKIP text_replace: old text {old!r} not in cell (row={row_idx} col={col_idx})")
             return False
         else:
             # Empty row anchor: scan all rows in col_idx.
@@ -447,7 +556,7 @@ def _apply_text_replace(doc, change, rev, author, date, log):
                                 tracked_modify_para(para, old, new, rev, author, date)
                                 log.append(f"  OK  text_replace (table_cell any_col row={r_idx} col={c_idx}): {old!r} → {new!r}")
                                 return True
-            log.append(f"  SKIP text_replace: old text {old!r} not found in any table column")
             return False
     elif loc['kind'] == 'body_para':
@@ -458,16 +567,16 @@ def _apply_text_replace(doc, change, rev, author, date, log):
             # Fall back: find by paragraph context
             para, conf = _find_para(doc, ctx, prefer_not_in_table=True)
             if para is None:
-                log.append(f"  SKIP text_replace: old text {old!r} not found in TS")
                 return False
         if old in para.text:
             tracked_modify_para(para, old, new, rev, author, date)
             log.append(f"  OK  text_replace (body_para conf={conf:.1f}): {old!r} → {new!r}")
             return True
-        log.append(f"  SKIP text_replace: old text {old!r} not in resolved paragraph")
         return False
-    log.append(f"  SKIP text_replace: unknown kind {loc['kind']!r}")
     return False
@@ -479,7 +588,7 @@ def _apply_para_insert(doc, change, rev, author, date, log):
     anchor_para, conf = _find_para(doc, anchor_text)
     if anchor_para is None:
-        log.append(f"  SKIP para_insert: anchor not found {anchor_text[:60]!r}")
         return False
     items = [(p['text'], p['style'] or 'Normal') for p in paras_data]
@@ -500,13 +609,13 @@ def _apply_row_insert(doc, change, rev, author, date, log, last_inserted=None):
     else:
         tbl, t_conf = _find_table(doc, loc['table_header'])
         if tbl is None:
-            log.append(f"  SKIP row_insert: table not found {loc['table_header'][:2]!r}")
             return False
     after_anchor = loc.get('after_row_anchor', '')
     row_idx, r_conf = _find_row(tbl, after_anchor)
     if row_idx < 0:
-        log.append(f"  SKIP row_insert: anchor row not found {after_anchor!r}")
         return False
     cells_data = change.get('cells', [])

 # ── Text normalisation ────────────────────────────────────────────────────────
 def _norm(text):
+    """Normalise common Unicode invisible/whitespace/punctuation variants for comparison."""
     return (text
+            .replace('\xa0',   ' ')   # non-breaking space
+            .replace('\u202f', ' ')   # narrow no-break space
+            .replace('\u2007', ' ')   # figure space
+            .replace('\u2060', '')    # word joiner (invisible)
+            .replace('\u200b', '')    # zero-width space
+            .replace('\u00ad', '')    # soft hyphen (invisible)
+            .replace('\u2011', '-')   # non-breaking hyphen
+            .replace('\u2013', '-')   # en dash
+            .replace('\u2014', '-')   # em dash
+            .replace('\u2212', '-')   # minus sign
+            .replace('\u2018', "'")   # left single quote
+            .replace('\u2019', "'")   # right single quote
+            .replace('\u201c', '"')   # left double quote
+            .replace('\u201d', '"')   # right double quote
             .strip())
     Used as a third-level fallback (confidence 0.8) after exact and NBSP-norm.
     """
     base = (text
+            .replace('\xa0',   '')
+            .replace('\u202f', '')
+            .replace('\u2007', '')
+            .replace('\u2060', '')
+            .replace('\u200b', '')
+            .replace('\u00ad', '')
+            .replace('\u2011', '-')
             .replace('\u2013', '-')
+            .replace('\u2014', '-')
+            .replace('\u2212', '-')
+            .replace('\u2018', "'")
+            .replace('\u2019', "'")
+            .replace('\u201c', '"')
+            .replace('\u201d', '"'))
     return re.sub(r'\s+', '', base)
+def _norm_alnum(text):
+    """Keep only lowercase alphanumeric characters — last-resort matching.
+    Strips all punctuation, spaces, and Unicode variants so that only the
+    raw word/number content is compared.  Used as a confidence-0.6 fallback
+    in _find_row when even whitespace-stripped matching fails (e.g. different
+    bracket styles, quote variants, or punctuation differences between the CR
+    and the TS).
+    """
+    return re.sub(r'[^a-z0-9]', '', text.lower())
+def _clean_prefix(text: str) -> str:
+    """Return the longest leading substring that contains only standard printable
+    ASCII characters (ord 32–126).
+    Non-breaking spaces, curly quotes, and other Unicode characters embedded
+    mid-text (e.g. between spec number components like 'TS\xa0102\xa0226')
+    make the full anchor unmatchable.  The clean prefix — the part before the
+    first such character — is still reliable and specific enough to locate the
+    correct row.
+    """
+    end = 0
+    for ch in text:
+        if ord(ch) < 32 or ord(ch) > 126:
+            break
+        end += 1
+    return text[:end].strip()
 # ── Document search helpers ───────────────────────────────────────────────────
 def _full_para_text(para):
     """
     Find first row in tbl where col-0 cell text contains anchor_text.
     Returns (row_idx, confidence) or (-1, 0.0).
+    Matching levels, in order of confidence:
+      1.0 — exact substring match
+      0.9 — Unicode-normalised match   (_norm: xa0, dashes, quotes, …)
+      0.8 — whitespace-stripped match  (_norm_ws: also removes tabs/newlines)
+      0.6 — alphanumeric-only match    (_norm_alnum: strips all non a-z0-9)
+      0.55 — clean-prefix unique match: extract the leading ASCII-only part of
+             the anchor and find the single row that contains it.
+      0.5  — clean-prefix + token-overlap: when multiple rows share the prefix,
+             pick the one whose col-0 tokens overlap most with the anchor tokens.
     """
+    norm_anchor  = _norm(anchor_text)
+    ws_anchor    = _norm_ws(anchor_text)
+    alnum_anchor = _norm_alnum(anchor_text)
     best = (-1, 0.0)
     for idx, row in enumerate(tbl.rows):
         cell0 = row.cells[0].text if row.cells else ''
         if anchor_text in cell0:
             best = (idx, 0.9)
         elif ws_anchor and ws_anchor in _norm_ws(cell0) and best[1] < 0.8:
             best = (idx, 0.8)
+        elif alnum_anchor and alnum_anchor in _norm_alnum(cell0) and best[1] < 0.6:
+            best = (idx, 0.6)
+    if best[0] >= 0:
+        return best
+    # ── Prefix-based partial match ─────────────────────────────────────────────
+    # The anchor may have Unicode chars embedded mid-text that prevent all string
+    # comparisons above from matching, even after normalisation (e.g. when the CR
+    # extracts '\xa0' between spec-number parts but the TS has different encoding).
+    # Strategy: use only the clean ASCII prefix of the anchor as the search key.
+    # If that prefix is found in exactly one row → we've uniquely identified it.
+    # If it appears in several rows → pick the one whose full token set overlaps
+    # most with the anchor's tokens (the user's described disambiguation rule).
+    prefix = _clean_prefix(anchor_text)
+    if prefix and len(prefix) > 8:
+        prefix_low = prefix.lower()
+        hits = [
+            idx for idx, row in enumerate(tbl.rows)
+            if row.cells and prefix_low in row.cells[0].text.lower()
+        ]
+        if len(hits) == 1:
+            return hits[0], 0.55
+        elif len(hits) > 1:
+            anchor_tokens = set(re.findall(r'[a-z0-9]+', anchor_text.lower()))
+            best_score, best_idx = -1, -1
+            for hit_idx in hits:
+                cell_tokens = set(re.findall(r'[a-z0-9]+',
+                                             tbl.rows[hit_idx].cells[0].text.lower()))
+                score = len(anchor_tokens & cell_tokens)
+                if score > best_score:
+                    best_score, best_idx = score, hit_idx
+            if best_idx >= 0:
+                return best_idx, 0.5
+    return (-1, 0.0)
 # ── vMerge row insertion ──────────────────────────────────────────────────────
                     break
     if ts_para_elem is None:
+        log.append(f'  ERROR section_replace: del_heading {del_heading!r} not found in TS')
         return False
     ts_body = ts_para_elem.getparent()
     if loc['kind'] == 'table_cell':
         tbl, t_conf = _find_table(doc, loc['table_header'])
         if tbl is None:
+            log.append(f"  ERROR text_replace: table not found {loc['table_header'][:2]!r}")
             return False
         col_idx = loc['col_idx']
         row_anchor = loc['row_anchor']
         if row_anchor:
             row_idx, r_conf = _find_row(tbl, row_anchor)
             if row_idx < 0:
+                # Primary table doesn't contain this row anchor — the CR may be
+                # targeting a different table than the one _find_table resolved.
+                # Try every other table in the document before giving up.
+                for alt_tbl in doc.tables:
+                    if alt_tbl is tbl:
+                        continue
+                    row_idx, r_conf = _find_row(alt_tbl, row_anchor)
+                    if row_idx >= 0:
+                        tbl = alt_tbl
+                        break
+            if row_idx < 0:
+                log.append(f"  ERROR text_replace: row anchor not found {row_anchor!r}")
                 return False
             row = tbl.rows[row_idx]
             if col_idx >= len(row.cells):
+                log.append(f"  ERROR text_replace: col_idx {col_idx} out of range")
                 return False
             cell = row.cells[col_idx]
             for para in cell.paragraphs:
                     tracked_modify_para(para, old, new, rev, author, date)
                     log.append(f"  OK  text_replace (table_cell row={row_idx} col={col_idx}): {old!r} → {new!r}")
                     return True
+            log.append(f"  ERROR text_replace: old text {old!r} not in cell (row={row_idx} col={col_idx})")
             return False
         else:
             # Empty row anchor: scan all rows in col_idx.
                                 tracked_modify_para(para, old, new, rev, author, date)
                                 log.append(f"  OK  text_replace (table_cell any_col row={r_idx} col={c_idx}): {old!r} → {new!r}")
                                 return True
+            log.append(f"  ERROR text_replace: old text {old!r} not found in any table column")
             return False
     elif loc['kind'] == 'body_para':
             # Fall back: find by paragraph context
             para, conf = _find_para(doc, ctx, prefer_not_in_table=True)
             if para is None:
+                log.append(f"  ERROR text_replace: old text {old!r} not found in TS")
                 return False
         if old in para.text:
             tracked_modify_para(para, old, new, rev, author, date)
             log.append(f"  OK  text_replace (body_para conf={conf:.1f}): {old!r} → {new!r}")
             return True
+        log.append(f"  ERROR text_replace: old text {old!r} not in resolved paragraph")
         return False
+    log.append(f"  ERROR text_replace: unknown kind {loc['kind']!r}")
     return False
     anchor_para, conf = _find_para(doc, anchor_text)
     if anchor_para is None:
+        log.append(f"  ERROR para_insert: anchor not found {anchor_text[:60]!r}")
         return False
     items = [(p['text'], p['style'] or 'Normal') for p in paras_data]
     else:
         tbl, t_conf = _find_table(doc, loc['table_header'])
         if tbl is None:
+            log.append(f"  ERROR row_insert: table not found {loc['table_header'][:2]!r}")
             return False
     after_anchor = loc.get('after_row_anchor', '')
     row_idx, r_conf = _find_row(tbl, after_anchor)
     if row_idx < 0:
+        log.append(f"  ERROR row_insert: anchor row not found {after_anchor!r}")
         return False
     cells_data = change.get('cells', [])