From 6cda3cf5239cf776ba522bcb3ddbc9e1a756251c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maxime=20R=C3=A9aux?= Date: Fri, 10 Apr 2026 11:00:20 +0200 Subject: [PATCH] simplify pass 3 & 4 --- prepare_pages_and_registry.py | 41 ++++++++++------------------------- 1 file changed, 12 insertions(+), 29 deletions(-) diff --git a/prepare_pages_and_registry.py b/prepare_pages_and_registry.py index 8da8f36..d97282a 100644 --- a/prepare_pages_and_registry.py +++ b/prepare_pages_and_registry.py @@ -392,38 +392,24 @@ def resolve_all(key): skipped_redirect = 0 -ignored_redirect = 0 valid_titles = { data["title"] for data in canonical_pages.values() } -for k, v in category_redirects.items(): +for k, v in {**redirects, **category_redirects}.items(): if k == v: continue - final = resolve_all(v) - if final in valid_titles and k != final: - equivalences[k] = final -for k, v in list(redirects.items()): - if k == v: - continue final = resolve_all(v) + if final in valid_titles and k != final: equivalences[k] = final else: skipped_redirect += 1 -for src, dst in list(redirects.items()): - final = equivalences.get(dst, dst) - if final in valid_titles and src != final: - equivalences[src] = final - else: - ignored_redirect += 1 - print(f"Skipped redirect to non-canonical: {skipped_redirect}") -print(f"Ignored redirect (non-canonical): {ignored_redirect}") # -------------------------------------------------- # PASS 4 — normalisation finale des equivalences @@ -437,20 +423,19 @@ def resolve_equivalence(key): return key -for k in list(equivalences): - final = resolve_equivalence(equivalences[k]) - if final in valid_titles: - equivalences[k] = final - +clean_equivalences = {} for k, v in equivalences.items(): - if v not in valid_titles: - problems.append(f"Non canonical mapping: {k} -> {v}") + final = resolve_equivalence(v) + + if final in valid_titles and k != final: + clean_equivalences[k] = final + else: + if final not in valid_titles: + problems.append(f"Non canonical mapping: {k} -> {final}") + +equivalences = clean_equivalences -equivalences = { - k: v for k, v in equivalences.items() - if k != v -} print(f"Equivalences kept: {len(equivalences)}") @@ -492,7 +477,6 @@ print(f"{copied} pages copiées") registry = { "canonical_pages": canonical_pages, "equivalences": equivalences, - "redirects": redirects, "potential_tags": potential_tags, "ignored_pages": ignored_pages, } @@ -509,7 +493,6 @@ with open(REPORT_PATH, "w", encoding="utf-8") as f: f.write("=== MIGRATION REPORT ===\n") f.write(f"Canonical pages: {len(canonical_pages)}\n") f.write(f"Equivalences: {len(equivalences)}\n") - f.write(f"Redirects: {len(redirects)}\n") f.write(f"Ignored: {len(ignored_pages)}\n") f.write(f"Problems: {len(problems)}\n\n") for p in problems: