simplify pass 3 & 4

This commit is contained in:
Maxime Réaux 2026-04-10 11:00:20 +02:00
parent 022a17221d
commit 6cda3cf523

View file

@ -392,38 +392,24 @@ def resolve_all(key):
skipped_redirect = 0 skipped_redirect = 0
ignored_redirect = 0
valid_titles = { valid_titles = {
data["title"] data["title"]
for data in canonical_pages.values() for data in canonical_pages.values()
} }
for k, v in category_redirects.items(): for k, v in {**redirects, **category_redirects}.items():
if k == v: if k == v:
continue continue
final = resolve_all(v)
if final in valid_titles and k != final:
equivalences[k] = final
for k, v in list(redirects.items()):
if k == v:
continue
final = resolve_all(v) final = resolve_all(v)
if final in valid_titles and k != final: if final in valid_titles and k != final:
equivalences[k] = final equivalences[k] = final
else: else:
skipped_redirect += 1 skipped_redirect += 1
for src, dst in list(redirects.items()):
final = equivalences.get(dst, dst)
if final in valid_titles and src != final:
equivalences[src] = final
else:
ignored_redirect += 1
print(f"Skipped redirect to non-canonical: {skipped_redirect}") print(f"Skipped redirect to non-canonical: {skipped_redirect}")
print(f"Ignored redirect (non-canonical): {ignored_redirect}")
# -------------------------------------------------- # --------------------------------------------------
# PASS 4 — normalisation finale des equivalences # PASS 4 — normalisation finale des equivalences
@ -437,20 +423,19 @@ def resolve_equivalence(key):
return key return key
for k in list(equivalences): clean_equivalences = {}
final = resolve_equivalence(equivalences[k])
if final in valid_titles:
equivalences[k] = final
for k, v in equivalences.items(): for k, v in equivalences.items():
if v not in valid_titles: final = resolve_equivalence(v)
problems.append(f"Non canonical mapping: {k} -> {v}")
if final in valid_titles and k != final:
clean_equivalences[k] = final
else:
if final not in valid_titles:
problems.append(f"Non canonical mapping: {k} -> {final}")
equivalences = clean_equivalences
equivalences = {
k: v for k, v in equivalences.items()
if k != v
}
print(f"Equivalences kept: {len(equivalences)}") print(f"Equivalences kept: {len(equivalences)}")
@ -492,7 +477,6 @@ print(f"{copied} pages copiées")
registry = { registry = {
"canonical_pages": canonical_pages, "canonical_pages": canonical_pages,
"equivalences": equivalences, "equivalences": equivalences,
"redirects": redirects,
"potential_tags": potential_tags, "potential_tags": potential_tags,
"ignored_pages": ignored_pages, "ignored_pages": ignored_pages,
} }
@ -509,7 +493,6 @@ with open(REPORT_PATH, "w", encoding="utf-8") as f:
f.write("=== MIGRATION REPORT ===\n") f.write("=== MIGRATION REPORT ===\n")
f.write(f"Canonical pages: {len(canonical_pages)}\n") f.write(f"Canonical pages: {len(canonical_pages)}\n")
f.write(f"Equivalences: {len(equivalences)}\n") f.write(f"Equivalences: {len(equivalences)}\n")
f.write(f"Redirects: {len(redirects)}\n")
f.write(f"Ignored: {len(ignored_pages)}\n") f.write(f"Ignored: {len(ignored_pages)}\n")
f.write(f"Problems: {len(problems)}\n\n") f.write(f"Problems: {len(problems)}\n\n")
for p in problems: for p in problems: