simplify pass 3 & 4

This commit is contained in:
Maxime Réaux 2026-04-10 11:00:20 +02:00
parent 022a17221d
commit 6cda3cf523

View file

@ -392,38 +392,24 @@ def resolve_all(key):
skipped_redirect = 0
ignored_redirect = 0
valid_titles = {
data["title"]
for data in canonical_pages.values()
}
for k, v in category_redirects.items():
for k, v in {**redirects, **category_redirects}.items():
if k == v:
continue
final = resolve_all(v)
if final in valid_titles and k != final:
equivalences[k] = final
for k, v in list(redirects.items()):
if k == v:
continue
final = resolve_all(v)
if final in valid_titles and k != final:
equivalences[k] = final
else:
skipped_redirect += 1
for src, dst in list(redirects.items()):
final = equivalences.get(dst, dst)
if final in valid_titles and src != final:
equivalences[src] = final
else:
ignored_redirect += 1
print(f"Skipped redirect to non-canonical: {skipped_redirect}")
print(f"Ignored redirect (non-canonical): {ignored_redirect}")
# --------------------------------------------------
# PASS 4 — normalisation finale des equivalences
@ -437,20 +423,19 @@ def resolve_equivalence(key):
return key
for k in list(equivalences):
final = resolve_equivalence(equivalences[k])
if final in valid_titles:
equivalences[k] = final
clean_equivalences = {}
for k, v in equivalences.items():
if v not in valid_titles:
problems.append(f"Non canonical mapping: {k} -> {v}")
final = resolve_equivalence(v)
if final in valid_titles and k != final:
clean_equivalences[k] = final
else:
if final not in valid_titles:
problems.append(f"Non canonical mapping: {k} -> {final}")
equivalences = clean_equivalences
equivalences = {
k: v for k, v in equivalences.items()
if k != v
}
print(f"Equivalences kept: {len(equivalences)}")
@ -492,7 +477,6 @@ print(f"{copied} pages copiées")
registry = {
"canonical_pages": canonical_pages,
"equivalences": equivalences,
"redirects": redirects,
"potential_tags": potential_tags,
"ignored_pages": ignored_pages,
}
@ -509,7 +493,6 @@ with open(REPORT_PATH, "w", encoding="utf-8") as f:
f.write("=== MIGRATION REPORT ===\n")
f.write(f"Canonical pages: {len(canonical_pages)}\n")
f.write(f"Equivalences: {len(equivalences)}\n")
f.write(f"Redirects: {len(redirects)}\n")
f.write(f"Ignored: {len(ignored_pages)}\n")
f.write(f"Problems: {len(problems)}\n\n")
for p in problems: