simplify pass 3 & 4
This commit is contained in:
parent
022a17221d
commit
6cda3cf523
1 changed files with 12 additions and 29 deletions
|
|
@ -392,38 +392,24 @@ def resolve_all(key):
|
|||
|
||||
|
||||
skipped_redirect = 0
|
||||
ignored_redirect = 0
|
||||
|
||||
valid_titles = {
|
||||
data["title"]
|
||||
for data in canonical_pages.values()
|
||||
}
|
||||
|
||||
for k, v in category_redirects.items():
|
||||
for k, v in {**redirects, **category_redirects}.items():
|
||||
if k == v:
|
||||
continue
|
||||
final = resolve_all(v)
|
||||
if final in valid_titles and k != final:
|
||||
equivalences[k] = final
|
||||
|
||||
for k, v in list(redirects.items()):
|
||||
if k == v:
|
||||
continue
|
||||
final = resolve_all(v)
|
||||
|
||||
if final in valid_titles and k != final:
|
||||
equivalences[k] = final
|
||||
else:
|
||||
skipped_redirect += 1
|
||||
|
||||
for src, dst in list(redirects.items()):
|
||||
final = equivalences.get(dst, dst)
|
||||
if final in valid_titles and src != final:
|
||||
equivalences[src] = final
|
||||
else:
|
||||
ignored_redirect += 1
|
||||
|
||||
print(f"Skipped redirect to non-canonical: {skipped_redirect}")
|
||||
print(f"Ignored redirect (non-canonical): {ignored_redirect}")
|
||||
|
||||
# --------------------------------------------------
|
||||
# PASS 4 — normalisation finale des equivalences
|
||||
|
|
@ -437,20 +423,19 @@ def resolve_equivalence(key):
|
|||
return key
|
||||
|
||||
|
||||
for k in list(equivalences):
|
||||
final = resolve_equivalence(equivalences[k])
|
||||
if final in valid_titles:
|
||||
equivalences[k] = final
|
||||
|
||||
clean_equivalences = {}
|
||||
|
||||
for k, v in equivalences.items():
|
||||
if v not in valid_titles:
|
||||
problems.append(f"Non canonical mapping: {k} -> {v}")
|
||||
final = resolve_equivalence(v)
|
||||
|
||||
if final in valid_titles and k != final:
|
||||
clean_equivalences[k] = final
|
||||
else:
|
||||
if final not in valid_titles:
|
||||
problems.append(f"Non canonical mapping: {k} -> {final}")
|
||||
|
||||
equivalences = clean_equivalences
|
||||
|
||||
equivalences = {
|
||||
k: v for k, v in equivalences.items()
|
||||
if k != v
|
||||
}
|
||||
print(f"Equivalences kept: {len(equivalences)}")
|
||||
|
||||
|
||||
|
|
@ -492,7 +477,6 @@ print(f"{copied} pages copiées")
|
|||
registry = {
|
||||
"canonical_pages": canonical_pages,
|
||||
"equivalences": equivalences,
|
||||
"redirects": redirects,
|
||||
"potential_tags": potential_tags,
|
||||
"ignored_pages": ignored_pages,
|
||||
}
|
||||
|
|
@ -509,7 +493,6 @@ with open(REPORT_PATH, "w", encoding="utf-8") as f:
|
|||
f.write("=== MIGRATION REPORT ===\n")
|
||||
f.write(f"Canonical pages: {len(canonical_pages)}\n")
|
||||
f.write(f"Equivalences: {len(equivalences)}\n")
|
||||
f.write(f"Redirects: {len(redirects)}\n")
|
||||
f.write(f"Ignored: {len(ignored_pages)}\n")
|
||||
f.write(f"Problems: {len(problems)}\n\n")
|
||||
for p in problems:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue