simplify pass 3 & 4
This commit is contained in:
parent
022a17221d
commit
6cda3cf523
1 changed files with 12 additions and 29 deletions
|
|
@ -392,38 +392,24 @@ def resolve_all(key):
|
||||||
|
|
||||||
|
|
||||||
skipped_redirect = 0
|
skipped_redirect = 0
|
||||||
ignored_redirect = 0
|
|
||||||
|
|
||||||
valid_titles = {
|
valid_titles = {
|
||||||
data["title"]
|
data["title"]
|
||||||
for data in canonical_pages.values()
|
for data in canonical_pages.values()
|
||||||
}
|
}
|
||||||
|
|
||||||
for k, v in category_redirects.items():
|
for k, v in {**redirects, **category_redirects}.items():
|
||||||
if k == v:
|
if k == v:
|
||||||
continue
|
continue
|
||||||
final = resolve_all(v)
|
|
||||||
if final in valid_titles and k != final:
|
|
||||||
equivalences[k] = final
|
|
||||||
|
|
||||||
for k, v in list(redirects.items()):
|
|
||||||
if k == v:
|
|
||||||
continue
|
|
||||||
final = resolve_all(v)
|
final = resolve_all(v)
|
||||||
|
|
||||||
if final in valid_titles and k != final:
|
if final in valid_titles and k != final:
|
||||||
equivalences[k] = final
|
equivalences[k] = final
|
||||||
else:
|
else:
|
||||||
skipped_redirect += 1
|
skipped_redirect += 1
|
||||||
|
|
||||||
for src, dst in list(redirects.items()):
|
|
||||||
final = equivalences.get(dst, dst)
|
|
||||||
if final in valid_titles and src != final:
|
|
||||||
equivalences[src] = final
|
|
||||||
else:
|
|
||||||
ignored_redirect += 1
|
|
||||||
|
|
||||||
print(f"Skipped redirect to non-canonical: {skipped_redirect}")
|
print(f"Skipped redirect to non-canonical: {skipped_redirect}")
|
||||||
print(f"Ignored redirect (non-canonical): {ignored_redirect}")
|
|
||||||
|
|
||||||
# --------------------------------------------------
|
# --------------------------------------------------
|
||||||
# PASS 4 — normalisation finale des equivalences
|
# PASS 4 — normalisation finale des equivalences
|
||||||
|
|
@ -437,20 +423,19 @@ def resolve_equivalence(key):
|
||||||
return key
|
return key
|
||||||
|
|
||||||
|
|
||||||
for k in list(equivalences):
|
clean_equivalences = {}
|
||||||
final = resolve_equivalence(equivalences[k])
|
|
||||||
if final in valid_titles:
|
|
||||||
equivalences[k] = final
|
|
||||||
|
|
||||||
|
|
||||||
for k, v in equivalences.items():
|
for k, v in equivalences.items():
|
||||||
if v not in valid_titles:
|
final = resolve_equivalence(v)
|
||||||
problems.append(f"Non canonical mapping: {k} -> {v}")
|
|
||||||
|
if final in valid_titles and k != final:
|
||||||
|
clean_equivalences[k] = final
|
||||||
|
else:
|
||||||
|
if final not in valid_titles:
|
||||||
|
problems.append(f"Non canonical mapping: {k} -> {final}")
|
||||||
|
|
||||||
|
equivalences = clean_equivalences
|
||||||
|
|
||||||
equivalences = {
|
|
||||||
k: v for k, v in equivalences.items()
|
|
||||||
if k != v
|
|
||||||
}
|
|
||||||
print(f"Equivalences kept: {len(equivalences)}")
|
print(f"Equivalences kept: {len(equivalences)}")
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -492,7 +477,6 @@ print(f"{copied} pages copiées")
|
||||||
registry = {
|
registry = {
|
||||||
"canonical_pages": canonical_pages,
|
"canonical_pages": canonical_pages,
|
||||||
"equivalences": equivalences,
|
"equivalences": equivalences,
|
||||||
"redirects": redirects,
|
|
||||||
"potential_tags": potential_tags,
|
"potential_tags": potential_tags,
|
||||||
"ignored_pages": ignored_pages,
|
"ignored_pages": ignored_pages,
|
||||||
}
|
}
|
||||||
|
|
@ -509,7 +493,6 @@ with open(REPORT_PATH, "w", encoding="utf-8") as f:
|
||||||
f.write("=== MIGRATION REPORT ===\n")
|
f.write("=== MIGRATION REPORT ===\n")
|
||||||
f.write(f"Canonical pages: {len(canonical_pages)}\n")
|
f.write(f"Canonical pages: {len(canonical_pages)}\n")
|
||||||
f.write(f"Equivalences: {len(equivalences)}\n")
|
f.write(f"Equivalences: {len(equivalences)}\n")
|
||||||
f.write(f"Redirects: {len(redirects)}\n")
|
|
||||||
f.write(f"Ignored: {len(ignored_pages)}\n")
|
f.write(f"Ignored: {len(ignored_pages)}\n")
|
||||||
f.write(f"Problems: {len(problems)}\n\n")
|
f.write(f"Problems: {len(problems)}\n\n")
|
||||||
for p in problems:
|
for p in problems:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue