keep error pages with fallback content

This commit is contained in:
Maxime Réaux 2026-04-15 10:36:21 +02:00
parent 4e473ba2c9
commit 8e9289998b
2 changed files with 63 additions and 14 deletions

View file

@ -3,6 +3,7 @@ import json
import re
from bs4 import BeautifulSoup
from urllib.parse import urlparse, parse_qs, unquote
import unicodedata
# --------------------------------------------------
# PATHS
@ -29,16 +30,14 @@ valid_targets = set(canonical_pages.values())
# HELPERS
# --------------------------------------------------
def normalize_title(title: str | None):
if not title:
return None
title = unquote(title)
def normalize_title(title: str) -> str:
title = title.strip()
title = unicodedata.normalize("NFKC", title)
title = title.replace("_", " ")
title = re.sub(r"\s+", " ", title.strip())
title = title.replace("", "'").replace("", "'").replace("", '"').replace("", '"')
title = re.sub(r"\s+", " ", title)
return title.casefold()
# -------------------------
# Extract MediaWiki target
# -------------------------