keep error pages with fallback content
This commit is contained in:
parent
4e473ba2c9
commit
8e9289998b
2 changed files with 63 additions and 14 deletions
|
|
@ -3,6 +3,7 @@ import json
|
|||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse, parse_qs, unquote
|
||||
import unicodedata
|
||||
|
||||
# --------------------------------------------------
|
||||
# PATHS
|
||||
|
|
@ -29,16 +30,14 @@ valid_targets = set(canonical_pages.values())
|
|||
# HELPERS
|
||||
# --------------------------------------------------
|
||||
|
||||
def normalize_title(title: str | None):
|
||||
if not title:
|
||||
return None
|
||||
|
||||
title = unquote(title)
|
||||
def normalize_title(title: str) -> str:
|
||||
title = title.strip()
|
||||
title = unicodedata.normalize("NFKC", title)
|
||||
title = title.replace("_", " ")
|
||||
title = re.sub(r"\s+", " ", title.strip())
|
||||
title = title.replace("’", "'").replace("‘", "'").replace("“", '"').replace("”", '"')
|
||||
title = re.sub(r"\s+", " ", title)
|
||||
return title.casefold()
|
||||
|
||||
|
||||
# -------------------------
|
||||
# Extract MediaWiki target
|
||||
# -------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue