diff --git a/boostack_create_pages.py b/boostack_create_pages.py
deleted file mode 100644
index 0dd4f23..0000000
--- a/boostack_create_pages.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import requests
-import sys
-
-# ==========================
-# CONFIGURATION
-# ==========================
-
-BOOKSTACK_API_URL = "https://wiki-warmachine.ungol.fr/api"
-API_TOKEN_ID = "VOTRE_TOKEN_ID"
-API_TOKEN_SECRET = "VOTRE_TOKEN_SECRET"
-
-PAGES_FILE = "pages.txt"
-
-DEFAULT_CONTENT = """
-<p><em>Page restaurée automatiquement depuis l'ancien wiki.</em></p>
-"""
-
-# ==========================
-# HEADERS
-# ==========================
-
-HEADERS = {
-    "Authorization": f"Token {API_TOKEN_ID}:{API_TOKEN_SECRET}",
-    "Content-Type": "application/json"
-}
-
-# ==========================
-# FUNCTIONS
-# ==========================
-
-def create_page(title, chapter_id, content=DEFAULT_CONTENT):
-    """Create a page in BookStack"""
-    url = f"{BOOKSTACK_API_URL}/pages"
-
-    payload = {
-        "name": title,
-        "html": content,
-        "chapter_id": int(chapter_id)
-    }
-
-    response = requests.post(url, headers=HEADERS, json=payload)
-
-    if response.status_code == 200:
-        page_id = response.json().get("id")
-        print(f"[OK] Page créée : '{title}' (ID {page_id})")
-        return page_id
-    else:
-        print(f"[ERREUR] Impossible de créer '{title}'")
-        print(response.status_code, response.text)
-        return None
-
-
-def load_pages(filename):
-    """Load pages list from file"""
-    pages = []
-    with open(filename, "r", encoding="utf-8") as f:
-        for line in f:
-            line = line.strip()
-            if not line or line.startswith("#"):
-                continue
-            try:
-                title, chapter_id = line.split("|")
-                pages.append((title.strip(), chapter_id.strip()))
-            except ValueError:
-                print(f"[IGNORÉ] Ligne invalide : {line}")
-    return pages
-
-
-# ==========================
-# MAIN
-# ==========================
-
-def main():
-    pages = load_pages(PAGES_FILE)
-
-    if not pages:
-        print("Aucune page à créer.")
-        sys.exit(0)
-
-    print(f"{len(pages)} pages à créer...\n")
-
-    for title, chapter_id in pages:
-        create_page(title, chapter_id)
-
-    print("\nImport terminé.")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/extract_content.py b/extract_content.py
index 7d0b1df..18ceabc 100644
--- a/extract_content.py
+++ b/extract_content.py
@@ -51,30 +51,6 @@ STRIP_ATTRIBUTES = [
     "border",
 ]
 
-ERROR_PAGE_PATTERNS = [
-    # 5xx
-    "503 service unavailable",
-    "502 bad gateway",
-    "500 internal server error",
-    "504 gateway time",
-
-    # 4xx
-    "400 bad request",
-    "401 unauthorized",
-    "403 forbidden",
-    "404 not found",
-    "408 request time",
-    "419 page expired",
-    "429 too many requests",
-
-    # génériques
-    "temporarily busy",
-    "server error",
-    "internal error",
-    "page not found",
-    "request could not be satisfied",
-]
-
 # ======================
 # HELPERS
 # ======================
@@ -262,32 +238,13 @@ def remove_intro_rule_box(content):
         if getattr(el, "name", None) == "p":
             break
 
-def is_error_page(soup: BeautifulSoup) -> bool:
-    text = soup.get_text(" ", strip=True).lower()
-    return any(p in text for p in ERROR_PAGE_PATTERNS)
-
-def build_fallback_html(title: str, filename: str) -> str:
-    safe_title = title or filename.replace("_", " ").replace(".html", "")
-
-    return f"""<html>
-<head>
-  <meta charset="utf-8">
-  <title>{safe_title}</title>
-</head>
-<body>
-  <h1>{safe_title}</h1>
-  <p>Lost content (HTTracker) in {filename}</p>
-</body>
-</html>
-"""
-
 # ======================
 # CORE FUNCTIONS
 # ======================
 
 def clean_html_file(input_path: Path, output_path: Path):
-    html_page = input_path.read_text(encoding="utf-8", errors="ignore")
-    soup = BeautifulSoup(html_page, "html.parser")
+    html = input_path.read_text(encoding="utf-8", errors="ignore")
+    soup = BeautifulSoup(html, "html.parser")
 
     # Remove comments (HTTrack etc.)
     for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):
@@ -298,15 +255,11 @@ def clean_html_file(input_path: Path, output_path: Path):
         for el in soup.find_all(tag):
             el.decompose()
 
+
     # Extract main content
     content = soup.select_one("#mw-content-text")
-    if not content or is_error_page(soup):
+    if not content:
         print(f"[WARN] No content in {input_path.name}")
-        fallback = build_fallback_html(
-            title=soup.title.get_text(strip=True) if soup.title else "",
-            filename=input_path.name
-        )
-        output_path.write_text(fallback, encoding="utf-8")
         return
     remove_intro_rule_box(content)
 
@@ -409,14 +362,11 @@ def clean_html_file(input_path: Path, output_path: Path):
 
 def process_all():
     OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
-    files = list(SOURCE_DIR.glob("*.html"))
-    total = len(files)
-    print(f"{total} fichiers trouvés")
-    for i, file in enumerate(files, start=1):
+
+    for file in SOURCE_DIR.glob("*.html"):
         output_file = OUTPUT_DIR / file.name
         clean_html_file(file, output_file)
-        if i % 200 == 0 or i == total:
-            print(f"{i}/{total} analysés ({i/total:.1%})")
+
     print("✅ Cleaning complete")
 
 
diff --git a/prepare_pages_and_registry.py b/prepare_pages_and_registry.py
index 1f0b471..f6517c6 100644
--- a/prepare_pages_and_registry.py
+++ b/prepare_pages_and_registry.py
@@ -9,7 +9,7 @@ from difflib import SequenceMatcher
 from bs4 import BeautifulSoup
 import unicodedata
 
-SOURCE_DIR = Path("../original_index")
+SOURCE_DIR = Path("../test")
 OUTPUT_DIR = Path("../output")
 
 PAGES_DIR = Path(OUTPUT_DIR / "pages")
@@ -535,40 +535,34 @@ def title_to_filename(title: str) -> str:
         title.replace(" ", "_").replace("’", "'").replace("‘", "'").replace("“", '"').replace("”", '"').casefold() + ".html"
     )
 
-output_canonical_pages = {}
-name_registry = {}
+
 copied = 0
-collision = 0
 total = len(canonical_pages)
 
 for i, (article_id, data) in enumerate(canonical_pages.items(), 1):
+
     src = data["path"]
-    base_name = title_to_filename(data["title"])
-    if base_name in name_registry:
-        base_name = Path(base_name).stem
-        base_name = f"{base_name}__{article_id}.html"
-        collision += 1
-        problems.append(f"Resolved collision: {base_name} (from {src})")
-    name_registry[base_name] = article_id
-    dst = PAGES_DIR / base_name
+    dst_name = title_to_filename(data["title"])
+    dst = PAGES_DIR / dst_name
+
     try:
         shutil.copy2(src, dst)
-        output_canonical_pages[article_id] = base_name
+        canonical_pages[article_id] = dst_name
         copied += 1
     except Exception as e:
         problems.append(f"Copy failed {src}: {e}")
+
     if i % 200 == 0 or i == total:
         print(f"{i}/{total} copiés")
 
 print(f"{copied} pages copiées")
-print(f"{collision} collisions détectées")
 
 # --------------------------------------------------
 # SAVE REGISTRY
 # --------------------------------------------------
 
 registry = {
-    "canonical_pages": output_canonical_pages,
+    "canonical_pages": canonical_pages,
     "equivalences": equivalences,
     "potential_tags": potential_tags,
     "ignored_pages": ignored_pages,
@@ -585,7 +579,7 @@ with open(REGISTRY_PATH, "w", encoding="utf-8") as f:
 
 with open(REPORT_PATH, "w", encoding="utf-8") as f:
     f.write("=== MIGRATION REPORT ===\n")
-    f.write(f"Canonical pages: {len(output_canonical_pages)}\n")
+    f.write(f"Canonical pages: {len(canonical_pages)}\n")
     f.write(f"Equivalences: {len(equivalences)}\n")
     f.write(f"Ignored: {len(ignored_pages)}\n")
     f.write(f"Problems: {len(problems)}\n\n")
diff --git a/scan_internal_links.py b/scan_internal_links.py
index e74680b..0858a57 100644
--- a/scan_internal_links.py
+++ b/scan_internal_links.py
@@ -3,27 +3,16 @@ import json
 import re
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse, parse_qs, unquote
-import unicodedata
 
 # --------------------------------------------------
-# CONFIG
+# PATHS
 # --------------------------------------------------
 
-PAGES_DIR = Path("../output_ok/cleaned_pages")
-REGISTRY_PATH = Path("../output_ok/equivalence_registry.json")
-OUTPUT_DIR = Path("../output_ok/link_scan")
+PAGES_DIR = Path("../output/pages")
+REGISTRY_PATH = Path("../output/equivalence_registry.json")
+OUTPUT_DIR = Path("../output/link_scan")
 
 OUTPUT_DIR.mkdir(exist_ok=True)
-IGNORED_PREFIXES = (
-    "file ",
-    "image ",
-    "category ",
-    "template ",
-    "special ",
-    "help ",
-    "user ",
-    "talk ",
-)
 
 # --------------------------------------------------
 # LOAD REGISTRY
@@ -40,72 +29,92 @@ valid_targets = set(canonical_pages.values())
 # HELPERS
 # --------------------------------------------------
 
-def normalize_title(title: str) -> str:
+def normalize_title(title: str | None):
     if not title:
-        return
-    title = title.strip()
+        return None
+
     title = unquote(title)
-    title = Path(title).stem
-    title = unicodedata.normalize("NFKC", title)
     title = title.replace("_", " ")
-    title = title.replace("’", "'").replace("‘", "'").replace("“", '"').replace("”", '"')
-    title = re.sub(r"\s+", " ", title)
+    title = re.sub(r"\s+", " ", title.strip())
     return title.casefold()
 
+
+# -------------------------
+# Extract MediaWiki target
+# -------------------------
+
 def extract_mediawiki_target(href: str):
+
     if not href:
         return None
+
+    # ignore anchors
     if href.startswith("#"):
         return None
+
     parsed = urlparse(href)
+
+    # external link
     if parsed.scheme in ("http", "https"):
         return None
+
     path = parsed.path or ""
+
+    # /wiki/Page_Name
     if "/wiki/" in path:
         return path.split("/wiki/", 1)[1]
+
+    # index.php?title=Page
     if "index.php" in path:
         qs = parse_qs(parsed.query)
         if "title" in qs:
             return qs["title"][0]
+
+    # fallback filename-like
     return Path(path).stem
 
 
+# -------------------------
+# Ignore unwanted namespaces
+# -------------------------
+
+IGNORED_PREFIXES = (
+    "file:",
+    "image:",
+    "template:",
+    "special:",
+    "help:",
+    "user:",
+    "talk:",
+)
+
 def is_ignored_namespace(title_norm: str):
     return title_norm.startswith(IGNORED_PREFIXES)
 
+
+# -------------------------
+# Extract article content
+# -------------------------
+
 def extract_article_links(soup):
+
     content = soup.find("div", id="mw-content-text")
     if not content:
         return []
+
     links = []
+
     for a in content.select("a[href]"):
+
+        # ignore navboxes / metadata
         if a.find_parent(class_="navbox"):
             continue
-        links.append({
-            "href": a.get("href"),
-            "title": a.get("title"),
-            "text": a.get_text(strip=True),
-        })
+
+        href = a.get("href")
+        links.append(href)
+
     return links
 
-def resolve_link(raw_target, title_attr):
-    candidates = []
-    if title_attr:
-        candidates.append(title_attr)
-    if raw_target:
-        candidates.append(raw_target)
-    for candidate in candidates:
-        norm = normalize_title(candidate)
-        if not norm:
-            continue
-        if is_ignored_namespace(norm):
-            return None, "ignored"
-        if norm in equivalences:
-            return equivalences[norm], "equivalence"
-        filename = norm.replace(" ", "_") + ".html"
-        if filename in valid_targets:
-            return filename, "direct"
-    return None, "unresolved"
 
 # --------------------------------------------------
 # MAIN SCAN
@@ -113,29 +122,43 @@ def resolve_link(raw_target, title_attr):
 
 resolved_links = []
 unresolved_links = []
+
 files = list(PAGES_DIR.glob("*.html"))
 print(f"{len(files)} pages à analyser")
 
 for i, file_path in enumerate(files, 1):
+
     html = file_path.read_text(encoding="utf-8", errors="ignore")
     soup = BeautifulSoup(html, "html.parser")
+
     links = extract_article_links(soup)
-    for link in links:
-        raw_target = extract_mediawiki_target(link["href"])
-        resolved, method = resolve_link(raw_target, link["title"])
+
+    for href in links:
+
+        raw_target = extract_mediawiki_target(href)
+        norm = normalize_title(raw_target)
+
+        if not norm:
+            continue
+
+        if is_ignored_namespace(norm):
+            continue
+
         entry = {
             "source": file_path.name,
-            "href": link["href"],
-            "title": link["title"],
-            "method": method,
+            "href": href,
+            "normalized": norm,
         }
+
+        resolved = equivalences.get(norm)
+
         if resolved:
-            entry["resolved"] = resolved
+            entry["resolved_title"] = resolved
             resolved_links.append(entry)
         else:
-            entry["raw_target"] = raw_target
             unresolved_links.append(entry)
-    if i % 200 == 0:
+
+    if i % 100 == 0:
         print(f"{i}/{len(files)} analysées")
 
 # --------------------------------------------------