tweak paths and find links
This commit is contained in:
parent
36c8bb2354
commit
e4aaa33137
7 changed files with 309 additions and 10 deletions
|
|
@ -4,8 +4,8 @@ import re
|
|||
import json
|
||||
|
||||
|
||||
INPUT_DIR = Path(".")
|
||||
OUTPUT_DIR = Path("unique_pages")
|
||||
INPUT_DIR = Path("../original_index")
|
||||
OUTPUT_DIR = Path("../unique_pages")
|
||||
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||
|
||||
ARTICLE_ID_RE = re.compile(r'"wgArticleId":\s*(\d+)')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue