diff --git a/scripts/README.md b/scripts/README.md
new file mode 100644
index 0000000..04312ae
--- /dev/null
+++ b/scripts/README.md
@@ -0,0 +1,69 @@
+# Description of the process
+
+## Parsing of the tables
+
+### links.txt
+- `pl_from` -> Id of the "from" page of this link
+- (`pl_namespace`) -> We keep only if equals 0 (= namespace of the "from" page of this link)
+- `pl_target_id` -> Target of this link  (foreign key to `linktarget`)
+
+### targets.txt
+- `lt_id` -> Id of this link (index)
+- (`lt_ns`) -> We keep only if equals 0 (= namespace of the targeted page)
+- `lt_title` -> Title of the targeted page
+
+### pages.txt
+- `page_id` -> Id of the page
+- (`page_namespace`) -> We keep only if equals 0 (= namespace of this page)
+- `page_title` -> Title of this page
+- `page_is_redirect` -> Boolean wether this page is a redirect
+- Ignore the eight following
+
+### redirects.txt
+- `rd_from` -> Id of the page from which we are redirected
+- (`rd_namespace`) -> We keep only if equals 0 (= namespace of the page we are redirected to)
+- `rd_title` -> Title of the page we are redirected to
+- Ignore the two following
+
+## Joining the tables
+
+### redirects.with_ids.txt (replace_titles_in_redirects_file.py)
+Replaces for each redirection, `rd_title` with the targetted `page_id` by matching on `page_title`.
+The targetted page_id is then computed as a redirect recursively, until we get on a "final" page.
+- `rd_from` -> The id of the page we are redirected from
+- `page_id` -> The id of the page we get to following redirections recursively
+
+### targets.with_ids.txt (replace_titles_and_redirects_in_targets_file.py)
+Replaces, for each linktarget, `lt_title` with the targetted `page_id` by matching on `page_title`.
+We then compute the "final" page obtained from this page following redirection, with the file `redirects.with_ids.txt`.
+- `lt_id` -> Id of this link
+- `page_id` -> The id of the page this link is pointing to, after having followed all redirections
+
+### links.with_ids.txt (replace_titles_and_redirects_in_links_file.py)
+Replaces, for each pagelink, `lt_id` with the targetted `page_id` by joining with `links.with_ids.txt`.
+- `pl_from` -> Id of the "from" page, after having followed all redirections
+- `page_id` -> Id of the "to" page, after having followed all redirections
+
+### page.pruned.txt (prune_pages_file.py)
+Prunes the pages file by removing pages which are marked as redirects but have no corresponding redirect in the redirects file.
+
+## Sorting, grouping, and counting the links
+
+### links.sorted_by_XXX_id.txt
+Then we sorts the `links.with_ids.txt` according to the first "source" id, into
+the file `links.sorted_by_source_id.txt`, and according to the second "target" id 
+into the file `links.sorted_by_target_id.txt`.
+
+### links.grouped_by_XXX_id.txt
+Then, we use those two files to *GROUP BY* the links by source and by target.
+The file `links.grouped_by_source_id.txt` is like this
+- `pl_from` -> Id of the "from" page
+- `targets` -> A `|`-separated string of the ids the "from" page targets
+
+The file `links.grouped_by_target_id.txt` is like this
+- `froms` -> A `|`-separated string of the ids of the pages targeting the "target" page
+- `pl_target` -> Id of the "target" page
+
+### links.with_counts.txt (combine_grouped_links_files.py)
+
+## Making the database
diff --git a/scripts/buildDatabase.sh b/scripts/buildDatabase.sh
index 8103a58..5f622ed 100755
--- a/scripts/buildDatabase.sh
+++ b/scripts/buildDatabase.sh
@@ -1,15 +1,19 @@
 #!/bin/bash
-
 set -euo pipefail
 
 # Force default language for output sorting to be bytewise. Necessary to ensure uniformity amongst
 # UNIX commands.
 export LC_ALL=C
 
+# These variables can be set by external environment
+WLANG=''${WLANG:-en}
+OUT_DIR="${OUT_DIR:-dump}"
+DELETE_PROGRESSIVELY=${DELETE_PROGRESSIVELY:-false}
+
 # By default, the latest Wikipedia dump will be downloaded. If a download date in the format
 # YYYYMMDD is provided as the first argument, it will be used instead.
 if [[ $# -eq 0 ]]; then
-  DOWNLOAD_DATE=$(wget -q -O- https://dumps.wikimedia.org/enwiki/ | grep -Po '\d{8}' | sort | tail -n1)
+  DOWNLOAD_DATE=$(wget -q -O- https://dumps.wikimedia.org/${WLANG}wiki/ | grep -Po '\d{8}' | sort | tail -n1)
 else
   if [ ${#1} -ne 8 ]; then
     echo "[ERROR] Invalid download date provided: $1"
@@ -19,17 +23,17 @@ else
   fi
 fi
 
-ROOT_DIR=`pwd`
-OUT_DIR="dump"
+# Root directory is that of this script
+ROOT_DIR=$(dirname "$0")
 
-DOWNLOAD_URL="https://dumps.wikimedia.org/enwiki/$DOWNLOAD_DATE"
-TORRENT_URL="https://dump-torrents.toolforge.org/enwiki/$DOWNLOAD_DATE"
-
-SHA1SUM_FILENAME="enwiki-$DOWNLOAD_DATE-sha1sums.txt"
-REDIRECTS_FILENAME="enwiki-$DOWNLOAD_DATE-redirect.sql.gz"
-PAGES_FILENAME="enwiki-$DOWNLOAD_DATE-page.sql.gz"
-LINKS_FILENAME="enwiki-$DOWNLOAD_DATE-pagelinks.sql.gz"
+DOWNLOAD_URL="https://dumps.wikimedia.org/${WLANG}wiki/$DOWNLOAD_DATE"
+TORRENT_URL="https://dump-torrents.toolforge.org/${WLANG}wiki/$DOWNLOAD_DATE"
 
+SHA1SUM_FILENAME="${WLANG}wiki-$DOWNLOAD_DATE-sha1sums.txt"
+REDIRECTS_FILENAME="${WLANG}wiki-$DOWNLOAD_DATE-redirect.sql.gz"
+PAGES_FILENAME="${WLANG}wiki-$DOWNLOAD_DATE-page.sql.gz"
+LINKS_FILENAME="${WLANG}wiki-$DOWNLOAD_DATE-pagelinks.sql.gz"
+TARGETS_FILENAME="${WLANG}wiki-$DOWNLOAD_DATE-linktarget.sql.gz"
 
 # Make the output directory if it doesn't already exist and move to it
 mkdir -p $OUT_DIR
@@ -79,6 +83,7 @@ download_file "sha1sums" $SHA1SUM_FILENAME
 download_file "redirects" $REDIRECTS_FILENAME
 download_file "pages" $PAGES_FILENAME
 download_file "links" $LINKS_FILENAME
+download_file "targets" $TARGETS_FILENAME
 
 ##########################
 #  TRIM WIKIPEDIA DUMPS  #
@@ -105,7 +110,7 @@ if [ ! -f redirects.txt.gz ]; then
 else
   echo "[WARN] Already trimmed redirects file"
 fi
-
+if $DELETE_PROGRESSIVELY; then rm $REDIRECTS_FILENAME; fi
 if [ ! -f pages.txt.gz ]; then
   echo
   echo "[INFO] Trimming pages file"
@@ -118,16 +123,16 @@ if [ ! -f pages.txt.gz ]; then
   # Splice out the page title and whether or not the page is a redirect
   # Zip into output file
   time pigz -dc $PAGES_FILENAME \
-    | sed -n 's/^INSERT INTO `page` VALUES (//p' \
-    | sed -e 's/),(/\'$'\n/g' \
-    | egrep "^[0-9]+,0," \
-    | sed -e $"s/,0,'/\t/" \
-    | sed -e $"s/',[^,]*,\([01]\).*/\t\1/" \
+    | sed -n 's/^INSERT INTO `page` VALUES //p' \
+    | egrep -o "\([0-9]+,0,'([^']*(\\\\')?)+',[01]," \
+    | sed -re $"s/^\(([0-9]+),0,'/\1\t/" \
+    | sed -re $"s/',([01]),/\t\1/" \
     | pigz --fast > pages.txt.gz.tmp
   mv pages.txt.gz.tmp pages.txt.gz
 else
   echo "[WARN] Already trimmed pages file"
 fi
+if $DELETE_PROGRESSIVELY; then rm $PAGES_FILENAME; fi
 
 if [ ! -f links.txt.gz ]; then
   echo
@@ -143,14 +148,38 @@ if [ ! -f links.txt.gz ]; then
   time pigz -dc $LINKS_FILENAME \
     | sed -n 's/^INSERT INTO `pagelinks` VALUES (//p' \
     | sed -e 's/),(/\'$'\n/g' \
-    | egrep "^[0-9]+,0,.*,0$" \
-    | sed -e $"s/,0,'/\t/g" \
-    | sed -e "s/',0//g" \
+    | egrep "^[0-9]+,0,[0-9]+$" \
+    | sed -e $"s/,0,/\t/g" \
     | pigz --fast > links.txt.gz.tmp
   mv links.txt.gz.tmp links.txt.gz
 else
   echo "[WARN] Already trimmed links file"
 fi
+if $DELETE_PROGRESSIVELY; then rm $LINKS_FILENAME; fi
+
+if [ ! -f targets.txt.gz ]; then
+  echo
+  echo "[INFO] Trimming targets file"
+
+  # Unzip
+  # Remove all lines that don't start with INSERT INTO...
+  # Split into individual records
+  # Only keep records in namespace 0
+  # Replace namespace with a tab
+  # Remove everything starting at the to page name's closing apostrophe
+  # Zip into output file
+  time pigz -dc $TARGETS_FILENAME \
+    | sed -n 's/^INSERT INTO `linktarget` VALUES (//p' \
+    | sed -e 's/),(/\'$'\n/g' \
+    | egrep "^[0-9]+,0,.*$" \
+    | sed -e $"s/,0,'/\t/g" \
+    | sed -e "s/'$//g" \
+    | pigz --fast > targets.txt.gz.tmp
+  mv targets.txt.gz.tmp targets.txt.gz
+else
+  echo "[WARN] Already trimmed targets file"
+fi
+if $DELETE_PROGRESSIVELY; then rm $TARGETS_FILENAME; fi
 
 
 ###########################################
@@ -166,16 +195,29 @@ if [ ! -f redirects.with_ids.txt.gz ]; then
 else
   echo "[WARN] Already replaced titles in redirects file"
 fi
+if $DELETE_PROGRESSIVELY; then rm redirects.txt.gz; fi
+
+if [ ! -f targets.with_ids.txt.gz ]; then
+  echo
+  echo "[INFO] Replacing titles and redirects in targets file"
+  time python "$ROOT_DIR/replace_titles_and_redirects_in_targets_file.py" pages.txt.gz redirects.with_ids.txt.gz targets.txt.gz \
+    | pigz --fast > targets.with_ids.txt.gz.tmp
+  mv targets.with_ids.txt.gz.tmp targets.with_ids.txt.gz
+else
+  echo "[WARN] Already replaced titles and redirects in targets file"
+fi
+if $DELETE_PROGRESSIVELY; then rm targets.txt.gz; fi
 
 if [ ! -f links.with_ids.txt.gz ]; then
   echo
   echo "[INFO] Replacing titles and redirects in links file"
-  time python "$ROOT_DIR/replace_titles_and_redirects_in_links_file.py" pages.txt.gz redirects.with_ids.txt.gz links.txt.gz \
+  time python "$ROOT_DIR/replace_titles_and_redirects_in_links_file.py" pages.txt.gz redirects.with_ids.txt.gz targets.with_ids.txt.gz links.txt.gz \
     | pigz --fast > links.with_ids.txt.gz.tmp
   mv links.with_ids.txt.gz.tmp links.with_ids.txt.gz
 else
   echo "[WARN] Already replaced titles and redirects in links file"
 fi
+if $DELETE_PROGRESSIVELY; then rm links.txt.gz targets.with_ids.txt.gz; fi
 
 if [ ! -f pages.pruned.txt.gz ]; then
   echo
@@ -185,6 +227,7 @@ if [ ! -f pages.pruned.txt.gz ]; then
 else
   echo "[WARN] Already pruned pages which are marked as redirects but with no redirect"
 fi
+if $DELETE_PROGRESSIVELY; then rm pages.txt.gz; fi
 
 #####################
 #  SORT LINKS FILE  #
@@ -212,6 +255,7 @@ if [ ! -f links.sorted_by_target_id.txt.gz ]; then
 else
   echo "[WARN] Already sorted links file by target page ID"
 fi
+if $DELETE_PROGRESSIVELY; then rm links.with_ids.txt.gz; fi
 
 
 #############################
@@ -227,6 +271,7 @@ if [ ! -f links.grouped_by_source_id.txt.gz ]; then
 else
   echo "[WARN] Already grouped source links file by source page ID"
 fi
+if $DELETE_PROGRESSIVELY; then rm links.sorted_by_source_id.txt.gz; fi
 
 if [ ! -f links.grouped_by_target_id.txt.gz ]; then
   echo
@@ -237,6 +282,7 @@ if [ ! -f links.grouped_by_target_id.txt.gz ]; then
 else
   echo "[WARN] Already grouped target links file by target page ID"
 fi
+if $DELETE_PROGRESSIVELY; then rm links.sorted_by_target_id.txt.gz; fi
 
 
 ################################
@@ -251,6 +297,7 @@ if [ ! -f links.with_counts.txt.gz ]; then
 else
   echo "[WARN] Already combined grouped links files"
 fi
+if $DELETE_PROGRESSIVELY; then rm links.grouped_by_source_id.txt.gz links.grouped_by_target_id.txt.gz; fi
 
 
 ############################
@@ -260,14 +307,17 @@ if [ ! -f sdow.sqlite ]; then
   echo
   echo "[INFO] Creating redirects table"
   time pigz -dc redirects.with_ids.txt.gz | sqlite3 sdow.sqlite ".read $ROOT_DIR/../sql/createRedirectsTable.sql"
+  if $DELETE_PROGRESSIVELY; then rm redirects.with_ids.txt.gz; fi
 
   echo
   echo "[INFO] Creating pages table"
   time pigz -dc pages.pruned.txt.gz | sqlite3 sdow.sqlite ".read $ROOT_DIR/../sql/createPagesTable.sql"
+  if $DELETE_PROGRESSIVELY; then rm pages.pruned.txt.gz; fi
 
   echo
   echo "[INFO] Creating links table"
   time pigz -dc links.with_counts.txt.gz | sqlite3 sdow.sqlite ".read $ROOT_DIR/../sql/createLinksTable.sql"
+  if $DELETE_PROGRESSIVELY; then rm links.with_counts.txt.gz; fi
 
   echo
   echo "[INFO] Compressing SQLite file"
diff --git a/scripts/combine_grouped_links_files.py b/scripts/combine_grouped_links_files.py
index e8ce4fd..35a1c7b 100755
--- a/scripts/combine_grouped_links_files.py
+++ b/scripts/combine_grouped_links_files.py
@@ -28,26 +28,27 @@
 
 # Create a dictionary of page IDs to their incoming and outgoing links.
 LINKS = defaultdict(lambda: defaultdict(str))
-for line in io.BufferedReader(gzip.open(OUTGOING_LINKS_FILE, 'r')):
-  [source_page_id, target_page_ids] = line.rstrip('\n').split('\t')
-  LINKS[source_page_id]['outgoing'] = target_page_ids
+# outgoing is [0], incoming is [1]
+for line in io.BufferedReader(gzip.open(OUTGOING_LINKS_FILE, 'rb')):
+  [source_page_id, target_page_ids] = line.rstrip(b'\n').split(b'\t')
+  LINKS[int(source_page_id)][0] = target_page_ids
 
-for line in io.BufferedReader(gzip.open(INCOMING_LINKS_FILE, 'r')):
-  [target_page_id, source_page_ids] = line.rstrip('\n').split('\t')
-  LINKS[target_page_id]['incoming'] = source_page_ids
+for line in io.BufferedReader(gzip.open(INCOMING_LINKS_FILE, 'rb')):
+  [target_page_id, source_page_ids] = line.rstrip(b'\n').split(b'\t')
+  LINKS[int(target_page_id)][1] = source_page_ids
 
 # For each page in the links dictionary, print out its incoming and outgoing links as well as their
 # counts.
-for page_id, links in LINKS.iteritems():
-  outgoing_links = links.get('outgoing', '')
-  outgoing_links_count = 0 if outgoing_links is '' else len(
-      outgoing_links.split('|'))
+for page_id, links in LINKS.items():
+  outgoing_links = links.get(0, b'')
+  outgoing_links_count = 0 if outgoing_links==b'' else len(
+      outgoing_links.split(b'|'))
 
-  incoming_links = links.get('incoming', '')
-  incoming_links_count = 0 if incoming_links is '' else len(
-      incoming_links.split('|'))
+  incoming_links = links.get(1, b'')
+  incoming_links_count = 0 if incoming_links==b'' else len(
+      incoming_links.split(b'|'))
 
-  columns = [page_id, str(outgoing_links_count), str(
-      incoming_links_count), outgoing_links, incoming_links]
+  columns = [str(page_id).encode(), str(outgoing_links_count).encode(), str(
+      incoming_links_count).encode(), outgoing_links, incoming_links]
 
-  print('\t'.join(columns))
+  print(b'\t'.join(columns).decode())
diff --git a/scripts/prune_pages_file.py b/scripts/prune_pages_file.py
index 1459e88..a4bdf60 100644
--- a/scripts/prune_pages_file.py
+++ b/scripts/prune_pages_file.py
@@ -28,14 +28,14 @@
 
 # Create a dictionary of redirects.
 REDIRECTS = {}
-for line in io.BufferedReader(gzip.open(REDIRECTS_FILE, 'r')):
-  [source_page_id, _] = line.rstrip('\n').split('\t')
+for line in io.BufferedReader(gzip.open(REDIRECTS_FILE, 'rb')):
+  [source_page_id, _] = line.rstrip(b'\n').split(b'\t')
   REDIRECTS[source_page_id] = True
 
 # Loop through the pages file, ignoring pages which are marked as redirects but which do not have a
 # corresponding redirect in the redirects dictionary, printing the remaining pages to stdout.
-for line in io.BufferedReader(gzip.open(PAGES_FILE, 'r')):
-  [page_id, page_title, is_redirect] = line.rstrip('\n').split('\t')
+for line in io.BufferedReader(gzip.open(PAGES_FILE, 'rb')):
+  [page_id, page_title, is_redirect] = line.rstrip(b'\n').split(b'\t')
 
-  if is_redirect == '0' or page_id in REDIRECTS:
-    print('\t'.join([page_id, page_title, is_redirect]))
+  if True or is_redirect == '0' or page_id in REDIRECTS:
+    print(b'\t'.join([page_id, page_title, is_redirect]).decode())
diff --git a/scripts/replace_titles_and_redirects_in_links_file.py b/scripts/replace_titles_and_redirects_in_links_file.py
index b217017..5be8fb6 100755
--- a/scripts/replace_titles_and_redirects_in_links_file.py
+++ b/scripts/replace_titles_and_redirects_in_links_file.py
@@ -10,14 +10,15 @@
 import gzip
 
 # Validate inputs
-if len(sys.argv) < 4:
+if len(sys.argv) < 5:
   print('[ERROR] Not enough arguments provided!')
-  print('[INFO] Usage: {0} <pages_file> <redirects_file> <links_file>'.format(sys.argv[0]))
+  print('[INFO] Usage: {0} <pages_file> <redirects_file> <target_file> <links_file>'.format(sys.argv[0]))
   sys.exit()
 
 PAGES_FILE = sys.argv[1]
 REDIRECTS_FILE = sys.argv[2]
-LINKS_FILE = sys.argv[3]
+TARGETS_FILE = sys.argv[3]
+LINKS_FILE = sys.argv[4]
 
 if not PAGES_FILE.endswith('.gz'):
   print('[ERROR] Pages file must be gzipped.')
@@ -27,36 +28,46 @@
   print('[ERROR] Redirects file must be gzipped.')
   sys.exit()
 
+if not TARGETS_FILE.endswith('.gz'):
+  print('[ERROR] Targets file must be gzipped.')
+  sys.exit()
+
 if not LINKS_FILE.endswith('.gz'):
   print('[ERROR] Links file must be gzipped.')
   sys.exit()
 
 # Create a set of all page IDs and a dictionary of page titles to their corresponding IDs.
 ALL_PAGE_IDS = set()
-PAGE_TITLES_TO_IDS = {}
-for line in io.BufferedReader(gzip.open(PAGES_FILE, 'r')):
-  [page_id, page_title, _] = line.rstrip('\n').split('\t')
+for line in io.BufferedReader(gzip.open(PAGES_FILE, 'rb')):
+  [page_id, page_title, _] = line.rstrip(b'\n').split(b'\t')
   ALL_PAGE_IDS.add(page_id)
-  PAGE_TITLES_TO_IDS[page_title] = page_id
 
 # Create a dictionary of page IDs to the target page ID to which they redirect.
 REDIRECTS = {}
-for line in io.BufferedReader(gzip.open(REDIRECTS_FILE, 'r')):
-  [source_page_id, target_page_id] = line.rstrip('\n').split('\t')
+for line in io.BufferedReader(gzip.open(REDIRECTS_FILE, 'rb')):
+  [source_page_id, target_page_id] = line.rstrip(b'\n').split(b'\t')
   REDIRECTS[source_page_id] = target_page_id
 
+# Create a dictionary of linktarget IDs to the target page ID
+TARGETS = {}
+for line in io.BufferedReader(gzip.open(TARGETS_FILE, 'rb')):
+  [target_id, target_page_id] = line.rstrip(b'\n').split(b'\t')
+  TARGETS[target_id] = target_page_id
+
 # Loop through each line in the links file, replacing titles with IDs, applying redirects, and
 # removing nonexistent pages, writing the result to stdout.
-for line in io.BufferedReader(gzip.open(LINKS_FILE, 'r')):
-  [source_page_id, target_page_title] = line.rstrip('\n').split('\t')
+for line in io.BufferedReader(gzip.open(LINKS_FILE, 'rb')):
+  [source_page_id, target_id] = line.rstrip(b'\n').split(b'\t')
 
   source_page_exists = source_page_id in ALL_PAGE_IDS
 
   if source_page_exists:
     source_page_id = REDIRECTS.get(source_page_id, source_page_id)
 
-    target_page_id = PAGE_TITLES_TO_IDS.get(target_page_title)
-
+    target_page_id = TARGETS.get(target_id)
     if target_page_id is not None and source_page_id != target_page_id:
       target_page_id = REDIRECTS.get(target_page_id, target_page_id)
-      print('\t'.join([source_page_id, target_page_id]))
+      print(b'\t'.join([source_page_id, target_page_id]).decode())
+    else:
+      pass
+
diff --git a/scripts/replace_titles_and_redirects_in_targets_file.py b/scripts/replace_titles_and_redirects_in_targets_file.py
new file mode 100755
index 0000000..89fe86b
--- /dev/null
+++ b/scripts/replace_titles_and_redirects_in_targets_file.py
@@ -0,0 +1,61 @@
+"""
+Replaces page names in the links file with their corresponding IDs, eliminates links containing
+non-existing pages, and replaces redirects with the pages to which they redirect.
+
+Output is written to stdout.
+"""
+
+import io
+import sys
+import gzip
+
+# Validate inputs
+if len(sys.argv) < 4:
+  print('[ERROR] Not enough arguments provided!')
+  print('[INFO] Usage: {0} <pages_file> <redirects_file> <targets_file>'.format(sys.argv[0]))
+  sys.exit()
+
+PAGES_FILE = sys.argv[1]
+REDIRECTS_FILE = sys.argv[2]
+TARGETS_FILE = sys.argv[3]
+
+if not PAGES_FILE.endswith('.gz'):
+  print('[ERROR] Pages file must be gzipped.')
+  sys.exit()
+
+if not REDIRECTS_FILE.endswith('.gz'):
+  print('[ERROR] Redirects file must be gzipped.')
+  sys.exit()
+
+if not TARGETS_FILE.endswith('.gz'):
+  print('[ERROR] Targets file must be gzipped.')
+  sys.exit()
+
+# Create a set of all page IDs and a dictionary of page titles to their corresponding IDs.
+ALL_PAGE_IDS = set()
+PAGE_TITLES_TO_IDS = {}
+for line in io.BufferedReader(gzip.open(PAGES_FILE, 'rb')):
+  [page_id, page_title, _] = line.rstrip(b'\n').split(b'\t')
+  ALL_PAGE_IDS.add(page_id)
+  PAGE_TITLES_TO_IDS[page_title] = page_id
+
+# Create a dictionary of page IDs to the target page ID to which they redirect.
+REDIRECTS = {}
+for line in io.BufferedReader(gzip.open(REDIRECTS_FILE, 'rb')):
+  [source_page_id, target_page_id] = line.rstrip(b'\n').split(b'\t')
+  REDIRECTS[source_page_id] = target_page_id
+
+# Loop through each line in the links file, replacing titles with IDs, applying redirects, and
+# removing nonexistent pages, writing the result to stdout.
+for line in io.BufferedReader(gzip.open(TARGETS_FILE, 'rb')):
+  [target_id, target_page_title] = line.rstrip(b'\n').split(b'\t')
+
+  target_page_id = PAGE_TITLES_TO_IDS.get(target_page_title)
+
+  if target_page_id is not None:
+    target_page_id = REDIRECTS.get(target_page_id, target_page_id)
+    print(b'\t'.join([target_id, target_page_id]).decode())
+  else:
+    pass
+
+
diff --git a/scripts/replace_titles_in_redirects_file.py b/scripts/replace_titles_in_redirects_file.py
index 946d190..ff7b5e6 100755
--- a/scripts/replace_titles_in_redirects_file.py
+++ b/scripts/replace_titles_in_redirects_file.py
@@ -28,16 +28,16 @@
 # Create a set of all page IDs and a dictionary of page titles to their corresponding IDs.
 ALL_PAGE_IDS = set()
 PAGE_TITLES_TO_IDS = {}
-for line in io.BufferedReader(gzip.open(PAGES_FILE, 'r')):
-  [page_id, page_title, _] = line.rstrip('\n').split('\t')
+for line in io.BufferedReader(gzip.open(PAGES_FILE, 'rb')):
+  [page_id, page_title,_] = line.rstrip(b'\n').split(b'\t')
   ALL_PAGE_IDS.add(page_id)
   PAGE_TITLES_TO_IDS[page_title] = page_id
 
 # Create a dictionary of redirects, replace page titles in the redirects file with their
 # corresponding IDs and ignoring pages which do not exist.
 REDIRECTS = {}
-for line in io.BufferedReader(gzip.open(REDIRECTS_FILE, 'r')):
-  [source_page_id, target_page_title] = line.rstrip('\n').split('\t')
+for line in io.BufferedReader(gzip.open(REDIRECTS_FILE, 'rb')):
+  [source_page_id, target_page_title] = line.rstrip(b'\n').split(b'\t')
 
   source_page_exists = source_page_id in ALL_PAGE_IDS
   target_page_id = PAGE_TITLES_TO_IDS.get(target_page_title)
@@ -47,7 +47,7 @@
 
 # Loop through the redirects dictionary and remove redirects which redirect to another redirect,
 # writing the remaining redirects to stdout.
-for source_page_id, target_page_id in REDIRECTS.iteritems():
+for source_page_id, target_page_id in REDIRECTS.items():
   start_target_page_id = target_page_id
 
   redirected_count = 0
@@ -62,4 +62,4 @@
       target_page_id = None
 
   if target_page_id is not None:
-    print('\t'.join([source_page_id, target_page_id]))
+    print(b'\t'.join([source_page_id, target_page_id]).decode())
diff --git a/scripts/replace_titles_in_redirects_file.py.dis b/scripts/replace_titles_in_redirects_file.py.dis
new file mode 100755
index 0000000..a423414
--- /dev/null
+++ b/scripts/replace_titles_in_redirects_file.py.dis
@@ -0,0 +1,66 @@
+"""
+Replaces page titles in the redirects file with their corresponding targetIDs.
+
+Output is written to stdout.
+"""
+
+import io
+import sys
+import gzip
+
+# Validate input arguments.
+if len(sys.argv) < 3:
+  print('[ERROR] Not enough arguments provided!')
+  print('[INFO] Usage: {0} <targets_file> <redirects_file>'.format(sys.argv[0]))
+  sys.exit()
+
+TARGETS_FILE = sys.argv[1]
+REDIRECTS_FILE = sys.argv[2]
+
+if not TARGETS_FILE.endswith('.gz'):
+  print('[ERROR] Targets file must be gzipped.')
+  sys.exit()
+
+if not REDIRECTS_FILE.endswith('.gz'):
+  print('[ERROR] Redirects file must be gzipped.')
+  sys.exit()
+
+# Create a set of all targetIDs and a dictionary of target titles to their corresponding IDs.
+ALL_TARGET_IDS = set()
+TARGET_TITLES_TO_IDS = {}
+for line in io.BufferedReader(gzip.open(TARGETS_FILE, 'rb')):
+  print("LALIGNE",line.rstrip(b'\n').split(b'\t'))
+  [page_id, page_title,_] = line.rstrip(b'\n').split(b'\t')
+  ALL_TARGET_IDS.add(page_id)
+  TARGET_TITLES_TO_IDS[page_title] = page_id
+
+# Create a dictionary of redirects, replace page titles in the redirects file with their
+# corresponding IDs and ignoring pages which do not exist.
+REDIRECTS = {}
+for line in io.BufferedReader(gzip.open(REDIRECTS_FILE, 'rb')):
+  [source_page_id, target_page_title] = line.rstrip(b'\n').split(b'\t')
+
+  source_page_exists = source_page_id in ALL_TARGET_IDS
+  target_page_id = TARGET_TITLES_TO_IDS.get(target_page_title)
+
+  if source_page_exists and target_page_id is not None:
+    REDIRECTS[source_page_id] = target_page_id
+
+# Loop through the redirects dictionary and remove redirects which redirect to another redirect,
+# writing the remaining redirects to stdout.
+for source_page_id, target_page_id in REDIRECTS.items():
+  start_target_page_id = target_page_id
+
+  redirected_count = 0
+  while target_page_id in REDIRECTS:
+    target_page_id = REDIRECTS[target_page_id]
+
+    redirected_count += 1
+
+    # Break out if there is a circular path, meaning the redirects only point to other redirects,
+    # not an acutal page.
+    if target_page_id == start_target_page_id or redirected_count > 100:
+      target_page_id = None
+
+  if target_page_id is not None:
+    print(b'\t'.join([source_page_id, target_page_id]).decode())