From ca64127d5779fd638c5485cfbf4c23d530437dde Mon Sep 17 00:00:00 2001
From: Jesir Vargas <vargas@vit.lu>
Date: Fri, 11 Sep 2015 10:40:41 -0400
Subject: [PATCH 1/4] add test for getDocFromUrl() to make sure it actually
 runs; also, add previously undefined exception 'e'

---
 boilerpy/extractors.py |  6 ++--
 tests/unittests.py     | 77 +++++++++++++++++++++++++-----------------
 2 files changed, 49 insertions(+), 34 deletions(-)
diff --git a/boilerpy/extractors.py b/boilerpy/extractors.py
index 462c2f0..230cc1b 100644
--- a/boilerpy/extractors.py
+++ b/boilerpy/extractors.py
@@ -50,7 +50,7 @@ def getDocFromFile(self,filename):
 		return self.getDoc(self.readFromFile(filename))
 	
 	def getDocFromUrl(self,url):
-		return self.getDoc(self.readFromUrl(filename))
+		return self.getDoc(self.readFromUrl(url))
 
 	def getDoc(self,text):
 		doc=self.parseDoc(text)
@@ -85,13 +85,13 @@ def parseDoc(self,inputStr):
 		bpParser=parser.BoilerpipeHTMLParser()
 		try:
 			bpParser.feed(inputStr)
-		except:
+		except Exception as exc:
 			#in case of error, try again, first removing script tag content
 			bpParser=parser.BoilerpipeHTMLParser()
 			inputStr=re.sub(r'<(?:script|SCRIPT)[^>]*>.*?</(?:script|SCRIPT)>','<script></script>',inputStr,0,re.DOTALL)
 			try:
 				bpParser.feed(inputStr)
-			except:
+			except Exception as e:
 				print "Error parsing HTML : "+str(e)
 				return None
 		doc=bpParser.toTextDocument()
diff --git a/tests/unittests.py b/tests/unittests.py
index ac0a97e..1716a36 100644
--- a/tests/unittests.py
+++ b/tests/unittests.py
@@ -1,5 +1,8 @@
 import unittest
 import sys
+
+import mock
+
 from boilerpy.document import TextDocument,TextBlock
 from boilerpy.filters import *
 from boilerpy.extractors import Extractor
@@ -18,7 +21,7 @@ def runOneTest():
 
 class TestFilters(unittest.TestCase):
 	defaultWords="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec fermentum tincidunt magna, eu pulvinar mauris dapibus pharetra. In varius, nisl a rutrum porta, sem sem semper lacus, et varius urna tellus vel lorem. Nullam urna eros, luctus eget blandit ac, imperdiet feugiat ipsum. Donec laoreet tristique mi a bibendum. Sed pretium bibendum scelerisque. Mauris id pellentesque turpis. Mauris porta adipiscing massa, quis tempus dui pharetra ac. Morbi lacus mauris, feugiat ac tempor ut, congue tincidunt risus. Pellentesque tincidunt adipiscing elit, in fringilla enim scelerisque vel. Nulla facilisi. ".split(' ')
-	
+
 	def makedoc(self,wordsArr,numAnchorWordsArr=None,isContentArr=None,labelArr=None):
 		textBlocks=[]
 		for idx,words in enumerate(wordsArr):
@@ -45,26 +48,26 @@ def makedoc(self,wordsArr,numAnchorWordsArr=None,isContentArr=None,labelArr=None
 				else: block.addLabel(label)
 			except TypeError,IndexError:
 				pass
-				
+
 			textBlocks.append(block)
-		
+
 		return TextDocument(textBlocks)
-	
+
 	def verifyContent(self,filtr,doc,contentArr,show=False):
 		isContentBefore=[block.isContent() for block in doc.getTextBlocks()]
 		isChanged=filtr.process(doc)
 		isContent=[block.isContent() for block in doc.getTextBlocks()]
 		self.assertEqual(isContent,contentArr)
 		self.assertEqual(isChanged,isContent!=isContentBefore)
-		
+
 	def test_markEveryhingContent(self):
 		doc=self.makedoc([5,100,80],None,[False,True,False])
 		self.verifyContent(MarkEverythingContentFilter(),doc,[True,True,True])
-		
+
 	def test_inverted(self):
 		doc=self.makedoc([5,100,80],None,[False,True,False])
 		self.verifyContent(InvertedFilter(),doc,[True,False,True])
-		
+
 	def test_boilerplateBlock(self):
 		#keeps if isContent
 		doc=self.makedoc([5,100,10,50,80],None,[False,True,False,True,False])
@@ -76,7 +79,7 @@ def test_boilerplateBlock(self):
 		self.assertEqual(doc.getTextBlocks(),finalBlocks)
 		self.assertEqual(isContent,[True,True])
 		self.assertEqual(isChanged,True)
-		
+
 	def test_minWords(self):
 		#rejects if #words<k
 		doc=self.makedoc([10,50],None,[True,True])
@@ -131,7 +134,7 @@ def test_simpleBlockFusion(self):
 	def test_contentFusion(self):
 		#join blocks with low link density
 		filtr=ContentFusion()
-		
+
 		#merge
 		doc=self.makedoc([10,10],[0,0],[True,False])
 		isChanged=filtr.process(doc)
@@ -158,7 +161,7 @@ def test_contentFusion(self):
 
 	def test_labelFusion(self):
 		#fuse blocks with identical labels - ONLY LOOKS AT LABELS with markup prefix
-		
+
 		lb1=DefaultLabels.MARKUP_PREFIX+".title"
 		lb2=DefaultLabels.MARKUP_PREFIX+".menu"
 		doc=self.makedoc([10,10,10,10,10,10,10],None,None,[None,None,lb1,lb1,lb2,lb2,[lb1,lb2]])
@@ -211,7 +214,7 @@ def test_addPrecedingLabels(self):
 		labels=[block.getLabels() for block in doc.getTextBlocks()]
 		self.assertEqual(labels,[set([lb1]),set([prefix+lb1,lb2]),set([prefix+lb2])])
 		self.assertEqual(isChanged,True)
-		
+
 	def test_documentTitleMatch(self):
 		#add title label to blocks matching sections of the title
 		doc=self.makedoc(["News","This is the real title","Red herring"])
@@ -262,12 +265,12 @@ def test_numWordsClassifier(self):
 		#accepts or rejects block based on machine-trained decision tree rules
 		#using features from previous, current and next block
 		filtr=NumWordsRulesClassifier()
-		
+
 		doc=self.makedoc([2,10,10],[0,0,0],[True,True,True])
 		isChanged=filtr.process(doc)
 		#test middle block only
 		self.assertEqual(doc.getTextBlocks()[1].isContent(),False)
-		
+
 		doc=self.makedoc([10,10,10],[0,0,0],[True,True,True])
 		isChanged=filtr.process(doc)
 		self.assertEqual(doc.getTextBlocks()[1].isContent(),True)
@@ -308,23 +311,23 @@ def makedoc(self,template,contentArr):
 		s+=templateArr[-1]
 		doc=self.extractor.parseDoc(s)
 		return doc
-	
+
 	def test_blocks(self):
 		template="<html><body><p>*</p><div>*<p>*</p>*</div></body></html>"
 		content=self.makecontent([4,5,6,7])
 		doc=self.makedoc(template,content)
-		
+
 		blocks=doc.getTextBlocks()
 		textArr=[block.getText() for block in blocks]
 		numWords=[block.getNumWords() for block in blocks]
 		self.assertEqual(textArr,content)
 		self.assertEqual(numWords,[4,5,6,7])
-	
+
 	def test_anchor(self):
 		template="<html><body><p>*</p><div>*<a href='half.html'>*</a></div><a href='full.html'><p>*</p></a></body></html>"
 		content=self.makecontent([6,"end with space ",3,6])
 		doc=self.makedoc(template,content)
-		
+
 		blocks=doc.getTextBlocks()
 		textArr=[block.getText() for block in blocks]
 		densityArr=[block.getLinkDensity() for block in blocks]
@@ -332,35 +335,35 @@ def test_anchor(self):
 		self.assertEqual(textArr,[content[0],content[1]+content[2],content[3]])
 		self.assertEqual(numAnchorWords,[0,3,6])
 		self.assertEqual(densityArr,[0.0,0.5,1.0])
-	
+
 	def test_title(self):
 		titleText="THIS IS TITLE"
 		s="<html><head><title>"+titleText+"</title></head><body><p>THIS IS CONTENT</p></body></html>"
 		doc=self.extractor.parseDoc(s)
 		self.assertEqual(doc.getTitle(),titleText)
-	
+
 	def test_body(self):
 		bodyText="THIS IS CONTENT"
 		s="<html><head><p>NOT IN BODY</p></head><body><p>"+bodyText+"</p></body></html>"
 		doc=self.extractor.parseDoc(s)
 		textArr=[block.getText() for block in doc.getTextBlocks()]
 		self.assertEqual(textArr,[bodyText])
-	
+
 	def test_inline(self):
 		template="<html><body><div><h1>*</h1><h4>*</h4></div><div><span>*</span><b>*</b></div></body></html>"
 		content=['AA','BB','CC','DD']
 		doc=self.makedoc(template,content)
-		
+
 		blocks=doc.getTextBlocks()
 		textArr=[block.getText() for block in blocks]
 		numWords=[block.getNumWords() for block in blocks]
 		self.assertEqual(textArr,[content[0],content[1],content[2]+content[3]])
-	
+
 	def test_ignorable(self):
 		template="<html><body><p>*</p><option><p>*</p></option></body></html>"
 		content=self.makecontent([10,12])
 		doc=self.makedoc(template,content)
-		
+
 		blocks=doc.getTextBlocks()
 		textArr=[block.getText() for block in blocks]
 		self.assertEqual(textArr,[content[0]])
@@ -372,36 +375,36 @@ def test_textDensity(self):
 		template="<html><body><p>*</p><p>*</p></body></html>"
 		content=self.makecontent([80,"one, !!! two"])
 		doc=self.makedoc(template,content)
-		
+
 		blocks=doc.getTextBlocks()
 		numArr=[[block.getNumWords(),block.numWordsInWrappedLines,block.numWrappedLines,block.getTextDensity()] for block in blocks]
-		
+
 		#exact values are unknown, approximate value range to check
 		self.assertEqual(blocks[0].getNumWords(),80)
 		self.assertRange(blocks[0].numWordsInWrappedLines,60,80)
 		self.assertRange(blocks[0].numWrappedLines,4,7)
 		self.assertRange(blocks[0].getTextDensity(),8,16)
-		
+
 		self.assertEqual(numArr[1],[2,2,1,2])
-	
+
 	def test_blockIdxs(self):
 		template="<html><body><p>*  </p>  <p> * </p><p>*  </p><p>*  </p></body></html>"
 		content=self.makecontent([11,12,13,14])
 		doc=self.makedoc(template,content)
-		
+
 		blocks=doc.getTextBlocks()
 		idxArr=[[block.getOffsetBlocksStart(),block.getOffsetBlocksEnd()] for block in blocks]
 		self.assertEqual(idxArr,[[0,0],[1,1],[2,2],[3,3]])
-	
+
 	def test_tagLevel(self):
 		template="<html><body><div><p><span><a href='x.html'>*</a></span></p>*</div></body></html>"
 		content=self.makecontent([5,6])
 		doc=self.makedoc(template,content)
-		
+
 		blocks=doc.getTextBlocks()
 		levelArr=[block.getTagLevel() for block in blocks]
 		self.assertEqual(levelArr,[5,3])
-	
+
 	def test_merge(self):
 		block1=TextBlock("AA BB CC ",set([0]),3,3,3,1,0)
 		block2=TextBlock("DD EE FF GG HH II JJ .",set([1]),6,0,6,2,1)
@@ -417,4 +420,16 @@ def test_merge(self):
 		self.assertEqual(block1.getOffsetBlocksStart(),0)
 		self.assertEqual(block1.getOffsetBlocksEnd(),1)
 
+
+	def test_getDocFromUrl(self):
+		"""getDocFromUrl() should run (was dying because of undefined 'filename')"""
+		url = "http://www.example.com/"
+		fake_readFromUrl = mock.Mock(return_value=u"<html><body><h1>Example</h1></body></html>")
+		tmp_filter = MarkEverythingContentFilter()
+
+		with mock.patch.object(self.extractor, "readFromUrl", fake_readFromUrl):
+			with mock.patch.object(self.extractor, "filter", tmp_filter):
+				self.assertIsInstance(self.extractor.getDocFromUrl(url), TextDocument)
+
+
 runTests()

From 9b9dcda1e3213ebad0b16cda029ae8a940781b49 Mon Sep 17 00:00:00 2001
From: Jesir Vargas <jesir.vargas@gmail.com>
Date: Fri, 6 Apr 2018 08:40:54 -0400
Subject: [PATCH 2/4] quick port to py3 using futurize, with minor fixes and
 style cleanups

---
 boilerpy/__init__.py   |  3 +--
 boilerpy/document.py   | 11 ++++++-----
 boilerpy/extractors.py | 10 ++++++----
 boilerpy/filters.py    |  6 +++---
 boilerpy/parser.py     | 17 ++++++++---------
 tests/unittests.py     | 11 +++++------
 6 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/boilerpy/__init__.py b/boilerpy/__init__.py
index a796300..96ca1bd 100644
--- a/boilerpy/__init__.py
+++ b/boilerpy/__init__.py
@@ -16,5 +16,4 @@
 #  * See the License for the specific language governing permissions and
 #  * limitations under the License.
 #  
-
-import extractors,filters,parser,document
\ No newline at end of file
+from . import extractors, filters, parser, document
diff --git a/boilerpy/document.py b/boilerpy/document.py
index 8c6852d..c8e8df5 100644
--- a/boilerpy/document.py
+++ b/boilerpy/document.py
@@ -17,7 +17,8 @@
 #  * limitations under the License.
 #  
 # package: de.l3s.boilerpipe.document
-import copy,sys
+import copy
+import sys
 
 # 
 #  * Some pre-defined labels which can be used in conjunction with
@@ -150,8 +151,8 @@ def initDensities(self):
 		if self.numWordsInWrappedLines == 0:
 			self.numWordsInWrappedLines = self.numWords
 			self.numWrappedLines = 1
-		self.textDensity = self.numWordsInWrappedLines / float(self.numWrappedLines)
-		self.linkDensity = 0 if self.numWords==0 else self.numWordsInAnchorText / float(self.numWords)
+		self.textDensity = self.numWordsInWrappedLines / self.numWrappedLines
+		self.linkDensity = 0 if self.numWords == 0 else self.numWordsInAnchorText / self.numWords
 		
 	def isContent(self):
 		""" generated source for method isContent """
@@ -294,7 +295,7 @@ def setTagLevel(self, tagLevel):
 		self.tagLevel = tagLevel
 
 TextBlock.EMPTY_START = TextBlock("", set(), 0, 0, 0, 0, -1)
-TextBlock.EMPTY_END = TextBlock("", set(), 0, 0, 0, 0, sys.maxint)
+TextBlock.EMPTY_END = TextBlock("", set(), 0, 0, 0, 0, sys.maxsize)
 
 
 
@@ -325,7 +326,7 @@ def __init__(self, doc, contentOnly):
 	#	  
 	def avgNumWords(self):
 		""" generated source for method avgNumWords """
-		return self.numWords / float(self.numBlocks)
+		return self.numWords / self.numBlocks
 
 	# 
 	#	  * Returns the overall number of words in all blocks.
diff --git a/boilerpy/extractors.py b/boilerpy/extractors.py
index 230cc1b..aa72175 100644
--- a/boilerpy/extractors.py
+++ b/boilerpy/extractors.py
@@ -27,10 +27,12 @@
 
 
 from xml.sax import parseString, SAXException
-import HTMLParser
+import html.parser
 from . import filters
 from . import parser
-import urllib2
+import urllib.request
+import urllib.error
+import urllib.parse
 import re
 
 class Extractor(object):
@@ -67,7 +69,7 @@ def readFromFile(self,filename):
 		return text
 	
 	def readFromUrl(self,url):
-		f=urllib2.urlopen(url)
+		f = urllib.request.urlopen(url)
 		text=f.read()
 		encoding=self.getUrlEncoding(f)
 		f.close()
@@ -92,7 +94,7 @@ def parseDoc(self,inputStr):
 			try:
 				bpParser.feed(inputStr)
 			except Exception as e:
-				print "Error parsing HTML : "+str(e)
+				print("Error parsing HTML : " + str(e))
 				return None
 		doc=bpParser.toTextDocument()
 		return doc
diff --git a/boilerpy/filters.py b/boilerpy/filters.py
index c2885bb..43d04e3 100644
--- a/boilerpy/filters.py
+++ b/boilerpy/filters.py
@@ -59,7 +59,7 @@
 
 import re
 from . import document
-from document import DefaultLabels
+from .document import DefaultLabels
 
 # Boilerpipe abstract interface
 
@@ -72,11 +72,11 @@ def subtractBlocks(self,blockArr,blocksToRemove):
 		if len(blocksToRemove)==0: return blockArr
 		newBlockArr=[]
 		removeIter=iter(blocksToRemove)
-		curBlockToRemove=removeIter.next()
+		curBlockToRemove = next(removeIter)
 		for idx,block in enumerate(blockArr):
 			if block==curBlockToRemove:
 				try:
-					curBlockToRemove=removeIter.next()
+					curBlockToRemove = next(removeIter)
 				except StopIteration:
 					#add the rest
 					newBlockArr.extend(blockArr[idx+1:])
diff --git a/boilerpy/parser.py b/boilerpy/parser.py
index 5f07449..5e90c43 100644
--- a/boilerpy/parser.py
+++ b/boilerpy/parser.py
@@ -17,10 +17,10 @@
 #  * limitations under the License.
 #  
 
-from HTMLParser import HTMLParser
+from html.parser import HTMLParser
 from xml.sax import ContentHandler
 from . import document
-from document import DefaultLabels
+from .document import DefaultLabels
 import re
 
 
@@ -146,7 +146,7 @@ def start(self, contentHandler, tagName, attrs):
 		sizeAttr = attrs.getValue("size")
 		size=None
 		if sizeAttr != None:
-			match=PAT_FONT_SIZE.match(sizeAttr)
+			match = self.PAT_FONT_SIZE.match(sizeAttr)
 			if match!=None:
 				rel=match.group(0)
 				val=match.group(1)
@@ -293,13 +293,13 @@ def changesTagLevel(self):
 	def getAncestorLabels(self):
 		""" generated source for method getAncestorLabels """
 		labelSet = set()
-		for labels in labelStack:
+		for labels in self.labelStack:
 			if labels == None:continue 
 			labelSet.update(labels)
 		return labelSet
 
 
-class CommonTagActions:
+class CommonTagActions(object):
 	TA_IGNORABLE_ELEMENT=IgnorableElementTagAction()
 	TA_ANCHOR_TEXT=AnchorTextTagAction()
 	TA_BODY=BodyTagAction()
@@ -374,7 +374,7 @@ def addTo(self, textBlock):
 		if self.condition(textBlock): self.addLabelsTo(textBlock)
 
 
-class SpecialTokens:
+class SpecialTokens(object):
 	ANCHOR_TEXT_START = u'\ue00astart'
 	ANCHOR_TEXT_END = u'\ue00aend'
 
@@ -397,9 +397,8 @@ class BoilerpipeBaseParser(object):
 	EVENT_CHARACTERS=2
 	EVENT_WHITESPACE=3
 	#all word characters except underscore -- i.e. not (not word or underscore)
-	PAT_VALID_WORD_CHARACTER = re.compile(r"[^\W_]",re.UNICODE)
-#	PAT_WORD = re.compile(r"\ue00a?[\w]+",re.UNICODE)
-	PAT_WORD = re.compile(ur"\ue00a?[\w\"'\.,\!\@\-\:\;\$\?\(\)/]+",re.UNICODE)
+	PAT_VALID_WORD_CHARACTER = re.compile(r"[^\W_]", re.UNICODE)
+	PAT_WORD = re.compile(r"\ue00a?[\w\"'\.,\!\@\-\:\;\$\?\(\)/]+", re.UNICODE)
 	
 	""" generated source for class BoilerpipeHTMLContentHandler """
 	# 
diff --git a/tests/unittests.py b/tests/unittests.py
index 1716a36..96e367b 100644
--- a/tests/unittests.py
+++ b/tests/unittests.py
@@ -1,7 +1,6 @@
 import unittest
 import sys
-
-import mock
+from unittest import mock
 
 from boilerpy.document import TextDocument,TextBlock
 from boilerpy.filters import *
@@ -33,12 +32,12 @@ def makedoc(self,wordsArr,numAnchorWordsArr=None,isContentArr=None,labelArr=None
 				numWords=text.count(' ')
 			try:
 				numAnchorWords=numAnchorWordsArr[idx]
-			except TypeError,IndexError:
+			except (TypeError, IndexError):
 				numAnchorWords=0
 			block=TextBlock(text,set(),numWords,numAnchorWords,0,0,idx)
 			try:
 				block.setIsContent(isContentArr[idx])
-			except TypeError,IndexError:
+			except (TypeError, IndexError):
 				pass
 			try:
 				label=labelArr[idx]
@@ -46,7 +45,7 @@ def makedoc(self,wordsArr,numAnchorWordsArr=None,isContentArr=None,labelArr=None
 				elif type(label)==list:
 					for l in label: block.addLabel(l)
 				else: block.addLabel(label)
-			except TypeError,IndexError:
+			except (TypeError, IndexError):
 				pass
 
 			textBlocks.append(block)
@@ -414,7 +413,7 @@ def test_merge(self):
 		self.assertEqual(block1.getText(),"AA BB CC \nDD EE FF GG HH II JJ .")
 		self.assertEqual(block1.getNumWords(),9)
 		self.assertEqual(block1.getNumWordsInAnchorText(),3)
-		self.assertAlmostEqual(block1.getLinkDensity(),1.0/3.0)
+		self.assertAlmostEqual(block1.getLinkDensity(), 1.0 / 3.0)
 		self.assertEqual(block1.getTextDensity(),3)
 		self.assertEqual(block1.getLabels(),set([DefaultLabels.MIGHT_BE_CONTENT,DefaultLabels.ARTICLE_METADATA]))
 		self.assertEqual(block1.getOffsetBlocksStart(),0)

From 93fa23e7ef395b0124d4a81f1bafa99bc573adf8 Mon Sep 17 00:00:00 2001
From: Jesir Vargas <jesir.vargas@gmail.com>
Date: Fri, 6 Apr 2018 08:47:12 -0400
Subject: [PATCH 3/4] replace tabs with spaces across the board

---
 .gitattributes         |   10 +-
 README.txt             |   14 +-
 boilerpy/__init__.py   |    2 +-
 boilerpy/document.py   |  554 ++++++++--------
 boilerpy/extractors.py |  192 +++---
 boilerpy/filters.py    | 1382 ++++++++++++++++++++--------------------
 boilerpy/parser.py     | 1042 +++++++++++++++---------------
 setup.py               |   34 +-
 tests/unittests.py     |  828 ++++++++++++------------
 9 files changed, 2029 insertions(+), 2029 deletions(-)

diff --git a/.gitattributes b/.gitattributes
index 412eeda..2431c40 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -10,13 +10,13 @@
 *.dbproj merge=union
 
 # Standard to msysgit
-*.doc	 diff=astextplain
-*.DOC	 diff=astextplain
+*.doc     diff=astextplain
+*.DOC     diff=astextplain
 *.docx diff=astextplain
 *.DOCX diff=astextplain
 *.dot  diff=astextplain
 *.DOT  diff=astextplain
 *.pdf  diff=astextplain
-*.PDF	 diff=astextplain
-*.rtf	 diff=astextplain
-*.RTF	 diff=astextplain
+*.PDF     diff=astextplain
+*.rtf     diff=astextplain
+*.RTF     diff=astextplain
diff --git a/README.txt b/README.txt
index bf0b259..078fa4b 100644
--- a/README.txt
+++ b/README.txt
@@ -20,19 +20,19 @@ Installation
 
 BoilerPy was packaged with distutils.  In can be installed from the command-line with the following line:
 
-	``>python setup.py install``
+    ``>python setup.py install``
 
 Usage
 ---------------------------------------
 
-	``import boilerpy``
+    ``import boilerpy``
 
-	``boilerpy.extractors.ARTICLE_EXTRACTOR.getContentFromUrl('http://www.example.com/')``
+    ``boilerpy.extractors.ARTICLE_EXTRACTOR.getContentFromUrl('http://www.example.com/')``
 
-	``boilerpy.extractors.ARTICLE_EXTRACTOR.getContentFromFile('site/example.html')``
+    ``boilerpy.extractors.ARTICLE_EXTRACTOR.getContentFromFile('site/example.html')``
 
-	``htmlText='<html><body><h1>Example</h1></body></html>'``
-	``boilerpy.extractors.ARTICLE_EXTRACTOR.getContent(htmlText)``
+    ``htmlText='<html><body><h1>Example</h1></body></html>'``
+    ``boilerpy.extractors.ARTICLE_EXTRACTOR.getContent(htmlText)``
 
 
 
@@ -83,4 +83,4 @@ A full-text extractor which is tuned towards extracting sentences from news arti
 Version
 ---------------------------------------
 
-1.0 - Created 14 Feb 2013
\ No newline at end of file
+1.0 - Created 14 Feb 2013
diff --git a/boilerpy/__init__.py b/boilerpy/__init__.py
index 96ca1bd..6d36c52 100644
--- a/boilerpy/__init__.py
+++ b/boilerpy/__init__.py
@@ -8,7 +8,7 @@
 #  * (the "License"); you may not use this file except in compliance with
 #  * the License.  You may obtain a copy of the License at
 #  *
-#  *	 http://www.apache.org/licenses/LICENSE-2.0
+#  *     http://www.apache.org/licenses/LICENSE-2.0
 #  *
 #  * Unless required by applicable law or agreed to in writing, software
 #  * distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/boilerpy/document.py b/boilerpy/document.py
index c8e8df5..9f953ff 100644
--- a/boilerpy/document.py
+++ b/boilerpy/document.py
@@ -8,7 +8,7 @@
 #  * (the "License"); you may not use this file except in compliance with
 #  * the License.  You may obtain a copy of the License at
 #  *
-#  *	 http://www.apache.org/licenses/LICENSE-2.0
+#  *     http://www.apache.org/licenses/LICENSE-2.0
 #  *
 #  * Unless required by applicable law or agreed to in writing, software
 #  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -27,14 +27,14 @@
 #  * @author Christian Kohlschtter
 #  
 class DefaultLabels(object):
-	""" generated source for class DefaultLabels """
-	TITLE = "de.l3s.boilerpipe/TITLE"
-	ARTICLE_METADATA = "de.l3s.boilerpipe/ARTICLE_METADATA"
-	INDICATES_END_OF_TEXT = "de.l3s.boilerpipe/INDICATES_END_OF_TEXT"
-	MIGHT_BE_CONTENT = "de.l3s.boilerpipe/MIGHT_BE_CONTENT"
-	STRICTLY_NOT_CONTENT = "de.l3s.boilerpipe/STRICTLY_NOT_CONTENT"
-	HR = "de.l3s.boilerpipe/HR"
-	MARKUP_PREFIX = "<"
+    """ generated source for class DefaultLabels """
+    TITLE = "de.l3s.boilerpipe/TITLE"
+    ARTICLE_METADATA = "de.l3s.boilerpipe/ARTICLE_METADATA"
+    INDICATES_END_OF_TEXT = "de.l3s.boilerpipe/INDICATES_END_OF_TEXT"
+    MIGHT_BE_CONTENT = "de.l3s.boilerpipe/MIGHT_BE_CONTENT"
+    STRICTLY_NOT_CONTENT = "de.l3s.boilerpipe/STRICTLY_NOT_CONTENT"
+    HR = "de.l3s.boilerpipe/HR"
+    MARKUP_PREFIX = "<"
 
 # 
 #  * A text document, consisting of one or more {@link TextBlock}s.
@@ -42,77 +42,77 @@ class DefaultLabels(object):
 #  * @author Christian Kohlschtter
 #  
 class TextDocument(object):
-	#	  * Creates a new {@link TextDocument} with given {@link TextBlock}s and
-	#	  * given title.
-	#	  * 
-	#	  * @param title
-	#	  *			The "main" title for this text document.
-	#	  * @param textBlocks
-	#	  *			The text blocks of this document.
-	def __init__(self, textBlocks, title=None):
-		self.title = title
-		self.textBlocks = textBlocks
-
-	#	  * Returns the {@link TextBlock}s of this document.
-	#	  * 
-	#	  * @return A list of {@link TextBlock}s, in sequential order of appearance.
-	#	  
-	def getTextBlocks(self):
-		""" generated source for method getTextBlocks """
-		return self.textBlocks
-
-	def setTextBlocks(self,textBlocks): self.textBlocks=textBlocks
-
-	# 
-	#	  * Returns the "main" title for this document, or <code>null</code> if no
-	#	  * such title has ben set.
-	#	  * 
-	#	  * @return The "main" title.
-	def getTitle(self):
-		""" generated source for method getTitle """
-		return self.title
-
-	# 
-	#	  * Updates the "main" title for this document.
-	#	  * 
-	#	  * @param title
-	def setTitle(self, title):
-		""" generated source for method setTitle """
-		self.title = title
-
-	# 
-	#	  * Returns the {@link TextDocument}'s content.
-	#	  * 
-	#	  * @return The content text.
-	def getContent(self):
-		""" generated source for method getContent """
-		return self.getText(True, False)
-
-	# 
-	#	  * Returns the {@link TextDocument}'s content, non-content or both
-	#	  * 
-	#	  * @param includeContent Whether to include TextBlocks marked as "content".
-	#	  * @param includeNonContent Whether to include TextBlocks marked as "non-content".
-	#	  * @return The text.
-	def getText(self, includeContent, includeNonContent):
-		sb = ""
-		for block in self.getTextBlocks():
-			if block.isContent():
-				if not includeContent:
-					continue 
-			else:
-				if not includeNonContent:
-					continue 
-			sb+=block.getText()+'\n'
-		return sb
-
-	#	  * Returns detailed debugging information about the contained {@link TextBlock}s.
-	#	  * @return Debug information.
-	def debugString(self):
-		sb = ""
-		for tb in self.getTextBlocks():
-			sb+=str(tb)+"\n"
-		return sb
+    #      * Creates a new {@link TextDocument} with given {@link TextBlock}s and
+    #      * given title.
+    #      * 
+    #      * @param title
+    #      *            The "main" title for this text document.
+    #      * @param textBlocks
+    #      *            The text blocks of this document.
+    def __init__(self, textBlocks, title=None):
+        self.title = title
+        self.textBlocks = textBlocks
+
+    #      * Returns the {@link TextBlock}s of this document.
+    #      * 
+    #      * @return A list of {@link TextBlock}s, in sequential order of appearance.
+    #      
+    def getTextBlocks(self):
+        """ generated source for method getTextBlocks """
+        return self.textBlocks
+
+    def setTextBlocks(self,textBlocks): self.textBlocks=textBlocks
+
+    # 
+    #      * Returns the "main" title for this document, or <code>null</code> if no
+    #      * such title has ben set.
+    #      * 
+    #      * @return The "main" title.
+    def getTitle(self):
+        """ generated source for method getTitle """
+        return self.title
+
+    # 
+    #      * Updates the "main" title for this document.
+    #      * 
+    #      * @param title
+    def setTitle(self, title):
+        """ generated source for method setTitle """
+        self.title = title
+
+    # 
+    #      * Returns the {@link TextDocument}'s content.
+    #      * 
+    #      * @return The content text.
+    def getContent(self):
+        """ generated source for method getContent """
+        return self.getText(True, False)
+
+    # 
+    #      * Returns the {@link TextDocument}'s content, non-content or both
+    #      * 
+    #      * @param includeContent Whether to include TextBlocks marked as "content".
+    #      * @param includeNonContent Whether to include TextBlocks marked as "non-content".
+    #      * @return The text.
+    def getText(self, includeContent, includeNonContent):
+        sb = ""
+        for block in self.getTextBlocks():
+            if block.isContent():
+                if not includeContent:
+                    continue 
+            else:
+                if not includeNonContent:
+                    continue 
+            sb+=block.getText()+'\n'
+        return sb
+
+    #      * Returns detailed debugging information about the contained {@link TextBlock}s.
+    #      * @return Debug information.
+    def debugString(self):
+        sb = ""
+        for tb in self.getTextBlocks():
+            sb+=str(tb)+"\n"
+        return sb
 
 
 
@@ -128,171 +128,171 @@ def debugString(self):
 #  
 
 class TextBlock(object):
-	""" generated source for class TextBlock """
-
-	def __init__(self, text, containedTextElements=set(), numWords=0, numWordsInAnchorText=0, numWordsInWrappedLines=0, numWrappedLines=0, offsetBlocks=0):
-		self._isContent = False
-		self.labels = set()
-		self.numFullTextWords = 0
-		self.tagLevel = 0
-		
-		self.text = text
-		self.containedTextElements = containedTextElements
-		self.numWords = numWords
-		self.numWordsInAnchorText = numWordsInAnchorText
-		self.numWordsInWrappedLines = numWordsInWrappedLines
-		self.numWrappedLines = numWrappedLines
-		self.offsetBlocksStart = offsetBlocks
-		self.offsetBlocksEnd = offsetBlocks
-		self.initDensities()
-
-	def initDensities(self):
-		""" generated source for method initDensities """
-		if self.numWordsInWrappedLines == 0:
-			self.numWordsInWrappedLines = self.numWords
-			self.numWrappedLines = 1
-		self.textDensity = self.numWordsInWrappedLines / self.numWrappedLines
-		self.linkDensity = 0 if self.numWords == 0 else self.numWordsInAnchorText / self.numWords
-		
-	def isContent(self):
-		""" generated source for method isContent """
-		return self._isContent
-
-	def setIsContent(self, isContent):
-		""" generated source for method setIsContent """
-		if isContent != self._isContent:
-			self._isContent = isContent
-			return True
-		else:
-			return False
-
-	def getText(self):
-		""" generated source for method getText """
-		return self.text
-
-	def getNumWords(self):
-		""" generated source for method getNumWords """
-		return self.numWords
-
-	def getNumWordsInAnchorText(self):
-		""" generated source for method getNumWordsInAnchorText """
-		return self.numWordsInAnchorText
-
-	def getTextDensity(self):
-		""" generated source for method getTextDensity """
-		return self.textDensity
-
-	def getLinkDensity(self):
-		""" generated source for method getLinkDensity """
-		return self.linkDensity
-
-	def mergeNext(self, nextTextBlock):
-		""" generated source for method mergeNext """
-		if self.text==None: self.text=""
-		self.text+='\n'+nextTextBlock.text
-		self.numWords += nextTextBlock.numWords
-		self.numWordsInAnchorText += nextTextBlock.numWordsInAnchorText
-		self.numWordsInWrappedLines += nextTextBlock.numWordsInWrappedLines
-		self.numWrappedLines += nextTextBlock.numWrappedLines
-		self.offsetBlocksStart = min(self.offsetBlocksStart, nextTextBlock.offsetBlocksStart)
-		self.offsetBlocksEnd = max(self.offsetBlocksEnd, nextTextBlock.offsetBlocksEnd)
-		self.initDensities()
-		self._isContent |= nextTextBlock.isContent()
-		self.containedTextElements|=nextTextBlock.containedTextElements
-		self.numFullTextWords += nextTextBlock.numFullTextWords
-		self.labels|=nextTextBlock.labels
-		self.tagLevel = min(self.tagLevel, nextTextBlock.tagLevel)
-
-	def getOffsetBlocksStart(self):
-		""" generated source for method getOffsetBlocksStart """
-		return self.offsetBlocksStart
-
-	def getOffsetBlocksEnd(self):
-		""" generated source for method getOffsetBlocksEnd """
-		return self.offsetBlocksEnd
-
-	def __repr__(self):
-		""" generated source for method toString """
-		return "[" + str(self.offsetBlocksStart) + "-" + str(self.offsetBlocksEnd) + ";tl=" + str(self.tagLevel) + "; nw=" + str(self.numWords) + ";nwl=" + str(self.numWrappedLines) + ";ld=" + str(self.linkDensity) + "]\t" + ("CONTENT" if self.isContent else "boilerplate") + "," + str(self.labels) + "\n" + str(self.getText())
-
-	# 
-	#	  * Adds an arbitrary String label to this {@link TextBlock}.
-	#	  * 
-	#	  * @param label The label
-	#	  
-	def addLabel(self, label):
-		""" generated source for method addLabel """
-		self.labels.add(label)
-
-	# 
-	#	  * Checks whether this TextBlock has the given label.
-	#	  * 
-	#	  * @param label The label
-	#	  * @return <code>true</code> if this block is marked by the given label.
-	#	  
-	def hasLabel(self, label):
-		""" generated source for method hasLabel """
-		return label in self.labels
-
-	def removeLabel(self, label):
-		""" generated source for method removeLabel """
-		try:
-			self.labels.remove(label)
-			return True
-		except KeyError:
-			return False
-
-	# 
-	#	  * Returns the labels associated to this TextBlock, or <code>null</code> if no such labels
-	#	  * exist.
-	#	  * 
-	#	  * to the data structure. However it is recommended to use the label-specific methods in {@link TextBlock}
-	#	  * whenever possible.
-	#	  * 
-	#	  * @return Returns the set of labels, or <code>null</code> if no labels was added yet.
-	#	  
-	def getLabels(self):
-		""" generated source for method getLabels """
-		return self.labels
-
-	# 
-	#	  * Adds a set of labels to this {@link TextBlock}.
-	#	  * <code>null</code>-references are silently ignored.
-	#	  * 
-	#	  * @param labels The labels to be added. 
-	#	  
-	def addLabels(self, *labels):
-		""" generated source for method addLabels """
-		if len(labels)==0 or labels[0] == None: return
-		if self.labels == None:	self.labels = set()
-		elif len(labels)==1 and (type(labels[0])==set or type(labels[0])==list): self.labels|=set(labels[0])
-		else: self.labels|=set(labels)
-
-
-	# 
-	#	  * Returns the containedTextElements BitSet, or <code>null</code>.
-	#	  * @return
-	#	  
-	def getContainedTextElements(self):
-		""" generated source for method getContainedTextElements """
-		return self.containedTextElements
-
-	def clone(self):
-		try:
-			clone = copy.copy(self)
-		except copy.error:
-			raise copy.error
-		if self.labels != None:	clone.labels = self.labels.copy()
-		if self.containedTextElements != None: clone.containedTextElements = self.containedTextElements.copy()
-		return clone
-
-	def getTagLevel(self):
-		""" generated source for method getTagLevel """
-		return self.tagLevel
-
-	def setTagLevel(self, tagLevel):
-		""" generated source for method setTagLevel """
-		self.tagLevel = tagLevel
+    """ generated source for class TextBlock """
+
+    def __init__(self, text, containedTextElements=set(), numWords=0, numWordsInAnchorText=0, numWordsInWrappedLines=0, numWrappedLines=0, offsetBlocks=0):
+        self._isContent = False
+        self.labels = set()
+        self.numFullTextWords = 0
+        self.tagLevel = 0
+        
+        self.text = text
+        self.containedTextElements = containedTextElements
+        self.numWords = numWords
+        self.numWordsInAnchorText = numWordsInAnchorText
+        self.numWordsInWrappedLines = numWordsInWrappedLines
+        self.numWrappedLines = numWrappedLines
+        self.offsetBlocksStart = offsetBlocks
+        self.offsetBlocksEnd = offsetBlocks
+        self.initDensities()
+
+    def initDensities(self):
+        """ generated source for method initDensities """
+        if self.numWordsInWrappedLines == 0:
+            self.numWordsInWrappedLines = self.numWords
+            self.numWrappedLines = 1
+        self.textDensity = self.numWordsInWrappedLines / self.numWrappedLines
+        self.linkDensity = 0 if self.numWords == 0 else self.numWordsInAnchorText / self.numWords
+        
+    def isContent(self):
+        """ generated source for method isContent """
+        return self._isContent
+
+    def setIsContent(self, isContent):
+        """ generated source for method setIsContent """
+        if isContent != self._isContent:
+            self._isContent = isContent
+            return True
+        else:
+            return False
+
+    def getText(self):
+        """ generated source for method getText """
+        return self.text
+
+    def getNumWords(self):
+        """ generated source for method getNumWords """
+        return self.numWords
+
+    def getNumWordsInAnchorText(self):
+        """ generated source for method getNumWordsInAnchorText """
+        return self.numWordsInAnchorText
+
+    def getTextDensity(self):
+        """ generated source for method getTextDensity """
+        return self.textDensity
+
+    def getLinkDensity(self):
+        """ generated source for method getLinkDensity """
+        return self.linkDensity
+
+    def mergeNext(self, nextTextBlock):
+        """ generated source for method mergeNext """
+        if self.text==None: self.text=""
+        self.text+='\n'+nextTextBlock.text
+        self.numWords += nextTextBlock.numWords
+        self.numWordsInAnchorText += nextTextBlock.numWordsInAnchorText
+        self.numWordsInWrappedLines += nextTextBlock.numWordsInWrappedLines
+        self.numWrappedLines += nextTextBlock.numWrappedLines
+        self.offsetBlocksStart = min(self.offsetBlocksStart, nextTextBlock.offsetBlocksStart)
+        self.offsetBlocksEnd = max(self.offsetBlocksEnd, nextTextBlock.offsetBlocksEnd)
+        self.initDensities()
+        self._isContent |= nextTextBlock.isContent()
+        self.containedTextElements|=nextTextBlock.containedTextElements
+        self.numFullTextWords += nextTextBlock.numFullTextWords
+        self.labels|=nextTextBlock.labels
+        self.tagLevel = min(self.tagLevel, nextTextBlock.tagLevel)
+
+    def getOffsetBlocksStart(self):
+        """ generated source for method getOffsetBlocksStart """
+        return self.offsetBlocksStart
+
+    def getOffsetBlocksEnd(self):
+        """ generated source for method getOffsetBlocksEnd """
+        return self.offsetBlocksEnd
+
+    def __repr__(self):
+        """ generated source for method toString """
+        return "[" + str(self.offsetBlocksStart) + "-" + str(self.offsetBlocksEnd) + ";tl=" + str(self.tagLevel) + "; nw=" + str(self.numWords) + ";nwl=" + str(self.numWrappedLines) + ";ld=" + str(self.linkDensity) + "]\t" + ("CONTENT" if self.isContent else "boilerplate") + "," + str(self.labels) + "\n" + str(self.getText())
+
+    # 
+    #      * Adds an arbitrary String label to this {@link TextBlock}.
+    #      * 
+    #      * @param label The label
+    #      
+    def addLabel(self, label):
+        """ generated source for method addLabel """
+        self.labels.add(label)
+
+    # 
+    #      * Checks whether this TextBlock has the given label.
+    #      * 
+    #      * @param label The label
+    #      * @return <code>true</code> if this block is marked by the given label.
+    #      
+    def hasLabel(self, label):
+        """ generated source for method hasLabel """
+        return label in self.labels
+
+    def removeLabel(self, label):
+        """ generated source for method removeLabel """
+        try:
+            self.labels.remove(label)
+            return True
+        except KeyError:
+            return False
+
+    # 
+    #      * Returns the labels associated to this TextBlock, or <code>null</code> if no such labels
+    #      * exist.
+    #      * 
+    #      * to the data structure. However it is recommended to use the label-specific methods in {@link TextBlock}
+    #      * whenever possible.
+    #      * 
+    #      * @return Returns the set of labels, or <code>null</code> if no labels was added yet.
+    #      
+    def getLabels(self):
+        """ generated source for method getLabels """
+        return self.labels
+
+    # 
+    #      * Adds a set of labels to this {@link TextBlock}.
+    #      * <code>null</code>-references are silently ignored.
+    #      * 
+    #      * @param labels The labels to be added. 
+    #      
+    def addLabels(self, *labels):
+        """ generated source for method addLabels """
+        if len(labels)==0 or labels[0] == None: return
+        if self.labels == None:    self.labels = set()
+        elif len(labels)==1 and (type(labels[0])==set or type(labels[0])==list): self.labels|=set(labels[0])
+        else: self.labels|=set(labels)
+
+
+    # 
+    #      * Returns the containedTextElements BitSet, or <code>null</code>.
+    #      * @return
+    #      
+    def getContainedTextElements(self):
+        """ generated source for method getContainedTextElements """
+        return self.containedTextElements
+
+    def clone(self):
+        try:
+            clone = copy.copy(self)
+        except copy.error:
+            raise copy.error
+        if self.labels != None:    clone.labels = self.labels.copy()
+        if self.containedTextElements != None: clone.containedTextElements = self.containedTextElements.copy()
+        return clone
+
+    def getTagLevel(self):
+        """ generated source for method getTagLevel """
+        return self.tagLevel
+
+    def setTagLevel(self, tagLevel):
+        """ generated source for method setTagLevel """
+        self.tagLevel = tagLevel
 
 TextBlock.EMPTY_START = TextBlock("", set(), 0, 0, 0, 0, -1)
 TextBlock.EMPTY_END = TextBlock("", set(), 0, 0, 0, 0, sys.maxsize)
@@ -304,35 +304,35 @@ def setTagLevel(self, tagLevel):
 #  * @author Christian Kohlschuetter
 #  
 class TextDocumentStatistics(object):
-	# 
-	#	  * Computes statistics on a given {@link TextDocument}.
-	#	  *
-	#	  * @param doc The {@link TextDocument}.
-	#	  * @param contentOnly if true then o
-	#	  
-	def __init__(self, doc, contentOnly):
-		self.numWords=0
-		self.numBlocks=0
-		for tb in doc.getTextBlocks():
-			if contentOnly and not tb.isContent(): continue 
-			self.numWords += tb.getNumWords()
-			self.numBlocks += 1
-
-
-	#	  * Returns the average number of words at block-level (= overall number of words divided by
-	#	  * the number of blocks).
-	#	  * 
-	#	  * @return Average
-	#	  
-	def avgNumWords(self):
-		""" generated source for method avgNumWords """
-		return self.numWords / self.numBlocks
-
-	# 
-	#	  * Returns the overall number of words in all blocks.
-	#	  * 
-	#	  * @return Sum
-	#	  
-	def getNumWords(self):
-		""" generated source for method getNumWords """
-		return self.numWords
+    # 
+    #      * Computes statistics on a given {@link TextDocument}.
+    #      *
+    #      * @param doc The {@link TextDocument}.
+    #      * @param contentOnly if true then o
+    #      
+    def __init__(self, doc, contentOnly):
+        self.numWords=0
+        self.numBlocks=0
+        for tb in doc.getTextBlocks():
+            if contentOnly and not tb.isContent(): continue 
+            self.numWords += tb.getNumWords()
+            self.numBlocks += 1
+
+
+    #      * Returns the average number of words at block-level (= overall number of words divided by
+    #      * the number of blocks).
+    #      * 
+    #      * @return Average
+    #      
+    def avgNumWords(self):
+        """ generated source for method avgNumWords """
+        return self.numWords / self.numBlocks
+
+    # 
+    #      * Returns the overall number of words in all blocks.
+    #      * 
+    #      * @return Sum
+    #      
+    def getNumWords(self):
+        """ generated source for method getNumWords """
+        return self.numWords
diff --git a/boilerpy/extractors.py b/boilerpy/extractors.py
index aa72175..d3e95e7 100644
--- a/boilerpy/extractors.py
+++ b/boilerpy/extractors.py
@@ -8,7 +8,7 @@
 #  * (the "License"); you may not use this file except in compliance with
 #  * the License.  You may obtain a copy of the License at
 #  *
-#  *	 http://www.apache.org/licenses/LICENSE-2.0
+#  *     http://www.apache.org/licenses/LICENSE-2.0
 #  *
 #  * Unless required by applicable law or agreed to in writing, software
 #  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -36,68 +36,68 @@
 import re
 
 class Extractor(object):
-	def __init__(self,filtr):
-		self.filter=filtr	
-	
-	def getContent(self, text):
-		return self.getDoc(text).getContent()
-	
-	def getContentFromUrl(self, url):
-		return self.getDocFromUrl(url).getContent()
-	
-	def getContentFromFile(self, filename):
-		return self.getDocFromFile(filename).getContent()
-	
-	def getDocFromFile(self,filename):
-		return self.getDoc(self.readFromFile(filename))
-	
-	def getDocFromUrl(self,url):
-		return self.getDoc(self.readFromUrl(url))
-
-	def getDoc(self,text):
-		doc=self.parseDoc(text)
-		self.filter.process(doc)
-		return doc
-
-	def readFromFile(self,filename):
-		f=open(filename,'r')
-		text=f.read()
-		f.close()
-		try:
-			text=text.decode('utf8')
-		except UnicodeDecodeError: pass
-		return text
-	
-	def readFromUrl(self,url):
-		f = urllib.request.urlopen(url)
-		text=f.read()
-		encoding=self.getUrlEncoding(f)
-		f.close()
-		try:
-			text=text.decode(encoding)
-		except UnicodeDecodeError: pass
-		return text
-
-	def getUrlEncoding(self,f):
-		try:
-			return f.headers['content-type'].split('charset=')[1].split(';')[0]
-		except: return 'utf8'
-	
-	def parseDoc(self,inputStr):
-		bpParser=parser.BoilerpipeHTMLParser()
-		try:
-			bpParser.feed(inputStr)
-		except Exception as exc:
-			#in case of error, try again, first removing script tag content
-			bpParser=parser.BoilerpipeHTMLParser()
-			inputStr=re.sub(r'<(?:script|SCRIPT)[^>]*>.*?</(?:script|SCRIPT)>','<script></script>',inputStr,0,re.DOTALL)
-			try:
-				bpParser.feed(inputStr)
-			except Exception as e:
-				print("Error parsing HTML : " + str(e))
-				return None
-		doc=bpParser.toTextDocument()
-		return doc
+    def __init__(self,filtr):
+        self.filter=filtr    
+    
+    def getContent(self, text):
+        return self.getDoc(text).getContent()
+    
+    def getContentFromUrl(self, url):
+        return self.getDocFromUrl(url).getContent()
+    
+    def getContentFromFile(self, filename):
+        return self.getDocFromFile(filename).getContent()
+    
+    def getDocFromFile(self,filename):
+        return self.getDoc(self.readFromFile(filename))
+    
+    def getDocFromUrl(self,url):
+        return self.getDoc(self.readFromUrl(url))
+
+    def getDoc(self,text):
+        doc=self.parseDoc(text)
+        self.filter.process(doc)
+        return doc
+
+    def readFromFile(self,filename):
+        f=open(filename,'r')
+        text=f.read()
+        f.close()
+        try:
+            text=text.decode('utf8')
+        except UnicodeDecodeError: pass
+        return text
+    
+    def readFromUrl(self,url):
+        f = urllib.request.urlopen(url)
+        text=f.read()
+        encoding=self.getUrlEncoding(f)
+        f.close()
+        try:
+            text=text.decode(encoding)
+        except UnicodeDecodeError: pass
+        return text
+
+    def getUrlEncoding(self,f):
+        try:
+            return f.headers['content-type'].split('charset=')[1].split(';')[0]
+        except: return 'utf8'
+    
+    def parseDoc(self,inputStr):
+        bpParser=parser.BoilerpipeHTMLParser()
+        try:
+            bpParser.feed(inputStr)
+        except Exception as exc:
+            #in case of error, try again, first removing script tag content
+            bpParser=parser.BoilerpipeHTMLParser()
+            inputStr=re.sub(r'<(?:script|SCRIPT)[^>]*>.*?</(?:script|SCRIPT)>','<script></script>',inputStr,0,re.DOTALL)
+            try:
+                bpParser.feed(inputStr)
+            except Exception as e:
+                print("Error parsing HTML : " + str(e))
+                return None
+        doc=bpParser.toTextDocument()
+        return doc
 
 
 
@@ -105,28 +105,28 @@ def parseDoc(self,inputStr):
 #  * A full-text extractor which is tuned towards news articles. In this scenario
 #  * it achieves higher accuracy than {@link DefaultExtractor}.
 articleFilterChain=filters.FilterChain([
-	filters.TerminatingBlocksFinder(),
-	filters.DocumentTitleMatchClassifier(None,True),
-	filters.NumWordsRulesClassifier(),
-	filters.IgnoreBlocksAfterContentFilter(),
-	filters.BlockProximityFusion(1,False,False),
-	filters.BoilerplateBlockFilter(),
-	filters.BlockProximityFusion(1,True,False),
-	filters.KeepLargestBlockFilter(),
-	filters.ExpandTitleToContentFilter()
+    filters.TerminatingBlocksFinder(),
+    filters.DocumentTitleMatchClassifier(None,True),
+    filters.NumWordsRulesClassifier(),
+    filters.IgnoreBlocksAfterContentFilter(),
+    filters.BlockProximityFusion(1,False,False),
+    filters.BoilerplateBlockFilter(),
+    filters.BlockProximityFusion(1,True,False),
+    filters.KeepLargestBlockFilter(),
+    filters.ExpandTitleToContentFilter()
 ])
-# 	 * Works very well for most types of Article-like HTML.
+#      * Works very well for most types of Article-like HTML.
 ARTICLE_EXTRACTOR = Extractor(articleFilterChain)
 
 
 
 # class DefaultExtractor
-# 	 * Usually worse than {@link ArticleExtractor}, but simpler/no heuristics.
+#      * Usually worse than {@link ArticleExtractor}, but simpler/no heuristics.
 #  * A quite generic full-text extractor. 
 defaultFilterChain=filters.FilterChain([
-	filters.SimpleBlockFusionProcessor(),
-	filters.BlockProximityFusion(1,False,False),
-	filters.DensityRulesClassifier()
+    filters.SimpleBlockFusionProcessor(),
+    filters.BlockProximityFusion(1,False,False),
+    filters.DensityRulesClassifier()
 ])
 DEFAULT_EXTRACTOR = Extractor(defaultFilterChain)
 
@@ -137,19 +137,19 @@ def parseDoc(self,inputStr):
 #  * For news articles, it may perform better than the {@link DefaultExtractor},
 #  * but usually worse than {@link ArticleExtractor}.
 largestContentFilterChain=filters.FilterChain([
-	filters.NumWordsRulesClassifier(),
-	filters.BlockProximityFusion(1,False,False),
-	filters.KeepLargestBlockFilter()
+    filters.NumWordsRulesClassifier(),
+    filters.BlockProximityFusion(1,False,False),
+    filters.KeepLargestBlockFilter()
 ])
-# 	 * Like {@link DefaultExtractor}, but keeps the largest text block only.
+#      * Like {@link DefaultExtractor}, but keeps the largest text block only.
 LARGEST_CONTENT_EXTRACTOR = Extractor(largestContentFilterChain)
 
 
 
 
 # class CanolaExtractor
-# 	 * Trained on krdwrd Canola (different definition of "boilerplate"). You may
-# 	 * give it a try.
+#      * Trained on krdwrd Canola (different definition of "boilerplate"). You may
+#      * give it a try.
 CANOLA_EXTRACTOR = Extractor(filters.CanolaFilter())
 
 
@@ -157,9 +157,9 @@ def parseDoc(self,inputStr):
 
 # class KeepEverythingExtractor
 #  * Marks everything as content.
-# 	 * Dummy Extractor; should return the input text. Use this to double-check
-# 	 * that your problem is within a particular {@link BoilerpipeExtractor}, or
-# 	 * somewhere else.
+#      * Dummy Extractor; should return the input text. Use this to double-check
+#      * that your problem is within a particular {@link BoilerpipeExtractor}, or
+#      * somewhere else.
 KEEP_EVERYTHING_EXTRACTOR = Extractor(filters.MarkEverythingContentFilter())
 
 
@@ -176,9 +176,9 @@ def parseDoc(self,inputStr):
 # class ArticleSentencesExtractor
 #  * A full-text extractor which is tuned towards extracting sentences from news articles.
 ARTICLE_SENTENCES_EXTRACTOR=Extractor(filters.FilterChain([
-	articleFilterChain,
-	filters.SplitParagraphBlocksFilter(),
-	filters.MinClauseWordsFilter()
+    articleFilterChain,
+    filters.SplitParagraphBlocksFilter(),
+    filters.MinClauseWordsFilter()
 ]))
 
 
@@ -186,10 +186,10 @@ def parseDoc(self,inputStr):
 #  * For news articles, it may perform better than the {@link DefaultExtractor},
 #  * but usually worse than {@link ArticleExtractor}.
 class KeepEverythingWithMinKWordsFilter(filters.FilterChain):
-	def __init__(self, kMin):
-		filterArr = [
-			filters.SimpleBlockFusionProcessor(),
-			filters.MarkEverythingContentFilter(),
-			filters.MinWordsFilter(kMin)
-		]
-		super(KeepEverythingWithMinKWordsFilter, self).__init__(filters)
+    def __init__(self, kMin):
+        filterArr = [
+            filters.SimpleBlockFusionProcessor(),
+            filters.MarkEverythingContentFilter(),
+            filters.MinWordsFilter(kMin)
+        ]
+        super(KeepEverythingWithMinKWordsFilter, self).__init__(filters)
diff --git a/boilerpy/filters.py b/boilerpy/filters.py
index 43d04e3..a9714ed 100644
--- a/boilerpy/filters.py
+++ b/boilerpy/filters.py
@@ -9,7 +9,7 @@
 #  * (the "License"); you may not use this file except in compliance with
 #  * the License.  You may obtain a copy of the License at
 #  *
-#  *	 http://www.apache.org/licenses/LICENSE-2.0
+#  *     http://www.apache.org/licenses/LICENSE-2.0
 #  *
 #  * Unless required by applicable law or agreed to in writing, software
 #  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -64,37 +64,37 @@
 # Boilerpipe abstract interface
 
 class BoilerpipeFilter(object):
-	def process(self, doc): pass
-	
-	def subtractBlocks(self,blockArr,blocksToRemove):
-		#inefficient but in place: for block in blocksToRemove: blockArr.remove(blocksToRemove)
-		#efficiently subtracts second array from first assuming blocksToRemove shows up in the same order as blocArr
-		if len(blocksToRemove)==0: return blockArr
-		newBlockArr=[]
-		removeIter=iter(blocksToRemove)
-		curBlockToRemove = next(removeIter)
-		for idx,block in enumerate(blockArr):
-			if block==curBlockToRemove:
-				try:
-					curBlockToRemove = next(removeIter)
-				except StopIteration:
-					#add the rest
-					newBlockArr.extend(blockArr[idx+1:])
-					break
-			else: newBlockArr.append(block)
-		return newBlockArr
+    def process(self, doc): pass
+    
+    def subtractBlocks(self,blockArr,blocksToRemove):
+        #inefficient but in place: for block in blocksToRemove: blockArr.remove(blocksToRemove)
+        #efficiently subtracts second array from first assuming blocksToRemove shows up in the same order as blocArr
+        if len(blocksToRemove)==0: return blockArr
+        newBlockArr=[]
+        removeIter=iter(blocksToRemove)
+        curBlockToRemove = next(removeIter)
+        for idx,block in enumerate(blockArr):
+            if block==curBlockToRemove:
+                try:
+                    curBlockToRemove = next(removeIter)
+                except StopIteration:
+                    #add the rest
+                    newBlockArr.extend(blockArr[idx+1:])
+                    break
+            else: newBlockArr.append(block)
+        return newBlockArr
 
 # chain together multiple filters in sequence
 class FilterChain(BoilerpipeFilter):
-	def __init__(self,filterArr):
-		super(FilterChain, self).__init__()
-		self.filterArr=filterArr
-		
-	def process(self,doc):
-		isUpdated=False
-		for filtr in self.filterArr:
-			isUpdated|=filtr.process(doc)
-		return isUpdated
+    def __init__(self,filterArr):
+        super(FilterChain, self).__init__()
+        self.filterArr=filterArr
+        
+    def process(self,doc):
+        isUpdated=False
+        for filtr in self.filterArr:
+            isUpdated|=filtr.process(doc)
+        return isUpdated
 
 
 #-----------------------------------------------------------------------
@@ -109,14 +109,14 @@ def process(self,doc):
 #  * @author Christian Kohlschtter
 #  
 class MarkEverythingContentFilter(BoilerpipeFilter):
-	def process(self, doc):
-		""" generated source for method process """
-		changes = False
-		for tb in doc.getTextBlocks():
-			if not tb.isContent():
-				tb.setIsContent(True)
-				changes = True
-		return changes
+    def process(self, doc):
+        """ generated source for method process """
+        changes = False
+        for tb in doc.getTextBlocks():
+            if not tb.isContent():
+                tb.setIsContent(True)
+                changes = True
+        return changes
 
 
 # 
@@ -126,12 +126,12 @@ def process(self, doc):
 #  
 class InvertedFilter(BoilerpipeFilter):
 
-	def process(self, doc):
-		""" generated source for method process """
-		tbs = doc.getTextBlocks()
-		if len(tbs)==0: return False
-		for tb in tbs: tb.setIsContent(not tb.isContent())
-		return True
+    def process(self, doc):
+        """ generated source for method process """
+        tbs = doc.getTextBlocks()
+        if len(tbs)==0: return False
+        for tb in tbs: tb.setIsContent(not tb.isContent())
+        return True
 
 
 # 
@@ -140,14 +140,14 @@ def process(self, doc):
 #  * @author Christian Kohlschtter
 #  
 class BoilerplateBlockFilter(BoilerpipeFilter):
-	def process(self, doc):
-		""" generated source for method process """
-		textBlocks = doc.getTextBlocks()
-		newBlocks=[tb for tb in textBlocks if tb.isContent()]
-		hasChanges = len(newBlocks)<len(textBlocks)
-		doc.setTextBlocks(newBlocks)
+    def process(self, doc):
+        """ generated source for method process """
+        textBlocks = doc.getTextBlocks()
+        newBlocks=[tb for tb in textBlocks if tb.isContent()]
+        hasChanges = len(newBlocks)<len(textBlocks)
+        doc.setTextBlocks(newBlocks)
 
-		return hasChanges
+        return hasChanges
 
 
 # 
@@ -156,18 +156,18 @@ def process(self, doc):
 #  * @author Christian Kohlschtter
 #  
 class MinWordsFilter(BoilerpipeFilter):
-	def __init__(self, minWords):
-		super(MinWordsFilter, self).__init__()
-		self.minWords = minWords
+    def __init__(self, minWords):
+        super(MinWordsFilter, self).__init__()
+        self.minWords = minWords
 
-	def process(self, doc):
-		changes = False
-		for tb in doc.getTextBlocks():
-			if not tb.isContent(): continue 
-			if tb.getNumWords() < self.minWords:
-				tb.setIsContent(False)
-				changes = True
-		return changes
+    def process(self, doc):
+        changes = False
+        for tb in doc.getTextBlocks():
+            if not tb.isContent(): continue 
+            if tb.getNumWords() < self.minWords:
+                tb.setIsContent(False)
+                changes = True
+        return changes
 
 
 # 
@@ -181,42 +181,42 @@ def process(self, doc):
 #  * @see SplitParagraphBlocksFilter
 #  
 class MinClauseWordsFilter(BoilerpipeFilter):
-	def __init__(self, minWords=5, acceptClausesWithoutDelimiter=False):
-		super(MinClauseWordsFilter, self).__init__()
-		self.minWords = minWords
-		self.acceptClausesWithoutDelimiter = acceptClausesWithoutDelimiter
-
-	PAT_CLAUSE_DELIMITER = re.compile(r"\b[\,\.\:\;\!\?]+(?:\s+|\Z)",re.UNICODE)
-	PAT_WHITESPACE = re.compile("\s+")
-
-	def process(self, doc):
-		""" generated source for method process """
-		changes = False
-		for tb in doc.getTextBlocks():
-			if not tb.isContent(): continue 
-			hasClause = False
-			possibleClauseArr=self.PAT_CLAUSE_DELIMITER.split(tb.getText())
-			for possibleClause in possibleClauseArr[:-1]:
-				hasClause = self.isClauseAccepted(possibleClause)
-				if hasClause: break
-			
-			#  since clauses should *always end* with a delimiter, we normally
-			#  don't consider text without one
-			if self.acceptClausesWithoutDelimiter:
-				hasClause |= self.isClauseAccepted(possibleClauseArr[-1])
-			if not hasClause:
-				tb.setIsContent(False)
-				changes = True
-				#  System.err.println("IS NOT CONTENT: " + text);
-		return changes
-
-	def isClauseAccepted(self, text):
-		""" generated source for method isClause """
-		n = 1
-		for match in self.PAT_WHITESPACE.finditer(text):
-			n += 1
-			if n >= self.minWords: return True
-		return n >= self.minWords
+    def __init__(self, minWords=5, acceptClausesWithoutDelimiter=False):
+        super(MinClauseWordsFilter, self).__init__()
+        self.minWords = minWords
+        self.acceptClausesWithoutDelimiter = acceptClausesWithoutDelimiter
+
+    PAT_CLAUSE_DELIMITER = re.compile(r"\b[\,\.\:\;\!\?]+(?:\s+|\Z)",re.UNICODE)
+    PAT_WHITESPACE = re.compile("\s+")
+
+    def process(self, doc):
+        """ generated source for method process """
+        changes = False
+        for tb in doc.getTextBlocks():
+            if not tb.isContent(): continue 
+            hasClause = False
+            possibleClauseArr=self.PAT_CLAUSE_DELIMITER.split(tb.getText())
+            for possibleClause in possibleClauseArr[:-1]:
+                hasClause = self.isClauseAccepted(possibleClause)
+                if hasClause: break
+            
+            #  since clauses should *always end* with a delimiter, we normally
+            #  don't consider text without one
+            if self.acceptClausesWithoutDelimiter:
+                hasClause |= self.isClauseAccepted(possibleClauseArr[-1])
+            if not hasClause:
+                tb.setIsContent(False)
+                changes = True
+                #  System.err.println("IS NOT CONTENT: " + text);
+        return changes
+
+    def isClauseAccepted(self, text):
+        """ generated source for method isClause """
+        n = 1
+        for match in self.PAT_WHITESPACE.finditer(text):
+            n += 1
+            if n >= self.minWords: return True
+        return n >= self.minWords
 
 
 # 
@@ -230,56 +230,56 @@ def isClauseAccepted(self, text):
 #  * @see MinClauseWordsFilter
 #  
 class SplitParagraphBlocksFilter(BoilerpipeFilter):
-	def process(self, doc):
-		changes = False
-		blocks = doc.getTextBlocks()
-		blocksNew = []
-		for tb in blocks:
-			text = tb.getText();
-			paragraphs = re.split(r"[\n\r]+",text)
-			if len(paragraphs)<2:
-				blocksNew.append(tb)
-				continue 
-			isContent = tb.isContent()
-			labels = tb.getLabels()
-			for p in paragraphs:
-				tbP=document.TextBlock(p)
-				tbP.setIsContent(isContent)
-				tbP.addLabels(labels)
-				blocksNew.append(tbP)
-				changes = True
-				
-		if changes: doc.setTextBlocks(blocksNew)
-		return changes
-		
+    def process(self, doc):
+        changes = False
+        blocks = doc.getTextBlocks()
+        blocksNew = []
+        for tb in blocks:
+            text = tb.getText();
+            paragraphs = re.split(r"[\n\r]+",text)
+            if len(paragraphs)<2:
+                blocksNew.append(tb)
+                continue 
+            isContent = tb.isContent()
+            labels = tb.getLabels()
+            for p in paragraphs:
+                tbP=document.TextBlock(p)
+                tbP.setIsContent(isContent)
+                tbP.addLabels(labels)
+                blocksNew.append(tbP)
+                changes = True
+                
+        if changes: doc.setTextBlocks(blocksNew)
+        return changes
+        
 
 class SurroundingToContentFilter(BoilerpipeFilter):
-	# this is now default when no arguments are passed
-	#INSTANCE_TEXT = SurroundingToContentFilter(TextBlockCondition())
-	
-	#ctor - condition is an function for an additional condition to determine if it can be made content
-	def __init__(self, condition=lambda tb:tb.getLinkDensity()==0 and tb.getNumWords()>6):
-		super(SurroundingToContentFilter, self).__init__()
-		self.cond=condition
-
-	def process(self, doc):
-		""" generated source for method process """
-		tbs = doc.getTextBlocks()
-		n=len(tbs)
-		hasChanges=False
-		i=1
-		while i<n-1:
-			prev=tbs[i-1]
-			cur=tbs[i]
-			next=tbs[i+1]
-			if not cur.isContent() and prev.isContent() and next.isContent() and self.cond(cur):
-				cur.setIsContent(True)
-				hasChanges = True
-				i+=2
-			else: i+=1
-			# WARNING: POSSIBLE BUG - in original i+=2 regardless of whether content is found.  this seems illogica to me - should be +=1
-
-		return hasChanges
+    # this is now default when no arguments are passed
+    #INSTANCE_TEXT = SurroundingToContentFilter(TextBlockCondition())
+    
+    #ctor - condition is an function for an additional condition to determine if it can be made content
+    def __init__(self, condition=lambda tb:tb.getLinkDensity()==0 and tb.getNumWords()>6):
+        super(SurroundingToContentFilter, self).__init__()
+        self.cond=condition
+
+    def process(self, doc):
+        """ generated source for method process """
+        tbs = doc.getTextBlocks()
+        n=len(tbs)
+        hasChanges=False
+        i=1
+        while i<n-1:
+            prev=tbs[i-1]
+            cur=tbs[i]
+            next=tbs[i+1]
+            if not cur.isContent() and prev.isContent() and next.isContent() and self.cond(cur):
+                cur.setIsContent(True)
+                hasChanges = True
+                i+=2
+            else: i+=1
+            # WARNING: POSSIBLE BUG - in original i+=2 regardless of whether content is found.  this seems illogica to me - should be +=1
+
+        return hasChanges
 
 # 
 #  * Marks all blocks that contain a given label as "boilerplate".
@@ -287,20 +287,20 @@ def process(self, doc):
 #  * @author Christian Kohlschtter
 #  
 class LabelToBoilerplateFilter(BoilerpipeFilter):
-	""" generated source for class LabelToBoilerplateFilter """
-	#INSTANCE_STRICTLY_NOT_CONTENT = LabelToBoilerplateFilter(DefaultLabels.STRICTLY_NOT_CONTENT)
+    """ generated source for class LabelToBoilerplateFilter """
+    #INSTANCE_STRICTLY_NOT_CONTENT = LabelToBoilerplateFilter(DefaultLabels.STRICTLY_NOT_CONTENT)
 
-	def __init__(self, *labels):
-		super(LabelToBoilerplateFilter, self).__init__()
-		self.labels = labels
+    def __init__(self, *labels):
+        super(LabelToBoilerplateFilter, self).__init__()
+        self.labels = labels
 
-	def process(self, doc):
-		changes = False
-		for tb in doc.getTextBlocks():
-			if tb.isContent() and any(tb.hasLabel(label) for label in self.labels):
-				tb.setIsContent(False)
-				changes = True
-		return changes
+    def process(self, doc):
+        changes = False
+        for tb in doc.getTextBlocks():
+            if tb.isContent() and any(tb.hasLabel(label) for label in self.labels):
+                tb.setIsContent(False)
+                changes = True
+        return changes
 
 
 # 
@@ -309,19 +309,19 @@ def process(self, doc):
 #  * @author Christian Kohlschtter
 #  
 class LabelToContentFilter(BoilerpipeFilter):
-	""" generated source for class LabelToContentFilter """
-	def __init__(self, *labels):
-		""" generated source for method __init__ """
-		super(LabelToContentFilter, self).__init__()
-		self.labels = labels
+    """ generated source for class LabelToContentFilter """
+    def __init__(self, *labels):
+        """ generated source for method __init__ """
+        super(LabelToContentFilter, self).__init__()
+        self.labels = labels
 
-	def process(self, doc):
-		changes = False
-		for tb in doc.getTextBlocks():
-			if not tb.isContent() and any(tb.hasLabel(label) for label in self.labels):
-				tb.setIsContent(True)
-				changes = True
-		return changes
+    def process(self, doc):
+        changes = False
+        for tb in doc.getTextBlocks():
+            if not tb.isContent() and any(tb.hasLabel(label) for label in self.labels):
+                tb.setIsContent(True)
+                changes = True
+        return changes
 
 
 
@@ -339,51 +339,51 @@ def process(self, doc):
 #  * @author Christian Kohlschtter
 #  
 class SimpleBlockFusionProcessor(BoilerpipeFilter):
-	def process(self, doc):
-		""" generated source for method process """
-		textBlocks = doc.getTextBlocks()
-		changes = False
-		if len(textBlocks) < 2: return False
-		prevBlock = textBlocks[0]
-		blocksToRemove=[]
-		for block in textBlocks[1:]:
-			if prevBlock.getTextDensity() == block.getTextDensity():
-				prevBlock.mergeNext(block)
-				blocksToRemove.append(block)
-				changes = True
-			else:
-				prevBlock = block
-
-		if changes: doc.setTextBlocks(self.subtractBlocks(textBlocks,blocksToRemove))
-		return changes
+    def process(self, doc):
+        """ generated source for method process """
+        textBlocks = doc.getTextBlocks()
+        changes = False
+        if len(textBlocks) < 2: return False
+        prevBlock = textBlocks[0]
+        blocksToRemove=[]
+        for block in textBlocks[1:]:
+            if prevBlock.getTextDensity() == block.getTextDensity():
+                prevBlock.mergeNext(block)
+                blocksToRemove.append(block)
+                changes = True
+            else:
+                prevBlock = block
+
+        if changes: doc.setTextBlocks(self.subtractBlocks(textBlocks,blocksToRemove))
+        return changes
 
 
 
 class ContentFusion(BoilerpipeFilter):
-	def process(self, doc):
-		""" generated source for method process """
-		textBlocks = doc.getTextBlocks()
-		if len(textBlocks) < 2: return False
-		#WARNNING: POSSIBLE BUG FOUND : shouldn't prevBlock be reset every passthrough?
-		changes=False
-		#changedOnPass - if it has been changed on the previous passthrough
-		changedOnPass=True
-		while changedOnPass:
-			changedOnPass = False
-			prevBlock = textBlocks[0]
-			blocksToRemove=[]
-			for block in textBlocks[1:]:
-				if prevBlock.isContent() and block.getLinkDensity() < 0.56 and not block.hasLabel(DefaultLabels.STRICTLY_NOT_CONTENT):
-					prevBlock.mergeNext(block)
-					blocksToRemove.append(block)
-					changedOnPass=True
-					changes = True
-				else:
-					prevBlock = block
-				textBlocks=self.subtractBlocks(textBlocks,blocksToRemove)
-		if changes: doc.setTextBlocks(textBlocks)
-
-		return changes
+    def process(self, doc):
+        """ generated source for method process """
+        textBlocks = doc.getTextBlocks()
+        if len(textBlocks) < 2: return False
+        #WARNNING: POSSIBLE BUG FOUND : shouldn't prevBlock be reset every passthrough?
+        changes=False
+        #changedOnPass - if it has been changed on the previous passthrough
+        changedOnPass=True
+        while changedOnPass:
+            changedOnPass = False
+            prevBlock = textBlocks[0]
+            blocksToRemove=[]
+            for block in textBlocks[1:]:
+                if prevBlock.isContent() and block.getLinkDensity() < 0.56 and not block.hasLabel(DefaultLabels.STRICTLY_NOT_CONTENT):
+                    prevBlock.mergeNext(block)
+                    blocksToRemove.append(block)
+                    changedOnPass=True
+                    changes = True
+                else:
+                    prevBlock = block
+                textBlocks=self.subtractBlocks(textBlocks,blocksToRemove)
+        if changes: doc.setTextBlocks(textBlocks)
+
+        return changes
 
 
 # 
@@ -392,46 +392,46 @@ def process(self, doc):
 #  * @author Christian Kohlschtter
 #  
 class LabelFusion(BoilerpipeFilter):
-	#INSTANCE = LabelFusion("")
-
-	# 
-	#	  * Creates a new {@link LabelFusion} instance.
-	#	  *
-	#	  * @param maxBlocksDistance The maximum distance in blocks.
-	#	  * @param contentOnly 
-	#	  
-	def __init__(self, labelPrefix=""):
-		""" generated source for method __init__ """
-		super(LabelFusion, self).__init__()
-		self.labelPrefix = labelPrefix
-
-	def process(self, doc):
-		""" generated source for method process """
-		textBlocks = doc.getTextBlocks()
-		if len(textBlocks) < 2: return False
-		changes = False
-		prevBlock = textBlocks[0]
-		blocksToRemove=[]
-		for block in textBlocks[1::]:
-			if self.equalLabels(prevBlock.getLabels(), block.getLabels()):
-				prevBlock.mergeNext(block)
-				blocksToRemove.append(block)
-				changes = True
-			else:
-				prevBlock = block
-		
-		if changes: doc.setTextBlocks(self.subtractBlocks(textBlocks,blocksToRemove))
-
-		return changes
-
-	def equalLabels(self, labels1, labels2):
-		""" generated source for method equalLabels """
-		if labels1 == None or labels2 == None: return False
-		#NOTE: Should blocks be merged if neither of them have labels???  i.e. labels1==labels2==empty set
-		return self.markupLabelsOnly(labels1) == self.markupLabelsOnly(labels2)
-
-	def markupLabelsOnly(self, labels):
-		return set([label for label in labels if label.startswith(DefaultLabels.MARKUP_PREFIX)])
+    #INSTANCE = LabelFusion("")
+
+    # 
+    #      * Creates a new {@link LabelFusion} instance.
+    #      *
+    #      * @param maxBlocksDistance The maximum distance in blocks.
+    #      * @param contentOnly 
+    #      
+    def __init__(self, labelPrefix=""):
+        """ generated source for method __init__ """
+        super(LabelFusion, self).__init__()
+        self.labelPrefix = labelPrefix
+
+    def process(self, doc):
+        """ generated source for method process """
+        textBlocks = doc.getTextBlocks()
+        if len(textBlocks) < 2: return False
+        changes = False
+        prevBlock = textBlocks[0]
+        blocksToRemove=[]
+        for block in textBlocks[1::]:
+            if self.equalLabels(prevBlock.getLabels(), block.getLabels()):
+                prevBlock.mergeNext(block)
+                blocksToRemove.append(block)
+                changes = True
+            else:
+                prevBlock = block
+        
+        if changes: doc.setTextBlocks(self.subtractBlocks(textBlocks,blocksToRemove))
+
+        return changes
+
+    def equalLabels(self, labels1, labels2):
+        """ generated source for method equalLabels """
+        if labels1 == None or labels2 == None: return False
+        #NOTE: Should blocks be merged if neither of them have labels???  i.e. labels1==labels2==empty set
+        return self.markupLabelsOnly(labels1) == self.markupLabelsOnly(labels2)
+
+    def markupLabelsOnly(self, labels):
+        return set([label for label in labels if label.startswith(DefaultLabels.MARKUP_PREFIX)])
 
 
 
@@ -442,71 +442,71 @@ def markupLabelsOnly(self, labels):
 #  * @author Christian Kohlschtter
 #  
 class BlockProximityFusion(BoilerpipeFilter):
-	""" generated source for class BlockProximityFusion """
-	#MAX_DISTANCE_1 = BlockProximityFusion(1, False, False)
-	#MAX_DISTANCE_1_SAME_TAGLEVEL = BlockProximityFusion(1, False, True)
-	#MAX_DISTANCE_1_CONTENT_ONLY = BlockProximityFusion(1, True, False)
-	#MAX_DISTANCE_1_CONTENT_ONLY_SAME_TAGLEVEL = BlockProximityFusion(1, True, True)
-
-	# 
-	#	  * Creates a new {@link BlockProximityFusion} instance.
-	#	  *
-	#	  * @param maxBlocksDistance The maximum distance in blocks.
-	#	  * @param contentOnly 
-	#	  
-	def __init__(self, maxBlocksDistance=1, contentOnly=False, sameTagLevelOnly=False):
-		""" generated source for method __init__ """
-		super(BlockProximityFusion, self).__init__()
-		self.maxBlocksDistance = maxBlocksDistance
-		self.contentOnly = contentOnly
-		self.sameTagLevelOnly = sameTagLevelOnly
-
-	def process(self, doc):
-		""" generated source for method process """
-		textBlocks = doc.getTextBlocks()
-		if len(textBlocks) < 2: return False
-		changes = False
-
-		if self.contentOnly:
-			startIdx=None
-			for idx,block in enumerate(textBlocks):
-				if block.isContent():
-					startIdx=idx
-					break
-			if startIdx == None: return False
-		else:
-			startIdx=0
-		
-		prevBlock=textBlocks[startIdx]		
-		blocksToRemove=[]
-		for block in textBlocks[startIdx+1:]:
-			if not block.isContent():
-				prevBlock = block
-				continue 
-			diffBlocks = block.getOffsetBlocksStart() - prevBlock.getOffsetBlocksEnd() - 1;
-			if diffBlocks <= self.maxBlocksDistance:
-				ok=True
-				if self.contentOnly:
-					if not prevBlock.isContent() or not block.isContent():
-						ok = False
-				if self.sameTagLevelOnly and prevBlock.getTagLevel() != block.getTagLevel():
-					ok = False
-				if ok:
-					prevBlock.mergeNext(block)
-					#remove current block
-					blocksToRemove.append(block)
-					changes = True
-				else:
-					prevBlock = block
-			else:
-				prevBlock = block
-				
-		if len(blocksToRemove)>0:
-			newBlocks=self.subtractBlocks(textBlocks,blocksToRemove)
-			doc.setTextBlocks(newBlocks)
-			changes=True
-			
-		return changes
+    """ generated source for class BlockProximityFusion """
+    #MAX_DISTANCE_1 = BlockProximityFusion(1, False, False)
+    #MAX_DISTANCE_1_SAME_TAGLEVEL = BlockProximityFusion(1, False, True)
+    #MAX_DISTANCE_1_CONTENT_ONLY = BlockProximityFusion(1, True, False)
+    #MAX_DISTANCE_1_CONTENT_ONLY_SAME_TAGLEVEL = BlockProximityFusion(1, True, True)
+
+    # 
+    #      * Creates a new {@link BlockProximityFusion} instance.
+    #      *
+    #      * @param maxBlocksDistance The maximum distance in blocks.
+    #      * @param contentOnly 
+    #      
+    def __init__(self, maxBlocksDistance=1, contentOnly=False, sameTagLevelOnly=False):
+        """ generated source for method __init__ """
+        super(BlockProximityFusion, self).__init__()
+        self.maxBlocksDistance = maxBlocksDistance
+        self.contentOnly = contentOnly
+        self.sameTagLevelOnly = sameTagLevelOnly
+
+    def process(self, doc):
+        """ generated source for method process """
+        textBlocks = doc.getTextBlocks()
+        if len(textBlocks) < 2: return False
+        changes = False
+
+        if self.contentOnly:
+            startIdx=None
+            for idx,block in enumerate(textBlocks):
+                if block.isContent():
+                    startIdx=idx
+                    break
+            if startIdx == None: return False
+        else:
+            startIdx=0
+        
+        prevBlock=textBlocks[startIdx]        
+        blocksToRemove=[]
+        for block in textBlocks[startIdx+1:]:
+            if not block.isContent():
+                prevBlock = block
+                continue 
+            diffBlocks = block.getOffsetBlocksStart() - prevBlock.getOffsetBlocksEnd() - 1;
+            if diffBlocks <= self.maxBlocksDistance:
+                ok=True
+                if self.contentOnly:
+                    if not prevBlock.isContent() or not block.isContent():
+                        ok = False
+                if self.sameTagLevelOnly and prevBlock.getTagLevel() != block.getTagLevel():
+                    ok = False
+                if ok:
+                    prevBlock.mergeNext(block)
+                    #remove current block
+                    blocksToRemove.append(block)
+                    changes = True
+                else:
+                    prevBlock = block
+            else:
+                prevBlock = block
+                
+        if len(blocksToRemove)>0:
+            newBlocks=self.subtractBlocks(textBlocks,blocksToRemove)
+            doc.setTextBlocks(newBlocks)
+            changes=True
+            
+        return changes
 
 
 
@@ -521,49 +521,49 @@ def process(self, doc):
 #  * @author Christian Kohlschtter
 #  
 class KeepLargestBlockFilter(BoilerpipeFilter):
-	""" generated source for class KeepLargestBlockFilter """
-	#INSTANCE = KeepLargestBlockFilter(False)
-	#INSTANCE_EXPAND_TO_SAME_TAGLEVEL = KeepLargestBlockFilter(True)
-
-	def __init__(self, expandToSameLevelText=False):
-		""" generated source for method __init__ """
-		super(KeepLargestBlockFilter, self).__init__()
-		self.expandToSameLevelText = expandToSameLevelText
-
-	def process(self, doc):
-		""" generated source for method process """
-		textBlocks = doc.getTextBlocks()
-		if len(textBlocks) < 2: return False
-		
-		try:
-			contentBlockIter=(tb for tb in textBlocks if tb.isContent())
-			largestBlock=max(contentBlockIter,key=lambda tb:tb.getNumWords())
-		except ValueError:
-			#no content blocks exist / largest block not found
-			largestBlock=None
-		
-		for tb in textBlocks:
-			if tb == largestBlock:
-				tb.setIsContent(True)
-			else:
-				tb.setIsContent(False)
-				tb.addLabel(DefaultLabels.MIGHT_BE_CONTENT)
-		
-		if self.expandToSameLevelText and largestBlock!=None:
-			level = largestBlock.getTagLevel()
-			largestBlockIdx=textBlocks.index(largestBlock)
-			
-			for tb in textBlocks[largestBlockIdx::-1]:
-				tl=tb.getTagLevel()
-				if tl < level: break
-				elif tl == level: tb.setIsContent(True)
-
-			for tb in textBlocks[largestBlockIdx:]:
-				tl=tb.getTagLevel()
-				if tl < level: break
-				elif tl == level: tb.setIsContent(True)
-
-		return True
+    """ generated source for class KeepLargestBlockFilter """
+    #INSTANCE = KeepLargestBlockFilter(False)
+    #INSTANCE_EXPAND_TO_SAME_TAGLEVEL = KeepLargestBlockFilter(True)
+
+    def __init__(self, expandToSameLevelText=False):
+        """ generated source for method __init__ """
+        super(KeepLargestBlockFilter, self).__init__()
+        self.expandToSameLevelText = expandToSameLevelText
+
+    def process(self, doc):
+        """ generated source for method process """
+        textBlocks = doc.getTextBlocks()
+        if len(textBlocks) < 2: return False
+        
+        try:
+            contentBlockIter=(tb for tb in textBlocks if tb.isContent())
+            largestBlock=max(contentBlockIter,key=lambda tb:tb.getNumWords())
+        except ValueError:
+            #no content blocks exist / largest block not found
+            largestBlock=None
+        
+        for tb in textBlocks:
+            if tb == largestBlock:
+                tb.setIsContent(True)
+            else:
+                tb.setIsContent(False)
+                tb.addLabel(DefaultLabels.MIGHT_BE_CONTENT)
+        
+        if self.expandToSameLevelText and largestBlock!=None:
+            level = largestBlock.getTagLevel()
+            largestBlockIdx=textBlocks.index(largestBlock)
+            
+            for tb in textBlocks[largestBlockIdx::-1]:
+                tl=tb.getTagLevel()
+                if tl < level: break
+                elif tl == level: tb.setIsContent(True)
+
+            for tb in textBlocks[largestBlockIdx:]:
+                tl=tb.getTagLevel()
+                if tl < level: break
+                elif tl == level: tb.setIsContent(True)
+
+        return True
 
 
 #  * Marks all {@link TextBlock}s "content" which are between the headline and the part that
@@ -574,44 +574,44 @@ def process(self, doc):
 #  * @author Christian Kohlschtter
 #  
 class ExpandTitleToContentFilter(BoilerpipeFilter):
-	def process(self, doc):
-		""" generated source for method process """
-		i = 0
-		titleIdx = -1
-		contentStart = -1
-		for tb in doc.getTextBlocks():
-			if contentStart == -1 and tb.hasLabel(DefaultLabels.TITLE):
-				titleIdx = i
-			if contentStart == -1 and tb.isContent():
-				contentStart = i
-			i += 1
-			
-		if contentStart <= titleIdx or titleIdx == -1: return False
-		
-		changes = False
-		for tb in doc.getTextBlocks()[titleIdx:contentStart]:
-			if tb.hasLabel(DefaultLabels.MIGHT_BE_CONTENT):
-				changes |= tb.setIsContent(True)
-		return changes
+    def process(self, doc):
+        """ generated source for method process """
+        i = 0
+        titleIdx = -1
+        contentStart = -1
+        for tb in doc.getTextBlocks():
+            if contentStart == -1 and tb.hasLabel(DefaultLabels.TITLE):
+                titleIdx = i
+            if contentStart == -1 and tb.isContent():
+                contentStart = i
+            i += 1
+            
+        if contentStart <= titleIdx or titleIdx == -1: return False
+        
+        changes = False
+        for tb in doc.getTextBlocks()[titleIdx:contentStart]:
+            if tb.hasLabel(DefaultLabels.MIGHT_BE_CONTENT):
+                changes |= tb.setIsContent(True)
+        return changes
 
 
 class ArticleMetadataFilter(BoilerpipeFilter):
-	#checks for date/time/author blocks
-	PATTERNS_SHORT = [re.compile(r"^[0-9 \,\./]*\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|December)?\b[0-9 \,\:apm\./]*(?:[CPSDMGET]{2,3})?$"), re.compile("^[Bb]y ")];
-	
-	def process(self, doc):
-		""" generated source for method process """
-		changed = False
-		for tb in doc.getTextBlocks():
-			if tb.getNumWords() > 10: continue 
-			for p in self.PATTERNS_SHORT:
-				text = tb.getText()
-				if p.search(text):
-					changed = True
-					tb.setIsContent(True)
-					tb.addLabel(DefaultLabels.ARTICLE_METADATA)
-					break
-		return changed
+    #checks for date/time/author blocks
+    PATTERNS_SHORT = [re.compile(r"^[0-9 \,\./]*\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|December)?\b[0-9 \,\:apm\./]*(?:[CPSDMGET]{2,3})?$"), re.compile("^[Bb]y ")];
+    
+    def process(self, doc):
+        """ generated source for method process """
+        changed = False
+        for tb in doc.getTextBlocks():
+            if tb.getNumWords() > 10: continue 
+            for p in self.PATTERNS_SHORT:
+                text = tb.getText()
+                if p.search(text):
+                    changed = True
+                    tb.setIsContent(True)
+                    tb.addLabel(DefaultLabels.ARTICLE_METADATA)
+                    break
+        return changed
 
 
 # 
@@ -620,36 +620,36 @@ def process(self, doc):
 #  * @author Christian Kohlschtter
 #  
 class AddPrecedingLabelsFilter(BoilerpipeFilter):
-	#INSTANCE = AddPrecedingLabelsFilter("")
-	#INSTANCE_PRE = AddPrecedingLabelsFilter("^")
-
-	# 
-	#	  * Creates a new {@link AddPrecedingLabelsFilter} instance.
-	#	  *
-	#	  * @param maxBlocksDistance The maximum distance in blocks.
-	#	  * @param contentOnly 
-	#	  
-	def __init__(self, labelPrefix=""):
-		""" generated source for method __init__ """
-		super(AddPrecedingLabelsFilter, self).__init__()
-		self.labelPrefix = labelPrefix
-
-	def process(self, doc):
-		""" generated source for method process """
-		textBlocks = doc.getTextBlocks()
-		if len(textBlocks) < 2: return False
-		changes = False
-		blockBelow = None
-		
-		for block in textBlocks[::-1]:
-			if blockBelow != None:
-				labels=block.getLabels()
-				if labels != None and len(labels)>0:
-					for l in labels: blockBelow.addLabel(self.labelPrefix + l)
-					changes = True
-			blockBelow = block
-
-		return changes
+    #INSTANCE = AddPrecedingLabelsFilter("")
+    #INSTANCE_PRE = AddPrecedingLabelsFilter("^")
+
+    # 
+    #      * Creates a new {@link AddPrecedingLabelsFilter} instance.
+    #      *
+    #      * @param maxBlocksDistance The maximum distance in blocks.
+    #      * @param contentOnly 
+    #      
+    def __init__(self, labelPrefix=""):
+        """ generated source for method __init__ """
+        super(AddPrecedingLabelsFilter, self).__init__()
+        self.labelPrefix = labelPrefix
+
+    def process(self, doc):
+        """ generated source for method process """
+        textBlocks = doc.getTextBlocks()
+        if len(textBlocks) < 2: return False
+        changes = False
+        blockBelow = None
+        
+        for block in textBlocks[::-1]:
+            if blockBelow != None:
+                labels=block.getLabels()
+                if labels != None and len(labels)>0:
+                    for l in labels: blockBelow.addLabel(self.labelPrefix + l)
+                    changes = True
+            blockBelow = block
+
+        return changes
 
 
 # 
@@ -661,67 +661,67 @@ def process(self, doc):
 #  
 
 class DocumentTitleMatchClassifier(BoilerpipeFilter):
-	""" generated source for class DocumentTitleMatchClassifier """
-
-	def __init__(self, title, useDocTitle=False):
-		""" generated source for method __init__ """
-		super(DocumentTitleMatchClassifier, self).__init__()
-		self.useDocTitle=useDocTitle
-		if useDocTitle: self.potentialTitles=None
-		else: self.potentialTitles=self.findPotentialTitles(title)
-					
-	def findPotentialTitles(self,title):
-		if title == None: return None
-		title = title.strip()
-		if len(title)==0:
-			return None
-		else:
-			potentialTitles = set()
-			potentialTitles.add(title)
-			p = self.getLongestPart(title, "[ ]*[\||:][ ]*")
-			if p != None: potentialTitles.add(p)
-			p = self.getLongestPart(title, "[ ]*[\||:\(\)][ ]*")
-			if p != None: potentialTitles.add(p)
-			p = self.getLongestPart(title, "[ ]*[\||:\(\)\-][ ]*")
-			if p != None: potentialTitles.add(p)
-			p = self.getLongestPart(title, "[ ]*[\||,|:\(\)\-][ ]*")
-			if p != None: potentialTitles.add(p)
-		return potentialTitles
-
-	def getPotentialTitles(self):
-		""" generated source for method getPotentialTitles """
-		return self.potentialTitles
-
-	def getLongestPart(self, title, pattern):
-		""" generated source for method getLongestPart """
-		parts = re.split(pattern,title)
-		if len(parts)==1: return None
-		
-		longestNumWords = 0
-		longestPart = ""
-		for p in parts:
-			if ".com" in p: continue 
-			numWords=self.getNumWords(p)
-			if numWords > longestNumWords or len(p)>len(longestPart):
-				longestNumWords = numWords
-				longestPart = p
-		if len(longestPart)==0: return None
-		else: return longestPart.strip()
-
-	def getNumWords(self,text):
-		return len(re.findall("\w+",text,re.UNICODE))
-
-	def process(self, doc):
-		""" generated source for method process """
-		if self.useDocTitle: self.potentialTitles=self.findPotentialTitles(doc.getTitle())
-		if self.potentialTitles == None: return False
-		changes = False
-		for tb in doc.getTextBlocks():
-			text=tb.getText().strip().lower()
-			if any(candidate.lower()==text for candidate in self.potentialTitles):
-				tb.addLabel(DefaultLabels.TITLE)
-				changes = True
-		return changes
+    """ generated source for class DocumentTitleMatchClassifier """
+
+    def __init__(self, title, useDocTitle=False):
+        """ generated source for method __init__ """
+        super(DocumentTitleMatchClassifier, self).__init__()
+        self.useDocTitle=useDocTitle
+        if useDocTitle: self.potentialTitles=None
+        else: self.potentialTitles=self.findPotentialTitles(title)
+                    
+    def findPotentialTitles(self,title):
+        if title == None: return None
+        title = title.strip()
+        if len(title)==0:
+            return None
+        else:
+            potentialTitles = set()
+            potentialTitles.add(title)
+            p = self.getLongestPart(title, "[ ]*[\||:][ ]*")
+            if p != None: potentialTitles.add(p)
+            p = self.getLongestPart(title, "[ ]*[\||:\(\)][ ]*")
+            if p != None: potentialTitles.add(p)
+            p = self.getLongestPart(title, "[ ]*[\||:\(\)\-][ ]*")
+            if p != None: potentialTitles.add(p)
+            p = self.getLongestPart(title, "[ ]*[\||,|:\(\)\-][ ]*")
+            if p != None: potentialTitles.add(p)
+        return potentialTitles
+
+    def getPotentialTitles(self):
+        """ generated source for method getPotentialTitles """
+        return self.potentialTitles
+
+    def getLongestPart(self, title, pattern):
+        """ generated source for method getLongestPart """
+        parts = re.split(pattern,title)
+        if len(parts)==1: return None
+        
+        longestNumWords = 0
+        longestPart = ""
+        for p in parts:
+            if ".com" in p: continue 
+            numWords=self.getNumWords(p)
+            if numWords > longestNumWords or len(p)>len(longestPart):
+                longestNumWords = numWords
+                longestPart = p
+        if len(longestPart)==0: return None
+        else: return longestPart.strip()
+
+    def getNumWords(self,text):
+        return len(re.findall("\w+",text,re.UNICODE))
+
+    def process(self, doc):
+        """ generated source for method process """
+        if self.useDocTitle: self.potentialTitles=self.findPotentialTitles(doc.getTitle())
+        if self.potentialTitles == None: return False
+        changes = False
+        for tb in doc.getTextBlocks():
+            text=tb.getText().strip().lower()
+            if any(candidate.lower()==text for candidate in self.potentialTitles):
+                tb.addLabel(DefaultLabels.TITLE)
+                changes = True
+        return changes
 
 
 
@@ -743,9 +743,9 @@ def process(self, doc):
 #  * @author Christian Kohlschtter
 #  
 class HeuristicFilterBase(BoilerpipeFilter):
-	def getNumFullTextWords(self, tb, minTextDensity=9):
-		if tb.getTextDensity() >= minTextDensity: return tb.getNumWords()
-		else: return 0
+    def getNumFullTextWords(self, tb, minTextDensity=9):
+        if tb.getTextDensity() >= minTextDensity: return tb.getNumWords()
+        else: return 0
 
 # 
 #  * Keeps only those content blocks which contain at least k full-text words
@@ -754,17 +754,17 @@ def getNumFullTextWords(self, tb, minTextDensity=9):
 #  * @author Christian Kohlschtter
 #  
 class MinFulltextWordsFilter(HeuristicFilterBase):
-	def __init__(self, minWords=30):
-		self.minWords = minWords
+    def __init__(self, minWords=30):
+        self.minWords = minWords
 
-	def process(self, doc):
-		""" generated source for method process """
-		changes = False
-		for tb in doc.getTextBlocks():
-			if tb.isContent() and self.getNumFullTextWords(tb) < self.minWords:
-				tb.setIsContent(False)
-				changes = True
-		return changes
+    def process(self, doc):
+        """ generated source for method process """
+        changes = False
+        for tb in doc.getTextBlocks():
+            if tb.isContent() and self.getNumFullTextWords(tb) < self.minWords:
+                tb.setIsContent(False)
+                changes = True
+        return changes
 
 
 # 
@@ -785,21 +785,21 @@ def process(self, doc):
 #  
 class KeepLargestFulltextBlockFilter(HeuristicFilterBase):
 
-	def process(self, doc):
-		""" generated source for method process """
-		textBlocks = doc.getTextBlocks()
-		if len(textBlocks) < 2: return False
-		contentBlocks=[block for block in textBlocks if block.isContent()]
-		if len(contentBlocks)==0: return False
-		largestBlock=max(contentBlocks,key=self.getNumFullTextWords)
-
-		for tb in textBlocks:
-			if tb == largestBlock:
-				tb.setIsContent(True)
-			else:
-				tb.setIsContent(False)
-				tb.addLabel(DefaultLabels.MIGHT_BE_CONTENT)
-		return True
+    def process(self, doc):
+        """ generated source for method process """
+        textBlocks = doc.getTextBlocks()
+        if len(textBlocks) < 2: return False
+        contentBlocks=[block for block in textBlocks if block.isContent()]
+        if len(contentBlocks)==0: return False
+        largestBlock=max(contentBlocks,key=self.getNumFullTextWords)
+
+        for tb in textBlocks:
+            if tb == largestBlock:
+                tb.setIsContent(True)
+            else:
+                tb.setIsContent(False)
+                tb.addLabel(DefaultLabels.MIGHT_BE_CONTENT)
+        return True
 
 # 
 #  * Marks all blocks as "non-content" that occur after blocks that have been
@@ -811,28 +811,28 @@ def process(self, doc):
 #  * @see TerminatingBlocksFinder
 #  
 class IgnoreBlocksAfterContentFilter(HeuristicFilterBase):
-	""" generated source for class IgnoreBlocksAfterContentFilter """
-	#DEFAULT_INSTANCE = IgnoreBlocksAfterContentFilter(60)
-	#INSTANCE_200 = IgnoreBlocksAfterContentFilter(200)
-
-	def __init__(self, minNumWords=60):
-		self.minNumWords = minNumWords
-
-	def process(self, doc):
-		""" generated source for method process """
-		changes = False
-		numWords = 0
-		foundEndOfText = False
-		for block in doc.getTextBlocks():
-			if block.isContent():
-				numWords += self.getNumFullTextWords(block)
-			if block.hasLabel(DefaultLabels.INDICATES_END_OF_TEXT) and numWords >= self.minNumWords:
-				foundEndOfText = True
-			if foundEndOfText:
-				changes = True
-				block.setIsContent(False)
-
-		return changes
+    """ generated source for class IgnoreBlocksAfterContentFilter """
+    #DEFAULT_INSTANCE = IgnoreBlocksAfterContentFilter(60)
+    #INSTANCE_200 = IgnoreBlocksAfterContentFilter(200)
+
+    def __init__(self, minNumWords=60):
+        self.minNumWords = minNumWords
+
+    def process(self, doc):
+        """ generated source for method process """
+        changes = False
+        numWords = 0
+        foundEndOfText = False
+        for block in doc.getTextBlocks():
+            if block.isContent():
+                numWords += self.getNumFullTextWords(block)
+            if block.hasLabel(DefaultLabels.INDICATES_END_OF_TEXT) and numWords >= self.minNumWords:
+                foundEndOfText = True
+            if foundEndOfText:
+                changes = True
+                block.setIsContent(False)
+
+        return changes
 # 
 #  * Marks all blocks as "non-content" that occur after blocks that have been
 #  * marked {@link DefaultLabels#INDICATES_END_OF_TEXT}, and after any content block.
@@ -843,22 +843,22 @@ def process(self, doc):
 #  
 class IgnoreBlocksAfterContentFromEndFilter(HeuristicFilterBase):
 
-	def process(self, doc):
-		""" generated source for method process """
-		changes = False
-		words = 0
-		blocks = doc.getTextBlocks()
-		if len(blocks)==0: return False
-		for tb in blocks[::-1]:
-			if tb.hasLabel(DefaultLabels.INDICATES_END_OF_TEXT):
-				tb.addLabel(DefaultLabels.STRICTLY_NOT_CONTENT)
-				tb.removeLabel(DefaultLabels.MIGHT_BE_CONTENT)
-				tb.setIsContent(False)
-				changes = True
-			elif tb.isContent():
-				words += tb.getNumWords()
-				if words > 200: break
-		return changes
+    def process(self, doc):
+        """ generated source for method process """
+        changes = False
+        words = 0
+        blocks = doc.getTextBlocks()
+        if len(blocks)==0: return False
+        for tb in blocks[::-1]:
+            if tb.hasLabel(DefaultLabels.INDICATES_END_OF_TEXT):
+                tb.addLabel(DefaultLabels.STRICTLY_NOT_CONTENT)
+                tb.removeLabel(DefaultLabels.MIGHT_BE_CONTENT)
+                tb.setIsContent(False)
+                changes = True
+            elif tb.isContent():
+                words += tb.getNumWords()
+                if words > 200: break
+        return changes
 
 
 # 
@@ -871,46 +871,46 @@ def process(self, doc):
 #  
 class TerminatingBlocksFinder(BoilerpipeFilter):
 
-	#  public static long timeSpent = 0;
-	def process(self, doc):
-		""" generated source for method process """
-		changes = False
-		
-		for tb in doc.getTextBlocks():
-			if tb.getNumWords() >=15: continue
-			text=tb.getText().strip()
-			if len(text)<8: continue
-			textLC = text.lower()
-			
-			startmatches=(" reuters","please rate this","post a comment")
-			inmatches=("what you think...","add your comment","add comment","reader views","have your say","reader comments","rtta artikeln")
-			eqmatch="thanks for your comments - this feedback is now closed"
-			
-			if textLC.startswith("comments") or self.startsWithNumber(textLC, " comments", " users responded in") or any(textLC.startswith(matchStr) for matchStr in startmatches) or any(matchStr in textLC for matchStr in inmatches) or textLC == eqmatch:
-				tb.addLabel(DefaultLabels.INDICATES_END_OF_TEXT)
-				changes = True
-		#  timeSpent += System.currentTimeMillis() - t;
-		return changes
-
-	# 
-	# 	 * Checks whether the given text t starts with a sequence of digits,
-	# 	 * followed by one of the given strings.
-	# 	 * 
-	# 	 * @param t
-	# 	 *			The text to examine
-	# 	 * @param len
-	# 	 *			The length of the text to examine
-	# 	 * @param str
-	# 	 *			Any strings that may follow the digits.
-	# 	 * @return true if at least one combination matches
-	# 	 
-	def startsWithNumber(self, text, *matchStrArr):
-		""" generated source for method startsWithNumber """
-		numberMatch=re.search('\D',text)
-		if numberMatch==None: pos=len(text)
-		else: pos=numberMatch.start()
-		if pos==0: return False
-		else: return any(text.startswith(matchStr,pos) for matchStr in matchStrArr)
+    #  public static long timeSpent = 0;
+    def process(self, doc):
+        """ generated source for method process """
+        changes = False
+        
+        for tb in doc.getTextBlocks():
+            if tb.getNumWords() >=15: continue
+            text=tb.getText().strip()
+            if len(text)<8: continue
+            textLC = text.lower()
+            
+            startmatches=(" reuters","please rate this","post a comment")
+            inmatches=("what you think...","add your comment","add comment","reader views","have your say","reader comments","rtta artikeln")
+            eqmatch="thanks for your comments - this feedback is now closed"
+            
+            if textLC.startswith("comments") or self.startsWithNumber(textLC, " comments", " users responded in") or any(textLC.startswith(matchStr) for matchStr in startmatches) or any(matchStr in textLC for matchStr in inmatches) or textLC == eqmatch:
+                tb.addLabel(DefaultLabels.INDICATES_END_OF_TEXT)
+                changes = True
+        #  timeSpent += System.currentTimeMillis() - t;
+        return changes
+
+    # 
+    #      * Checks whether the given text t starts with a sequence of digits,
+    #      * followed by one of the given strings.
+    #      * 
+    #      * @param t
+    #      *            The text to examine
+    #      * @param len
+    #      *            The length of the text to examine
+    #      * @param str
+    #      *            Any strings that may follow the digits.
+    #      * @return true if at least one combination matches
+    #      
+    def startsWithNumber(self, text, *matchStrArr):
+        """ generated source for method startsWithNumber """
+        numberMatch=re.search('\D',text)
+        if numberMatch==None: pos=len(text)
+        else: pos=numberMatch.start()
+        if pos==0: return False
+        else: return any(text.startswith(matchStr,pos) for matchStr in matchStrArr)
 
 
 # 
@@ -923,46 +923,46 @@ def startsWithNumber(self, text, *matchStrArr):
 #  
 class NumWordsRulesClassifier(BoilerpipeFilter):
 
-	def process(self, doc):
-		""" generated source for method process """
-		textBlocks = doc.getTextBlocks()
-		hasChanges = False
-		
-		n=len(textBlocks)
-		for i,currentBlock in enumerate(textBlocks):
-			if i>0: prevBlock=textBlocks[i-1]
-			else: prevBlock=document.TextBlock.EMPTY_START
-			if i+1<n: nextBlock=textBlocks[i+1]
-			else: nextBlock=document.TextBlock.EMPTY_START
-			hasChanges |= self.classify(prevBlock, currentBlock, nextBlock)
-		return hasChanges
-
-	def classify(self, prev, curr, next):
-		""" generated source for method classify """
-		isContent = False
-		if curr.getLinkDensity() <= 0.333333:
-			if prev.getLinkDensity() <= 0.555556:
-				if curr.getNumWords() <= 16:
-					if next.getNumWords() <= 15:
-						if prev.getNumWords() <= 4:
-							isContent = False
-						else:
-							isContent = True
-					else:
-						isContent = True
-				else:
-					isContent = True
-			else:
-				if curr.getNumWords() <= 40:
-					if next.getNumWords() <= 17:
-						isContent = False
-					else:
-						isContent = True
-				else:
-					isContent = True
-		else:
-			isContent = False
-		return curr.setIsContent(isContent)
+    def process(self, doc):
+        """ generated source for method process """
+        textBlocks = doc.getTextBlocks()
+        hasChanges = False
+        
+        n=len(textBlocks)
+        for i,currentBlock in enumerate(textBlocks):
+            if i>0: prevBlock=textBlocks[i-1]
+            else: prevBlock=document.TextBlock.EMPTY_START
+            if i+1<n: nextBlock=textBlocks[i+1]
+            else: nextBlock=document.TextBlock.EMPTY_START
+            hasChanges |= self.classify(prevBlock, currentBlock, nextBlock)
+        return hasChanges
+
+    def classify(self, prev, curr, next):
+        """ generated source for method classify """
+        isContent = False
+        if curr.getLinkDensity() <= 0.333333:
+            if prev.getLinkDensity() <= 0.555556:
+                if curr.getNumWords() <= 16:
+                    if next.getNumWords() <= 15:
+                        if prev.getNumWords() <= 4:
+                            isContent = False
+                        else:
+                            isContent = True
+                    else:
+                        isContent = True
+                else:
+                    isContent = True
+            else:
+                if curr.getNumWords() <= 40:
+                    if next.getNumWords() <= 17:
+                        isContent = False
+                    else:
+                        isContent = True
+                else:
+                    isContent = True
+        else:
+            isContent = False
+        return curr.setIsContent(isContent)
 
 
 
@@ -976,46 +976,46 @@ def classify(self, prev, curr, next):
 #  
 class DensityRulesClassifier(BoilerpipeFilter):
 
-	def process(self, doc):
-		""" generated source for method process """
-		textBlocks = doc.getTextBlocks()
-		hasChanges = False
-		
-		n=len(textBlocks)
-		for i,currentBlock in enumerate(textBlocks):
-			if i>0: prevBlock=textBlocks[i-1]
-			else: prevBlock=document.TextBlock.EMPTY_START
-			if i+1<n: nextBlock=textBlocks[i+1]
-			else: nextBlock=document.TextBlock.EMPTY_START
-			hasChanges |= self.classify(prevBlock, currentBlock, nextBlock)
-		return hasChanges
-
-	def classify(self, prev, curr, next):
-		""" generated source for method classify """
-		isContent = False
-		if curr.getLinkDensity() <= 0.333333:
-			if prev.getLinkDensity() <= 0.555556:
-				if curr.getTextDensity() <= 9:
-					if next.getTextDensity() <= 10:
-						if prev.getTextDensity() <= 4:
-							isContent = False
-						else:
-							isContent = True
-					else:
-						isContent = True
-				else:
-					if next.getTextDensity() == 0:
-						isContent = False
-					else:
-						isContent = True
-			else:
-				if next.getTextDensity() <= 11:
-					isContent = False
-				else:
-					isContent = True
-		else:
-			isContent = False
-		return curr.setIsContent(isContent)
+    def process(self, doc):
+        """ generated source for method process """
+        textBlocks = doc.getTextBlocks()
+        hasChanges = False
+        
+        n=len(textBlocks)
+        for i,currentBlock in enumerate(textBlocks):
+            if i>0: prevBlock=textBlocks[i-1]
+            else: prevBlock=document.TextBlock.EMPTY_START
+            if i+1<n: nextBlock=textBlocks[i+1]
+            else: nextBlock=document.TextBlock.EMPTY_START
+            hasChanges |= self.classify(prevBlock, currentBlock, nextBlock)
+        return hasChanges
+
+    def classify(self, prev, curr, next):
+        """ generated source for method classify """
+        isContent = False
+        if curr.getLinkDensity() <= 0.333333:
+            if prev.getLinkDensity() <= 0.555556:
+                if curr.getTextDensity() <= 9:
+                    if next.getTextDensity() <= 10:
+                        if prev.getTextDensity() <= 4:
+                            isContent = False
+                        else:
+                            isContent = True
+                    else:
+                        isContent = True
+                else:
+                    if next.getTextDensity() == 0:
+                        isContent = False
+                    else:
+                        isContent = True
+            else:
+                if next.getTextDensity() <= 11:
+                    isContent = False
+                else:
+                    isContent = True
+        else:
+            isContent = False
+        return curr.setIsContent(isContent)
 
 # 
 #  * A full-text extractor trained on <a href="http://krdwrd.org/">krdwrd</a> <a
@@ -1027,25 +1027,25 @@ def classify(self, prev, curr, next):
 #  
 class CanolaFilter(BoilerpipeFilter):
 
-	def process(self, doc):
-		""" generated source for method process """
-		textBlocks = doc.getTextBlocks()
-		hasChanges = False
-		
-		n=len(textBlocks)
-		for i,currentBlock in enumerate(textBlocks):
-			if i>0: prevBlock=textBlocks[i-1]
-			else: prevBlock=document.TextBlock.EMPTY_START
-			if i+1<n: nextBlock=textBlocks[i+1]
-			else: nextBlock=document.TextBlock.EMPTY_START
-			hasChanges |= self.classify(prevBlock, currentBlock, nextBlock)
-		return hasChanges
-
-	def classify(self, prev, curr, next):
-		""" generated source for method classify """
-		cond1=curr.getLinkDensity() > 0 and next.getNumWords() > 11
-		cond2=curr.getNumWords() > 19
-		cond3=next.getNumWords() > 6 and next.getLinkDensity() == 0 and prev.getLinkDensity() == 0 and (curr.getNumWords() > 6 or prev.getNumWords() > 7 or next.getNumWords() > 19)
-		isContent = cond1 or cond2 or cond3
-		return curr.setIsContent(isContent)
+    def process(self, doc):
+        """ generated source for method process """
+        textBlocks = doc.getTextBlocks()
+        hasChanges = False
+        
+        n=len(textBlocks)
+        for i,currentBlock in enumerate(textBlocks):
+            if i>0: prevBlock=textBlocks[i-1]
+            else: prevBlock=document.TextBlock.EMPTY_START
+            if i+1<n: nextBlock=textBlocks[i+1]
+            else: nextBlock=document.TextBlock.EMPTY_START
+            hasChanges |= self.classify(prevBlock, currentBlock, nextBlock)
+        return hasChanges
+
+    def classify(self, prev, curr, next):
+        """ generated source for method classify """
+        cond1=curr.getLinkDensity() > 0 and next.getNumWords() > 11
+        cond2=curr.getNumWords() > 19
+        cond3=next.getNumWords() > 6 and next.getLinkDensity() == 0 and prev.getLinkDensity() == 0 and (curr.getNumWords() > 6 or prev.getNumWords() > 7 or next.getNumWords() > 19)
+        isContent = cond1 or cond2 or cond3
+        return curr.setIsContent(isContent)
 
diff --git a/boilerpy/parser.py b/boilerpy/parser.py
index 5e90c43..fc0a835 100644
--- a/boilerpy/parser.py
+++ b/boilerpy/parser.py
@@ -8,7 +8,7 @@
 #  * (the "License"); you may not use this file except in compliance with
 #  * the License.  You may obtain a copy of the License at
 #  *
-#  *	 http://www.apache.org/licenses/LICENSE-2.0
+#  *     http://www.apache.org/licenses/LICENSE-2.0
 #  *
 #  * Unless required by applicable law or agreed to in writing, software
 #  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -31,25 +31,25 @@
 
 
 class TagAction(object):
-	def start(self, contentHandler, tagName, attrs): return False
-	def end(self, contentHandler, tagName): return False
-	def changesTagLevel(self): return False
+    def start(self, contentHandler, tagName, attrs): return False
+    def end(self, contentHandler, tagName): return False
+    def changesTagLevel(self): return False
 
 # 
 #  * Marks this tag as "ignorable", i.e. all its inner content is silently skipped.
 #  
 class IgnorableElementTagAction(TagAction):
-	""" generated source for class TA_IGNORABLE_ELEMENT """
-	def start(self, contentHandler, tagName, attrs):
-		contentHandler.inIgnorableElement += 1
-		return True
+    """ generated source for class TA_IGNORABLE_ELEMENT """
+    def start(self, contentHandler, tagName, attrs):
+        contentHandler.inIgnorableElement += 1
+        return True
 
-	def end(self, contentHandler, tagName):
-		contentHandler.inIgnorableElement -= 1
-		return True
+    def end(self, contentHandler, tagName):
+        contentHandler.inIgnorableElement -= 1
+        return True
 
-	def changesTagLevel(self):
-		return True
+    def changesTagLevel(self):
+        return True
 
 # 
 #  * Marks this tag as "anchor" (this should usually only be set for the <code>&lt;A&gt;</code> tag).
@@ -59,286 +59,286 @@ def changesTagLevel(self):
 #  * If boilerpipe encounters such nestings, a SAXException is thrown.
 #  
 class AnchorTextTagAction(TagAction):
-	""" generated source for class TA_ANCHOR_TEXT """
-	def start(self, contentHandler, tagName, attrs):
-		contentHandler.inAnchor += 1
-		if contentHandler.inAnchor > 1:
-			#  as nested A elements are not allowed per specification, we
-			#  are probably reaching this branch due to a bug in the XML
-			#  parser
-			print("Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow...")
-			self.end(contentHandler, tagName)
-		if contentHandler.inIgnorableElement == 0:
-			contentHandler.addToken(SpecialTokens.ANCHOR_TEXT_START)
-		return False
-
-	def end(self, contentHandler, tagName):
-		contentHandler.inAnchor -= 1
-		if contentHandler.inAnchor == 0 and contentHandler.inIgnorableElement == 0:
-			contentHandler.addToken(SpecialTokens.ANCHOR_TEXT_END)
-		return False
-
-	def changesTagLevel(self):
-		return True
+    """ generated source for class TA_ANCHOR_TEXT """
+    def start(self, contentHandler, tagName, attrs):
+        contentHandler.inAnchor += 1
+        if contentHandler.inAnchor > 1:
+            #  as nested A elements are not allowed per specification, we
+            #  are probably reaching this branch due to a bug in the XML
+            #  parser
+            print("Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow...")
+            self.end(contentHandler, tagName)
+        if contentHandler.inIgnorableElement == 0:
+            contentHandler.addToken(SpecialTokens.ANCHOR_TEXT_START)
+        return False
+
+    def end(self, contentHandler, tagName):
+        contentHandler.inAnchor -= 1
+        if contentHandler.inAnchor == 0 and contentHandler.inIgnorableElement == 0:
+            contentHandler.addToken(SpecialTokens.ANCHOR_TEXT_END)
+        return False
+
+    def changesTagLevel(self):
+        return True
 
 # 
 #  * Marks this tag the body element (this should usually only be set for the <code>&lt;BODY&gt;</code> tag).
 #  
 class BodyTagAction(TagAction):
-	""" generated source for class TA_BODY """
-	def start(self, contentHandler, tagName, attrs):
-		contentHandler.flushBlock()
-		contentHandler.inBody += 1
-		return False
+    """ generated source for class TA_BODY """
+    def start(self, contentHandler, tagName, attrs):
+        contentHandler.flushBlock()
+        contentHandler.inBody += 1
+        return False
 
-	def end(self, contentHandler, tagName):
-		contentHandler.flushBlock()
-		contentHandler.inBody -= 1
-		return False
+    def end(self, contentHandler, tagName):
+        contentHandler.flushBlock()
+        contentHandler.inBody -= 1
+        return False
 
-	def changesTagLevel(self):
-		return True
+    def changesTagLevel(self):
+        return True
 
 # 
 #  * Marks this tag a simple "inline" element, which generates whitespace, but no new block.
 #  
 class InlineWhitespaceTagAction(TagAction):
-	""" generated source for class TA_INLINE_WHITESPACE """
-	def start(self, contentHandler, tagName, attrs):
-		contentHandler.addWhitespaceIfNecessary()
-		return False
+    """ generated source for class TA_INLINE_WHITESPACE """
+    def start(self, contentHandler, tagName, attrs):
+        contentHandler.addWhitespaceIfNecessary()
+        return False
 
-	def end(self, contentHandler, tagName):
-		contentHandler.addWhitespaceIfNecessary()
-		return False
+    def end(self, contentHandler, tagName):
+        contentHandler.addWhitespaceIfNecessary()
+        return False
 
-	def changesTagLevel(self): return False
+    def changesTagLevel(self): return False
 
 # 
 #  * Marks this tag a simple "inline" element, which neither generates whitespace, nor a new block.
 #  
 class InlineTagAction(TagAction):
-	""" generated source for class TA_INLINE_NO_WHITESPACE """
-	def start(self, contentHandler, tagName, attrs): return False
-	def end(self, contentHandler, tagName): return False
-	def changesTagLevel(self): return False
+    """ generated source for class TA_INLINE_NO_WHITESPACE """
+    def start(self, contentHandler, tagName, attrs): return False
+    def end(self, contentHandler, tagName): return False
+    def changesTagLevel(self): return False
 
 # 
 #  * Explicitly marks this tag a simple "block-level" element, which always generates whitespace
 #  
 class BlockTagAction(TagAction):
-	""" generated source for class TA_BLOCK_LEVEL """
-	def start(self, contentHandler, tagName, attrs): return True
-	def end(self, contentHandler, tagName): return True
-	def changesTagLevel(self): return True
+    """ generated source for class TA_BLOCK_LEVEL """
+    def start(self, contentHandler, tagName, attrs): return True
+    def end(self, contentHandler, tagName): return True
+    def changesTagLevel(self): return True
 
 # 
 #  * Special TagAction for the <code>&lt;FONT&gt;</code> tag, which keeps track of the
 #  * absolute and relative font size.
 #  
 class FontTagAction(TagAction):
-	""" generated source for class TA_FONT """
-	#WARNING: POSSIBLE BUG -- used to be [0-9] without +
-	PAT_FONT_SIZE = re.compile("([\+\-]?)([0-9]+)")
-
-	def start(self, contentHandler, tagName, attrs):
-		""" generated source for method start """
-		sizeAttr = attrs.getValue("size")
-		size=None
-		if sizeAttr != None:
-			match = self.PAT_FONT_SIZE.match(sizeAttr)
-			if match!=None:
-				rel=match.group(0)
-				val=match.group(1)
-				if len(rel)==0:
-					#  absolute
-					size = val
-				else:
-					#  relative
-					#last non-none element from stack, default 3
-					lastNonNone=(s for s in contentHandler.fontSizeStack[::-1] if s!=None)
-					prevSize=next(lastNonNone,3)
-					if rel[0] == '+': size = prevSize + val
-					else: size = prevSize - val
-		contentHandler.fontSizeStack.append(size)
-		return False
-
-	def end(self, contentHandler, tagName):
-		contentHandler.fontSizeStack.pop()
-		return False
-
-	def changesTagLevel(self): return False
+    """ generated source for class TA_FONT """
+    #WARNING: POSSIBLE BUG -- used to be [0-9] without +
+    PAT_FONT_SIZE = re.compile("([\+\-]?)([0-9]+)")
+
+    def start(self, contentHandler, tagName, attrs):
+        """ generated source for method start """
+        sizeAttr = attrs.getValue("size")
+        size=None
+        if sizeAttr != None:
+            match = self.PAT_FONT_SIZE.match(sizeAttr)
+            if match!=None:
+                rel=match.group(0)
+                val=match.group(1)
+                if len(rel)==0:
+                    #  absolute
+                    size = val
+                else:
+                    #  relative
+                    #last non-none element from stack, default 3
+                    lastNonNone=(s for s in contentHandler.fontSizeStack[::-1] if s!=None)
+                    prevSize=next(lastNonNone,3)
+                    if rel[0] == '+': size = prevSize + val
+                    else: size = prevSize - val
+        contentHandler.fontSizeStack.append(size)
+        return False
+
+    def end(self, contentHandler, tagName):
+        contentHandler.fontSizeStack.pop()
+        return False
+
+    def changesTagLevel(self): return False
 
 # 
 #  * {@link CommonTagActions} for inline elements, which triggers some {@link LabelAction} on the generated
 #  * {@link TextBlock}.
 #  
 class InlineTagLabelAction(TagAction):
-	""" generated source for class InlineTagLabelAction """
+    """ generated source for class InlineTagLabelAction """
 
-	def __init__(self, action):
-		""" generated source for method __init__ """
-		super(InlineTagLabelAction, self).__init__()
-		self.action = action
+    def __init__(self, action):
+        """ generated source for method __init__ """
+        super(InlineTagLabelAction, self).__init__()
+        self.action = action
 
-	def start(self, contentHandler, tagName, attrs):
-		""" generated source for method start """
-		contentHandler.addWhitespaceIfNecessary()
-		contentHandler.addLabelAction(self.action)
-		return False
+    def start(self, contentHandler, tagName, attrs):
+        """ generated source for method start """
+        contentHandler.addWhitespaceIfNecessary()
+        contentHandler.addLabelAction(self.action)
+        return False
 
-	def end(self, contentHandler, tagName):
-		""" generated source for method end """
-		contentHandler.addWhitespaceIfNecessary()
-		return False
+    def end(self, contentHandler, tagName):
+        """ generated source for method end """
+        contentHandler.addWhitespaceIfNecessary()
+        return False
 
-	def changesTagLevel(self):
-		""" generated source for method changesTagLevel """
-		return False
+    def changesTagLevel(self):
+        """ generated source for method changesTagLevel """
+        return False
 
 # 
 #  * {@link CommonTagActions} for block-level elements, which triggers some {@link LabelAction} on the generated
 #  * {@link TextBlock}.
 #  
 class BlockTagLabelAction(TagAction):
-	""" generated source for class BlockTagLabelAction """
+    """ generated source for class BlockTagLabelAction """
 
-	def __init__(self, action):
-		""" generated source for method __init__ """
-		super(BlockTagLabelAction, self).__init__()
-		self.action = action
+    def __init__(self, action):
+        """ generated source for method __init__ """
+        super(BlockTagLabelAction, self).__init__()
+        self.action = action
 
-	def start(self, contentHandler, tagName, attrs):
-		""" generated source for method start """
-		contentHandler.addLabelAction(self.action)
-		return True
+    def start(self, contentHandler, tagName, attrs):
+        """ generated source for method start """
+        contentHandler.addLabelAction(self.action)
+        return True
 
-	def end(self, contentHandler, tagName):
-		""" generated source for method end """
-		return True
+    def end(self, contentHandler, tagName):
+        """ generated source for method end """
+        return True
 
-	def changesTagLevel(self):
-		""" generated source for method changesTagLevel """
-		return True
+    def changesTagLevel(self):
+        """ generated source for method changesTagLevel """
+        return True
 
 
 class Chained(TagAction):
 
-	def __init__(self, tagAction1, tagAction2):
-		""" generated source for method __init__ """
-		super(Chained, self).__init__()
-		self.tagAction1 = tagAction1
-		self.tagAction2 = tagAction2
+    def __init__(self, tagAction1, tagAction2):
+        """ generated source for method __init__ """
+        super(Chained, self).__init__()
+        self.tagAction1 = tagAction1
+        self.tagAction2 = tagAction2
 
-	def start(self, contentHandler, tagName, attrs):
-		""" generated source for method start """
-		return self.tagAction1.start(contentHandler, tagName, attrs) | self.tagAction2.start(contentHandler, tagName, attrs)
+    def start(self, contentHandler, tagName, attrs):
+        """ generated source for method start """
+        return self.tagAction1.start(contentHandler, tagName, attrs) | self.tagAction2.start(contentHandler, tagName, attrs)
 
-	def end(self, contentHandler, tagName):
-		""" generated source for method end """
-		return self.tagAction1.end(contentHandler, tagName) | self.tagAction2.end(contentHandler, tagName)
+    def end(self, contentHandler, tagName):
+        """ generated source for method end """
+        return self.tagAction1.end(contentHandler, tagName) | self.tagAction2.end(contentHandler, tagName)
 
-	def changesTagLevel(self):
-		""" generated source for method changesTagLevel """
-		return self.tagAction1.changesTagLevel() or self.tagAction2.changesTagLevel()
+    def changesTagLevel(self):
+        """ generated source for method changesTagLevel """
+        return self.tagAction1.changesTagLevel() or self.tagAction2.changesTagLevel()
 
 
 class MarkupTagAction(TagAction):
-	""" generated source for class MarkupTagAction """
-
-	def __init__(self, isBlockLevel):
-		""" generated source for method __init__ """
-		super(MarkupTagAction, self).__init__()
-		self.isBlockLevel = isBlockLevel
-		self.labelStack = []
-
-	PAT_NUM = re.compile("[0-9]+")
-
-	def start(self, contentHandler, tagName, attrs):
-		""" generated source for method start """
-		labels = []
-		labels.append(DefaultLabels.MARKUP_PREFIX + tagName)
-		classVal = attrs.getValue("class")
-		if classVal != None and len(classVal)>0:
-			classVal = self.PAT_NUM.sub("#",classVal).strip()
-			vals = classVal.split(r"[ ]+")
-			labels.append(DefaultLabels.MARKUP_PREFIX + "." + classVal.replace(' ', '.'))
-			if len(vals)>1:
-				for s in vals:
-					labels.append(DefaultLabels.MARKUP_PREFIX + "." + s)
-		id = attrs.get("id")
-		if id != None and len(id)<0:
-			id = self.PAT_NUM.sub("#",id)
-			labels.append(DefaultLabels.MARKUP_PREFIX + "#" + id)
-		ancestors = self.getAncestorLabels()
-		labelsWithAncestors = []
-		for l in labels:
-			for an in ancestors:
-				labelsWithAncestors.append(an)
-				labelsWithAncestors.append(an + " " + l)
-			labelsWithAncestors.append(l)
-		contentHandler.addLabelAction(LabelAction(labelsWithAncestors))
-		self.labelStack.append(labels)
-		return self.isBlockLevel
-
-	def end(self, contentHandler, tagName):
-		""" generated source for method end """
-		self.labelStack.pop()
-		return self.isBlockLevel
-
-	def changesTagLevel(self):
-		""" generated source for method changesTagLevel """
-		return self.isBlockLevel
-
-	def getAncestorLabels(self):
-		""" generated source for method getAncestorLabels """
-		labelSet = set()
-		for labels in self.labelStack:
-			if labels == None:continue 
-			labelSet.update(labels)
-		return labelSet
+    """ generated source for class MarkupTagAction """
+
+    def __init__(self, isBlockLevel):
+        """ generated source for method __init__ """
+        super(MarkupTagAction, self).__init__()
+        self.isBlockLevel = isBlockLevel
+        self.labelStack = []
+
+    PAT_NUM = re.compile("[0-9]+")
+
+    def start(self, contentHandler, tagName, attrs):
+        """ generated source for method start """
+        labels = []
+        labels.append(DefaultLabels.MARKUP_PREFIX + tagName)
+        classVal = attrs.getValue("class")
+        if classVal != None and len(classVal)>0:
+            classVal = self.PAT_NUM.sub("#",classVal).strip()
+            vals = classVal.split(r"[ ]+")
+            labels.append(DefaultLabels.MARKUP_PREFIX + "." + classVal.replace(' ', '.'))
+            if len(vals)>1:
+                for s in vals:
+                    labels.append(DefaultLabels.MARKUP_PREFIX + "." + s)
+        id = attrs.get("id")
+        if id != None and len(id)<0:
+            id = self.PAT_NUM.sub("#",id)
+            labels.append(DefaultLabels.MARKUP_PREFIX + "#" + id)
+        ancestors = self.getAncestorLabels()
+        labelsWithAncestors = []
+        for l in labels:
+            for an in ancestors:
+                labelsWithAncestors.append(an)
+                labelsWithAncestors.append(an + " " + l)
+            labelsWithAncestors.append(l)
+        contentHandler.addLabelAction(LabelAction(labelsWithAncestors))
+        self.labelStack.append(labels)
+        return self.isBlockLevel
+
+    def end(self, contentHandler, tagName):
+        """ generated source for method end """
+        self.labelStack.pop()
+        return self.isBlockLevel
+
+    def changesTagLevel(self):
+        """ generated source for method changesTagLevel """
+        return self.isBlockLevel
+
+    def getAncestorLabels(self):
+        """ generated source for method getAncestorLabels """
+        labelSet = set()
+        for labels in self.labelStack:
+            if labels == None:continue 
+            labelSet.update(labels)
+        return labelSet
 
 
 class CommonTagActions(object):
-	TA_IGNORABLE_ELEMENT=IgnorableElementTagAction()
-	TA_ANCHOR_TEXT=AnchorTextTagAction()
-	TA_BODY=BodyTagAction()
-	TA_INLINE_WHITESPACE=InlineWhitespaceTagAction()
-	TA_INLINE_NO_WHITESPACE=InlineTagAction()
-	TA_BLOCK_LEVEL=BlockTagAction()
-	TA_FONT=FontTagAction()
+    TA_IGNORABLE_ELEMENT=IgnorableElementTagAction()
+    TA_ANCHOR_TEXT=AnchorTextTagAction()
+    TA_BODY=BodyTagAction()
+    TA_INLINE_WHITESPACE=InlineWhitespaceTagAction()
+    TA_INLINE_NO_WHITESPACE=InlineTagAction()
+    TA_BLOCK_LEVEL=BlockTagAction()
+    TA_FONT=FontTagAction()
 
 defaultTagActionMap={
-	"STYLE" : CommonTagActions.TA_IGNORABLE_ELEMENT,
-	"SCRIPT" : CommonTagActions.TA_IGNORABLE_ELEMENT,
-	"OPTION" : CommonTagActions.TA_IGNORABLE_ELEMENT,
-	"OBJECT" : CommonTagActions.TA_IGNORABLE_ELEMENT,
-	"EMBED" : CommonTagActions.TA_IGNORABLE_ELEMENT,
-	"APPLET" : CommonTagActions.TA_IGNORABLE_ELEMENT,
-	#Note: link removed because it can be self-closing in HTML5
-	#"LINK" : CommonTagActions.TA_IGNORABLE_ELEMENT,
-	"A" : CommonTagActions.TA_ANCHOR_TEXT,
-	"BODY" : CommonTagActions.TA_BODY,
-	"STRIKE" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
-	"U" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
-	"B" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
-	"I" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
-	"EM" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
-	"STRONG" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
-	"SPAN" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
-		#  New in 1.1 (especially to improve extraction quality from Wikipedia etc.,
-	"SUP" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
-		#  New in 1.2
-	"CODE" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
-	"TT" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
-	"SUB" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
-	"VAR" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
-	"ABBR" : CommonTagActions.TA_INLINE_WHITESPACE,
-	"ACRONYM" : CommonTagActions.TA_INLINE_WHITESPACE,
-	"FONT" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
-		#  could also use TA_FONT 
-		#  added in 1.1.1
-	"NOSCRIPT" : CommonTagActions.TA_IGNORABLE_ELEMENT
+    "STYLE" : CommonTagActions.TA_IGNORABLE_ELEMENT,
+    "SCRIPT" : CommonTagActions.TA_IGNORABLE_ELEMENT,
+    "OPTION" : CommonTagActions.TA_IGNORABLE_ELEMENT,
+    "OBJECT" : CommonTagActions.TA_IGNORABLE_ELEMENT,
+    "EMBED" : CommonTagActions.TA_IGNORABLE_ELEMENT,
+    "APPLET" : CommonTagActions.TA_IGNORABLE_ELEMENT,
+    #Note: link removed because it can be self-closing in HTML5
+    #"LINK" : CommonTagActions.TA_IGNORABLE_ELEMENT,
+    "A" : CommonTagActions.TA_ANCHOR_TEXT,
+    "BODY" : CommonTagActions.TA_BODY,
+    "STRIKE" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
+    "U" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
+    "B" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
+    "I" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
+    "EM" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
+    "STRONG" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
+    "SPAN" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
+        #  New in 1.1 (especially to improve extraction quality from Wikipedia etc.,
+    "SUP" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
+        #  New in 1.2
+    "CODE" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
+    "TT" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
+    "SUB" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
+    "VAR" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
+    "ABBR" : CommonTagActions.TA_INLINE_WHITESPACE,
+    "ACRONYM" : CommonTagActions.TA_INLINE_WHITESPACE,
+    "FONT" : CommonTagActions.TA_INLINE_NO_WHITESPACE,
+        #  could also use TA_FONT 
+        #  added in 1.1.1
+    "NOSCRIPT" : CommonTagActions.TA_IGNORABLE_ELEMENT
 }
 
 
@@ -353,30 +353,30 @@ class CommonTagActions(object):
 #  * @author Christian Kohlschtter
 #  
 class LabelAction(object):
-	def __init__(self, *labels):
-		self.labels = labels
+    def __init__(self, *labels):
+        self.labels = labels
 
-	def addTo(self, textBlock):
-		self.addLabelsTo(textBlock)
+    def addTo(self, textBlock):
+        self.addLabelsTo(textBlock)
 
-	def addLabelsTo(self, textBlock):
-		textBlock.addLabels(self.labels)
+    def addLabelsTo(self, textBlock):
+        textBlock.addLabels(self.labels)
 
-	def __str__(self):
-		return str(self.labels)
+    def __str__(self):
+        return str(self.labels)
 
 class ConditionalLabelAction(LabelAction):
-	def __init__(self, condition, *labels):
-		super(ConditionalLabelAction, self).__init__(*labels)
-		self.condition = condition
+    def __init__(self, condition, *labels):
+        super(ConditionalLabelAction, self).__init__(*labels)
+        self.condition = condition
 
-	def addTo(self, textBlock):
-		if self.condition(textBlock): self.addLabelsTo(textBlock)
+    def addTo(self, textBlock):
+        if self.condition(textBlock): self.addLabelsTo(textBlock)
 
 
 class SpecialTokens(object):
-	ANCHOR_TEXT_START = u'\ue00astart'
-	ANCHOR_TEXT_END = u'\ue00aend'
+    ANCHOR_TEXT_START = u'\ue00astart'
+    ANCHOR_TEXT_END = u'\ue00aend'
 
 
 #----------------------------------------------------------------------------
@@ -392,296 +392,296 @@ class SpecialTokens(object):
 
 
 class BoilerpipeBaseParser(object):
-	EVENT_START_TAG=0
-	EVENT_END_TAG=1
-	EVENT_CHARACTERS=2
-	EVENT_WHITESPACE=3
-	#all word characters except underscore -- i.e. not (not word or underscore)
-	PAT_VALID_WORD_CHARACTER = re.compile(r"[^\W_]", re.UNICODE)
-	PAT_WORD = re.compile(r"\ue00a?[\w\"'\.,\!\@\-\:\;\$\?\(\)/]+", re.UNICODE)
-	
-	""" generated source for class BoilerpipeHTMLContentHandler """
-	# 
-	# 	 * Constructs a {@link BoilerpipeHTMLContentHandler} using the given
-	# 	 * {@link TagActionMap}.
-	# 	 * 
-	# 	 * @param tagActions
-	# 	 *			The {@link TagActionMap} to use, e.g.
-	# 	 *			{@link DefaultTagActionMap}.
-	# 	 
-	def __init__(self, tagActions=None):
-		""" generated source for method __init___0 """
-		#super(BoilerpipeHTMLContentHandler, self).__init__()
-		if tagActions==None: self.tagActions=defaultTagActionMap
-		else: self.tagActions = tagActions
-
-
-		self.clearTextBuffer()
-		self.inBody = 0
-		self.inAnchor = 0
-		self.inIgnorableElement = 0
-		self.textElementIdx = 0
-		self.lastStartTag = None
-		self.lastEndTag = None
-		self.lastEvent = None
-		self.offsetBlocks = 0
-		self.currentContainedTextElements=set()
-		self.flush = False
-		self.inAnchorText = False
-
-		self.title = None
-		self.tagLevel = 0
-		self.blockTagLevel = -1
-		self.textBlocks = []
-		self.labelStacks = []
-		self.fontSizeStack = []
-	
-	# 
-	# 	 * Recycles this instance.
-	# 	 
-	def recycle(self):
-		""" generated source for method recycle """
-		self.clearTextBuffer()
-		self.inBody = 0
-		self.inAnchor = 0
-		self.inIgnorableElement = 0
-		self.textElementIdx = 0
-		self.lastStartTag = None
-		self.lastEndTag = None
-		self.lastEvent = None
-		self.offsetBlocks = 0
-		self.currentContainedTextElements=set()
-		self.flush = False
-		self.inAnchorText = False
-		self.textBlocks=[]
-		
-		#--------- added -------
-		self.title = None
-		self.tagLevel = 0
-		self.blockTagLevel = -1
-		self.labelStacks = []
-		self.fontSizeStack = []
+    EVENT_START_TAG=0
+    EVENT_END_TAG=1
+    EVENT_CHARACTERS=2
+    EVENT_WHITESPACE=3
+    #all word characters except underscore -- i.e. not (not word or underscore)
+    PAT_VALID_WORD_CHARACTER = re.compile(r"[^\W_]", re.UNICODE)
+    PAT_WORD = re.compile(r"\ue00a?[\w\"'\.,\!\@\-\:\;\$\?\(\)/]+", re.UNICODE)
+    
+    """ generated source for class BoilerpipeHTMLContentHandler """
+    # 
+    #      * Constructs a {@link BoilerpipeHTMLContentHandler} using the given
+    #      * {@link TagActionMap}.
+    #      * 
+    #      * @param tagActions
+    #      *            The {@link TagActionMap} to use, e.g.
+    #      *            {@link DefaultTagActionMap}.
+    #      
+    def __init__(self, tagActions=None):
+        """ generated source for method __init___0 """
+        #super(BoilerpipeHTMLContentHandler, self).__init__()
+        if tagActions==None: self.tagActions=defaultTagActionMap
+        else: self.tagActions = tagActions
+
+
+        self.clearTextBuffer()
+        self.inBody = 0
+        self.inAnchor = 0
+        self.inIgnorableElement = 0
+        self.textElementIdx = 0
+        self.lastStartTag = None
+        self.lastEndTag = None
+        self.lastEvent = None
+        self.offsetBlocks = 0
+        self.currentContainedTextElements=set()
+        self.flush = False
+        self.inAnchorText = False
+
+        self.title = None
+        self.tagLevel = 0
+        self.blockTagLevel = -1
+        self.textBlocks = []
+        self.labelStacks = []
+        self.fontSizeStack = []
+    
+    # 
+    #      * Recycles this instance.
+    #      
+    def recycle(self):
+        """ generated source for method recycle """
+        self.clearTextBuffer()
+        self.inBody = 0
+        self.inAnchor = 0
+        self.inIgnorableElement = 0
+        self.textElementIdx = 0
+        self.lastStartTag = None
+        self.lastEndTag = None
+        self.lastEvent = None
+        self.offsetBlocks = 0
+        self.currentContainedTextElements=set()
+        self.flush = False
+        self.inAnchorText = False
+        self.textBlocks=[]
+        
+        #--------- added -------
+        self.title = None
+        self.tagLevel = 0
+        self.blockTagLevel = -1
+        self.labelStacks = []
+        self.fontSizeStack = []
 
 
 #------------------------------- SAX Parser methods ----------------------------------------
 
-	#  @Override
-	def endDocument(self):
-		""" generated source for method endDocument """
-		self.flushBlock()
-
-	#  @Override
-	def startDocument(self): pass
-
-	#  @Override
-	def startElement(self, name,attrs):
-		self.labelStacks.append([])
-		
-		tagAction = self.tagActions.get(name.strip().upper())
-
-		if tagAction != None:
-			self.flush |= tagAction.start(self, name, attrs)
-			if tagAction.changesTagLevel(): self.tagLevel += 1
-		else:
-			self.tagLevel += 1
-			self.flush = True
-		self.lastEvent = self.EVENT_START_TAG
-		self.lastStartTag = name
-
-	#  @Override
-	def endElement(self, name):
-		tagAction = self.tagActions.get(name.strip().upper())
-
-		
-		if tagAction != None:
-			self.flush |= tagAction.end(self, name)
-			if tagAction.changesTagLevel(): self.tagLevel -= 1
-		else:
-			self.flush = True
-			self.tagLevel -= 1
-
-		if self.flush: self.flushBlock()
-		self.lastEvent = self.EVENT_END_TAG
-		self.lastEndTag = name
-		self.labelStacks.pop()
-
-	#  @Override
-	def characters(self, content):
-		self.textElementIdx += 1
-		if self.flush:
-			self.flushBlock()
-			self.flush = False
-		if self.inIgnorableElement != 0: return
-
-		if len(content) == 0:	return
-		
-		strippedContent=content.strip()
-		
-		if len(strippedContent) == 0:
-			self.addWhitespaceIfNecessary()
-			self.lastEvent = self.EVENT_WHITESPACE
-			return
-		
-		startWhitespace=content[0].isspace()
-		if startWhitespace: self.addWhitespaceIfNecessary()
-		
-		if self.blockTagLevel == -1:
-			self.blockTagLevel = self.tagLevel			
-		self.textBuffer+=strippedContent
-		self.tokenBuffer+=strippedContent
-		
-		endWhitespace=content[-1].isspace()
-		if endWhitespace: self.addWhitespaceIfNecessary()
-		
-		self.lastEvent = self.EVENT_CHARACTERS
-		self.currentContainedTextElements.add(self.textElementIdx)
-
-	#  @Override
-	def ignorableWhitespace(self, whitespace):
-		self.addWhitespaceIfNecessary()
+    #  @Override
+    def endDocument(self):
+        """ generated source for method endDocument """
+        self.flushBlock()
+
+    #  @Override
+    def startDocument(self): pass
+
+    #  @Override
+    def startElement(self, name,attrs):
+        self.labelStacks.append([])
+        
+        tagAction = self.tagActions.get(name.strip().upper())
+
+        if tagAction != None:
+            self.flush |= tagAction.start(self, name, attrs)
+            if tagAction.changesTagLevel(): self.tagLevel += 1
+        else:
+            self.tagLevel += 1
+            self.flush = True
+        self.lastEvent = self.EVENT_START_TAG
+        self.lastStartTag = name
+
+    #  @Override
+    def endElement(self, name):
+        tagAction = self.tagActions.get(name.strip().upper())
+
+        
+        if tagAction != None:
+            self.flush |= tagAction.end(self, name)
+            if tagAction.changesTagLevel(): self.tagLevel -= 1
+        else:
+            self.flush = True
+            self.tagLevel -= 1
+
+        if self.flush: self.flushBlock()
+        self.lastEvent = self.EVENT_END_TAG
+        self.lastEndTag = name
+        self.labelStacks.pop()
+
+    #  @Override
+    def characters(self, content):
+        self.textElementIdx += 1
+        if self.flush:
+            self.flushBlock()
+            self.flush = False
+        if self.inIgnorableElement != 0: return
+
+        if len(content) == 0:    return
+        
+        strippedContent=content.strip()
+        
+        if len(strippedContent) == 0:
+            self.addWhitespaceIfNecessary()
+            self.lastEvent = self.EVENT_WHITESPACE
+            return
+        
+        startWhitespace=content[0].isspace()
+        if startWhitespace: self.addWhitespaceIfNecessary()
+        
+        if self.blockTagLevel == -1:
+            self.blockTagLevel = self.tagLevel            
+        self.textBuffer+=strippedContent
+        self.tokenBuffer+=strippedContent
+        
+        endWhitespace=content[-1].isspace()
+        if endWhitespace: self.addWhitespaceIfNecessary()
+        
+        self.lastEvent = self.EVENT_CHARACTERS
+        self.currentContainedTextElements.add(self.textElementIdx)
+
+    #  @Override
+    def ignorableWhitespace(self, whitespace):
+        self.addWhitespaceIfNecessary()
 
 #------------------------------- utility methods ----------------------------------------
 
 
-	def flushBlock(self):
-		""" generated source for method flushBlock """
-		if self.inBody == 0:
-			if self.lastStartTag.lower()=="title": self.setTitle(self.textBuffer.strip())
-			self.clearTextBuffer()
-			return
-		if len(self.tokenBuffer.strip())==0:
-			self.clearTextBuffer()
-			return
-
-		tokens = self.tokenize(self.tokenBuffer)
-		numWords = 0
-		numLinkedWords = 0
-		numWrappedLines = 0
-		currentLineLength = -1
-		#  don't count the first space
-		maxLineLength = 80
-		numTokens = 0
-		numWordsCurrentLine = 0
-		
-		for token in tokens:
-			if token==SpecialTokens.ANCHOR_TEXT_START: self.inAnchorText = True
-			elif token==SpecialTokens.ANCHOR_TEXT_END: self.inAnchorText = False
-			elif self.isWord(token):
-				numTokens += 1
-				numWords += 1
-				numWordsCurrentLine += 1
-				if self.inAnchorText:
-					numLinkedWords += 1
-				currentLineLength += len(token) + 1
-				if currentLineLength > maxLineLength:
-					numWrappedLines += 1
-					currentLineLength = len(token)
-					numWordsCurrentLine = 1
-			else:
-				numTokens += 1
-	
-		#if only special tokens (numTokens excludes special tokens)
-		if numTokens == 0:
-			self.clearTextBuffer()
-			return
-
-		if numWrappedLines == 0:
-			numWordsInWrappedLines = numWords
-			numWrappedLines = 1
-		else:
-			numWordsInWrappedLines = numWords - numWordsCurrentLine
-
-		tb = document.TextBlock(self.textBuffer.strip(), self.currentContainedTextElements, numWords, numLinkedWords, numWordsInWrappedLines, numWrappedLines, self.offsetBlocks)
-		self.currentContainedTextElements = set()
-		self.offsetBlocks += 1
-		self.clearTextBuffer()
-		tb.setTagLevel(self.blockTagLevel)
-		self.addTextBlock(tb)
-		self.blockTagLevel = -1
-
-	def addTextBlock(self, tb):
-		""" generated source for method addTextBlock """
-		for fontSize in self.fontSizeStack[::-1]:
-			if fontSize != None:
-				tb.addLabel("font-" + str(fontSize))
-				break
-		for labelStack in self.labelStacks:
-			for labels in labelStack:
-					labels.addTo(tb)
-		self.textBlocks.append(tb)
-
-
-	def isWord(self, token):
-		""" generated source for method isWord """
-		return self.PAT_VALID_WORD_CHARACTER.search(token)!=None
-		
-	def tokenize(self,text):
-		return self.PAT_WORD.findall(text)
-
-	def getTextBlocks(self):
-		""" generated source for method getTextBlocks """
-		return self.textBlocks
-	
-	def getTitle(self):
-		""" generated source for method getTitle """
-		return self.title
-
-	def setTitle(self, s):
-		""" generated source for method setTitle """
-		if s == None or len(s)==0: return
-		self.title = s
-
-	# 
-	# 	 * Returns a {@link TextDocument} containing the extracted {@link TextBlock}
-	# 	 * s. NOTE: Only call this after parsing.
-	# 	 * 
-	# 	 * @return The {@link TextDocument}
-	# 	 
-	def toTextDocument(self):
-		""" generated source for method toTextDocument """
-		#  just to be sure
-		self.flushBlock()
-		return document.TextDocument(self.getTextBlocks(), self.getTitle())
-
-	def addWhitespaceIfNecessary(self):
-		""" generated source for method addWhitespaceIfNecessary """
-		if len(self.textBuffer)==0 or not self.textBuffer[-1].isspace():
-			self.textBuffer+=' '
-		if len(self.tokenBuffer)==0 or not self.tokenBuffer[-1].isspace():
-			self.tokenBuffer+=' '
-	
-	def clearTextBuffer(self):
-		self.textBuffer=''
-		self.tokenBuffer=''
-	
-	def addToken(self,token):
-		self.addWhitespaceIfNecessary()
-		self.tokenBuffer+=token
-		self.addWhitespaceIfNecessary()
-
-	def addLabelAction(self, la):
-		""" generated source for method addLabelAction """
-		if len(self.labelStacks)==0: self.labelStacks.append([])
-		self.labelStacks[-1].append(la)
+    def flushBlock(self):
+        """ generated source for method flushBlock """
+        if self.inBody == 0:
+            if self.lastStartTag.lower()=="title": self.setTitle(self.textBuffer.strip())
+            self.clearTextBuffer()
+            return
+        if len(self.tokenBuffer.strip())==0:
+            self.clearTextBuffer()
+            return
+
+        tokens = self.tokenize(self.tokenBuffer)
+        numWords = 0
+        numLinkedWords = 0
+        numWrappedLines = 0
+        currentLineLength = -1
+        #  don't count the first space
+        maxLineLength = 80
+        numTokens = 0
+        numWordsCurrentLine = 0
+        
+        for token in tokens:
+            if token==SpecialTokens.ANCHOR_TEXT_START: self.inAnchorText = True
+            elif token==SpecialTokens.ANCHOR_TEXT_END: self.inAnchorText = False
+            elif self.isWord(token):
+                numTokens += 1
+                numWords += 1
+                numWordsCurrentLine += 1
+                if self.inAnchorText:
+                    numLinkedWords += 1
+                currentLineLength += len(token) + 1
+                if currentLineLength > maxLineLength:
+                    numWrappedLines += 1
+                    currentLineLength = len(token)
+                    numWordsCurrentLine = 1
+            else:
+                numTokens += 1
+    
+        #if only special tokens (numTokens excludes special tokens)
+        if numTokens == 0:
+            self.clearTextBuffer()
+            return
+
+        if numWrappedLines == 0:
+            numWordsInWrappedLines = numWords
+            numWrappedLines = 1
+        else:
+            numWordsInWrappedLines = numWords - numWordsCurrentLine
+
+        tb = document.TextBlock(self.textBuffer.strip(), self.currentContainedTextElements, numWords, numLinkedWords, numWordsInWrappedLines, numWrappedLines, self.offsetBlocks)
+        self.currentContainedTextElements = set()
+        self.offsetBlocks += 1
+        self.clearTextBuffer()
+        tb.setTagLevel(self.blockTagLevel)
+        self.addTextBlock(tb)
+        self.blockTagLevel = -1
+
+    def addTextBlock(self, tb):
+        """ generated source for method addTextBlock """
+        for fontSize in self.fontSizeStack[::-1]:
+            if fontSize != None:
+                tb.addLabel("font-" + str(fontSize))
+                break
+        for labelStack in self.labelStacks:
+            for labels in labelStack:
+                    labels.addTo(tb)
+        self.textBlocks.append(tb)
+
+
+    def isWord(self, token):
+        """ generated source for method isWord """
+        return self.PAT_VALID_WORD_CHARACTER.search(token)!=None
+        
+    def tokenize(self,text):
+        return self.PAT_WORD.findall(text)
+
+    def getTextBlocks(self):
+        """ generated source for method getTextBlocks """
+        return self.textBlocks
+    
+    def getTitle(self):
+        """ generated source for method getTitle """
+        return self.title
+
+    def setTitle(self, s):
+        """ generated source for method setTitle """
+        if s == None or len(s)==0: return
+        self.title = s
+
+    # 
+    #      * Returns a {@link TextDocument} containing the extracted {@link TextBlock}
+    #      * s. NOTE: Only call this after parsing.
+    #      * 
+    #      * @return The {@link TextDocument}
+    #      
+    def toTextDocument(self):
+        """ generated source for method toTextDocument """
+        #  just to be sure
+        self.flushBlock()
+        return document.TextDocument(self.getTextBlocks(), self.getTitle())
+
+    def addWhitespaceIfNecessary(self):
+        """ generated source for method addWhitespaceIfNecessary """
+        if len(self.textBuffer)==0 or not self.textBuffer[-1].isspace():
+            self.textBuffer+=' '
+        if len(self.tokenBuffer)==0 or not self.tokenBuffer[-1].isspace():
+            self.tokenBuffer+=' '
+    
+    def clearTextBuffer(self):
+        self.textBuffer=''
+        self.tokenBuffer=''
+    
+    def addToken(self,token):
+        self.addWhitespaceIfNecessary()
+        self.tokenBuffer+=token
+        self.addWhitespaceIfNecessary()
+
+    def addLabelAction(self, la):
+        """ generated source for method addLabelAction """
+        if len(self.labelStacks)==0: self.labelStacks.append([])
+        self.labelStacks[-1].append(la)
 
 
 
 
 class BoilerpipeHTMLParser(HTMLParser,BoilerpipeBaseParser):
-	def __init__(self):
-		HTMLParser.__init__(self)
-		BoilerpipeBaseParser.__init__(self)
-		
-	def feed(self,data):
-		self.startDocument()
-		HTMLParser.feed(self,data)
-		self.endDocument()
-	
-	def handle_starttag(self, tag, attrs): self.startElement(tag,attrs)
-	def handle_endtag(self, tag): self.endElement(tag)
-	def handle_data(self, data): self.characters(data)
+    def __init__(self):
+        HTMLParser.__init__(self)
+        BoilerpipeBaseParser.__init__(self)
+        
+    def feed(self,data):
+        self.startDocument()
+        HTMLParser.feed(self,data)
+        self.endDocument()
+    
+    def handle_starttag(self, tag, attrs): self.startElement(tag,attrs)
+    def handle_endtag(self, tag): self.endElement(tag)
+    def handle_data(self, data): self.characters(data)
 
 class BoilerpipeSAXContentHandler(ContentHandler,BoilerpipeBaseParser):
-	def __init__(self):
-		ContentHandler.__init__(self)
-		BoilerpipeBaseParser.__init__(self)
+    def __init__(self):
+        ContentHandler.__init__(self)
+        BoilerpipeBaseParser.__init__(self)
diff --git a/setup.py b/setup.py
index 6088060..aebbf74 100644
--- a/setup.py
+++ b/setup.py
@@ -6,22 +6,22 @@
 # README file and 2) it's easier to type in the README file than to put a raw
 # string in below ...
 def read(fname):
-	return open(os.path.join(os.path.dirname(__file__), fname)).read()
+    return open(os.path.join(os.path.dirname(__file__), fname)).read()
 
 setup(
-	name = "boilerpy",
-	version = "1.0",
-	author = "Sam Myer",
-	author_email = "mail@frozencavemanmedia.com",
-	description = "Python port of Boilerpipe, Boilerplate Removal and Fulltext Extraction from HTML pages",
-	license = "Apache 2.0",
-	keywords = "boilerpipe fulltext extraction",
-	url = "https://github.com/sammyer/BoilerPy",
-	packages=['boilerpy'],
-	long_description=read('README.txt'),
-	classifiers=[
-		"Development Status :: 4 - Beta",
-		"Topic :: Utilities",
-		"License :: OSI Approved :: Apache License",
-	]
-)
\ No newline at end of file
+    name = "boilerpy",
+    version = "1.0",
+    author = "Sam Myer",
+    author_email = "mail@frozencavemanmedia.com",
+    description = "Python port of Boilerpipe, Boilerplate Removal and Fulltext Extraction from HTML pages",
+    license = "Apache 2.0",
+    keywords = "boilerpipe fulltext extraction",
+    url = "https://github.com/sammyer/BoilerPy",
+    packages=['boilerpy'],
+    long_description=read('README.txt'),
+    classifiers=[
+        "Development Status :: 4 - Beta",
+        "Topic :: Utilities",
+        "License :: OSI Approved :: Apache License",
+    ]
+)
diff --git a/tests/unittests.py b/tests/unittests.py
index 96e367b..f12ab6e 100644
--- a/tests/unittests.py
+++ b/tests/unittests.py
@@ -7,428 +7,428 @@
 from boilerpy.extractors import Extractor
 
 def runTests():
-	suite = unittest.TestLoader().loadTestsFromTestCase(TestFilters)
-	unittest.TextTestRunner(verbosity=2).run(suite)
-	suite = unittest.TestLoader().loadTestsFromTestCase(TestParser)
-	unittest.TextTestRunner(verbosity=2).run(suite)
+    suite = unittest.TestLoader().loadTestsFromTestCase(TestFilters)
+    unittest.TextTestRunner(verbosity=2).run(suite)
+    suite = unittest.TestLoader().loadTestsFromTestCase(TestParser)
+    unittest.TextTestRunner(verbosity=2).run(suite)
 
 def runOneTest():
-	testName='test_anchor'
-	suite = unittest.TestSuite()
-	suite.addTest(TestParser(testName))
-	unittest.TextTestRunner(verbosity=2).run(suite)
+    testName='test_anchor'
+    suite = unittest.TestSuite()
+    suite.addTest(TestParser(testName))
+    unittest.TextTestRunner(verbosity=2).run(suite)
 
 class TestFilters(unittest.TestCase):
-	defaultWords="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec fermentum tincidunt magna, eu pulvinar mauris dapibus pharetra. In varius, nisl a rutrum porta, sem sem semper lacus, et varius urna tellus vel lorem. Nullam urna eros, luctus eget blandit ac, imperdiet feugiat ipsum. Donec laoreet tristique mi a bibendum. Sed pretium bibendum scelerisque. Mauris id pellentesque turpis. Mauris porta adipiscing massa, quis tempus dui pharetra ac. Morbi lacus mauris, feugiat ac tempor ut, congue tincidunt risus. Pellentesque tincidunt adipiscing elit, in fringilla enim scelerisque vel. Nulla facilisi. ".split(' ')
-
-	def makedoc(self,wordsArr,numAnchorWordsArr=None,isContentArr=None,labelArr=None):
-		textBlocks=[]
-		for idx,words in enumerate(wordsArr):
-			if type(words)==int:
-				numWords=words
-				text=' '.join(self.defaultWords[:numWords])
-			else:
-				text=words
-				numWords=text.count(' ')
-			try:
-				numAnchorWords=numAnchorWordsArr[idx]
-			except (TypeError, IndexError):
-				numAnchorWords=0
-			block=TextBlock(text,set(),numWords,numAnchorWords,0,0,idx)
-			try:
-				block.setIsContent(isContentArr[idx])
-			except (TypeError, IndexError):
-				pass
-			try:
-				label=labelArr[idx]
-				if label==None: pass
-				elif type(label)==list:
-					for l in label: block.addLabel(l)
-				else: block.addLabel(label)
-			except (TypeError, IndexError):
-				pass
-
-			textBlocks.append(block)
-
-		return TextDocument(textBlocks)
-
-	def verifyContent(self,filtr,doc,contentArr,show=False):
-		isContentBefore=[block.isContent() for block in doc.getTextBlocks()]
-		isChanged=filtr.process(doc)
-		isContent=[block.isContent() for block in doc.getTextBlocks()]
-		self.assertEqual(isContent,contentArr)
-		self.assertEqual(isChanged,isContent!=isContentBefore)
-
-	def test_markEveryhingContent(self):
-		doc=self.makedoc([5,100,80],None,[False,True,False])
-		self.verifyContent(MarkEverythingContentFilter(),doc,[True,True,True])
-
-	def test_inverted(self):
-		doc=self.makedoc([5,100,80],None,[False,True,False])
-		self.verifyContent(InvertedFilter(),doc,[True,False,True])
-
-	def test_boilerplateBlock(self):
-		#keeps if isContent
-		doc=self.makedoc([5,100,10,50,80],None,[False,True,False,True,False])
-		initBlocks=doc.getTextBlocks()
-		finalBlocks=[initBlocks[1],initBlocks[3]]
-		filtr=BoilerplateBlockFilter()
-		isChanged=filtr.process(doc)
-		isContent=[block.isContent() for block in doc.getTextBlocks()]
-		self.assertEqual(doc.getTextBlocks(),finalBlocks)
-		self.assertEqual(isContent,[True,True])
-		self.assertEqual(isChanged,True)
-
-	def test_minWords(self):
-		#rejects if #words<k
-		doc=self.makedoc([10,50],None,[True,True])
-		self.verifyContent(MinWordsFilter(20),doc,[False,True])
-
-	def test_minClauseWords(self):
-		#reject block if max(#words for each clause in block)<k
-		doc=self.makedoc(["This is a clause, because it is separated by a comma.","Real short","Lots of, very, very, very, small, clauses.","If acceptClausesWithoutDelimiter is false then clauses that dont end in punctuation dont count"],None,[True,True,True,True])
-		self.verifyContent(MinClauseWordsFilter(5,False),doc,[True,False,False,False])
-
-	def test_splitParagraphs(self):
-		#split paragraphs intpo separate blocks
-		doc=self.makedoc(["A single paragraph.","Multiple paragraphs.\n\nParagraph 2 is here."],None,[True,False])
-		filtr=SplitParagraphBlocksFilter()
-		isChanged=filtr.process(doc)
-		textArr=[block.getText() for block in doc.getTextBlocks()]
-		isContent=[block.isContent() for block in doc.getTextBlocks()]
-		self.assertEqual(textArr,["A single paragraph.","Multiple paragraphs.","Paragraph 2 is here."])
-		self.assertEqual(isContent,[True,False,False])
-		self.assertEqual(isChanged,True)
-
-	def test_surroundContent(self):
-		#accept block if prev and next blocks are content and condition is met
-		doc=self.makedoc([10,20,10,5,10,20,20,10],[0,0,0,5,0,0,0,0],[True,False,True,False,True,False,False,True])
-		defaultCondition=lambda tb:tb.getLinkDensity()==0 and tb.getNumWords()>6
-		self.verifyContent(SurroundingToContentFilter(defaultCondition),doc,[True,True,True,False,True,False,False,True])
-
-	def test_labelToBoilerplate(self):
-		#reject block if it has a particular label
-		lb_not=DefaultLabels.STRICTLY_NOT_CONTENT
-		lb_maybe=DefaultLabels.MIGHT_BE_CONTENT
-		doc=self.makedoc([10,10,10,10],None,[True,True,True,True],[lb_not,lb_maybe,[lb_not,lb_maybe],None])
-		self.verifyContent(LabelToBoilerplateFilter(DefaultLabels.STRICTLY_NOT_CONTENT),doc,[False,True,False,True])
-
-	def test_labelToContent(self):
-		#accept block if it has a particular label
-		lb_not=DefaultLabels.STRICTLY_NOT_CONTENT
-		lb_maybe=DefaultLabels.MIGHT_BE_CONTENT
-		doc=self.makedoc([10,10,10,10],None,[False,False,False,False],[lb_not,lb_maybe,[lb_not,lb_maybe],None])
-		self.verifyContent(LabelToContentFilter(DefaultLabels.MIGHT_BE_CONTENT),doc,[False,True,True,False])
-
-
-	def test_simpleBlockFusion(self):
-		#join blocks with the same number of words per line
-		doc=self.makedoc(["two words","three fucking words","another three words"],None,[False,False,False])
-		filtr=SimpleBlockFusionProcessor()
-		isChanged=filtr.process(doc)
-		blockIdxs=[(block.getOffsetBlocksStart(),block.getOffsetBlocksEnd()) for block in doc.getTextBlocks()]
-		self.assertEqual(blockIdxs,[(0,0),(1,2)])
-		self.assertEqual(isChanged,True)
-
-	def test_contentFusion(self):
-		#join blocks with low link density
-		filtr=ContentFusion()
-
-		#merge
-		doc=self.makedoc([10,10],[0,0],[True,False])
-		isChanged=filtr.process(doc)
-		self.assertEqual(len(doc.getTextBlocks()),1)
-		self.assertEqual(isChanged,True)
-
-		#dont merge if tagged not content
-		doc=self.makedoc([10,10],[0,0],[True,False],[None,DefaultLabels.STRICTLY_NOT_CONTENT])
-		isChanged=filtr.process(doc)
-		self.assertEqual(len(doc.getTextBlocks()),2)
-		self.assertEqual(isChanged,False)
-
-		#dont merge if link density is high
-		doc=self.makedoc([10,10],[0,8],[True,False])
-		isChanged=filtr.process(doc)
-		self.assertEqual(len(doc.getTextBlocks()),2)
-		self.assertEqual(isChanged,False)
-
-		#multiple pass merging
-		doc=self.makedoc([10,10,10,10],[0,0,0,0],[True,False,True,False])
-		isChanged=filtr.process(doc)
-		self.assertEqual(len(doc.getTextBlocks()),1)
-		self.assertEqual(isChanged,True)
-
-	def test_labelFusion(self):
-		#fuse blocks with identical labels - ONLY LOOKS AT LABELS with markup prefix
-
-		lb1=DefaultLabels.MARKUP_PREFIX+".title"
-		lb2=DefaultLabels.MARKUP_PREFIX+".menu"
-		doc=self.makedoc([10,10,10,10,10,10,10],None,None,[None,None,lb1,lb1,lb2,lb2,[lb1,lb2]])
-		filtr=LabelFusion()
-		isChanged=filtr.process(doc)
-		blockIdxs=[(block.getOffsetBlocksStart(),block.getOffsetBlocksEnd()) for block in doc.getTextBlocks()]
-		self.assertEqual(blockIdxs,[(0,1),(2,3),(4,5),(6,6)])
-		self.assertEqual(isChanged,True)
-
-	def test_blockProximity(self):
-		#fuse blocks close to each other
-		doc=self.makedoc([10,10,10,10,10,10,10],None,[False,True,True,True,True,True,False])
-		filtr=BlockProximityFusion(1,True,False)
-		isChanged=filtr.process(doc)
-		blockIdxs=[(block.getOffsetBlocksStart(),block.getOffsetBlocksEnd()) for block in doc.getTextBlocks()]
-		self.assertEqual(blockIdxs,[(0,0),(1,5),(6,6)])
-		self.assertEqual(isChanged,True)
-
-	def test_largestBlock(self):
-		#choose largest block
-		doc=self.makedoc([10,10,50,10],None,[False,True,True,True])
-		self.verifyContent(KeepLargestBlockFilter(),doc,[False,False,True,False])
-
-	def test_expandTitleToContent(self):
-		#marks all between title and content start
-		lb1=DefaultLabels.MIGHT_BE_CONTENT
-		doc=self.makedoc([10,10,10,10],None,[False,False,False,True],[lb1,[lb1,DefaultLabels.TITLE],lb1,lb1])
-		self.verifyContent(ExpandTitleToContentFilter(),doc,[False,True,True,True])
-
-	def test_articleMetadata(self):
-		#marks as content and tags blocks with date/time data
-		doc=self.makedoc([" May 1, 2009 8:00pm EST","May not be date 1","By Frank Sinatra","By looking at this sentence, you can see there is no author"],None,[False,False,False,False])
-		self.verifyContent(ArticleMetadataFilter(),doc,[True,False,True,False])
-		labels=[block.getLabels() for block in doc.getTextBlocks()]
-		self.assertIn(DefaultLabels.ARTICLE_METADATA,labels[0])
-
-	def test_largestBlock(self):
-		#accept largest block and reject all others
-		doc=self.makedoc([10,10,50,10],None,[False,True,True,True])
-		self.verifyContent(KeepLargestBlockFilter(),doc,[False,False,True,False])
-
-	def test_addPrecedingLabels(self):
-		#add prefix+preceding label to each block
-		lb1=DefaultLabels.TITLE
-		lb2=DefaultLabels.MIGHT_BE_CONTENT
-		prefix="^"
-		doc=self.makedoc([10,10,10],None,None,[lb1,lb2,None])
-		filtr=AddPrecedingLabelsFilter(prefix)
-		isChanged=filtr.process(doc)
-		labels=[block.getLabels() for block in doc.getTextBlocks()]
-		self.assertEqual(labels,[set([lb1]),set([prefix+lb1,lb2]),set([prefix+lb2])])
-		self.assertEqual(isChanged,True)
-
-	def test_documentTitleMatch(self):
-		#add title label to blocks matching sections of the title
-		doc=self.makedoc(["News","This is the real title","Red herring"])
-		doc.setTitle("News - This is the real title")
-		filtr=DocumentTitleMatchClassifier(None,True)
-		isChanged=filtr.process(doc)
-		labels=[block.getLabels() for block in doc.getTextBlocks()]
-		self.assertEqual(labels,[set(),set([DefaultLabels.TITLE]),set()])
-		self.assertEqual(isChanged,True)
-
-	def test_minFulltextWords(self):
-		#choose largest block
-		doc=self.makedoc([10,50],None,[True,True])
-		self.verifyContent(MinFulltextWordsFilter(30),doc,[False,True])
-
-	def test_largestFulltextBlock(self):
-		#accept largest block that has been marked as content and reject all others
-		doc=self.makedoc([10,50,80,10],None,[True,True,False,False])
-		self.verifyContent(KeepLargestFulltextBlockFilter(),doc,[False,True,False,False])
-
-	def test_ignoreBlocksAfterContent(self):
-		#rejects all blocks after(&including) first block with ENDOFTEXT label
-		#Also: ENDOFTEXT labels are ignored until the total number of words in content blocks reaches a certain number
-		lb=DefaultLabels.INDICATES_END_OF_TEXT
-		doc=self.makedoc([10,30,50,80,20],None,[False,True,True,True,True],[lb,None,None,lb,None])
-		self.verifyContent(IgnoreBlocksAfterContentFilter(60),doc,[False,True,True,False,False])
-
-	def test_ignoreBlocksAfterContentFromEnd(self):
-		#rejects all blocks with ENDOFTEXT label
-		#works backwards until the total number of words in content blocks reaches 200 and then halts
-		lb=DefaultLabels.INDICATES_END_OF_TEXT
-		doc=self.makedoc([80,80,80,80,80],None,[True,True,True,True,True],[lb,None,None,lb,None])
-		self.verifyContent(IgnoreBlocksAfterContentFromEndFilter(),doc,[True,True,True,False,True])
-
-	def test_terminatingBlocks(self):
-		#add ENDOFTEXT label at detected beginning of comments section
-		lb=DefaultLabels.INDICATES_END_OF_TEXT
-		s1="Comments can be the first word of article text.  If there are many words in the block, it is not comments"
-		s2="Thanks for your comments - this feedback is now closed"
-		doc=self.makedoc(["Comments","Please have your say","48 Comments today",s1,s2])
-		filtr=TerminatingBlocksFinder()
-		isChanged=filtr.process(doc)
-		hasLabel=[(lb in block.getLabels()) for block in doc.getTextBlocks()]
-		self.assertEqual(hasLabel,[True,True,True,False,True])
-		self.assertEqual(isChanged,True)
-
-	def test_numWordsClassifier(self):
-		#accepts or rejects block based on machine-trained decision tree rules
-		#using features from previous, current and next block
-		filtr=NumWordsRulesClassifier()
-
-		doc=self.makedoc([2,10,10],[0,0,0],[True,True,True])
-		isChanged=filtr.process(doc)
-		#test middle block only
-		self.assertEqual(doc.getTextBlocks()[1].isContent(),False)
-
-		doc=self.makedoc([10,10,10],[0,0,0],[True,True,True])
-		isChanged=filtr.process(doc)
-		self.assertEqual(doc.getTextBlocks()[1].isContent(),True)
-
-	def test_densityClassifier(self):
-		#accepts or rejects block based on a different set of machine-trained decision tree rules
-		#using features from previous, current and next block
-		doc=self.makedoc([10,10,5],[10,0,0],[True,True,True])
-		isChanged=DensityRulesClassifier().process(doc)
-		self.assertEqual(doc.getTextBlocks()[1].isContent(),False)
-
-	def test_canolaClassifier(self):
-		#accepts or rejects block based on a different set of machine-trained decision tree rules
-		#using features from previous, current and next block
-		doc=self.makedoc([5,10,30],[5,10,0],[True,False,True])
-		isChanged=CanolaFilter().process(doc)
-		self.assertEqual(doc.getTextBlocks()[1].isContent(),True)
+    defaultWords="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec fermentum tincidunt magna, eu pulvinar mauris dapibus pharetra. In varius, nisl a rutrum porta, sem sem semper lacus, et varius urna tellus vel lorem. Nullam urna eros, luctus eget blandit ac, imperdiet feugiat ipsum. Donec laoreet tristique mi a bibendum. Sed pretium bibendum scelerisque. Mauris id pellentesque turpis. Mauris porta adipiscing massa, quis tempus dui pharetra ac. Morbi lacus mauris, feugiat ac tempor ut, congue tincidunt risus. Pellentesque tincidunt adipiscing elit, in fringilla enim scelerisque vel. Nulla facilisi. ".split(' ')
+
+    def makedoc(self,wordsArr,numAnchorWordsArr=None,isContentArr=None,labelArr=None):
+        textBlocks=[]
+        for idx,words in enumerate(wordsArr):
+            if type(words)==int:
+                numWords=words
+                text=' '.join(self.defaultWords[:numWords])
+            else:
+                text=words
+                numWords=text.count(' ')
+            try:
+                numAnchorWords=numAnchorWordsArr[idx]
+            except (TypeError, IndexError):
+                numAnchorWords=0
+            block=TextBlock(text,set(),numWords,numAnchorWords,0,0,idx)
+            try:
+                block.setIsContent(isContentArr[idx])
+            except (TypeError, IndexError):
+                pass
+            try:
+                label=labelArr[idx]
+                if label==None: pass
+                elif type(label)==list:
+                    for l in label: block.addLabel(l)
+                else: block.addLabel(label)
+            except (TypeError, IndexError):
+                pass
+
+            textBlocks.append(block)
+
+        return TextDocument(textBlocks)
+
+    def verifyContent(self,filtr,doc,contentArr,show=False):
+        isContentBefore=[block.isContent() for block in doc.getTextBlocks()]
+        isChanged=filtr.process(doc)
+        isContent=[block.isContent() for block in doc.getTextBlocks()]
+        self.assertEqual(isContent,contentArr)
+        self.assertEqual(isChanged,isContent!=isContentBefore)
+
+    def test_markEveryhingContent(self):
+        doc=self.makedoc([5,100,80],None,[False,True,False])
+        self.verifyContent(MarkEverythingContentFilter(),doc,[True,True,True])
+
+    def test_inverted(self):
+        doc=self.makedoc([5,100,80],None,[False,True,False])
+        self.verifyContent(InvertedFilter(),doc,[True,False,True])
+
+    def test_boilerplateBlock(self):
+        #keeps if isContent
+        doc=self.makedoc([5,100,10,50,80],None,[False,True,False,True,False])
+        initBlocks=doc.getTextBlocks()
+        finalBlocks=[initBlocks[1],initBlocks[3]]
+        filtr=BoilerplateBlockFilter()
+        isChanged=filtr.process(doc)
+        isContent=[block.isContent() for block in doc.getTextBlocks()]
+        self.assertEqual(doc.getTextBlocks(),finalBlocks)
+        self.assertEqual(isContent,[True,True])
+        self.assertEqual(isChanged,True)
+
+    def test_minWords(self):
+        #rejects if #words<k
+        doc=self.makedoc([10,50],None,[True,True])
+        self.verifyContent(MinWordsFilter(20),doc,[False,True])
+
+    def test_minClauseWords(self):
+        #reject block if max(#words for each clause in block)<k
+        doc=self.makedoc(["This is a clause, because it is separated by a comma.","Real short","Lots of, very, very, very, small, clauses.","If acceptClausesWithoutDelimiter is false then clauses that dont end in punctuation dont count"],None,[True,True,True,True])
+        self.verifyContent(MinClauseWordsFilter(5,False),doc,[True,False,False,False])
+
+    def test_splitParagraphs(self):
+        #split paragraphs intpo separate blocks
+        doc=self.makedoc(["A single paragraph.","Multiple paragraphs.\n\nParagraph 2 is here."],None,[True,False])
+        filtr=SplitParagraphBlocksFilter()
+        isChanged=filtr.process(doc)
+        textArr=[block.getText() for block in doc.getTextBlocks()]
+        isContent=[block.isContent() for block in doc.getTextBlocks()]
+        self.assertEqual(textArr,["A single paragraph.","Multiple paragraphs.","Paragraph 2 is here."])
+        self.assertEqual(isContent,[True,False,False])
+        self.assertEqual(isChanged,True)
+
+    def test_surroundContent(self):
+        #accept block if prev and next blocks are content and condition is met
+        doc=self.makedoc([10,20,10,5,10,20,20,10],[0,0,0,5,0,0,0,0],[True,False,True,False,True,False,False,True])
+        defaultCondition=lambda tb:tb.getLinkDensity()==0 and tb.getNumWords()>6
+        self.verifyContent(SurroundingToContentFilter(defaultCondition),doc,[True,True,True,False,True,False,False,True])
+
+    def test_labelToBoilerplate(self):
+        #reject block if it has a particular label
+        lb_not=DefaultLabels.STRICTLY_NOT_CONTENT
+        lb_maybe=DefaultLabels.MIGHT_BE_CONTENT
+        doc=self.makedoc([10,10,10,10],None,[True,True,True,True],[lb_not,lb_maybe,[lb_not,lb_maybe],None])
+        self.verifyContent(LabelToBoilerplateFilter(DefaultLabels.STRICTLY_NOT_CONTENT),doc,[False,True,False,True])
+
+    def test_labelToContent(self):
+        #accept block if it has a particular label
+        lb_not=DefaultLabels.STRICTLY_NOT_CONTENT
+        lb_maybe=DefaultLabels.MIGHT_BE_CONTENT
+        doc=self.makedoc([10,10,10,10],None,[False,False,False,False],[lb_not,lb_maybe,[lb_not,lb_maybe],None])
+        self.verifyContent(LabelToContentFilter(DefaultLabels.MIGHT_BE_CONTENT),doc,[False,True,True,False])
+
+
+    def test_simpleBlockFusion(self):
+        #join blocks with the same number of words per line
+        doc=self.makedoc(["two words","three fucking words","another three words"],None,[False,False,False])
+        filtr=SimpleBlockFusionProcessor()
+        isChanged=filtr.process(doc)
+        blockIdxs=[(block.getOffsetBlocksStart(),block.getOffsetBlocksEnd()) for block in doc.getTextBlocks()]
+        self.assertEqual(blockIdxs,[(0,0),(1,2)])
+        self.assertEqual(isChanged,True)
+
+    def test_contentFusion(self):
+        #join blocks with low link density
+        filtr=ContentFusion()
+
+        #merge
+        doc=self.makedoc([10,10],[0,0],[True,False])
+        isChanged=filtr.process(doc)
+        self.assertEqual(len(doc.getTextBlocks()),1)
+        self.assertEqual(isChanged,True)
+
+        #dont merge if tagged not content
+        doc=self.makedoc([10,10],[0,0],[True,False],[None,DefaultLabels.STRICTLY_NOT_CONTENT])
+        isChanged=filtr.process(doc)
+        self.assertEqual(len(doc.getTextBlocks()),2)
+        self.assertEqual(isChanged,False)
+
+        #dont merge if link density is high
+        doc=self.makedoc([10,10],[0,8],[True,False])
+        isChanged=filtr.process(doc)
+        self.assertEqual(len(doc.getTextBlocks()),2)
+        self.assertEqual(isChanged,False)
+
+        #multiple pass merging
+        doc=self.makedoc([10,10,10,10],[0,0,0,0],[True,False,True,False])
+        isChanged=filtr.process(doc)
+        self.assertEqual(len(doc.getTextBlocks()),1)
+        self.assertEqual(isChanged,True)
+
+    def test_labelFusion(self):
+        #fuse blocks with identical labels - ONLY LOOKS AT LABELS with markup prefix
+
+        lb1=DefaultLabels.MARKUP_PREFIX+".title"
+        lb2=DefaultLabels.MARKUP_PREFIX+".menu"
+        doc=self.makedoc([10,10,10,10,10,10,10],None,None,[None,None,lb1,lb1,lb2,lb2,[lb1,lb2]])
+        filtr=LabelFusion()
+        isChanged=filtr.process(doc)
+        blockIdxs=[(block.getOffsetBlocksStart(),block.getOffsetBlocksEnd()) for block in doc.getTextBlocks()]
+        self.assertEqual(blockIdxs,[(0,1),(2,3),(4,5),(6,6)])
+        self.assertEqual(isChanged,True)
+
+    def test_blockProximity(self):
+        #fuse blocks close to each other
+        doc=self.makedoc([10,10,10,10,10,10,10],None,[False,True,True,True,True,True,False])
+        filtr=BlockProximityFusion(1,True,False)
+        isChanged=filtr.process(doc)
+        blockIdxs=[(block.getOffsetBlocksStart(),block.getOffsetBlocksEnd()) for block in doc.getTextBlocks()]
+        self.assertEqual(blockIdxs,[(0,0),(1,5),(6,6)])
+        self.assertEqual(isChanged,True)
+
+    def test_largestBlock(self):
+        #choose largest block
+        doc=self.makedoc([10,10,50,10],None,[False,True,True,True])
+        self.verifyContent(KeepLargestBlockFilter(),doc,[False,False,True,False])
+
+    def test_expandTitleToContent(self):
+        #marks all between title and content start
+        lb1=DefaultLabels.MIGHT_BE_CONTENT
+        doc=self.makedoc([10,10,10,10],None,[False,False,False,True],[lb1,[lb1,DefaultLabels.TITLE],lb1,lb1])
+        self.verifyContent(ExpandTitleToContentFilter(),doc,[False,True,True,True])
+
+    def test_articleMetadata(self):
+        #marks as content and tags blocks with date/time data
+        doc=self.makedoc([" May 1, 2009 8:00pm EST","May not be date 1","By Frank Sinatra","By looking at this sentence, you can see there is no author"],None,[False,False,False,False])
+        self.verifyContent(ArticleMetadataFilter(),doc,[True,False,True,False])
+        labels=[block.getLabels() for block in doc.getTextBlocks()]
+        self.assertIn(DefaultLabels.ARTICLE_METADATA,labels[0])
+
+    def test_largestBlock(self):
+        #accept largest block and reject all others
+        doc=self.makedoc([10,10,50,10],None,[False,True,True,True])
+        self.verifyContent(KeepLargestBlockFilter(),doc,[False,False,True,False])
+
+    def test_addPrecedingLabels(self):
+        #add prefix+preceding label to each block
+        lb1=DefaultLabels.TITLE
+        lb2=DefaultLabels.MIGHT_BE_CONTENT
+        prefix="^"
+        doc=self.makedoc([10,10,10],None,None,[lb1,lb2,None])
+        filtr=AddPrecedingLabelsFilter(prefix)
+        isChanged=filtr.process(doc)
+        labels=[block.getLabels() for block in doc.getTextBlocks()]
+        self.assertEqual(labels,[set([lb1]),set([prefix+lb1,lb2]),set([prefix+lb2])])
+        self.assertEqual(isChanged,True)
+
+    def test_documentTitleMatch(self):
+        #add title label to blocks matching sections of the title
+        doc=self.makedoc(["News","This is the real title","Red herring"])
+        doc.setTitle("News - This is the real title")
+        filtr=DocumentTitleMatchClassifier(None,True)
+        isChanged=filtr.process(doc)
+        labels=[block.getLabels() for block in doc.getTextBlocks()]
+        self.assertEqual(labels,[set(),set([DefaultLabels.TITLE]),set()])
+        self.assertEqual(isChanged,True)
+
+    def test_minFulltextWords(self):
+        #choose largest block
+        doc=self.makedoc([10,50],None,[True,True])
+        self.verifyContent(MinFulltextWordsFilter(30),doc,[False,True])
+
+    def test_largestFulltextBlock(self):
+        #accept largest block that has been marked as content and reject all others
+        doc=self.makedoc([10,50,80,10],None,[True,True,False,False])
+        self.verifyContent(KeepLargestFulltextBlockFilter(),doc,[False,True,False,False])
+
+    def test_ignoreBlocksAfterContent(self):
+        #rejects all blocks after(&including) first block with ENDOFTEXT label
+        #Also: ENDOFTEXT labels are ignored until the total number of words in content blocks reaches a certain number
+        lb=DefaultLabels.INDICATES_END_OF_TEXT
+        doc=self.makedoc([10,30,50,80,20],None,[False,True,True,True,True],[lb,None,None,lb,None])
+        self.verifyContent(IgnoreBlocksAfterContentFilter(60),doc,[False,True,True,False,False])
+
+    def test_ignoreBlocksAfterContentFromEnd(self):
+        #rejects all blocks with ENDOFTEXT label
+        #works backwards until the total number of words in content blocks reaches 200 and then halts
+        lb=DefaultLabels.INDICATES_END_OF_TEXT
+        doc=self.makedoc([80,80,80,80,80],None,[True,True,True,True,True],[lb,None,None,lb,None])
+        self.verifyContent(IgnoreBlocksAfterContentFromEndFilter(),doc,[True,True,True,False,True])
+
+    def test_terminatingBlocks(self):
+        #add ENDOFTEXT label at detected beginning of comments section
+        lb=DefaultLabels.INDICATES_END_OF_TEXT
+        s1="Comments can be the first word of article text.  If there are many words in the block, it is not comments"
+        s2="Thanks for your comments - this feedback is now closed"
+        doc=self.makedoc(["Comments","Please have your say","48 Comments today",s1,s2])
+        filtr=TerminatingBlocksFinder()
+        isChanged=filtr.process(doc)
+        hasLabel=[(lb in block.getLabels()) for block in doc.getTextBlocks()]
+        self.assertEqual(hasLabel,[True,True,True,False,True])
+        self.assertEqual(isChanged,True)
+
+    def test_numWordsClassifier(self):
+        #accepts or rejects block based on machine-trained decision tree rules
+        #using features from previous, current and next block
+        filtr=NumWordsRulesClassifier()
+
+        doc=self.makedoc([2,10,10],[0,0,0],[True,True,True])
+        isChanged=filtr.process(doc)
+        #test middle block only
+        self.assertEqual(doc.getTextBlocks()[1].isContent(),False)
+
+        doc=self.makedoc([10,10,10],[0,0,0],[True,True,True])
+        isChanged=filtr.process(doc)
+        self.assertEqual(doc.getTextBlocks()[1].isContent(),True)
+
+    def test_densityClassifier(self):
+        #accepts or rejects block based on a different set of machine-trained decision tree rules
+        #using features from previous, current and next block
+        doc=self.makedoc([10,10,5],[10,0,0],[True,True,True])
+        isChanged=DensityRulesClassifier().process(doc)
+        self.assertEqual(doc.getTextBlocks()[1].isContent(),False)
+
+    def test_canolaClassifier(self):
+        #accepts or rejects block based on a different set of machine-trained decision tree rules
+        #using features from previous, current and next block
+        doc=self.makedoc([5,10,30],[5,10,0],[True,False,True])
+        isChanged=CanolaFilter().process(doc)
+        self.assertEqual(doc.getTextBlocks()[1].isContent(),True)
 
 
 
 class TestParser(unittest.TestCase):
-	extractor=Extractor(None)
-	defaultWords="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec fermentum tincidunt magna, eu pulvinar mauris dapibus pharetra. In varius, nisl a rutrum porta, sem sem semper lacus, et varius urna tellus vel lorem. Nullam urna eros, luctus eget blandit ac, imperdiet feugiat ipsum. Donec laoreet tristique mi a bibendum. Sed pretium bibendum scelerisque. Mauris id pellentesque turpis. Mauris porta adipiscing massa, quis tempus dui pharetra ac. Morbi lacus mauris, feugiat ac tempor ut, congue tincidunt risus. Pellentesque tincidunt adipiscing elit, in fringilla enim scelerisque vel. Nulla facilisi. ".split(' ')
-
-	def contentitem(self,s):
-		if type(s)==int:
-			return ' '.join(self.defaultWords[:s])
-		else: return s
-
-	def makecontent(self,strArr):
-		return [self.contentitem(s) for s in strArr]
-
-	def makedoc(self,template,contentArr):
-		templateArr=template.split('*')
-		s=""
-		for i,j in zip(templateArr[:-1],contentArr):
-			s+=i+j
-		s+=templateArr[-1]
-		doc=self.extractor.parseDoc(s)
-		return doc
-
-	def test_blocks(self):
-		template="<html><body><p>*</p><div>*<p>*</p>*</div></body></html>"
-		content=self.makecontent([4,5,6,7])
-		doc=self.makedoc(template,content)
-
-		blocks=doc.getTextBlocks()
-		textArr=[block.getText() for block in blocks]
-		numWords=[block.getNumWords() for block in blocks]
-		self.assertEqual(textArr,content)
-		self.assertEqual(numWords,[4,5,6,7])
-
-	def test_anchor(self):
-		template="<html><body><p>*</p><div>*<a href='half.html'>*</a></div><a href='full.html'><p>*</p></a></body></html>"
-		content=self.makecontent([6,"end with space ",3,6])
-		doc=self.makedoc(template,content)
-
-		blocks=doc.getTextBlocks()
-		textArr=[block.getText() for block in blocks]
-		densityArr=[block.getLinkDensity() for block in blocks]
-		numAnchorWords=[block.getNumWordsInAnchorText() for block in blocks]
-		self.assertEqual(textArr,[content[0],content[1]+content[2],content[3]])
-		self.assertEqual(numAnchorWords,[0,3,6])
-		self.assertEqual(densityArr,[0.0,0.5,1.0])
-
-	def test_title(self):
-		titleText="THIS IS TITLE"
-		s="<html><head><title>"+titleText+"</title></head><body><p>THIS IS CONTENT</p></body></html>"
-		doc=self.extractor.parseDoc(s)
-		self.assertEqual(doc.getTitle(),titleText)
-
-	def test_body(self):
-		bodyText="THIS IS CONTENT"
-		s="<html><head><p>NOT IN BODY</p></head><body><p>"+bodyText+"</p></body></html>"
-		doc=self.extractor.parseDoc(s)
-		textArr=[block.getText() for block in doc.getTextBlocks()]
-		self.assertEqual(textArr,[bodyText])
-
-	def test_inline(self):
-		template="<html><body><div><h1>*</h1><h4>*</h4></div><div><span>*</span><b>*</b></div></body></html>"
-		content=['AA','BB','CC','DD']
-		doc=self.makedoc(template,content)
-
-		blocks=doc.getTextBlocks()
-		textArr=[block.getText() for block in blocks]
-		numWords=[block.getNumWords() for block in blocks]
-		self.assertEqual(textArr,[content[0],content[1],content[2]+content[3]])
-
-	def test_ignorable(self):
-		template="<html><body><p>*</p><option><p>*</p></option></body></html>"
-		content=self.makecontent([10,12])
-		doc=self.makedoc(template,content)
-
-		blocks=doc.getTextBlocks()
-		textArr=[block.getText() for block in blocks]
-		self.assertEqual(textArr,[content[0]])
-
-	def assertRange(self,val,minval,maxval):
-		self.assertTrue(val>=minval and val<=maxval)
-
-	def test_textDensity(self):
-		template="<html><body><p>*</p><p>*</p></body></html>"
-		content=self.makecontent([80,"one, !!! two"])
-		doc=self.makedoc(template,content)
-
-		blocks=doc.getTextBlocks()
-		numArr=[[block.getNumWords(),block.numWordsInWrappedLines,block.numWrappedLines,block.getTextDensity()] for block in blocks]
-
-		#exact values are unknown, approximate value range to check
-		self.assertEqual(blocks[0].getNumWords(),80)
-		self.assertRange(blocks[0].numWordsInWrappedLines,60,80)
-		self.assertRange(blocks[0].numWrappedLines,4,7)
-		self.assertRange(blocks[0].getTextDensity(),8,16)
-
-		self.assertEqual(numArr[1],[2,2,1,2])
-
-	def test_blockIdxs(self):
-		template="<html><body><p>*  </p>  <p> * </p><p>*  </p><p>*  </p></body></html>"
-		content=self.makecontent([11,12,13,14])
-		doc=self.makedoc(template,content)
-
-		blocks=doc.getTextBlocks()
-		idxArr=[[block.getOffsetBlocksStart(),block.getOffsetBlocksEnd()] for block in blocks]
-		self.assertEqual(idxArr,[[0,0],[1,1],[2,2],[3,3]])
-
-	def test_tagLevel(self):
-		template="<html><body><div><p><span><a href='x.html'>*</a></span></p>*</div></body></html>"
-		content=self.makecontent([5,6])
-		doc=self.makedoc(template,content)
-
-		blocks=doc.getTextBlocks()
-		levelArr=[block.getTagLevel() for block in blocks]
-		self.assertEqual(levelArr,[5,3])
-
-	def test_merge(self):
-		block1=TextBlock("AA BB CC ",set([0]),3,3,3,1,0)
-		block2=TextBlock("DD EE FF GG HH II JJ .",set([1]),6,0,6,2,1)
-		block1.addLabels(DefaultLabels.MIGHT_BE_CONTENT)
-		block2.addLabels(DefaultLabels.ARTICLE_METADATA)
-		block1.mergeNext(block2)
-		self.assertEqual(block1.getText(),"AA BB CC \nDD EE FF GG HH II JJ .")
-		self.assertEqual(block1.getNumWords(),9)
-		self.assertEqual(block1.getNumWordsInAnchorText(),3)
-		self.assertAlmostEqual(block1.getLinkDensity(), 1.0 / 3.0)
-		self.assertEqual(block1.getTextDensity(),3)
-		self.assertEqual(block1.getLabels(),set([DefaultLabels.MIGHT_BE_CONTENT,DefaultLabels.ARTICLE_METADATA]))
-		self.assertEqual(block1.getOffsetBlocksStart(),0)
-		self.assertEqual(block1.getOffsetBlocksEnd(),1)
-
-
-	def test_getDocFromUrl(self):
-		"""getDocFromUrl() should run (was dying because of undefined 'filename')"""
-		url = "http://www.example.com/"
-		fake_readFromUrl = mock.Mock(return_value=u"<html><body><h1>Example</h1></body></html>")
-		tmp_filter = MarkEverythingContentFilter()
-
-		with mock.patch.object(self.extractor, "readFromUrl", fake_readFromUrl):
-			with mock.patch.object(self.extractor, "filter", tmp_filter):
-				self.assertIsInstance(self.extractor.getDocFromUrl(url), TextDocument)
+    extractor=Extractor(None)
+    defaultWords="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec fermentum tincidunt magna, eu pulvinar mauris dapibus pharetra. In varius, nisl a rutrum porta, sem sem semper lacus, et varius urna tellus vel lorem. Nullam urna eros, luctus eget blandit ac, imperdiet feugiat ipsum. Donec laoreet tristique mi a bibendum. Sed pretium bibendum scelerisque. Mauris id pellentesque turpis. Mauris porta adipiscing massa, quis tempus dui pharetra ac. Morbi lacus mauris, feugiat ac tempor ut, congue tincidunt risus. Pellentesque tincidunt adipiscing elit, in fringilla enim scelerisque vel. Nulla facilisi. ".split(' ')
+
+    def contentitem(self,s):
+        if type(s)==int:
+            return ' '.join(self.defaultWords[:s])
+        else: return s
+
+    def makecontent(self,strArr):
+        return [self.contentitem(s) for s in strArr]
+
+    def makedoc(self,template,contentArr):
+        templateArr=template.split('*')
+        s=""
+        for i,j in zip(templateArr[:-1],contentArr):
+            s+=i+j
+        s+=templateArr[-1]
+        doc=self.extractor.parseDoc(s)
+        return doc
+
+    def test_blocks(self):
+        template="<html><body><p>*</p><div>*<p>*</p>*</div></body></html>"
+        content=self.makecontent([4,5,6,7])
+        doc=self.makedoc(template,content)
+
+        blocks=doc.getTextBlocks()
+        textArr=[block.getText() for block in blocks]
+        numWords=[block.getNumWords() for block in blocks]
+        self.assertEqual(textArr,content)
+        self.assertEqual(numWords,[4,5,6,7])
+
+    def test_anchor(self):
+        template="<html><body><p>*</p><div>*<a href='half.html'>*</a></div><a href='full.html'><p>*</p></a></body></html>"
+        content=self.makecontent([6,"end with space ",3,6])
+        doc=self.makedoc(template,content)
+
+        blocks=doc.getTextBlocks()
+        textArr=[block.getText() for block in blocks]
+        densityArr=[block.getLinkDensity() for block in blocks]
+        numAnchorWords=[block.getNumWordsInAnchorText() for block in blocks]
+        self.assertEqual(textArr,[content[0],content[1]+content[2],content[3]])
+        self.assertEqual(numAnchorWords,[0,3,6])
+        self.assertEqual(densityArr,[0.0,0.5,1.0])
+
+    def test_title(self):
+        titleText="THIS IS TITLE"
+        s="<html><head><title>"+titleText+"</title></head><body><p>THIS IS CONTENT</p></body></html>"
+        doc=self.extractor.parseDoc(s)
+        self.assertEqual(doc.getTitle(),titleText)
+
+    def test_body(self):
+        bodyText="THIS IS CONTENT"
+        s="<html><head><p>NOT IN BODY</p></head><body><p>"+bodyText+"</p></body></html>"
+        doc=self.extractor.parseDoc(s)
+        textArr=[block.getText() for block in doc.getTextBlocks()]
+        self.assertEqual(textArr,[bodyText])
+
+    def test_inline(self):
+        template="<html><body><div><h1>*</h1><h4>*</h4></div><div><span>*</span><b>*</b></div></body></html>"
+        content=['AA','BB','CC','DD']
+        doc=self.makedoc(template,content)
+
+        blocks=doc.getTextBlocks()
+        textArr=[block.getText() for block in blocks]
+        numWords=[block.getNumWords() for block in blocks]
+        self.assertEqual(textArr,[content[0],content[1],content[2]+content[3]])
+
+    def test_ignorable(self):
+        template="<html><body><p>*</p><option><p>*</p></option></body></html>"
+        content=self.makecontent([10,12])
+        doc=self.makedoc(template,content)
+
+        blocks=doc.getTextBlocks()
+        textArr=[block.getText() for block in blocks]
+        self.assertEqual(textArr,[content[0]])
+
+    def assertRange(self,val,minval,maxval):
+        self.assertTrue(val>=minval and val<=maxval)
+
+    def test_textDensity(self):
+        template="<html><body><p>*</p><p>*</p></body></html>"
+        content=self.makecontent([80,"one, !!! two"])
+        doc=self.makedoc(template,content)
+
+        blocks=doc.getTextBlocks()
+        numArr=[[block.getNumWords(),block.numWordsInWrappedLines,block.numWrappedLines,block.getTextDensity()] for block in blocks]
+
+        #exact values are unknown, approximate value range to check
+        self.assertEqual(blocks[0].getNumWords(),80)
+        self.assertRange(blocks[0].numWordsInWrappedLines,60,80)
+        self.assertRange(blocks[0].numWrappedLines,4,7)
+        self.assertRange(blocks[0].getTextDensity(),8,16)
+
+        self.assertEqual(numArr[1],[2,2,1,2])
+
+    def test_blockIdxs(self):
+        template="<html><body><p>*  </p>  <p> * </p><p>*  </p><p>*  </p></body></html>"
+        content=self.makecontent([11,12,13,14])
+        doc=self.makedoc(template,content)
+
+        blocks=doc.getTextBlocks()
+        idxArr=[[block.getOffsetBlocksStart(),block.getOffsetBlocksEnd()] for block in blocks]
+        self.assertEqual(idxArr,[[0,0],[1,1],[2,2],[3,3]])
+
+    def test_tagLevel(self):
+        template="<html><body><div><p><span><a href='x.html'>*</a></span></p>*</div></body></html>"
+        content=self.makecontent([5,6])
+        doc=self.makedoc(template,content)
+
+        blocks=doc.getTextBlocks()
+        levelArr=[block.getTagLevel() for block in blocks]
+        self.assertEqual(levelArr,[5,3])
+
+    def test_merge(self):
+        block1=TextBlock("AA BB CC ",set([0]),3,3,3,1,0)
+        block2=TextBlock("DD EE FF GG HH II JJ .",set([1]),6,0,6,2,1)
+        block1.addLabels(DefaultLabels.MIGHT_BE_CONTENT)
+        block2.addLabels(DefaultLabels.ARTICLE_METADATA)
+        block1.mergeNext(block2)
+        self.assertEqual(block1.getText(),"AA BB CC \nDD EE FF GG HH II JJ .")
+        self.assertEqual(block1.getNumWords(),9)
+        self.assertEqual(block1.getNumWordsInAnchorText(),3)
+        self.assertAlmostEqual(block1.getLinkDensity(), 1.0 / 3.0)
+        self.assertEqual(block1.getTextDensity(),3)
+        self.assertEqual(block1.getLabels(),set([DefaultLabels.MIGHT_BE_CONTENT,DefaultLabels.ARTICLE_METADATA]))
+        self.assertEqual(block1.getOffsetBlocksStart(),0)
+        self.assertEqual(block1.getOffsetBlocksEnd(),1)
+
+
+    def test_getDocFromUrl(self):
+        """getDocFromUrl() should run (was dying because of undefined 'filename')"""
+        url = "http://www.example.com/"
+        fake_readFromUrl = mock.Mock(return_value=u"<html><body><h1>Example</h1></body></html>")
+        tmp_filter = MarkEverythingContentFilter()
+
+        with mock.patch.object(self.extractor, "readFromUrl", fake_readFromUrl):
+            with mock.patch.object(self.extractor, "filter", tmp_filter):
+                self.assertIsInstance(self.extractor.getDocFromUrl(url), TextDocument)
 
 
 runTests()

From e152a0653bd71c76a582526108d1fa58011e9d49 Mon Sep 17 00:00:00 2001
From: Jesir Vargas <jesir.vargas@gmail.com>
Date: Fri, 6 Apr 2018 11:49:41 -0400
Subject: [PATCH 4/4] remove accent for encoding sanity

---
 README.txt            |   2 +-
 dist/boilerpy-1.0.zip | Bin 27822 -> 0 bytes
 2 files changed, 1 insertion(+), 1 deletion(-)
 delete mode 100644 dist/boilerpy-1.0.zip

diff --git a/README.txt b/README.txt
index 078fa4b..5e06b9b 100644
--- a/README.txt
+++ b/README.txt
@@ -5,7 +5,7 @@ BoilerPy
 About
 ---------------------------------------
 
-BoilerPy is a native Python port of Christian Kohlschütter's Boilerpipe library, released under the Apache 2.0 Licence. (http://code.google.com/p/boilerpipe/
+BoilerPy is a native Python port of Christian Kohlschutter's Boilerpipe library, released under the Apache 2.0 Licence. (http://code.google.com/p/boilerpipe/
 )
 
 I created this port since I don't have access to Java on my webhost and I wanted to create a pure Python version.  Another Python version which consists of Python hooks to the original Java library can be found here : (https://github.com/misja/python-boilerpipe
diff --git a/dist/boilerpy-1.0.zip b/dist/boilerpy-1.0.zip
deleted file mode 100644
index a849c7c221511e0f39daa839cfff6b17cd8292f9..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 27822
zcmZ^~V~j3L)Ghe5ZQHhO+qP}necHBd+qP}<w9V6)ckX27o7~J)vQw#~vj5c1T5GSW
zQj`G!MFju=kbq$ud12Mc>Mx4_R%HJL=6_-AU}bCO?C3?uM9;_|A}J&<A*Mp_>gh_u
zvcj^U#K@}TC?luL$jr#9zObrxKQm1`DLbP?DLpGCM=QSy0#N*al2iZb;h%H<wMqd3
z09=9pKa$HziHOOoi2a|*&5{P~w;2&d@4ljf6rc_!;vDQ0s*a0<fw3A=K)IAxq)7>r
zX{FR2`d`)vuBN0EF;2q>xpw?mN(7H*1P5^xgblC0s50~^bSrIa*RB*evR`iK+PE_^
zVp=*pyd$K4e^Qab`jl*fag*JFLX=EN#7OOkx#^=%?vKP{+DZquLw96Gmxr@mBPpO4
zIiuy;&^xlwqTiZFUodjsZ<X>4^@cDniWqyTAECyCJBtZ2WRb{|TR7Xj>Jtxl{OJ#~
zNVVv+?z~p@rU+4ZGXKa=2I$w8Sxuc}vbq!+_+jesp>FGG-PoIJ2TAZK#y~!sum)+f
zQb^9zO0&yyPC(&CU4uMeK@9Ge8zwb=BQLsHB?W<M&#`5g%hG=0EJI*!_o6x~et|~!
z2rJRuv(-mgQ{C&X?7zjc<vk4(F<cpI7zGTt8|<OEH8Oa;z<GTH|10Xz3Faid6|U!>
zd5*C>160m}QWphmdECNLcq^B&M9FRXr|=be2vBLh0xKL^9(_e59%>KnA6O>Ip2z;;
zn&oU0{Le{F`<n|V2uyKig8=}o2mk=u|6V4FG7@xB^5P1u8ne#Z?1=mtEZ%=|bxr3r
zRddcpxbvu+b~@6`;{B7(OhZnjMPY$y0n$bYeocFy#(IGzrrlM!?yC5#h(NdV?cMwI
zbqh_QT5I1CTaIKRobdYXIwomGkcmgYZu)IH@gC)3X-s<Ijw)NKQWL3d1?cyE-19EH
z)@#99p-|@5qo9nL171X-qxI<F2DRpmWR(pI)FBAX)kr6{k!8D|1<co7ZjhC&olri#
zWbIZ+V2xAW;bvNqT^f}gL{Fb!n}s47O|W&A)n3b^F-}Oq|GpDjzQUQ!X}qaeWI0`y
zwrY{$m9Ujp=e2T(dz|R=3M1L(Ydt>oo?3Ji1$1_2v+@44Rr{AfcYkkxw^^i@h%-5Y
zz>+rmhAA8~O0sdJu|TC$qmj%1ehq7{;5`uv6MB|VMMxAV5wc_m5}~XZLIu+yA1p>P
z1%hBeL5tQP!=_2ofqhz@^jea%w6{JvraxdH+LXjdO2Ta3-LU11EXx;}jtfg40-=vv
z!fJvEG^2q$($W~kDpke2NVx*pkoRJ$$&k`M3Ogids(oN1J}M_ld`^PbsNk0)(pW)K
z9Sc??lt}OTtinT-ek93E={iWnVvc~?hBhdIVkZ1I37<>lneX@-_3ljH*b8|FA`uxA
z9Z#=t2gUg7<_LcDfb<0_qYX#`MT2q|yAt)%4K`Ocnwx_>XI($^l~6jLm+Ks_V}=P@
z^)_&{Ud09+(pyk2L(hudMgJ>!ya+QCfb3>XgDL|4xi9uZ4SGs(e#xsSa%U^_L{Ec3
z;p~;G@iYU%d(=H<BrK1BNUA$PZl|P6LA0JdMc(=6nErTodn-=^7$~57BxJz(@bKv7
z&%j}JVDL}2&ikmcs_<$T=U&H!Y&g;?m0QT4p$9KMytz-`ED^}iQwS#83M!ILdd+dn
zv~gOfoc8V2UoNMezGA+^y;WP~@^fBb+A*$QOxlv=EgrR-mmKb%i`2=>6Q!n7P*^%%
zHX=JHk+@3Mf9X1(cg2_fhmeNPD*ZcHqgQ<>{;V(j!|~Ln(;u+itU^s&f#BgsvS+B2
zOpMe_S3<GQwj2o*X>-Z8^{7dhil&xo;UE$lWK|c4LN!h`j)pB6ITJA$F&IgAlq9bV
zly?}6He{awb&$L?(j+<-w;oo|w4A`@-fk9`7xyofpL*Wsxp`22YP)VO{QUdFvdtpr
ztgN7koP;VzlS>7s&FKuXXvmEzJ-CYk4ax3s+u@qB0-+y0<)lG0GHbls5J6wELMh;J
z*^#>&PdDCT_@R%_mmcNvcZ{XjFw2}JDuG8TCV}Px8!{~FqcJvzubhE^Y&vl|HYPQp
zxD3%kF62p&yHJ5;*hH%b_VXzM?(_IcDPUf3mi4%fI^y-tt!WJkHbXUdP-qAPCJPuc
z1{6crdA|oLU1QN5+}{mzz{2_rKBg3BQ+F|N^%Ta+j|+Ei2VMZ4BHx05eoWwIvz+E^
z3Q-SFSxRNuB}7Eb4=86b#lZgJ8iKKsT2%OnSPoe2y<;Ytngo~{t;REGEJYNJwZYXY
zJmyXhP>W;AdZN8}HNzBR)1mvh;#5dS+1*xUc?&F}5!gylr-<nbRQm9B`MUgaV(ZP5
z86Upfn?4cECVK{5k`h}blx019OcG7&{IoN4%FINS3M49wL<!*!O4TBhO%lMq-f>jL
z=s)m6zytU5d<b*P!Dr0v3d{6P0C>mPE#g0Dnk<kZ%Q~&DymBJgEUoA>zb$oIl0)la
ztQbEyZg-4B>hbdX^6PesER7#O3%|M*i9R!q%8s_`b`xp6il7Pocz5>ww%W0W17!(k
z;9T;1ijkJ6;B{#gXk{hBffVtS@!ntV{e0NWoTBou^pRhtatiG}=ijNXFT5Rjc@C~K
zRHdV7;@#1ey+XNJGx#4gCo7j1W+apFi30@y$YB5gjQ<_YmBoZa<^C5fm#WLTY>UJ4
zX|VhtWc*)v{(7>Nr)0+ML=q4~OEx>HpOnz?;GDjRz|!@c{<0>ZZYVYBmStrume)w-
z4aCa}K=AS1@pJd?$EsY=uXpzzk7w}kE}jAHU&is>|3kMHC&-Oh5SESrxlUt%4cFR;
zKLb2gnmQ*_cvwr7Iz8S0`-pG1FJz-PjV&}e1Kqs^5e*$S3>-|q5X^ui-a2tHUwuL*
zYZ}Kr4(cv*cQt&-$N@$!W0D4)yrVkoone;)@8RtO`-tltJ(R3qfIJs5E#(Yyu86t&
zrU)HO_lTPd;&9;cKp4nep}t_Kj(sllH&~uSSc0SMNs?=Q(4N^}@Tz1HYCsZ4*uMWA
zg0*x;W+K*oB*MhG3#63kjfwT#rbbtqPBpsN1l*aJQA87y7<__b$Y?s41UqdhohTYs
zb;%EPnEMltH%UkrkmFNv()4^Sxxi2dlCj0Y-Wiw*ML5wofy#`D<2sVSOXWtg6TT3B
zBtAn{mGyLJ@s(4oT;a#+zgpTccYJSCQ0WqkX8oQBr+uyWP~p4fRhVL!vdu<8Qik7!
zG0k=o{EnbcEz!!oZewqT6A4dOQ)x^OT357%kh1cgaWh0|hFzIHaEHLPq2S>`LFefB
zS0s0(y0yQXho8{{^+)S{vb4sy-Q7&LHSh2Dr&w@)FIp$Z8d>qhwJ)!f!?J++@}j3Z
zn_Sx8@o;YSe=JiL_N(^&IN4=Z6RN6i9PHn{1kQfq<lTN0eDLe~G6t0aUZ?dekDfaM
zN8`!GGpN<c#g7N1c5rdw&%Ug-1_;NQ{5lRnmL7ODF!KTzW6xS-?+O{Yd-?lsk!uZ*
z(r~;&L2$3~ppp^33re!sMlcZAI7^CGW_1Z$NnOlUsU(9a>evaS!d_GlKzJ#vBdnbz
znyhReQS!<l`A5Q_ryQB&_j{VkXvhR>Or;febgDJx{%R&E`ARtPTmSG6XRnQuT3mC>
zEQ&u&+syV{Fe+}SC>LYtI_MX`clb?%)G}lo{%j=#hIWuU<4+>_La)d+h{md)85A=G
zp<kG?`sMD;*pF+ZH$OY;jQna;-gZYP*N5s6vrJG}Ia?QN{f&^;24g({{fVK?M|H`!
z(TGG_v8)7L)%GM|x_+LX60;v_m$JpejHj<BFQ!Au*COhKk!riOTigI16UHGkY)^;=
zv%`|E8wgurriYq(661&?op?892tsIK<?JM@_qSKRcVBH?+-B|VliM#{Mrh_ZPp@wu
z@g(YqP5hT39xR%Up~!Oyl@lJd_8gvMFqleS5qyi(qNMFzS|_5@2zr(%gXLjo1WeZj
zv`hEsL%#6@+@h!>Qcshoi0)fyVwL$RxEcT}=7XJ6*$_F6nL12=HT^Mq^RN2iV)n+B
zU&^7)e$rtM5T+qXm(+?uemR;^53e@n<2TqVG9_#Wk;$BLVy|m8=&bd2yt#E0!a6t#
zBhPvU`S7ftKBo=hpq$t6fI#{T%XSU#*ox;|d11e1e_GBx8MoisUTp#O4@i$3g7LPF
z38H@h%l!WGba4Ln|J7QU2FMGyXMxBg{MS}Qfc$?5QWrB<H%EF$FFh4Gn{7cvzU_lw
z#tsObgDJVclZF){y4Vp`70W76#&oeYcCXR9iFPmR7Vf2O<P=sGdwpI{=IYiKd_i!@
zPB*pT(%{xoRSD^9Boq923ECW33S$HZ6cl7`itS~jR&WVm@kPMai2k-5Z{2h&%0_tu
zKr|J#3Q}gXjTMY3`dywZ5S|5E01_G90U()0a8Q$Z$K*k*LL>^%!7q5!JVmjgYG6WH
zuy(A8i6COhXC)Z7IJ4|G=Xcs9^>yTAZ>KodpdQw7NP&W*A+Nb6ni>^}r!C0XO>1U+
zuiJf`2Kd>X@Es`Y$9?r50o5gm`yNW;#-a)Ih5fll(NwXgq-dA2Xe`a)XG5%OJq|s`
z`%r#S0WSaK0BNE{<>50YQwG*+?q`id;MN3c^5Zucg*$|-B);V#%bJn{sLRm)N8X`)
z?mqj1d>=TEUdRhi$l-qEk+Z@!io&E>1D#Sx2GDI7uf=)3+UfT5T@p^GkpyZg{8FgO
z_E;3M_yc##Q?%~p7qt%SY??=OxRLjG&Ox1gZkq^QO}camDLJ1AIJ%I9tXR!g<$UkE
zjx`(9#vitSR~Of0P~Pn>SFEvG{g_;OmT6*|m7<VMXK2RI{PAUPPRBLNmXNQc6>tsO
zgBI_Q9+Jdpcyjr*$!0Nc@xt)*F=g=~2sxj90fDhDdwK6v_g?pAf}~&5ra8Sp|3iY2
zF%rTy<<EC^{}JFKH2^^Lzmwqq-3Bw5I+(cGnc2JkS52+hdM?}Sk6M`z0fHHSBwJ~D
zddp>sv#HybERQCzqB~iaU0G3~q%0|wAp+G|*T3Gj!32>>g~T3b<d#RkU;n{GgXQz}
z{zrbg4JS$D2&UcXSFBt<k1+?oClpPRTCyK$c9r^!)9V-R^PARmQXJQb`So=_Qg!P&
zP?N!-Ju>T!kn76I`a+NEDWF)42c+gia-e9Ub(w@R2STRO{N-4aNfFMPWu`4~!tdwm
zA+MADR2Z~!6Z0W7A`Q`oc|iBx{Rw=C8=yW(B}t~i+AA0B3HD@0P*BUW?BGly((bZ#
za8z2(*ob71OHvrjfD}MN+x_+uI7v`T>lLD+=fB5C=k{$lV6sob)9Pv6<>~Fo7f)^+
z1Jom&IfCR2Xd%vTg)z?931fv|I!R1$m;@epchGCJ8qP4j2Be7QW7#ItRSIIgDZPPM
z8%faDbK69zK-$@iXqdDHpn-$~Y6n)5<V*?uc=Kef{e=(Khy6Xj=gIQL?{CXXg)tUq
z4~4qBG7gjgXO6J_y@bVQ7B7y>y=N#e5@4E7yJ;l^#I(X7D^|u6^AvSZtuk%K#!@1k
zCPY_Rb*+XX(^P)Nyg+MHnU8QN*LDCCi3pWB4mn7rIu5k8`#T2aFHM4v6ft+iFA=yI
zEJ6|M3_B~(d{6>a`c$J|BZh@SO+kJsj<jHIbOgN?@K{s`SS;9PtyTv)(T1H6Qy(#&
z!lZ!~a*?XL;P=~l2AM&mLjUBt6Slt}y67TZ(#6~W#AMYQ9-2*XEr=4>^sMF2$EvBK
zPggD<)G>h9NhDneT}DjKb$Qf)HKjM*LV-K)JP}Qf65&5XB<n%ZXEaCxfT{Qu8Nfp>
za>OR<UD<|c?s}(C=P>^CW)k4a@+e_{&y8v<j4M|s@#D!(px|)k;c)iz?y(<x3o3?v
zN73K=a$Y~~5}^{nYbR7MLUcCMpRfC`J^y@}=a;b!2AQg-*JHT9S0gF*cHi%d@z+tn
zpXCD&$iOSr5(f!tkHX9Hm8l5y6a5h&EuW<*TO~JTMWWpoEg=E1)WJl1bKtqA^pdLr
z2|vM?w2(W?F7Z)koLjR2B8O{yQNlLDnqbO2M#>gFiqLNlf^Ylm-nVh#df#x*uIR_t
zYc>V0+8EUe4#(YzSR^i6NVgJKccO_%N1bqG%5MRc{N$yv;JMju`O=CwOywz(Ss8)|
zLbus~xL1R2*<m)RGU2UYHil9-l|JlA|8S{(90dxh02?PBL!Qf@MgepVonR;gs}xFx
zB6#RqXh_Qm>@p?(i<S!3rzpNK)utW;7v>mU<ymIx8WYNvOi4q?uz0Y_Q0UEY<vH9%
z6uF#rn?U6PqplxjCHk9c>hyJ~{4KeG%pD$_Gf^;M1FRTRxKa%N$5#<yHONfv>swTW
zdX66FlCb*n#|fuW^t<s^K)!c}CTxcMG9E>5Mbv{#vU^{b?5uM@nG%5lfW0qRsoNYA
zP@As6(l6x9H6ZG>evZ{r>=3NEuy^csDKbNb(fOBfn0WN8a8!a8>X^2u(=~rh)lIL8
zNR?+qdO&WVKiD6!C$I31?XpBDB-ou<PR2gF?ld8bBxTiAQ0?U}$0*~fTM$I)BcMwn
z)lP3#S|DSp5Q#fs7}|P3^wy(!S?Pk#re;<j0?kNDJn|Ed8~<{I8qAd>YV<?#Y5wIV
zSKv2^x820@kQ0a)nj;1i7H3hcP5Ur&9$C?=cuN<Vc1gC%P}n`lT0B=ya2AR~a<(kR
zN2GDFwlo!k&1HJjNT;~TG5PP5*xk;*Bu_Z9r{5_2^YAD{d;F^6g}D(Hd5Or~{+OA%
zi^hj=<O$bdVTJsC*0K!ol4)?)1qkC$&k$Pn=K=Mgk#6|+xdqb$_POR-*7Ryqg}u)^
z2Q^WP?Yu7iN*yC;59pF&c)%>X#d`+-^leMZxBTBh7+k$STieP0SeOwk>__m0*d9KL
z#!a&noUfzzRbO67F-eG_67QFv6mhvit}rXmF<Z)Ay1-U0qy}2G3q2D&kYs8&(R408
z*vMq6jj${G14J>#ls^DRHGJH`%QXH4PJ<d&jj~$mwH$H);d$4rz<KuX#zwQ9+2H0h
zJnpt78m&)p@Vcm@(AHS~WMHx6YZi(ltk5oS_@XO_hXpwn+O9s5#p=s({5-V2e{d5S
zmnFVqLnSpttmbCzkOq)}s<2peb3IB$Vn69LC)_iZ8if`(ZQ5?q9$KXhJFY$wui#}(
z4Wc!pa6}%IqnWpp_qwuqUa=L`)6#7Z1seNz(#T@(UdddF)=+Z!Ft^9_c&f5d<T@4U
zI6O=ML%d0|5p;BME9XVglDk+Vl|rnCd~u#hCuhpy_KC5?<T0oE(KX=B`ld>IN%ZXk
ze0I5Lw9b6n+(U<a+3K2B6oZFG3dS90O2bu~y%6e4GpAvkiC%JL8oP=^)dlEkio2y=
z@c!QiqR-$H^nQ>l9{!21CYpoe&gM##`;?Mf;Di!II?C@xi0yDvolo;w1G(V5lqP|1
z_JK|^tt}og^fuY#7|~{palraZ+b!Xr?3DgLY9j>hw5xHGX7|vdwGh?K%hh#d)%W6k
z>g^hRd(Fw8b+?(z%H2V?()ZzS+n@jj{yaYLd4!C3rx7jJvmE=*i<x*6N)<o2>c)qk
z`w6bW?Nps@{A+KFT1Lz+$cN~dKWGI*2G;4M-Iwl*(G@qQ7#!_4d6Gh-N@x?*z0e2q
z49IWY`ux-k+-1yes{JRKnO?AEkEquD(wbqFTY=kUy(+qGI3XEIsE4RopuU5o2=95A
zE;PoRCyL9BV`g1`vSmn4J%#?<)w1%@>v(=clo7{AgyS_cvn^vF8&}rKZZCD~sO8Mv
zzUCoaDe2-lI!6nF3VUc_DlgO)P~Q5~g=EcVrBQ`g)2PEl9eVAKZ0gkb?ecBPaaz;p
zbUyqw6VsW8V;AIp1@WeqMCZI@`#O3xPKn_(Jtu#SUZ$UTx(HnI9@E_0%t9^e=7JEd
zCQ?eg4$$ZLS(n#}V~VD1KA&+=>LRO(WTj9i04TIDLa^L~4$F8eaPk@0)sEO@IGm}(
zbW#4M9M1Zv3|(8{z<A4&4Z#E>_C=HqDnuJY-<+nfX4|0Fc=A>)>JY8-`F2D&gvo+O
zeq7`2+)Ceq-r6YE0&(j9=9$59Rx7AKnb;}{H^Itfrkswzo;fi=pe=}x`HZSbPcIk0
z6|<t5=g|Uk8Vk+9PiC~{W`<!uCQ}ZQsk8Wi0aV@L;QGlAD2OlH1R4QHkb-aXTXF6l
zx$$wZqT9TYV{YgSD_7ncx9#K{i6}KR2!g#e^~W^YKYvr+q$T@8scwty1o`-$Y-P;e
z5-!=2S+*YOS23^Hh2OTso}S<0$~5B!?5*Tt8I53n8aXjqmF+6DcH9L9`dJgyuvXz)
ztxX0%Th;L%?pfm@#CG8$niy&H`EWLGf^yQ-3*FK-<Oh#TuS+S_Zr92_xF3vj+mGvX
zS~shC%eQntOa8vc`S(>nRPRIC=ADWee*J{}dhtI=yuGiAa$Udn@H46WrMa3Pjn+r;
z;qB!D*SH=9+`7;v_CAIIEggd>5NLv;Db<8<%6mBaf+3$j7Ao#@WyX#8NANi3<S&L0
z$j?CYrz%j^u+#0@viRn^$((o-(Khwz4-~ViuKpubhPT~2zeVQ7F3<}x2d!2VUb052
z<f8(__r==4y-$~e`cMTxaaQx<0cEafaO*JFEWS^WW29Q}DEsJCBS->v@E2aF70-Gf
zb>)I}`Z^&CZUzwM0KSC+ksDN7K_9{=#Og%*c*tMGaK=?YSWGdTwqDaeR(2>W23gPW
zeqDuO6?i=e)e^AybTuNy{bTQ&zP3((H>_Cz{^(HC+Or_qb4_x`8=nDj>c0^Gqt$gg
zwS+~>CL2s40RYec+$xg)U#)KD>FR7`;_Be+@}FAY)zop>?m+Wzv-l!VJQq@~Qrx?i
zxf0z}BlRxUPUA!F-IFDM5hSHF5<{W|Xs_A6^}pc)N-DA$ZgdEIi9GKQbMABpIRdOs
zrQ67P?HZqhOlkN~E+!vvf(1_KG9mLAGaq6$!P<AwHl-ggWRtx&2=C_Se&oAYCR5K?
z$<mF}Djdt$l5w(wu}E0|fRIXgPAHF}F0#}~!df8GO=u)0^TuX`nu&8`!5z*IM6plS
z%Tm)TKko6k1iGQ}xB-{PBA9{!V7iDa7f3Z~g6Hr>(yWG5kfX%X0<(xnWa=#7rRx<Q
zBa}d$MnqLMgpE|d6`a&!6!ltfBLW9Ey{cPv>G`Zx-$HZRpk;JI{MQNMzCB7mZ%?*a
zjiiw#e^Om0iJ7&KvVy{ias&6E^pFpt<;g-k29PS6Q*@tPw@9T&yroKA;35M9CMi0>
zC)r}dx&}nm01==Xx$a{n0{mq8JjXvB7ml}|gXve>fje&E#dC~*|Cu=R5$|3Z)e}6|
z|G9S+;vTL)X%e_*g91T5q<qbgY5_?(m1K<Oh2Su^6-m_89910?#(zSzl`ZGWWoAZ0
zkS#OBp3Km747*h?Fh5B^?mNh`ZtgNE;DZkU`fv&4p+I^{B1mnIGm7W4mJ*>W;pVCn
z<q|&&4YoHDUJTE8FD;PZjzlI+J_?dRY?P+O%xnZzC4~WB#HD(M@np!Mpn;&oLY&2~
zImDI^3#bS`1sg$WP%x2%AAt>GVeB}V`u^A~oh-}|K2?Y_p+Y}#O7x84C%zE2ZqTD<
z4<BaYAIshMMR~(@*^=^wssrbsB*P2VxDFEq@@(zR;WB#5m?k|;emi9e-=AzGt23>o
zC+}}o<d{>4izh};{^yhD>=$65K^vLvnZSA%cfZ*T(LVc#RMRD=Qr1npO1tWwO|)bW
z;_s4)+>A5gO<a?mu5YGbLa+pu;ZB1B0ojH-L<6dHB36NcXbO)`BQK3{O=U}>4~@@X
zNUfjnM%t11NLsvj%j8oi!<}bnf`aOy%EUoPx3;C02)S+E9$CDkqBHlVOTQx2qqGQw
zFY^VL{pyI+E;hEL+-xql28)`v-nNC~9B<4Z8*2P2G@ZI>H+r9r(Wod|qd)ET#Q}4;
zHEuWcAUEIhR*L$FyChN>H3ZpU-Z`~FKJK(lmIM(HyZ3Hg-$fgCOjx)bCIXoCuz7iB
zLr0VRO_j4@(3o%cZ(vV_==N*m03j(T@vRPH@7TXk8?)Z$1H%(uw>gA05pK7Mbr492
zCOEjb{--_ca&%B5uy&FLWafHcm3HN%q(Pop>tv_X+cR&5pt;^p*A_*m4_n)&Kct4r
zWbEJ=KA;bz7fht5m`?h9O{MBC>sKk8`^O&0Pc|$GWIRhg(!}t%;=GlI`q@(EtkPFG
zy*2(Ne~#|9e|%Tzm)z3#-+B3+gMgPqz~{!6*IVUUjbDpQK7ZSeKx<O%nHEZ*&$^HW
zV*^B*FR$1HIEe&S3yz$;+&yM}!G$Fh7e#qMSZ49ubsaHdII)?S9i_q|aUds1z*%$=
zti2l0cC%geh2-pTXe!>g+Z=r46RDSlw*BVyr1RJ?mb@Uh!Yn433QEgO3Y}3;gW$sX
zze+qzv>6mt=R?9ya>iLB4h>Uq76w*(>~Jo@`iAQAllXj;yg8}(QC0=l%EkPJ6}<Pb
zN~c|{P|0mKt@<YQU+^~cE1)=js!^yr7viDLt^>}?6`a{9#X;o(x1qWUcndvKuraCx
zQTObina!~@qrMIl!xu^j6PhI);cqKy(&lq_M{z3b9It*Vn-cpH&yh-7x5VQX_1{7-
zA(V`@**iHWoMWg}^k1|D%W3=(nXx9IRF$%4A<<u^kdMU$)Cf8g`M7?Lg>ebaCulAW
z(6M<mttL_?kxOAgYLh``ltP#e52$<=l%0dUIm*8t5I?=YI*gaDAQ?&9saTflAF(%-
zdvTe<xjL!Kq(FLx05cq=HoUh0b|~0q(ZhvVQyMhJ>@#y~*i;R2FxhhKGn+JKD3G^2
z)Ca@om>TAKsI^9}wI+uiH<Rs-_kH712oS!Bo6xEk&H4VnJMh6EHFw&L!(A}g81PrM
zrvdqeeI^86Z6zHRW@ld`Lt9=1^_dBO6fub0DOE?QXm*@{wmGB~P;6lLI1c53g4zjD
z!bAFAc!&wA7)N8_NC*SGCIX2k!NKaCKB6xtXuka@ldG|~2YPrULkkVz8GUq_gd{ue
z$!QpU7qGsM;zN8o=D>gM6%p}D%4161Slz6sk-L(If?72AIxOeST2c~GNM9nxnUXQF
z5vx5JhU))wWy+J?B6;mRM>QgV9j6ZYBWEGc;TlS|vGftRAz&#pS}o9hZx`_^-x_a)
zu4Kx$Z=R!!wM<Wagw>cPJj3*ReH%4v<9`HKWKX12W$pgLv+!VAcq(q~cwlv~N)YD@
zO2Jvs=3qFU74;x@cH$3jgT2~)DVy+*n37vK-zy-&Ty0ij!`lcP`1wJ}Wh13!)Ptt?
z1-RV(UdH!<8+;BM<PjlaX+AZt3bDU`LHHHKt;FJ1I)6R}!+KM|E@27Ch?4%%HqMZJ
z8U>{W5uHXx7>H6@0ic|oC1b);VCu^?lO$gUMeq1ZG$NYGvj)Z<-mHG_zEp7Z5$wW$
zJ!Hkc(qllV1e6bTC9#-GYIIBvR*$n%7@l1KCuBIFW`pc>5ybn+-rlmD*=ls``s>vy
zne^liR7jU6nKv7=Zs&5=c|xz8L}67`rPe4pKL2j1zN)40Oz0MzG);}+L*TwEi=JuR
z<{r0`kd<6;xMX`?AU37>GH14ndTFa|{tW^?Gp=$bZ2hSzlzW0IJxpcias+%L{EtZS
zDU}zt%h;?o!Uq8Gq5%NH|6h?}Ze{Cg_P-R$4d1uR_E@6;4a)}s{YrLP3#GI(c6%8Z
zSBvPjEvYoglG`Jx8c31^G8$w9z%onaBOsvLhk(;J5tXicF1sXI9Q5|}zwz%Cep}#r
zcF3^)+|y_0-#}Tk<+aa#fChEyMuZP<0|~!jp#aQh4%~O3+}GgD>Gcf!Z5DwiU>488
z_1XvNt!O<C=OIvN|E_=A{GGvP$h+_O(eP5X)02>Z;NP;?LChEu%^S`2#m|q6J1=|=
zqqA-Ne+9hTAP@)8E>iKB$>alp1v%sPyA9(t4HTRRX9SP-ea#jyVQ~ORiY~M;wf?)$
zS6OVC`WXyac;I_&X=4CNYt)irr&QB#00U%fz?^*`Wrytq&I!DJuo}mWfBu+W0%r*(
zcA~_RRZ!wrjzJ=Y9={RebZsnHLHCE7vzSYqUAJM%dTQ@)XV0IrWh>Ty3|cD8y56<8
z?bv!`bz}Q+J@U+-8chKI+-0H+Sihjp<N=5KbwhY|zvck-UMme8;w?ZfWJ1V;POFE7
z;fcHOB*+Yd3;qsV%sS4RFjKn*A)A8`lG3VGE*jO!NA_B~YUfskKJ~5o`TW#P?{UyG
z;M~|(ZyKGaU%&VGEnT+FZQoz9*|Y?h^tWtLsEq|hFrxl)!a3px)`Wnbg=B%QAMyr4
zJ`(-Fm-pPk=JL-h;>=znS&$HTE`?M!QX!DbM@-`ZXMsRXx{=G9N<6c&2r=!v?H~TO
zc@Ufv0BgO^U#(fSdri(^->hAkm;dV+KI=2YE1|Ox{_m30p9aV<Gv;ndhp_glDevm-
zVIDCfXA6-?2j3Npe-V$Kiyt!bh>RlTUfTBW7KyfU=;|p!u?8rDQ4ez0Fc_IA0-Pp_
z=EZ{L+?h$afVs&(kSJ>Bew0p_wc{NSA=ukMBcMU#sn(GuRT4C}GtawkLqzM1en);V
z`sV@;OKd}qP5oIF*iZziCXzE}o|p~1Y?6$uP_OH)0s&|u8O=q^Pn0?AxH}+|r$tZ+
zT=2H{V%<xW9_27qd2?0V7LepPdfUIq8_SIaQTSQ<<bS~k%c__ttrEa_96JJ8VSs%B
z)q5`}S8)ysH~1dieU0p+yh2aQCB`J@Ahds?0hVmuOfkx{fxSuV>I17Zu#C}ShjjUg
zgBSH=aKetDICF~ix}g39NKkA|UbmbcB0YcN76IIzbFYaW8@fR{~xDx9T(C7gG<
zkj!>zWi=OytsKk)ig=(v!}NiHy8r^~EFWA_0vM}6(V9*T473|BRy*^8dQVM55Q3hf
z=K^dIZ6iM<i*mjyHQb+TKWh@AqLLQqK8a3`QJdHNe7*7R=*@@ZgeYeRq1m3z5(vIQ
zU1SdpMDM3#)MrnOk}L)j#E@Y&9LYqnqi0W6iPXU2m`*+bE9Vx3H{cPGDpAxDh*h!*
z5vq~8vXwC7aaZeVcKi0G`iyR))4n-&D~Ao(kRd()VFG_65f4Vmgs}tCGru@yQFy!v
zx*oOBQm%BFQEhfKtAK2L+%G|`H%jeHVX*^<2+#`D+SYi!@F6MVdh}*dp%@Wk;l4r;
zk_Sw~<qFGeo>GO(S`t5YZufSb;-<5^Odfr`s4QgTO|lL(-mL^s3{*xy;Z-cFx2TIA
zdXkLthCFbW!fbtH_0)iCi~7Tsss3bdbMg_ifM;X^30P+;Gpz2>n;jZS^Cw8l-h?Ew
z5t8Kx>#uU#`OD7JKKnX(^KrFp|FnLqEDId4^lWT$!|g$c!N=INQ5`*(Vo^L_aShF-
zP#|*<&nV@H8d(`Hh<7An2~<b<O7`i9$W92~5!Z$YUx=;*P9X|r$%>?2=ZJo@8LD}R
zES$(VIQS{Cv}Uz#ir|_-OdSJiZqI5C6fC9UJV27g)-_Zp{PWJx+7FcV%~S<vNO5Sh
zbvpqO@6AhuMDpD?QS@pse!cPPyB_xvLj-Z}VE9JzTwNLyT-pHx{$xnl4b_hM3*o*1
z!51ksjMx#rn&HcwFeS#XA+UOd!rx+HPuha_u`$&jvVrJHM~WJ#2k0frAh%tTD8iZr
zeKhNTW7vzQe)$i0vG0i}Ih#gK6S{|~<8&%R>Y$Ah=r)04n1wJ@G2kMtaD)hdAiMh<
zsi7{{*b$8&3&TV{O~suE-l_cbDKyk74DATi_PJFAZGiYF%MDR1;V?r*h|q)QA)*n&
z*&qpRNrB;W0V}XGChUvW1|i0W8~GxLEMl~jm`xP8#uTVBI+COXLB~Or&YkIFBTcVY
z4dIKl_pD1BTgOitkL7M|Aqpe1+JYX_LPzJaP%X%A$_TX`3bP=4)G9QjIV|s*t0Tt+
zG*rWi>X?v6Bva2LO-l)*|7s~UC;HcLeQadL?s6yck9eSg(PHcxQbk1}1xY?;2u0xr
z0=Jel^#^I);V(ImAJq|X!+1V%IB_xtuAvh<Zncq4NV9P4LTR7JElY_5MyRm!0xLK%
zE8)T-MZKZ=k;z0R!V<zwt~KSSBhcfvb;ezawsi)dfwRh7IYT%*Z3iluZPhC;^g~(r
zt1s%(eqi=bCkZ17AN1etxd%{_UbeuODCq?OK^ciqn)H2_E(#!EY+%@h*r+QvedZ&3
zR=IkC<0Qaub0%@r7KKaZ)k8Lx#J^9?GZARz^Qir_P=BL8C?q<Mh*2pZg95&TF3FRE
z2un@2hao48p~NG+C%^zL;I;(Wqs}!DjAs$vY`F}I^=$`66UAx%dvNBu(5AeY{H&Vd
zeUgdZwIibB#!wu3VW!5U>G$!{pA~;nCJGyka0}#yh)-ydSu2}a7n`e1q$>*iqt=L>
zlF81;pmOBa2n8CSOT!4GzWav2l-!A0>H=@UKaTq_eaJ24&9Q~oOlIFY%CMDB6Vfz6
z3Y;9@8JT={@-P+YjA1!iQ*go!{`fk(dUQbiI*`t=FPTS<>~?`C$eD611IUHw8iT4>
z5;VU%rL_rEo{-q^9L6%OLE#Geqy9{xxlbTRpi{~So3Yq^WJt-Trp}wes^dpQ6wdiq
zm2{N&?b)r3lEd5(0qF@t=<Rb7CSjLx(tJt{Wyuje9=x6m#z5K$&!xvCn6JyqzYogV
ze5;zd6X0n9(fA^SK3gn8m9ye@{53X9?#ab^E5xPWr^CbIru9>Bc3%9uho8^B=a$t@
zF}u8`1=B7-+rZ7XqY9#(60+oWciH8w*Ij4O-|=(!c{(fqVfM|v)7k&CFI@>ux8JHW
zeoiqP1^V_w2e{B>jmwr2B>SsU#oI*p9Xu4{oknQ^yr1=H^!HhpUcc~3jwVrJTstC0
z6CJxyCNVz2afA`-PthjpN%iOLVC)J8IWubkNdSjs<VQiyHN6^??0;U1rsf~9=Kw+&
z!W=>ULNMVNLyOx$zQ*0oUPvaUXHjZ!c12wkGZ**~81f8hWkc}3Hw%$7F>-|l9BX{B
zr&ZqU%$slTWie}@M(VT6F_eAL)1@BnXvU*tYy`$Lr=E$IsZw=_RJ>%aQ*N3@ye!94
zvA!~L^Z5dXj=ay>(O2leBOe4sKE)mW!Jfvry(DJH!5oePRv})@=e@+;NX_dBX)LNV
zN6cePu4QJk=(WAoDK-MVoo$biHt@0`MjVz=RkVbX>lM%1DO^+9n8Ae9O-$R^=vQ#N
z7+V@5e@ELkIA;&q9Xx))oGf~YVF>R6<x)~)HIxE7(AK49pA6ivLoRDf9?b>dQ2sI}
zMleF6>GuGYtCaE|6!*#vU%m~zxy86%wA(t*{WBL!;w8C8ezZGKn8X;J8C83R&|uY!
zKf>QIWSA*+b-h;y=H;qn{Hi^F;v9F1o!?cVtHni5ZHn@cS4+lAN3;%%D_p$kJZw?(
zh*!1PTNqKgDV=?5spx4_X=|;3Q_Io5xhtifE5t#37!!)2aH+$dx-^SIOhqXT{@X6O
z0HE=@gdno)quvpnJFm7Q!Xu4$A=eXtBqscN@->GbDT?1X8abgq^J{C<xsh*Q<hNg1
z1I(CBR(PY|W2&Yc`wR&ZV@mto6-gmEAi^b79?o1XD@*}<wv>{;uE&%m5?Gutv4i9;
z{%Pae!NmPrLAhVQtVp|__YBU?D6jVj&spzGQy5d&xltnP^-p46R-xYWR_2RuWMIkC
zcXkD2M#~&O>Ch&J+Le|W8&R@La8tzLO;B=%^?-o(ujgqXQW?d!79oB_e#RSGzlSM^
zRY_qkRg0)<Xqs09A~OQp;1odOv=Gr$iI^HGdEEHyT2mp`mFbH&p>Foym|6F#zlsIM
zd9M7`s#S8Y_Zv7S%1Sr^CkX@>f=@^p&gNj?WuIfY6K&EMHSl;A+es)kiZEEXA#l-Y
z*U!Yo_C%whse392c#bqCD){%P)a0Kv4_oAOjMeOu>O^69J$PAX+ZqM#-RU)_x94`9
zd$g4}ep+K9=E&VzZT&k=bSbJJ3S~2#B3;%QWTCaT@l_mK15&y~Rv%J~w)TMrkc{wq
zbn*&eRg9hjUon@W_^g!o9N@xvqbauQEpOON6`}4VHFYYJRSm0SP@7sCf<AN|Gmw*U
z(^?1zc;8ySQjS@76jd;vZC{VgOV`8t`i9o$k5TLF2w5kf-Sz~Jc_ZCc4rGeCO(LJH
zT@oP(VN_r(@vI-~UvhZI=QCH_<G*^2#nVFH;xM7%RSm0qgw3>zJWt}z0XuHNGP`4r
zRC|H)$0@r-b8zqz8d8H6$Vi7gwDGu-(kgD44=c-Jn4O1V@A(I1qMax01A<r1)9jy~
zT=IKYPaZcY>gn=W<Q>M$Zxzfz60jcB7^puJ(1q#XcVlr+ZJx}WZ}bW3-ORLuRpMI;
z$@PsB5^3oEj$c}qz;TDtlxatxhjA2pP<@i{Srf+e$qBwlT682*L{d2LbcB(D&}pXq
zugfflNB@*BM%T>XDCARPYLXh+LTdv1z#2P=0=$4bTFsg&Ul^@)=z1e}hf{Ai$VaDe
ze2k`517FWmujp~oPv!u|J*~ZB5J|xBQ-Fq{CEQjY)8`LRUrOj=o2vNRe=KLN<cN0x
zsI1&dWGkMAP<h<Ro1}HsYn8Ry*nzGx^G+^ueba9sk--?M>o$YB{OfqAEV`#)Q4c9J
zl>U6~LkCD`c5z|>vNCoV*2G9oO*4aJw(V2E>TtQF1m75NuiTS;5ZTwhqk>;XN#h5H
z_k!OKu6*H42b5((;fM_jEY>FedSMWUDle4dh|zk1ak9aXb;C7y^n-xbR0@T%$f&o-
zjbn62gFfF$%v+?2LOL0EOD$jKSo8tZL?QUbXdtCaTj~S}tn|(;kjKAit)AHD@XU0w
z#?Z)@M8%J(kV(jgNh^I()@%#XLa0YST2nu{Z1e$M9!@?^o4&iBt<EZwpvVT7giyK9
zu0x*brpEh+{zP~`#Zw@>VOGZnE}8_q1BD(Br~Z-ry+Qo(6_9uly=b-G95uK@x0Nhx
zIAA|sfvV`2H4j%eQZnX6fz?kEV2ecNX}^`GK6~!i7PH&-$Fo*X1hdPlj;a|s;3T>E
z+fT0-4l)IXZ)6i0ifd!C11S-;MvYGgzA9xrEJ<#PY(6^9bPajkHu3U<ryj@W`6ic7
zlFc>9q$0>P(LMHdbNkHa#X2Pvp`~Dz68#ho!tU}4`kJ~+m9>Ks)wP#MX{h};CgX{6
z40x-Q`vxK4meeZ?VdxPFz6Q4M#^Zv+TN>~|uc)-cn@Y3xqS+8!(p7hHncEQ}_1B;<
z#@Q$Q*8^DaFH_d-m%f~!#T0oSj=GTV?#Yp;%~B}7l(vrd!iQ#od@JZ&qYub(zjKfq
z&>w>H>;j5lS#%^|aY+%yFM?QF$~an1W0HMgO8$(?VxNuJBo>gr_BWIsIvZ;zOe259
z2N2*~HN4uk+Gn}moLd-dlwEUghDA=%Mw=gcjM1s&ElQzj1ddZ|V_QRYK;ubgA_)#w
zp$_H?G-ywfrE8ue3sDoOdyy#OOE~`oHX`(;J7;3oMT0jwK!DVJn;+xa7KvtKqQG28
zsP@wdO+=FI%-%eIY${w7;RzIVimLS-3Zp}+f=R_d6BeuUiUU^y_)GqjPy6a|iCYA+
zzk)u+^EjNqpy%Ys-$(&RW5i%}>w5n-eO!qZ`#Ep(&3kHtF&bmgj9VQZ-M8)or4fAx
zf&Ti&lM<wo@k`O;GnVh1?>BG=-LGJ=PnN9&#cigp*L`W2NiadtF>0Q&j$Gmym{Q9g
zF(U^P&yRl(yIb^Nf%ehH?Yw@u8VUSDCJnx}WFdP8SN(1HOBkKx9GsG$keA-r9-|+<
z>2d0xX9SaWLov*wMw}YwtYTemd?KE9k9*06vPfW=28@EC(|979G{zx9;yPp;ymIgP
zGTyXVP&6w^4^^)`a4F8@aH8F0|DTDVv;ziC<~!@HS5W2or<zC~y=LGO(vQy8c@Ccp
z8q`@H-_BntG_-Hh4GCv+psptlx)$W?<v;rcYn^bChG+l-N=Qh~*23jj-FJH|eb2^4
z(2){1($Nvxbc23XLdrVHqMo2mLoeYv%1@0?Z*UJm&$h4{%2+mM6|vdh7upGmgg#0=
zQ?nUsguq)fr$`*=&lCqGLTzt{vf$O(1V=bKeUZGlMOCCU>}@SE)a+X@*!-z}nsP(^
zGJy-L7^eU)NDl@VN*_h1PDbV3g{Ne@*XHex`(B=b@ynXpQNg4@{i{bl3$IO+$J0rd
zhQHQt_swqqS+CCm`dh3O;8$<w*Y9kw|JKihz3cA&;@eB0%i$lvt%Bz-{?`1pVUQp2
zCkXfJ_{OS}nnheM#)codI3&^0Unde=f?t@15fFf6fU3U!8>+dR4>!>>ptJfodTw$v
z+x+cJK%rW|zpCdeU%51k^Wky6TBA1`j7H6#vB*!gQk7>cJ0+S7p#rZl<okIOLl37`
z9u;%C%#=|FP&Ye<stE_>kjKTS#O|)1jtWlqfP|gCyY^u9^jfoNm&4i>J2UtTs)C>#
zw42_uV1cr$@A|fINw+eT*OG57N`R3*o&D=F`@K#Kxvwhz_P}luUH0!cz#?t!0L{aX
z7cwah<MA;nJfym*wCEoK^9_}#YD7-zrq^nh$W+NS*^YNc$c}=_CUMZ8Mn83q@R(L{
zb20afZqn7%jFuUFw9E<BN}I!vkD=}^)3@P1fdh#E=VFYLqc1DeUe29j3(r^$HT;6o
z+@{8^vgw}9NL_iTD<?dK(!8`?r8XsZvx`!Tk-ZORxmzUVP!Q0|uMezvvkuxsXB3EI
z2P)UG<Sxd-&mTH(%u7gJT+RtQ;(Fr;aEwU|GvbgVW2wp>cTv{G4tk_wgUndSuozz&
z?bUnZO)KSiMB1LTU~q+(IFR2J*}oH2&IrnPimJ0va;qQes&M?$IQU`FQ_dGM8rNr&
zt1sVv-{8O07cQnMwdR_8kjoIl{qzPJeP594-3G2Pm*+9W7#ypDGUTwi<Mvll#%;|>
zQq7zulM81xC{;$*#Xpa0;=_OvMMUo{&MrzLbrbs7c{*`VnQ0;MgnI*xK=rsb=psE6
z(dbB`+2jbN9A6qELZzZ}tKXeuMY>Ult^E8ki-Oe&3EEhgEcqP4(+F9cn54(m<_Q|h
z$Tw$nl@LmD#wXzHlA&aT*s;d0$aF0QX-j52nTkC|+#2L6e#i0Y(yLBhJ!^gG`wa~E
z3epp6G=qx%)aLMh(>e{IS<AU1y5re6H_;<ZecVQ>s`Y_WAd?C&2nDa_!53%7Gz|ti
z=?G|^y1ffbrm#+@+hW2{6sTO&EaO}fLW4U>$ggGFel9z=J^#&RLRT4^SNOGOy5upj
zgvTFsN54_JtHd2$rk*8@jvLAyI=`shdjcr-uK0m*S0TSIwZK?}4w?X^c~MzVCaO6l
zXr^%*{q9SCt<ohQ*V!niydiIV9_FmbW93H@npOR1IqvgBCDDKJPKnC5d~NS?Ay7Se
zlL$Z)oJ4=bx4lXhm~}r?E19;wzMb|r?7wrSvz+N4=*|yXAE9#y-cF?^;TTLmwKe)q
za?Dv~ERt@O?y7fpo+W~W7SE(XLLIqZL>@9>p@;*bKULV`1!;ehz|TxaEm$e@_H<#Z
zmA|ZvK7VGSS8zuY#(Ec+g@!tM+(}<51J9wQy3Eiu?80B>>V^d(u--VWP=S+tjG}t@
zB?9bCG$;TAcL&{zF}JvB8s(rY^C})rmD{+gOLfYt1nQCXG=uY{fU;0&&MTuM7vpZv
zd#J8(wyvA%ai!*kbgi^{1URq((huXLNFHNJNXzs!f+@9~mp|RXf=x7J3dny535r!o
z&)px0>E3V7?qVK;7rKy*brUK~yjo*8#w?)>KhhQHYaJjfI|?P6r{oUzC02#3ue>kx
z8nSob>%nS0LAc+hUKH47a{uV+FT_m^7y6v6TDX}X!HBYN8A{ieqmlTrQ5~*8n)>)s
z!bXrlx5x1O=G|x@{{cG5r9KgacKlUtZ-(Tv$N^e#r+eG_Y}qsTM_&r;Ik~NuuK!~2
z_r6BbsO~*5kQSB4s?WZM=VlL^hn;2th?wHlF7NiP{uVSA8fG+*f9kF^uN0S!q=&=b
z&&y+0=2Wyoy7B^U?oX39?cs(MUfWDNP1VM#$c^UcT%Hn8wGf(Z%Xw6BPoem^&JoWN
zsvH@6X5?iq1SZEvlm3a)lwOoCUi!JSKaX}bwJu(;txt3bxb$CiEu|+P4W=^=-o7Z6
zL=syUl_l>hF5F(SHUU38UF+le4skLASM{?<X#3m5T+!BlTj1Y=?SfI#?LF8<oys&`
zlVHT7y5<H#d^oEv)ALpWN9#JUOl<v!kAe&&R)5K6bhSY9vV5eBbnV;O(<V<1QHrz(
z`^3_FMCLDQ&V>PJM;cK7QVPvAr&ltgWWe7He651~6opCDrcCg@P#w@{6+Wm5sRt@>
zf9T4uE*g0=b0-j&e;0*_yS!8WsCR-!8HZa9E4Q+azNfK}0(m99PmD8^8=}S~Hf%*d
z9^JaIGQBq{e4!5M)vAZczat@fqW-l%Qu=~{;T@Fqb$rK#Ac7IY6ZS#sPrGRpLlk3&
zn!@woiv;`SFfvMIv>}$+m{EG@f=9x2;s3iyEEKFk{HiA_mtq{nNzBj3#~y#5_I&T!
zBZ+HYxQ^M|KFPokZLg-zoDfDp=nRyX23Ow4L5M>cxIiYGnP`cfONrRdRB4b>$dM)u
z!iY0bD*v_&BbtjUpedVVTY5XE@gX*a9?Fo$s#x>*s;3pO8m3_pcg0PVixeGIw~dA-
zYYRh@Au3nY6@&2K=O>RCRa4T5q9*1~6Z#5yh`EW#p{j7!XT6hvse>h_*FqpTuqm8K
zZ!+uJ--IxkL~p?ISQZ6S&-}Xt&RvBphsWZmv}+&*$$(N^R}oLafA50=^U07<?QzZB
zMqEY7Bh0anf10H;hl>oW2%SQB87<;lD({#kv6_liqp-%}?zrljD56@Q*X`A*rdc`t
z{xN+QS;N7+S&20s=GQCy;O^63Rq!TRnpeWzEi7R(?12xAi`x*XZIKPNd(|l#!w5~9
z=838U39D1fIhT9^j;pP>!o(2%iD!a#KBP}oaqb2F`5w4Pgiiv9?S7+bjCP<S;YLTm
zv<t>8U~P}sqCWJTu_H$RAyfC?=i&lc|7+M^#IMXthG2fS1=PV+<ic&oq{b9k0V6UL
zVR}I9-|D2?a6sa2a2Ud9Qv!^_kxuRlHb3=|1(|DKC%yV1Ew&&N$9Wv4Dlu@}c^>=H
zTBoe-nte?-*?P7q_SU{F-|n=z8t4HJ-^;Ccm<86G48UvG0%;LKaZ~U$pxq|!dnlcf
zrB*#gnt?TV^j6c>hLftH8$u}njd(%V23lMN8FO1bYjua@IJc*D<ggJ~5{T6K3$6VV
z`pX-wP(xF2HZDo)UZ9;2E*h3<_n6FoXB(4Y`-HJe8lbE*Sz=q2sRYQgOxYi^P1+u^
zROGztzpl@FGjKs-I)8@T7`t<UP@;#EFS<{A*iY^pkk=k|u%xfz8WYMS%IiUAvBKcD
zNT&Q9E)2*B5ttdg+rlM{HZQn%!X3@9a&5@BKw+N!u`w_FKk7QGsJPZ`-BP%_ySux)
zy99SDEVvZzF2SAP4#6R~y99SgaCd^+%|5s7yVluv?W%{W`B1&hYV+;;ztIP*d!*I_
ziC7g4lOkIx@(pJX+rly%^0i!<W*(++(<<+lL>1(T<Dc_Ij6B?$KZlF8Zj}Cn7ppQe
z%3rI)mTpvL=@jq#+3wG1!CwM*+Ic`1IbG%=8P7poG~EWBpbhK1JZ{_hzB;#JVIboV
z@m^J~``l>ogAI2KJOyV%XO2O7$P6eg-^cgi@y*x!8xs0Al+<?}dZHd_XOn$~D~cA9
zfC+L}SPh~IV!>t?K^^NW1Fpx}@vtjKfFK`dW#8j2{!r~in}o=~Z)0Ubr@?o5ZR-6W
zhh9|nJCWQgLBNXl$7?|J`QIm|!u){Z-nE(4K^S0Q?VrHFNdD{bw3DfetEJ1I{@l5s
zod$tiQl4|@hlH*H<D8Cd2o3X2IGz_{=;u>L1n{w8;VZOn7fA~W^WV;4GB)o5eocC7
zgA`-~yB@aBSko%$8ynagZ`4wU?i+EuvN|&$XA$rXlZWiN<BQ5vTHxL(dJ^OaLpgOJ
z5ylB5U3rW)sQW(WCbln=vVcGV6q{b?#kyP4dwzl_8jiV^ob`djuJRj%)UvX;o}BVF
z@(o@INpzE94H>>su1PY&O8J>dKl<;sdH^JAlzG-k(9V)Z{2kz?L=lUFfqJ_j7X;Xv
z4Sn<>yfF=1ld@le?W3}&Xe`}&p`xWSV@e5AP?d_)d#KW|J5e;52&BwlOk3*<gwh+6
z2AkMHP%QVxovm$c&IZ!$7&MJIE+s6`Zw`*UC(W2oW(V<VvhyjFza702RPz)aqB_9f
zg+SnL5f?}R|F*_Zagr<8(KfkY#z>%2((wG*uu`aXB+9Ewuv~7R?3A<|vr|7ISS+O4
zB0t3($>OM()dC3@RPOxy<L2xL8{c_Oj$8PyzoX<AW!VR7&lQq~`0%T9q+`UMgon3n
z(_~c>az&{gCUgTBR07hFic45G3V9~+QZ^b!LsgaxXyR(Nto@7FBwb&nWY{!mY|b`#
zbxaVv#WT1Dt^rhLcKy;E_>-GTo}eGQ(hkFo!>g&&88w~d??E|WQzY518gUx;p0Rfa
zLQBr^pMAas_`9P<BKJ28N7&~#RcFG*8aybCK>`J)&%dLrrDm^s*1yTT4rsMMPq*Ht
zxwR`A#0&Gi--+Xd3VB!&q3o;0(*%?lEv*DRTTGN`Bmo_g7B%pdv+F>L-Q02qKf*7t
zK3nrlBsJBe7>wUgk~?m&mVoC<zuO+!vN|GHN@|eWiPqYTs-{TeFP(>E8^9AldPZd@
z>9NM_Kz=v+CI=1^8$=LJIjX+miFTAk3Io!B_n;c@*H1EnjtN@;jb(KY&Lhy#M*~xz
z2U|6(7Dr>jZE--B_=iy=-y^r24hug#Jl*E_aDOv<hy0BA>7)pDK<o>hz~mE)At}W^
zcFVmwkQi*WOKHWZuT6idG^fWV*Nn&@U7JLn0dnHt`i)9k;gu6%f-z4R3g;mhOoG8G
z%BeAYgu-%vT-6i0dik|yvf<Z<XJ0o(cX*RIQ8-4n`o0C2g5<m+p1}DrI?&mWeavd|
zbSAVd$?jrM-Hlqp8?Q)>oaeSTJA@HJl`EgMTMkN#J2R(8gsYQnv_Hmm%N2ZL0)P+b
zbtxrvU_bG_U<-)2KyPDL*~uPVQt8fOd%FjlIz#NZ{G!Y}+%+9vnH)T;juAqq%e{xS
zYEIi?Vp?OS7ol2X)f<2cF&^cN)!ti6+cQ8Ym%O1G27qQs@&wQFM&O1p;B6-_6-$QX
zihksa(Mb8E<|-V3D+`IRZMd~g4p&%hB;cMYvM!Sd)=NIUq9=+`S#IB~v&ROiMG`*{
zE2RkqqcQ!WI4O0qkjba?O`g}`W#6%*=dJzwy3?q$tf5mI@#{#1Wr*m}mer~e*q0=_
zaBNnD>Pb~r5cDVhx^)LU9J8&+h<7~B?dTu71S8J2NG)2J%^@wOnVLG(LrI3#-^ZA7
ztWB#=Uv?jpr0>w+r;%3E7uYbb%s4M{VhO^(svf+HwJ2EYojO=b?%;_8obme%4D46=
z)?G^yWe&c7`!xQdGZRuYz_KX)2|o`aWBVNlb1jnFl^O2EF9K_R+3>S`0gSTc2%ALW
z`%-pmv+YjAi+rRRsa^ylePlmrAs5|4sgt}qI-P&S2WdUPK^z_c#9~}Q%G?5nFF=uS
zuN)R3{f&qGJn(bqxZ}N*hyTLOC!WS*#U`u&-Az_#)M#U`<$Z;Ej~GLUGqy{iJaKfj
zGK|2qyG^eJCGF_yXj}Y^=K!h8F(V%a_tltbSIFFmLDH8B(7GMxldX00t0g&d7ZC2^
zwBQ%OcI0=0L_BKlAj5504`|LBpyHs~$Wvxz*qF>N+HTzfQU<Wl83qcXFLz*;af8<i
z$nCOD)85X*A(26MN|y!|DTMTB=r8{oM3Gxh5bV1<pZ_sdC*s@jr@$&*BwXyWx3#N}
z_;8bwcM`}Vv~K@)pGqV*WMY^HvEpnsS!WRY{4q~qpTFEUzz{ie()_&obbGtBEJEN8
z{m>ayzaqr6JJ)QWMml`>IH=x|phOf1<&6t(CEcjvkO85zG9)4Xx<EXPhDDYP?!834
zGHIU2z;k<GZ<mT}S2b}h*bNC;{iLfu5WJ1JUcHz}<=kC<c~h^yT(j9tvj%`GK6MAP
zYg#9N+)?XMg_{l57+3>Ca?G!Lh?`ZnvOtg}Ff?5V4)_G6bQTG_a$v|EHLVgpS_=+I
zW<u1O;vr<|tNhefOhnYXj5fnA&S>@W@~x{Dl5Z@N6dTG0x@%bN9M<f)J*fSuiV$2>
z^!Mt8b}T<A_wo#*UJ_0r_jK`QKTW!6FH%#I|H7;<7J^f-1*4fpZ!kvYckYKL67zYI
z+iq1AuX=~z+YiYBp>p}z;-Il46K?Z1o<B(~guv_W%&%)-tf|yB?R$$15XbJjUz87*
zlReO@lJ%;KVa3%MO;XImnXGurb`pdS4p;eE4Ef2e%XXOB)Bv<#vB79kI7%aV+b8@=
zSk<his73r)en4k1r1`am3oOg$o5_I1HZwbXLQ?U|IV}bAv(38vu-d~<O)Oadv(n0p
ztB?0wJ@f31LqV?)!-mzMn}Od(%H*ll0-!$R=N2=~QyJ=rGrT-wVM*h`Jq9<)X*ln~
zo|G5ktT&c`FU)Xh*s$)8IiN}DciljrHsXzV?SUN^t*?n%)n<2zCn!r7<mW?P_@49n
z#jTv98zi3ci7XeHs#t;Ac)v(3hm+N=5|}hIT!xm|twN{F|BUI2H^njfhM`#*<6UPw
zBZ~Ob7JCR5yy!j=7BOIPj@7cKe0OO6it;u2fh3;gi;B17a~_!;Yy3lI3MoppEP}4K
zk|Fq?acV4!OkN<cq3BwkIL<C5HY8YIIZNfF`CLx}z^k)J$__sH;l$N2FNb5Au$a7t
zw=KB2_BoEL3|a)MC6rklw)I57qD;;s=WF=5sxurfXg9SrX-z$oh<gIl<+e@Z2j8!~
z&t2G$(M2<wdzM=~@2CbAfvK+{V%d%61ajyJIugl1=cIAGz=ki@yH^I(zQOaP*&syN
z8pqLM)XYH`=dAkKxH?TGvPbP8dGrhEIIiI>eApT?P8?%CUIPj|RuS!dTTc4>p8o0K
z>^`-zj8{<{P0Gp)4VJR~2=AP6yb15-_Z%vniK{s4;S`=-JFB%(8M#!8jaBUq104nD
z*sy-s!W{H(3W2j@IKrwLWF~z5@B|3D=sz?y?`DmR)aJHGhnY+}PP~l~2TmJX{2*w&
z<(HYx_qF-BBi(a<F&m+eM%_8PbvQLJNpJC)tP6WsI;iFpR4k!f6uzM#;!E$cgc!x0
zrMDh^s;?UpZKrwLy#}(%jOfTUlTG#oJhT)1n16Aa5!gU1cJ&oJvge+<XZ>*`Y>MY9
zUb|yg&0m}wgOw~MEm)Uy=D?HF(#fiP{Y>Hkp@sZPBfc!UbIC*Lvg#yZxhlFaWocd?
zui)f67tmvHf`vcUE9`*iT*oFEU>^B1B2LWYO=jSGb_fwDt?CM$RPcs)jv(bDw^8PV
zb03Feo2g2wOOG;NNJk|pk|MHapLtOK`zz!tXDcH10*Zk+$Mm+Oub@9F5(}E#!;zEg
zb$q&GQVHQpYa6uhNzP})c|P7&gmTzfD6B`9^b^d$nf9L%PsCMkfu!6(sl^)yh2P?m
zq5BewVuXUq<w)_|P4y~yz8|2)F=T$2u0E9N6iutbOY2tIL|toTDO`YlxtmkP_}gGW
zt<=!#&5bvnY(~9x0pa%qvJZ0dWIqAa!E!-Ns7dgIMT#*A2Du^t&A>LM^szsNfx9-j
znlV>tyO`?0=r!3G20|UZL4F$~8AR16N&<mHE2V|;Fo}vdw(d~;8vS+J7Z`uGei7;X
z{^;Y1*G}ZJ>$PcdlREFb4KkQ9q^f+x*)-e3z-r$^!Lp7HW&yGlI>ZwslsK{k`LoyG
zOfi}%hIK+ZkZ&~{f`q@7{r7UuN6nCPv<+=Do|Ye|d;0S>#@`_{#o$E!svlo35^4y>
z9ymV%9{b$nF*=!z%0cjg4Tma(S&()D?lv^k4<@oQ&fTpMaWh%@bb?MA<HnL1&=5Pf
z1*%ao&WH9s+RZ`Gb!kG2bAHQwfZbIAyk_G{X$p*9g-iSD{GKR`v=7sqKpepxA!MJS
zL~m#Ddi>zUb@Mg{q5xJ@omAp^R4ZT}f5`jx(DCe80d=piq=K#0hmXJab@Ni_aQuL<
zG$L5nR4-Z`Zw|v<p4B0baUM=!TJIk9jF<i4hE1-BK!D#*{$C;=Z?7UhJ_Kv?c#Kl&
zH~p3>ph8`~V<~H;px&Fu2n|K|LLU%o38bgi#cm7GlGfUlT3{Q@%bF&2kB&B65i9+!
zM5z@s++^^RN*E`W4Z4of#=>`V=M&x$PvsttsgKxyRh}}iFT6YJBsME@{TXBE1zX4D
zu&#Ag^}K=exj_t^MUjp$qU=1n3CmO}E=*{p(@pNlBh{axn0aCk#f201ppAnC%|6+%
zNKT7FKW-o1qKJSf-1_q6S2c4rEnWdfAQ!6tpHaU%@&MgwuDs!s$g=qqm%|jjTTidx
zw7Lnq-3pN<$`X5b!GZ9G23@eUJD>Y@G*!?E6|}Bw%!0)Y;drqHIg62$irw^1a^Y7J
zV{GMFcp|OE=(7gUCET5ZSZ`r4Yv8w7nOvw%mB)M}<u1rj_mzhaZ#PFFdOFc;h0IJ;
zEP=>RvQp^r6+Q&VEBtDBF*bt`ay6WEZ4iU-NvwB1er`(1lvi<cGZNR#E)a%X`HX(e
zr@K{F7D2YA%CKc~Zd8FYu-o$Fogy4H)6uVKhj&)S1nLLl>J!;0_U)^8h=nQN)s?$N
zapt$|>IfqYk!`H^MUbqvGL{1p-2=KTix`Z1Y*_|0<-5&hq~aFe?vc?fJNJdg@Zq<2
z8kpi!YG<+=WF><=1C)QouRoVkC4sUD*xNLx_Q-j@SLRQ;#RJZgqr2+#Fv;a>TWSpu
zvE-RA0xF@3AwVzK<14ICyN-fCIj}Xh!t3*?xOIcIJ#(!d!{tA@(+}6jScnk>ST<A1
zIE9&1Hsp6lUay(ed4+w>r;l~OMfq;A)>R1aSd!wFG#HLu5$adOra(XK(Zeq`p7jY+
z54Um}LJJk>(z4|`bBv3?=uN3$^&PT)oU5UA$E%~4>(3}Fu@<6PHuVj|(&ici%Labi
zeS>zCBu%5;5C*S)m-s{a1^Hc*!8~nCvj4kj-Zv)0N&I)AX>bqp0FpyySZ?m)=L-V}
zIma3vHWZFU;v}NdDyORgX54F!j$!b!EL0{)NAp)Bmh$Y4w&5>Th;DI2)I1<D8Z7EN
zVH2d!lQQ)jT+ISk1dtX~uZXPUwaUAL+G-5GB%;uYZ)T@9!KM)?Uq5jn*blY<;()6L
zLy`IR{)FdSr#plT_^vND3B`D5j?q{3Lp%r<J$sVoa&I+q#@Z@W9akhiVIg2=ca8iC
zd(7xPE$g7pna%WelcCjdx;ZBz#jTynYub@Mw5TL-1GTSY_L2PPT@F|`*0U+uiVjCc
z-<GqWbq_TXs-Wp7P@h$a`26<Tg1Q9nvFi7%CxOQ?xi#umzulmHbx(YP6Hfq&QFe-V
zfvWe!ItEDa&7|%^S^l^E8!+u>WOGRb^41;89p8x~S{|bN$+Q>a;j`sD%GEqTWZoSZ
zNH&S^(D3W5K|wd^L%b(hR+(6KNdlY78cM^$czUHipVhOah@*5T8P{4%b=bb^GH56d
zmMk%)8*-a^Y*Z6ANFWv!Z$5B+Z-}}mmru$&5x|TYPy$b|^J2pI^vraYcFeWb;nF?z
z%aRVE99lmUx<>Ek^nx94>Cnk@LoqWT^kFTFa-l0lg8-|@#f8FTIn`wXA<fO6q{D!{
ztvsIH!h6CSiy>?lQ*oj6mRZ=PwQ{`166PTv&0%LMOw7A97YSaR5ws@RaDFNhg`dbj
zPkOV-YN2iNe1Uo+NwtooFv4kI>D6?y-=eBqx|4EuXlf;?sbLaP1Ie8@z--j`^Lrr4
zTN<P300K5PVtskU5$PCzd(sG<%3VFx7Y+x)1hpH&gN|1snwu=Kg$OX`l2eB;Huuph
z>y339nIi?kP7X7f!VAnfWe*AHl}ZPSJv+qRgTCVZ3iqU}A>bi2djnwc4nP(#HLuvY
zrF1KKxHJT*onX@j9)mohrEv_OBFt57cXu*r@^;54VR{t#B{3CChDt21@3c)k{cBNX
zgn~4%Rl-9Ha?A3p-I33mqhjxIzd))F!sY<;$oBH=(|&p9suR;0PNNe?G13mcmsO_n
zz2%hTG<`8Eu{KxVMlI9nTnCpvY`F2Z$JQ5)88e>bFZ6Z}qh`4`agFapzV2`Jy6zN)
zhS_~M)nY^)I097(nuy7C%q9I3PYwt8<jpE+1YtG^Ch+5e07vWd%*Luggm4a5t7l}5
zk3uM+3PJcG2lqKUlOR4uLi810sUa-_*fC5}-vaWP1sESQGRw?3{=9R7S?}#&H;wDn
ztp}{_p1=8y4w`kuk5~Z21AkPiZ-1Oe|J_z&Y;5CT<7RC9C%>~)pZeoeMHFTc{TZad
za=tYr=6I@Xlw~T7028^1Ei}$<j^IQYS9tn-E<@vxZ3Z)dD&-%rFtpr?fSEo#Y;D|&
zvcW5mXW0AL$3y$oCb9F{lU{MZ`4%Fx-L!Q<+o?<MEzn(px=s!I!eAbHla;~`pB^7_
z5e^_`$GH{=Xk`Q3_XEzhR^4Y-;pN6*qqOz>AIg?si4%jJv1`(+;I3Rg>nkF~ROPY4
z_KG2y5J})%ArcvLc|)6vb%Z!dl}aBW@nB#Eurh7aS`0c<G2p4O@9j5<I6@2^UUUcI
z-t#zbJiMtBmHg?gTxZWe3L82$_};!5z7M{d=pzDPKRm+gf?dG&r3$o4jvO>mlF}>T
z_LbSu`GoMeTfUIC3Oz)ZE{ak;CDRF{{fNi?wI`2$h=g8TZJW54aDecbx@SYI#H(U+
zZS|tstI@DdrJLyBvT|Y4HRUi*z6o@BMv}Wn1lf2H0OTXZ!)kM-NywLC0p`s)TP}kt
z;`bMnjGhZX$0pAv_cGhiwJuWKdT8Qflm6BVQuZpi3NDy|==jAC#N4BU#wW!`qIz00
z^(>+=+1x<EVN@?E09|Fy1+w+i-xRExf5wZTnl#oz(7?dD<o+SQb+dGJb7gV=^YH)Y
z+rRQ#13Slc-lg{q@Fx^PoRt-fFLna<Ed+yroWaa%<t)rNoJNRvo_s3)1IaYb!iB-h
z+JI@YijF;nEHO}^6ow47*Qo0*9|}vQ&a)9}(kPbcsAcPe-Vu0Pl27en(057>6<)zB
zZTt1cLMo7iSyt?mhNtN`tv{cu;Aw#@7mo5M1RjQ;aWzbPpl(4rH{LLnPwj)zy03EZ
zC=rBX!z%wzpEr4F^1b4`x{cv>hWJ%M>y|ZS!SdmBR(RnYj<M`VrwTk9F&w<PGSpLH
znHq#WV^7xGVp#1rqbj5#?nRw%wYa}A5GYY{xq##^aTE1jX~Z3^K!u;bM#=;Z^blB9
zYIm=&5wfGe?12oz9MxRA$okV3b1oSvlto*>FAxi%W`v=pi5TxYP%URXHJyPhQCF5t
zlr4vzfS{jnb!h5lUNw7KqXV?0li1w}X-=1LAjuVKoZJZ}_CtU-5o$z#b0|@eHuGxN
z1noB3HN|0!lP;K)8YyLcCJQi;@;w-e(6>rSn)#_898FH%fQ+bLblMbbvj9Sz20eqL
zM;Ik?fbsLVgoIRKuO;Q)7l>)&RtZAPGNl@d5mN=j2?!L~oIl+_q#l^H%mD*}k|x@A
zew5FE5?A6)1Z-Qw!5cnA{?KvM6_g<oG#Ukf;*Y`D4sPy_eeP>9T!7$6iKHIkK}y0*
zx00~!DpV7Mu$V641RO0EX4WSLuSPwiU@kF0fTSv3@bBJEq9{rCUcnjfK1J&iT&#IX
zL?9hIt97xGS9}PybZ`q=>^t3vy1ITHclj9AEm$}F48wNK=vZVH4?m475KcyRmO8HG
zcTh{4%KdCn^C2J`g>Xk(!^KLOWEKWSOmx$lPZi?`M`NxB`c8vvTFB%FQZjnPR4Yb+
z#swnAETrISo?^WTGCRVH+{2d~*P~n*L~0Yk{8C@414@IZ($>XG1fB(~+CXs?RLznJ
zr+i`A`S(qX8ua|HUlo#fGpyItAvY3Vc_p%r>5qvS*3w}3<L@BjLNv#vq?z85(x^!f
zKE>ZzIFqmgWH}JQS@J2C4^&R-+iSnje=Yd=EH`dVF7U|()|nGnZ$hvZ6u|m9QFBp)
zKvt%HDogj2PFY{S;Uwe~r(N6Qa%4$mUmD|te;;jxvH<FC<UkXtw{t7K-Y+$o_lU6a
zCd2xjjL~@ep>#f406>sNuNlw*e=8r8;%S%tBpe!3dSY-F-UE=<=SiQ-rc(lvi&3NA
zYF68EePtbh!35$oqYr7cT@k;ePqNK^Xqu&RAoFj%MPW13Ikdo@NT(M1=XY>JP7TVC
zKB*{)ARPR3?Whg6e+ja3rpun`P9->8r6|XPUfUJpeWfr{K2gPc(sO6xa<zedsxeLw
z)Z(MpD7m6SAP&!8S_B6-)T-BW?gbhnVMf%A`K+D09Yy>|sa)Yy(<|L2-T5S>tZ{I&
z^o$#i2@IdpB>)oRI~97%371j<!w`Oxeg7bYmMMLJ&z%z<SHxpH7A;@ArnJ16f;beG
z+~t#-tow545YcTs;w1+Q&>67^f)7x)0VG6X&_p*q6$_MPI!0)Gm6$1ev4Zk4bN8kA
zsI`{mP+wGr;;^BH0_kzs)v0;;!)liVR%=4M3y?}$F7wd$d=>nZt%=9k0>q*S!!sLZ
z$G>!sML6iH?cHn}ARn{yjJ=CP-K(g`)b!Yz)I(KBEaY<It-&Z_Ga^k(4g_$)QE@of
ztf5X&J>ZEWWlw6YjOsJ%c8C$=@wvhd?vRGEXcL!DW@{ksRK1oNu#Xg!x<;iG@tN>V
zvg-*x#lYeiVH%10>x18;I4$s1hJ_+r45wT{iRL4nKt7rmnfUL4Ljnt~3=C^bWQr`3
z%@!(Jl%<!4r7!Q?7)PQft!Ay^$@-roHen`>_o_=PBl8x5U<)p(o)<lmxU}$l>4&>O
zp7QZoudu30kE`md?^AD6O1Pv#pALdyI=?O}P|qBVI%S(1O!pWTLX^6N)>Ct5=ypk5
z_#Lm9;TCorAxBqfemA6~e;ncl&>ae(<yVh3Ph>vF<+2>dMnJ9MfZGZ01g8wU!d4eW
z_MufG=bEZTHbOuK-8tG5o*w^}yeoR<E=m3(K73j69z&J78l6Th%|AbByCVpMyt;4!
z4m^8!5dN5n)$aFe-D0l!$PTm;h;}q|x8L$^boc(mVdogv-`j4^g2Eq7p&gu{1a1*m
zM0&F_K{?}&hDZGzY^n%#2p@*U$2ikUs4vhcu#E76lO+|_Yx&~w>g6Wf{d{-v4D<#0
zy9yB-#VhSn=}&nUJdMo8i^N_22wyEtZHWt5l9J+|Wt{b_Qx7VwLCVvn+Q;SioK~L#
z&9iP2^K|{I>O&ox?Rux=b3z{UX(scja2^5twj3jwf$a*eT!w}^tRVFWSRpK>g9
z5))t0;yt2EI}Z(hLAfEz_Dz-~j<9z-+2D>7nv?QD;$7eiw=*3?iKq^ByH@&=El>JR
zG`Q~^-{d;`5Z$%(Jx2aHFlpc6z;#TCz8$G=9jsqV3)$3i!`p#fA-pu1dT<BZ-ovK$
zE{D<_eGnVlty6|`0ixbLP#McB_mo2SxJUP9t9!U+le0o`0@Wh<K!Kcp<C~sflQqaE
zwbQ#uw8#wKTzd4P006j*Zan%@I1}a4Kjpk%5_VyauD81)U(txI6)3a8!aer3t-`hK
zFvqBYOH+c5U^aw3>)8NU@|75wJnXnp{JOitD4+qgg3>@(J%SlVDUy(VT4g-8I5jo>
z-GY@`JEU=Y@tZ?zws>MZtP6AQPQ~eD`w%40<LhmY@TT9hU?<_*Oa3gf)X`PA!}|J+
z$Jl0h{Vep*LOF%!Gpu0duiWRa87%Xk6??|Zk7}_7J5UFGexnNintizPl8n#An$fIH
z7|V6K?yS7Un7DQlIzOHQq>6&LRpDqq6&zPP|8S|sV{qq)`_RLlw|TJp48{7J_&u!!
z{aaYX0o!8Mtowmi&lr6(?X}g9w<<)Or?;6M@^bI^5Rs=kMFMlta>!j=Q!pO^<jm`<
zMA!84Ewoy>#ph3jQ?}&O6Jn<p?(>TzPeH>)*Naa0Bbj=sNDPDD)b&@vbxSJTBL_ro
z5>^>4u%46Jh6M7_G7)Y>hkk{FqjJqH=Ud$*ev?g4GAg@8x!G==zfQOC6c|27E*G`t
z(D<F>R&e%NiWvg>=|iiP5ix_=MNnni9vgg6iy&|`b%;*a3#~o_fy&m7c?%ZTAij>)
zz*9^J_D{c8WV8x+j_udN@b!#cKzXfZdW8+YEi83z>(<lFpR4nQmk@;+dN)Y8G&Nb-
zOHNOyXeyYKrzq8MN*VA287s_=`}LjRfUogf1@N8sq84`%NY!KyL5}>|nmSA(mnFBO
z<CRp^rX4~}f6zl*{_AlXS|>3Coy}7=eTtt6N!|tT{79`uUY_O92bzc8g}<-pBr00^
zWWn2vlTaehj4nm+@`$3_xukzh23s$t(B6Gg8PQgJ7CpPL<eC_%*mdJh+s<#XlbYV}
zR>q)pga&Cvuy-CJj_#VaZ#Yh?^NVSVo|+?R#ZFDd5nz64#){ELE$M{t5;dv~?3BJt
zHszWL>2tMCYMk3*WABi9YkK1DRyXK)(3&Q&ez#uBL6svJnTNy?&&cfLoV=VLqW^0b
zUO&8I&K7^zWSd{8CQL)BPtkhuH^s;)IJFq0B|?HIpJjyKODTktL7m`QLo~_oI+;WP
zpVwZqotS9)t)YL=1xxC}b0gNWI53Y41jNt3P4hs=c#jO>`}ybN)yqL5$jhjFW(lp(
z+8BaZj(2zPgk#-vb^ikUKAs;`IxE(Vx+}AcQ*_(s>FEPwu(4_IlZ>R<gFjYjalI2$
z%_$zJop}0PNro$nM@x`i;HEJyN}Jc2ui<BDvlhgV#9gc|N<5;{l`jT>e*qUGUK?i;
zZhsm7ID}p0weg^O+tR)mtNkK(ENdPzUzhwM%*$LHL<WH!8yL89K=zZQne2T7VR=Xr
z_Ia4Y*+gbo&Y8!4+@Z&r_#yDhyL6^GbKF{iaG;tbvfWr_)Y<`+S9y)fRl8y~Q4238
z3&Xdor~*dyQSSaZAr|m>`T`Di8uBTeg!3fN)uE{N7@7DEI@l1R2mSpTY;hY=eJa>*
zyWMI`OZD{vLM{|hh+x7ZecT8;BQ6HJ4YQ`}9KdM-RdW1Zd_0-tgR(mLwy{9;(AoFh
zV`}-@ZxB(Eu0$kI8xR!|df*Ga1{L3X79kt^0fxZyXddFI2M*GajXWe5+jHXj_Dj<9
z8R_1_6tj0v8?<-*s^jT-yAmBt`XlGq)WtUW5%-?sKHZdS^^V0yf~9AK|6BJ<^H{E<
zs+<Mg=XJJsL~(&Cc_Hl#m$|P%7Bq|dZ0>KQ-O;Cqbz0j5K#BLZn!ZS;1J@cv1W)wc
zk-T%JW(6Hp3y?ruRty-GiVqr<6*AIXBkO@T@{0n4uHhQ5<Yr}A^lofn+H~rS^ABHL
zm>)gOrbZAI{x%d|d}Vo^DMeb=-?~1!X0Cg1_`8P?h^X0dCm92|{NN0;><$ht{4Sc|
zUEBON1uzfx;ruRtHl>eQ^Co^fO9kOUkDU^h-bvnN?J7v%K|OHxaW0$R2*aP?)b3hb
zre}i`-JpzCWAe=HeQS-^11rk5a=l+d-@ll@<y~dtjgMWUY}!<uYTx9uAp2_@68>(S
zDsX3xree!KH!p7$t<lHpE7&%=@fP)7NrgIbes~)mBRAkLqv4n~`kMP#_A9URPDSPO
zHB{~-&tvZ;jiY<^_h`{5Ye`viyU$5u0?)a`vpqB&>Q7htpB#f(fOaenjv#IIhXT&D
zPQzsN;^kf23fo#qU>g&cyixMlSnfKG7R&UREu=IyAn)P9w2RNtywR^3ST;=Sd`Eat
zUZKpd`ptuN4b6TF%d|NuO}LTQW=6vMcU+t`t6?rR2;lPdt+AFrwaaAh<Qa)^!8nPl
zL4|+JH=Hs6RpfqdZop<ruk1<-EB{Dx-%>HL9DJ=A(9#6xY7(GQ(IDv5mC=ZBdOXoJ
zf1N){>y2OIw$Jdi+;Hs6^z$yn&*Mf$FANzT(>H{5D9#%6#|HtG37h6q2_0NFDxp;5
zzhEor(7{KWTkZ|kda)r{r3OEjE%TVOw^Lj3;Plx3yxrkK-a%G_noc=CW%gY5F`CpH
zd+Z;w3(g*hg(rz&9)^Du)a}$zx!mb36efjl)v12ZoxF1PXlEl*yoU+#YY*^%COJI3
za(1p4c`Iex3&Rk*YbrZ#5}{Rmnd7F(=F`uhMc&JeUpLR}CB`{J-*h%yeHpIzC5Qvw
z6Sz-Iw}Q>Lo*}p%skfe>_ymjS`DHpdD=mgb&v!eNX@y)p6g$F3XJ@LI=E$^g%b0}|
zHGLFF5VTq;biZ5(pbLA)r8h$Y(Ksbj1F{i>9g=SS;jZE|2wZ5>Gsj&j@Mz_l=Jf_D
z_xS>XVI#-Cv<hQIZNM!1xX}gWF1k~9iWQv51xgP=sN413k#{{~qx~eJjl3<;P{d!8
zn7ijHBzA8po<i>`3iu_d{t4qvv_mFpo1Q~B-y0S;nfQE``8?H3(j~~2p^Or86HRz~
z*9X0WmQMH_ld&C@(fT35<5yR85&TQVR(Xe^TotJAhPeShT6r?gJVDluP#DEV$^?1!
z4J!ibGOc2s=a?;MOU28xh9R%b-}_^(Ze45)zMek~6yZq(3V8uWP$5;W&*}<>3%PTA
zd=oWiCAG5JdnMGuK4B!OW7sl$T`u|hd(DhR!PQwTT*Adm&i^&>^*07y+^EQh+*&C_
z=e4t>z|}ejDC{nOOySvOeuD5g|4)t9PHELSWpX*=-i>B4aEViyol=SGI$NJ&t2CFL
z?3!e~Ws1e~$TZPmi&03o@W;0nr=jcnIz#wk&EQhWl0<p)jQP?^0b;4_*}H?x7H%Xh
zELU#$Ptj82+xJFgA9!;=Pygkx1At?K{r6o<e@b}&^J)qD*T=t=)Bm}7>F<<(TB`Ja
z8DL;7L5BZH`P+)6zf=Bcq0e6wnm-BS|GJldulM;o;-9ui{DruK`47ZjJ0<=O`6rM2
zUyxSB|A744>i&1cKZ(5mLOdh?2jbuV>+f3czvKSNHTV~<6#u{B{+D_1@4$cZ0{sP4
z_~SDC_bvU;WB>OB%U_nDztjGy#`zcRMC`xO{$Hu{@34PLmH&d3YyLOb-!kUE<Nhh4
i{0mp9{U5mhKd@9)fP((l4-o#mng3Y*ZEgN_^?v}fHuTm2