Skip to content

Commit 249795e

Browse files
authored
Merge pull request #20 from jaysonsantos/speed-improvements
Make the code faster by compiling regex on global context and reusing named collection.
2 parents 16c4cef + d470a9d commit 249795e

File tree

1 file changed

+44
-46
lines changed

1 file changed

+44
-46
lines changed

giturlparse/parser.py

Lines changed: 44 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,43 @@
2323
import collections
2424
import re
2525

26+
Parsed = collections.namedtuple('Parsed', [
27+
'pathname',
28+
'protocols',
29+
'protocol',
30+
'href',
31+
'resource',
32+
'user',
33+
'port',
34+
'name',
35+
'owner',
36+
])
37+
38+
POSSIBLE_REGEXES = (
39+
re.compile(r'^(?P<protocol>https?|git|ssh|rsync)\://'
40+
r'(?:(?P<user>.+)@)*'
41+
r'(?P<resource>[a-z0-9_.-]*)'
42+
r'[:/]*'
43+
r'(?P<port>[\d]+){0,1}'
44+
r'(?P<pathname>\/(?P<owner>.+)/(?P<name>.+).git)'),
45+
re.compile(r'(git\+)?'
46+
r'((?P<protocol>\w+)://)'
47+
r'((?P<user>\w+)@)?'
48+
r'((?P<resource>[\w\.\-]+))'
49+
r'(:(?P<port>\d+))?'
50+
r'(?P<pathname>(\/(?P<owner>\w+)/)?'
51+
r'(\/?(?P<name>[\w\-]+)(\.git)?)?)'),
52+
re.compile(r'^(?:(?P<user>.+)@)*'
53+
r'(?P<resource>[a-z0-9_.-]*)[:/]*'
54+
r'(?P<port>[\d]+){0,1}'
55+
r'[:](?P<pathname>\/?(?P<owner>.+)/(?P<name>.+).git)'),
56+
re.compile(r'((?P<user>\w+)@)?'
57+
r'((?P<resource>[\w\.\-]+))'
58+
r'[\:\/]{1,2}'
59+
r'(?P<pathname>((?P<owner>\w+)/)?'
60+
r'((?P<name>[\w\-]+)(\.git)?)?)'),
61+
)
62+
2663

2764
class ParserError(Exception):
2865
""" Error raised when a URL can't be parsed. """
@@ -37,19 +74,6 @@ class Parser(object):
3774
def __init__(self, url):
3875
self._url = url
3976

40-
def get_parsed(self):
41-
return collections.namedtuple('Parsed', [
42-
'pathname',
43-
'protocols',
44-
'protocol',
45-
'href',
46-
'resource',
47-
'user',
48-
'port',
49-
'name',
50-
'owner',
51-
])
52-
5377
def parse(self):
5478
"""
5579
Parses a GIT URL and returns an object. Raises an exception on invalid
@@ -69,47 +93,21 @@ def parse(self):
6993
'name': None,
7094
'owner': None,
7195
}
72-
regexes = [
73-
(r'^(?P<protocol>https?|git|ssh|rsync)\://'
74-
r'(?:(?P<user>.+)@)*'
75-
r'(?P<resource>[a-z0-9_.-]*)'
76-
r'[:/]*'
77-
r'(?P<port>[\d]+){0,1}'
78-
r'(?P<pathname>\/(?P<owner>.+)/(?P<name>.+).git)'),
79-
(r'(git\+)?'
80-
r'((?P<protocol>\w+)://)'
81-
r'((?P<user>\w+)@)?'
82-
r'((?P<resource>[\w\.\-]+))'
83-
r'(:(?P<port>\d+))?'
84-
r'(?P<pathname>(\/(?P<owner>\w+)/)?'
85-
r'(\/?(?P<name>[\w\-]+)(\.git)?)?)'),
86-
(r'^(?:(?P<user>.+)@)*'
87-
r'(?P<resource>[a-z0-9_.-]*)[:/]*'
88-
r'(?P<port>[\d]+){0,1}'
89-
r'[:](?P<pathname>\/?(?P<owner>.+)/(?P<name>.+).git)'),
90-
(r'((?P<user>\w+)@)?'
91-
r'((?P<resource>[\w\.\-]+))'
92-
r'[\:\/]{1,2}'
93-
r'(?P<pathname>((?P<owner>\w+)/)?'
94-
r'((?P<name>[\w\-]+)(\.git)?)?)'),
95-
]
96-
for regex in regexes:
97-
if re.search(regex, self._url):
98-
m = re.search(regex, self._url)
99-
d.update(m.groupdict())
96+
for regex in POSSIBLE_REGEXES:
97+
match = regex.search(self._url)
98+
if match:
99+
d.update(match.groupdict())
100100
break
101101
else:
102102
msg = "Invalid URL '{}'".format(self._url)
103103
raise ParserError(msg)
104104

105-
p = self.get_parsed()
106-
107-
return p(**d)
105+
return Parsed(**d)
108106

109107
def _get_protocols(self):
110108
try:
111109
index = self._url.index('://')
112-
113-
return self._url[0:index].split('+')
114110
except ValueError:
115111
return []
112+
113+
return self._url[:index].split('+')

0 commit comments

Comments
 (0)