diff --git a/crawler/src/crawler/main.py b/crawler/src/crawler/main.py index 0b0ea61..a76935d 100644 --- a/crawler/src/crawler/main.py +++ b/crawler/src/crawler/main.py @@ -33,10 +33,7 @@ class Crawler(object): def __init__(self, url, delay, ignore): self.url = url self.delay = delay - if ignore: - self.ignore = ignore.split(',') - else: - self.ignore = [] + self.ignore = ignore.split(',') if ignore else [] def get(self, url): """ @@ -59,10 +56,7 @@ def crawl(self): for tag in soup.findAll('a', href=True): link = tag['href'] parsed = urlparse(link) - if parsed.scheme: - to_get = link - else: - to_get = self.url + link + to_get = link if parsed.scheme else self.url + link if should_ignore(self.ignore, to_get): print('Ignoring URL: {url}'.format(url=to_get)) continue diff --git a/crawler/src/crawler/utils.py b/crawler/src/crawler/utils.py index a4a6d82..0779966 100644 --- a/crawler/src/crawler/utils.py +++ b/crawler/src/crawler/utils.py @@ -9,10 +9,7 @@ def log(url, status): :param status: A status code for the `Response`. """ - if 200 <= int(status) < 300: - prose = 'OK' - else: - prose = 'ERR' + prose = 'OK' if 200 <= int(status) < 300 else 'ERR' print("{prose}: {status} {url}".format(prose=prose, url=url, status=status))