Cleaned logging
This commit is contained in:
parent
5ca786b1ae
commit
0462614a22
18
main.py
18
main.py
|
@ -1,6 +1,11 @@
|
||||||
import requests
|
import requests
|
||||||
import queue
|
import queue
|
||||||
import re
|
import re
|
||||||
|
import logging as log
|
||||||
|
|
||||||
|
# Init logging
|
||||||
|
log.basicConfig(level=log.INFO, format="%(asctime)-15s %(levelname)-8s %(message)s")
|
||||||
|
log.info("Shammer is getting ready...")
|
||||||
|
|
||||||
# Init static vars
|
# Init static vars
|
||||||
INDEX_URL = "https://bitscuit.be/"
|
INDEX_URL = "https://bitscuit.be/"
|
||||||
|
@ -20,32 +25,29 @@ while not urls.empty():
|
||||||
url = urls.get()
|
url = urls.get()
|
||||||
|
|
||||||
# Perform request
|
# Perform request
|
||||||
print("Fetching url '%s'..."%url, end="")
|
log.info("Fetching '%s'..."%url)
|
||||||
try:
|
try:
|
||||||
r = session.get(url)
|
r = session.get(url)
|
||||||
print("\tdone")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("\tfailed")
|
log.info("failed")
|
||||||
print(e)
|
log.info(e)
|
||||||
finally:
|
finally:
|
||||||
r.close()
|
r.close()
|
||||||
|
|
||||||
# Read response
|
# Read response
|
||||||
if r.status_code != 200:
|
if r.status_code != 200:
|
||||||
print("returned %d"%r.status_code)
|
log.info("returned status %d"%r.status_code)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Filter page for hrefs
|
# Filter page for hrefs
|
||||||
hrefs = [res[0] for res in re.findall("(https*:\/\/([a-z0-9]+\.)*[a-z0-9]{3,}\.[a-z0-9]{2,}\/)", r.text)]
|
hrefs = [res[0] for res in re.findall("(https*:\/\/([a-z0-9]+\.)*[a-z0-9]{3,}\.[a-z0-9]{2,}\/)", r.text)]
|
||||||
print(hrefs)
|
|
||||||
|
|
||||||
# Add to queue
|
# Add to queue
|
||||||
print("found %d new urls"%len(hrefs), end="")
|
|
||||||
numAdded = 0
|
numAdded = 0
|
||||||
for href in hrefs:
|
for href in hrefs:
|
||||||
if href not in visited:
|
if href not in visited:
|
||||||
urls.put(href)
|
urls.put(href)
|
||||||
visited.add(href)
|
visited.add(href)
|
||||||
numAdded += 1
|
numAdded += 1
|
||||||
print(", of which %d new"%numAdded)
|
log.info("%d urls, %d new, queue length %d"%(len(hrefs), numAdded, urls.qsize()))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue