Cleaned logging

This commit is contained in:
bitscuit 2021-05-08 19:58:05 +02:00
parent 5ca786b1ae
commit 0462614a22
1 changed files with 10 additions and 8 deletions

18
main.py
View File

@ -1,6 +1,11 @@
import requests import requests
import queue import queue
import re import re
import logging as log
# Init logging
log.basicConfig(level=log.INFO, format="%(asctime)-15s %(levelname)-8s %(message)s")
log.info("Shammer is getting ready...")
# Init static vars # Init static vars
INDEX_URL = "https://bitscuit.be/" INDEX_URL = "https://bitscuit.be/"
@ -20,32 +25,29 @@ while not urls.empty():
url = urls.get() url = urls.get()
# Perform request # Perform request
print("Fetching url '%s'..."%url, end="") log.info("Fetching '%s'..."%url)
try: try:
r = session.get(url) r = session.get(url)
print("\tdone")
except Exception as e: except Exception as e:
print("\tfailed") log.info("failed")
print(e) log.info(e)
finally: finally:
r.close() r.close()
# Read response # Read response
if r.status_code != 200: if r.status_code != 200:
print("returned %d"%r.status_code) log.info("returned status %d"%r.status_code)
continue continue
# Filter page for hrefs # Filter page for hrefs
hrefs = [res[0] for res in re.findall("(https*:\/\/([a-z0-9]+\.)*[a-z0-9]{3,}\.[a-z0-9]{2,}\/)", r.text)] hrefs = [res[0] for res in re.findall("(https*:\/\/([a-z0-9]+\.)*[a-z0-9]{3,}\.[a-z0-9]{2,}\/)", r.text)]
print(hrefs)
# Add to queue # Add to queue
print("found %d new urls"%len(hrefs), end="")
numAdded = 0 numAdded = 0
for href in hrefs: for href in hrefs:
if href not in visited: if href not in visited:
urls.put(href) urls.put(href)
visited.add(href) visited.add(href)
numAdded += 1 numAdded += 1
print(", of which %d new"%numAdded) log.info("%d urls, %d new, queue length %d"%(len(hrefs), numAdded, urls.qsize()))