From 0462614a223f64bff0ce3521b8bc91281e75c425 Mon Sep 17 00:00:00 2001 From: bitscuit Date: Sat, 8 May 2021 19:58:05 +0200 Subject: [PATCH] Cleaned logging --- main.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/main.py b/main.py index 6280fc5..6357c9b 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,11 @@ import requests import queue import re +import logging as log + +# Init logging +log.basicConfig(level=log.INFO, format="%(asctime)-15s %(levelname)-8s %(message)s") +log.info("Shammer is getting ready...") # Init static vars INDEX_URL = "https://bitscuit.be/" @@ -20,32 +25,29 @@ while not urls.empty(): url = urls.get() # Perform request - print("Fetching url '%s'..."%url, end="") + log.info("Fetching '%s'..."%url) try: r = session.get(url) - print("\tdone") except Exception as e: - print("\tfailed") - print(e) + log.info("failed") + log.info(e) finally: r.close() # Read response if r.status_code != 200: - print("returned %d"%r.status_code) + log.info("returned status %d"%r.status_code) continue # Filter page for hrefs hrefs = [res[0] for res in re.findall("(https*:\/\/([a-z0-9]+\.)*[a-z0-9]{3,}\.[a-z0-9]{2,}\/)", r.text)] - print(hrefs) # Add to queue - print("found %d new urls"%len(hrefs), end="") numAdded = 0 for href in hrefs: if href not in visited: urls.put(href) visited.add(href) numAdded += 1 - print(", of which %d new"%numAdded) + log.info("%d urls, %d new, queue length %d"%(len(hrefs), numAdded, urls.qsize()))