diff --git a/main.py b/main.py new file mode 100644 index 0000000..093fd51 --- /dev/null +++ b/main.py @@ -0,0 +1,37 @@ +import requests +import queue +import re + +# Init static vars +INDEX_URL = "https://bitscuit.be/" + +# Create session +session = requests.Session() + +# Create website queue +urls = queue.Queue() +urls.put(INDEX_URL) + +# Loop +while not urls.empty(): + url = urls.get() + + # Perform request + print("Fetching url '%s'..."%url, end="") + r = session.get(url) + print("\tdone") + + # Read response + if r.status_code != 200: + print("returned %d"%r.status_code) + continue + + # Filter page for hrefs + hrefs = [res[0] for res in re.findall("(https*:\/\/([a-z0-9]+\.)*[a-z0-9]{3,}\.[a-z0-9]{2,}\/)", r.text)] + print(hrefs) + + # Add to queue + print("adding %d new urls"%len(hrefs)) + for href in hrefs: + urls.put(href) + diff --git a/shammer b/shammer new file mode 100755 index 0000000..32de675 --- /dev/null +++ b/shammer @@ -0,0 +1,3 @@ +#!/bin/bash +cd $(dirname "$0") +python3 main.py