Wrote first version

This commit is contained in:
bitscuit 2021-05-08 17:43:31 +02:00
parent b3e781bd98
commit 3866b2be0f
2 changed files with 40 additions and 0 deletions

37
main.py Normal file
View File

@ -0,0 +1,37 @@
import requests
import queue
import re
# Init static vars
INDEX_URL = "https://bitscuit.be/"
# Create session
session = requests.Session()
# Create website queue
urls = queue.Queue()
urls.put(INDEX_URL)
# Loop
while not urls.empty():
url = urls.get()
# Perform request
print("Fetching url '%s'..."%url, end="")
r = session.get(url)
print("\tdone")
# Read response
if r.status_code != 200:
print("returned %d"%r.status_code)
continue
# Filter page for hrefs
hrefs = [res[0] for res in re.findall("(https*:\/\/([a-z0-9]+\.)*[a-z0-9]{3,}\.[a-z0-9]{2,}\/)", r.text)]
print(hrefs)
# Add to queue
print("adding %d new urls"%len(hrefs))
for href in hrefs:
urls.put(href)

3
shammer Executable file
View File

@ -0,0 +1,3 @@
#!/bin/bash
cd $(dirname "$0")
python3 main.py