import requests import queue import re # Init static vars INDEX_URL = "https://bitscuit.be/" # Create session session = requests.Session() # Create website queue urls = queue.Queue() urls.put(INDEX_URL) # Loop while not urls.empty(): url = urls.get() # Perform request print("Fetching url '%s'..."%url, end="") r = session.get(url) print("\tdone") # Read response if r.status_code != 200: print("returned %d"%r.status_code) continue # Filter page for hrefs hrefs = [res[0] for res in re.findall("(https*:\/\/([a-z0-9]+\.)*[a-z0-9]{3,}\.[a-z0-9]{2,}\/)", r.text)] print(hrefs) # Add to queue print("adding %d new urls"%len(hrefs)) for href in hrefs: urls.put(href)