From 95e4b27bc3b66fdbf6a4b32ca20f1b41f8f8e6e2 Mon Sep 17 00:00:00 2001
From: bitscuit <thomas@bitscuit.be>
Date: Sat, 8 May 2021 21:07:00 +0200
Subject: [PATCH] Added timeout

---
 main.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/main.py b/main.py
index f606d28..39d39ae 100644
--- a/main.py
+++ b/main.py
@@ -28,7 +28,7 @@ while not urls.empty():
     # Perform request
     log.info("Fetching '%s'..."%url)
     try:
-        r = session.get(url, stream=True)
+        r = session.get(url, stream=True, timeout=5)
         
         # Check file size
         if ("Content-Length" in r.headers.keys() and int(r.headers["Content-Length"]) > MAX_CONTENT_LENGTH) or ("content-length" in r.headers.keys() and int(r.headers["content-length"]) > MAX_CONTENT_LENGTH):
@@ -36,7 +36,7 @@ while not urls.empty():
             continue
 
         # Download full
-        r = session.get(url)
+        r = session.get(url, timeout=5)
 
     except Exception as e:
         log.info("failed")
@@ -55,7 +55,6 @@ while not urls.empty():
         hrefs = [res[0] for res in re.findall("(https*:\/\/([a-z0-9]+\.)*[a-z0-9]{3,}\.[a-z0-9]{2,}\/)", r.text)]
         pages = [(url[:url.rfind("/")+1]+res[0][(1 if res[0][0] == "/" else 0):]) for res in re.findall("href *= *[\"'](\/*([a-z0-9]*\/*)+(\.[a-z0-9]*)*)[\"']", r.text)]
         hrefs.extend(pages)
-        print(hrefs)
     except Exception as e:
         log.info("failed to parse page")
         log.info(e)