Added timeout

This commit is contained in:
bitscuit 2021-05-08 21:07:00 +02:00
parent a09bc1da54
commit 95e4b27bc3
1 changed files with 2 additions and 3 deletions

View File

@ -28,7 +28,7 @@ while not urls.empty():
# Perform request
log.info("Fetching '%s'..."%url)
try:
r = session.get(url, stream=True)
r = session.get(url, stream=True, timeout=5)
# Check file size
if ("Content-Length" in r.headers.keys() and int(r.headers["Content-Length"]) > MAX_CONTENT_LENGTH) or ("content-length" in r.headers.keys() and int(r.headers["content-length"]) > MAX_CONTENT_LENGTH):
@ -36,7 +36,7 @@ while not urls.empty():
continue
# Download full
r = session.get(url)
r = session.get(url, timeout=5)
except Exception as e:
log.info("failed")
@ -55,7 +55,6 @@ while not urls.empty():
hrefs = [res[0] for res in re.findall("(https*:\/\/([a-z0-9]+\.)*[a-z0-9]{3,}\.[a-z0-9]{2,}\/)", r.text)]
pages = [(url[:url.rfind("/")+1]+res[0][(1 if res[0][0] == "/" else 0):]) for res in re.findall("href *= *[\"'](\/*([a-z0-9]*\/*)+(\.[a-z0-9]*)*)[\"']", r.text)]
hrefs.extend(pages)
print(hrefs)
except Exception as e:
log.info("failed to parse page")
log.info(e)