Fixed little issue and moved delay to beginning of loop
This commit is contained in:
parent
9850fdf2b3
commit
4d88f14b9d
13
main.py
13
main.py
|
@ -82,6 +82,11 @@ while not urls.empty():
|
||||||
time.sleep(60)
|
time.sleep(60)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Wait random time
|
||||||
|
if REQUESTS_PER_MINUTE != 0:
|
||||||
|
l = random.uniform(0, 2*60/REQUESTS_PER_MINUTE)
|
||||||
|
time.sleep(l)
|
||||||
|
|
||||||
# Get next url from queue
|
# Get next url from queue
|
||||||
url = urls.get()
|
url = urls.get()
|
||||||
|
|
||||||
|
@ -109,12 +114,11 @@ while not urls.empty():
|
||||||
|
|
||||||
# Download full
|
# Download full
|
||||||
r = session.get(url, timeout=5)
|
r = session.get(url, timeout=5)
|
||||||
|
r.close()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.info("failed")
|
log.info("failed")
|
||||||
log.info(e)
|
log.info(e)
|
||||||
finally:
|
|
||||||
r.close()
|
|
||||||
|
|
||||||
# Filter page for hrefs
|
# Filter page for hrefs
|
||||||
timeStart = time.perf_counter()
|
timeStart = time.perf_counter()
|
||||||
|
@ -144,9 +148,4 @@ while not urls.empty():
|
||||||
numAdded += 1
|
numAdded += 1
|
||||||
log.info("%d urls, %d new, queue length %d"%(len(hrefs), numAdded, urls.qsize()))
|
log.info("%d urls, %d new, queue length %d"%(len(hrefs), numAdded, urls.qsize()))
|
||||||
|
|
||||||
# Wait random time
|
|
||||||
if REQUESTS_PER_MINUTE != 0:
|
|
||||||
l = random.uniform(0, 2*60/REQUESTS_PER_MINUTE)
|
|
||||||
time.sleep(l)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue