initial commit of project

This commit is contained in:
2021-04-11 19:51:12 +02:00
commit a21a8186d9
110 changed files with 16326178 additions and 0 deletions

View File

View File

@@ -0,0 +1,23 @@
import os
import sys
import json
def get_input(file_path, print_config=False):
""" Retrieve user input in json format. """
# Check path exists
if not os.path.exists(file_path):
print('GetInput error: Input path does not exist:\n%s' %(file_path))
sys.exit(1)
else:
pass
# Retrieve user preferences from json file, return dict
with open(file_path, "r") as f:
config = json.load(f)
# Print nicely as json
if print_config:
print(json.dumps(config, indent=4, sort_keys=True))
return config

View File

@@ -0,0 +1,28 @@
import requests
# from tools import get_user_agent
class HTMLresponse(object):
""" Response from trying to get an HTML page, returning error and text """
def __init__(self, url, user_agent=None):
""" Try to get response from URL """
self.error = 0
self.error_message = ""
self.text = ""
# Try to get HTML response
try:
if user_agent == None:
r = requests.get(url)
else:
r = requests.get(url, headers=user_agent)
r.raise_for_status()
self.text = r.text.encode('utf-8')
# If error, return error message and flag
except requests.exceptions.RequestException as err:
self.error = 1
self.response = str(err)
self.error_message = "".join(["Get error: ", url])

View File

@@ -0,0 +1,17 @@
"""
Read user agents from list (local file) and select one for the request.
"""
import random
def user_agent_list(user_agent_list_path):
"""Read user agents from list (local file). """
with open(user_agent_list_path, "r") as f:
agent_list = f.readlines()
return agent_list
def select_user_agent(agent_list):
"""Select random user agent from pre-loaded list of agents. """
n_agents = len(agent_list)
agent = {}
agent["user-agent"] = agent_list[random.randint(0, n_agents-1)].replace("\n", "").replace("\"", "")
return agent

View File

@@ -0,0 +1,50 @@
import os
import sys
import shutil
import re
def make_output_folder(folder_path, debug=False):
""" Make folder for output, checking for previous results """
# Skip if debug (avoids replace prompt)
if debug:
print("FolderSetup warning: Not creating directory because debug = True")
pass
else:
# If destination folder does not exist then create it
if not os.path.exists(folder_path):
os.mkdir(folder_path)
else:
# Otherwise give a choice to replace (overwrite), use, or exit
confirm_prompt = "The following folder exists:" + "\n" + \
str(folder_path) + "\n" + \
"Would you like to add to it ('a'), overwrite ('o'), or exit ('e'): "
confirm = input(confirm_prompt)
# Prompt for correctly formatted input (y/n)
while not re.search(r'[aeo]', confirm):
confirm_prompt = "Please confirm what you want to do." + "\n" + \
"Would you like to add to it ('a'), overwrite ('o'), or exit ('e'):"
confirm = input(confirm_prompt)
# If exit
if confirm == "e":
print("OK exiting.")
sys.exit(1)
# Else if overwrite
elif confirm == "o":
# Make folder path
shutil.rmtree(folder_path)
os.mkdir(folder_path)
print("Created output folder: %s" %(folder_path))
# Else if add
elif confirm == "a":
print("OK adding to folder")
return None

View File

@@ -0,0 +1,20 @@
import os
import sys
import re
def remove_pyc(folderpath):
"""Remove all pyc files from a folder."""
pyc_paths = []
# Save paths to all .pyc files in folder to list
for folder, subs, files in os.walk(folderpath):
for filename in files:
full_path = os.path.join(folder, filename)
is_pyc = re.search(r'\.pyc', full_path)
if is_pyc:
pyc_paths.append(full_path)
# Remove all files in list
for path in pyc_paths:
file = os.remove(path)

View File

@@ -0,0 +1,23 @@
import os
import sys
def write_text(input_text, file_path, option="Exit"):
""" Write text to disk checking options if it exists """
if not os.path.exists(file_path):
with open(file_path, 'w') as f:
f.write(input_text.decode("utf-8"))
else:
if option == "Exit":
print("Write file error: This file already exists.\n %s \nExiting..." %(file_path))
sys.exit(1)
elif option == "Append":
with open(file_path, 'a') as f:
f.write(input_text)
elif option == "Overwrite":
with open(file_path, 'w') as f:
f.write(input_text)