now caches gotten ips

master
Nicholas Hope 2023-01-03 21:54:09 -05:00
parent 119bb50dc3
commit 51e0a1944a
1 changed files with 77 additions and 81 deletions

View File

@ -3,6 +3,53 @@ import requests
from math import radians, cos, sin, acos, asin, sqrt
from pathlib import Path
from os import chdir
from datetime import datetime, timezone
from dateutil import tz
import pickle
class IPCache:
picklefile_name = 'cached_ips.pkl'
def __init__(self, /):
with Path('.') as p:
self.creation_dir = p.resolve()
def get(self, ip, /):
if ip in self.cache:
return self.cache[ip]
addr = f'http://ip-api.com/json/{ip}'
response = requests.get(addr)
if not response.ok:
raise RuntimeError(f'request for ip failed with {response.status_code}')
resulting_dict = eval(response.content)
if resulting_dict['status'] == 'fail':
raise RuntimeError(f'ip was invalid')
# the given timezone is like, 'Australia/Sydney'. we need to convert to
# a datetime.timezone type
timezone_str = resulting_dict['timezone']
tzfile = tz.gettz(timezone_str)
as_timedelta = tzfile.utcoffset(datetime.utcnow())
as_timezone_type = timezone(as_timedelta)
self.cache[ip] = ((resulting_dict['lat'], resulting_dict['lon']), as_timezone_type)
return self.cache[ip]
def __enter__(self, filename=picklefile_name, /):
chdir(self.creation_dir)
with Path(filename) as p:
if p.exists():
with open(filename, 'rb') as f:
cache = pickle.load(f)
else:
cache = {}
self.filename = filename
self.cache = cache
return self
def __exit__(self, err_type, err_value, traceback, /):
chdir(self.creation_dir)
with open(self.filename, 'wb') as f:
pickle.dump(self.cache, f)
def filter_logs(filename: str):
with open(filename, 'r') as f:
@ -13,24 +60,8 @@ def filter_logs(filename: str):
if (
status != '200'
or method != 'GET'
or file not in validnames
): continue
date, time, timezone = timethings.split(' ')
yield ip, date, time, timezone, status, method, file
def get_ip_latlon(ip: str) -> (int, int):
# make a reqest to ip-api.com to associate an ip to a
# latitude and longitude
addr = f'http://ip-api.com/json/{ip}'
response = requests.get(addr)
if not response.ok:
raise RuntimeError(f'request for ip failed with {response.status_code}')
resulting_dict = eval(response.content)
if resulting_dict['status'] != 'fail':
return resulting_dict['lat'], resulting_dict['lon']
def value_sort(d: dict, reverse=True) -> dict:
return sorted(d.items(), key=lambda kv_pair: kv_pair[1], reverse=reverse)
yield ip, timethings, status, method, file
def latlon_distance(p1, p2) -> float:
# black magic do not touch. use the haversine formula to find the distance
@ -54,45 +85,31 @@ def analyze_server(server: Path, serverip: str) -> None:
if not server.is_dir():
return
result = ''
filename = f'{server.name}/access.log'
hitfiles = {}
requesters = {}
distances = {}
times = {}
self_latlon = get_ip_latlon(serverip)
for ip, date, time_str, timezone, status, method, file in filter_logs(filename):
if file in hitfiles:
hitfiles[file] += 1
else:
hitfiles[file] = 1
self_latlon, _ = ip_cache.get(serverip)
for ip, timethings, status, method, file in filter_logs(filename):
# get_ip_latlon_tz() returns the latitude, longitude, and timezone
# of an ip
# TODO: cache results in a picklefile
latlon, timezone = ip_cache.get(ip)
if ip in requesters:
requesters[ip] += 1
else:
requesters[ip] = 1
latlon = get_ip_latlon(ip)
if latlon is not None:
distances[ip] = latlon_distance(self_latlon, latlon)
# convert `timethings` to a datetime object
time_of_hit = datetime.strptime(timethings, log_date_format)
# convert from its default timezone in GMT to the timezone of the requester
localtime = time_of_hit.replace(tzinfo=timezone.utc).astimezone(timezone)
# convert this time back to a string for logging purposes
localtime_str = localtime.strftime(log_date_format)
hour = time_str.split(':')[0]
if hour in times:
times[hour] += 1
else:
times[hour] = 1
print(f'\n\n--- ANALYSIS FOR {server.name.upper()} ---\n')
for dict_name in ['hitfiles', 'requesters', 'distances', 'times']:
print(
dict_name + ': {\n '
+ ',\n '.join(
f'{k!r}: {v!r}'
for k, v in value_sort(eval(dict_name))
)
+ '\n}'
distance = latlon_distance(self_latlon, latlon)
date, time = timethings.split(' ')[:2]
result += (
f'{server.name},{ip},{distance},{date},{time},'
+ f'{localtime_str},{method},{file}\n'
)
print(f'average: {sum(distances.values())/len(distances)}')
return result
def get_server_ip(servername: str) -> str:
# associate servers with ips
@ -115,45 +132,24 @@ def main(args: list) -> int:
print('no logdir provided')
return 1
outfile = 'analysis.csv'
start_dir = Path('.').resolve()
f = open(outfile, 'a')
for logdir in args[1:]:
chdir(logdir)
serverdir = Path('.')
for subdir in serverdir.iterdir():
serverip = get_server_ip(subdir.name)
analyze_server(subdir, serverip)
csv_lines = analyze_server(subdir, serverip)
chdir(start_dir)
f.write(csv_lines)
f.close()
return 0
validnames = {
'wp-login.php',
'.env',
'plugins/system/debug/debug.xml',
'administrator/language/en-GB/en-GB.xml',
'administrator/help/en-GB/toc.json',
'.git/config',
'vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php',
'xmlrpc.php',
'wp1/wp-includes/wlwmanifest.xml',
'wp/wp-includes/wlwmanifest.xml',
'wordpress/wp-includes/wlwmanifest.xml',
'web/wp-includes/wlwmanifest.xml',
'test/wp-includes/wlwmanifest.xml',
'site/wp-includes/wlwmanifest.xml',
'shop/wp-includes/wlwmanifest.xml',
'cms/wp-includes/wlwmanifest.xml',
'blog/wp-includes/wlwmanifest.xml',
'2019/wp-includes/wlwmanifest.xml',
'wp-load.php',
'public/_ignition/health-check',
'_ignition/health-check',
'admin/.env',
'protected/.env',
'wp-includes/wp-class.php',
'wp-commentin.php',
'wp-signin.php'
}
log_date_format = r'%y/%m/%d %H:%M:%S %z'
if __name__ == '__main__':
sys.exit(main(sys.argv))
with IPCache() as ip_cache:
sys.exit(main(sys.argv))