Nicholas Hope Science Fair 2023
https://nickhope.world/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
173 lines
5.5 KiB
173 lines
5.5 KiB
import sys |
|
import requests |
|
from math import radians, cos, sin, acos, asin, sqrt |
|
from pathlib import Path |
|
from os import chdir |
|
from datetime import datetime, timezone |
|
from dateutil import tz |
|
import pickle |
|
from time import sleep |
|
|
|
class IPCache: |
|
picklefile_name = 'cached_ips.pkl' |
|
|
|
def __init__(self, /): |
|
with Path('.') as p: |
|
self.creation_dir = p.resolve() |
|
|
|
def get(self, ip, /): |
|
if ip in self.cache: |
|
return self.cache[ip] |
|
print(f'{ip} not in cache') |
|
addr = f'http://ip-api.com/json/{ip}' |
|
attempts = 1 |
|
max_attempts = 5 |
|
while attempts <= max_attempts: |
|
response = requests.get(addr) |
|
sleep(2 ** attempts) |
|
if not response.ok: |
|
print(f'request for {ip} failed with {response.status_code}') |
|
attempts += 1 |
|
continue |
|
break |
|
else: |
|
raise RuntimeError(f'critical failure (> 5 retries)') |
|
resulting_dict = eval(response.content) |
|
if resulting_dict['status'] == 'fail': |
|
raise RuntimeError(f'ip was invalid') |
|
# the given timezone is like, 'Australia/Sydney'. we need to convert to |
|
# a datetime.timezone type |
|
timezone_str = resulting_dict['timezone'] |
|
if timezone_str == 'Europe/Kyiv': |
|
timezone_str = 'Europe/Kiev' |
|
tzfile = tz.gettz(timezone_str) |
|
as_timedelta = tzfile.utcoffset(datetime.utcnow()) |
|
as_timezone_type = timezone(as_timedelta) |
|
|
|
self.cache[ip] = ((resulting_dict['lat'], resulting_dict['lon']), as_timezone_type) |
|
return self.cache[ip] |
|
|
|
def __enter__(self, filename=picklefile_name, /): |
|
chdir(self.creation_dir) |
|
with Path(filename) as p: |
|
if p.exists(): |
|
with open(filename, 'rb') as f: |
|
cache = pickle.load(f) |
|
else: |
|
cache = {} |
|
self.filename = filename |
|
self.cache = cache |
|
return self |
|
|
|
def __exit__(self, err_type, err_value, traceback, /): |
|
chdir(self.creation_dir) |
|
with open(self.filename, 'wb') as f: |
|
pickle.dump(self.cache, f) |
|
|
|
def filter_logs(filename: str): |
|
with open(filename, 'r') as f: |
|
for line in f: |
|
# IP,YYMMDD hhmmss TIMEZONE,STATUS,METHOD,FILE |
|
ip, timethings, status, method, filepath = line.split(',', 4) |
|
file = '/'.join(filepath.split('/')[4:]).strip() |
|
if ( |
|
status != '200' |
|
or method != 'GET' |
|
): continue |
|
yield ip, timethings, status, method, file |
|
|
|
def latlon_distance(p1, p2) -> float: |
|
# black magic do not touch. use the haversine formula to find the distance |
|
lat1, lon1 = p1 |
|
lat2, lon2 = p2 |
|
lon1 = radians(lon1) |
|
lat1 = radians(lat1) |
|
lon2 = radians(lon2) |
|
lat2 = radians(lat2) |
|
|
|
dlon = lon2 - lon1 |
|
dlat = lat2 - lat1 |
|
a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2 |
|
|
|
c = 2 * asin(sqrt(a)) |
|
earth_radius_km = 6371 |
|
|
|
return c * earth_radius_km |
|
|
|
def analyze_server(server: Path) -> None: |
|
if not server.is_dir(): |
|
return |
|
serverip = get_server_ip(server.name) |
|
|
|
result = '' |
|
|
|
filename = f'{server.name}/access.log' |
|
self_latlon, _ = ip_cache.get(serverip) |
|
for ip, timethings, status, method, file in filter_logs(filename): |
|
# get_ip_latlon_tz() returns the latitude, longitude, and timezone |
|
# of an ip |
|
# TODO: cache results in a picklefile |
|
latlon, timezone = ip_cache.get(ip) |
|
|
|
# convert `timethings` to a datetime object |
|
time_of_hit = datetime.strptime(timethings, log_date_format) |
|
# convert from its default timezone in GMT to the timezone of the requester |
|
localtime = time_of_hit.replace(tzinfo=timezone.utc).astimezone(timezone) |
|
# convert this time back to a string for logging purposes, without timezone |
|
localdate, localtime = localtime.strftime(log_date_format).split(' ')[:2] |
|
localdate = '20' + localdate |
|
|
|
distance = latlon_distance(self_latlon, latlon) |
|
date, time = timethings.split(' ')[:2] |
|
lat, lon = latlon |
|
result += ( |
|
f'{server.name},{ip},{lat},{lon},{distance:.5f},20{date},{time},' |
|
+ f'{localdate},{localtime},{method},"{file}"\n' |
|
) |
|
|
|
return result |
|
|
|
def get_server_ip(servername: str) -> str: |
|
# associate servers with ips |
|
if servername == 'nova': |
|
return '184.73.25.153' |
|
elif servername == 'singapore': |
|
return '18.139.108.77' |
|
elif servername == 'sydney': |
|
return '54.206.216.118' |
|
elif servername == 'dublin': |
|
return '54.194.92.137' |
|
elif servername == 'brazil': |
|
return '18.228.245.48' |
|
|
|
raise ValueError(f'{servername} is not a known server') |
|
|
|
def main(args: list) -> int: |
|
if len(args) == 1: |
|
# no log dirs provided |
|
print('no logdir provided') |
|
return 1 |
|
|
|
outfile = 'analysis.csv' |
|
start_dir = Path('.').resolve() |
|
logdir = args[1] |
|
with Path(logdir) as p: |
|
logdir = p.resolve() |
|
|
|
with open(outfile, 'w') as f: |
|
f.write('server,ip,distance,sv date,sv time,local date,local time,verb,filename\n') |
|
chdir(logdir) |
|
serverdir = Path('.') |
|
for subdir in serverdir.iterdir(): |
|
csv_lines = analyze_server(subdir) |
|
f.write(csv_lines) |
|
chdir(start_dir) |
|
|
|
return 0 |
|
|
|
log_date_format = r'%y/%m/%d %H:%M:%S %z' |
|
|
|
if __name__ == '__main__': |
|
with IPCache() as ip_cache: |
|
code = main(sys.argv) |
|
sys.exit(code) |