now caches gotten ips

master
Nicholas Hope 2023-01-03 21:54:09 -05:00
parent 119bb50dc3
commit 51e0a1944a
1 changed files with 77 additions and 81 deletions

View File

@ -3,6 +3,53 @@ import requests
from math import radians, cos, sin, acos, asin, sqrt from math import radians, cos, sin, acos, asin, sqrt
from pathlib import Path from pathlib import Path
from os import chdir from os import chdir
from datetime import datetime, timezone
from dateutil import tz
import pickle
class IPCache:
picklefile_name = 'cached_ips.pkl'
def __init__(self, /):
with Path('.') as p:
self.creation_dir = p.resolve()
def get(self, ip, /):
if ip in self.cache:
return self.cache[ip]
addr = f'http://ip-api.com/json/{ip}'
response = requests.get(addr)
if not response.ok:
raise RuntimeError(f'request for ip failed with {response.status_code}')
resulting_dict = eval(response.content)
if resulting_dict['status'] == 'fail':
raise RuntimeError(f'ip was invalid')
# the given timezone is like, 'Australia/Sydney'. we need to convert to
# a datetime.timezone type
timezone_str = resulting_dict['timezone']
tzfile = tz.gettz(timezone_str)
as_timedelta = tzfile.utcoffset(datetime.utcnow())
as_timezone_type = timezone(as_timedelta)
self.cache[ip] = ((resulting_dict['lat'], resulting_dict['lon']), as_timezone_type)
return self.cache[ip]
def __enter__(self, filename=picklefile_name, /):
chdir(self.creation_dir)
with Path(filename) as p:
if p.exists():
with open(filename, 'rb') as f:
cache = pickle.load(f)
else:
cache = {}
self.filename = filename
self.cache = cache
return self
def __exit__(self, err_type, err_value, traceback, /):
chdir(self.creation_dir)
with open(self.filename, 'wb') as f:
pickle.dump(self.cache, f)
def filter_logs(filename: str): def filter_logs(filename: str):
with open(filename, 'r') as f: with open(filename, 'r') as f:
@ -13,24 +60,8 @@ def filter_logs(filename: str):
if ( if (
status != '200' status != '200'
or method != 'GET' or method != 'GET'
or file not in validnames
): continue ): continue
date, time, timezone = timethings.split(' ') yield ip, timethings, status, method, file
yield ip, date, time, timezone, status, method, file
def get_ip_latlon(ip: str) -> (int, int):
# make a reqest to ip-api.com to associate an ip to a
# latitude and longitude
addr = f'http://ip-api.com/json/{ip}'
response = requests.get(addr)
if not response.ok:
raise RuntimeError(f'request for ip failed with {response.status_code}')
resulting_dict = eval(response.content)
if resulting_dict['status'] != 'fail':
return resulting_dict['lat'], resulting_dict['lon']
def value_sort(d: dict, reverse=True) -> dict:
return sorted(d.items(), key=lambda kv_pair: kv_pair[1], reverse=reverse)
def latlon_distance(p1, p2) -> float: def latlon_distance(p1, p2) -> float:
# black magic do not touch. use the haversine formula to find the distance # black magic do not touch. use the haversine formula to find the distance
@ -54,45 +85,31 @@ def analyze_server(server: Path, serverip: str) -> None:
if not server.is_dir(): if not server.is_dir():
return return
result = ''
filename = f'{server.name}/access.log' filename = f'{server.name}/access.log'
hitfiles = {} self_latlon, _ = ip_cache.get(serverip)
requesters = {} for ip, timethings, status, method, file in filter_logs(filename):
distances = {} # get_ip_latlon_tz() returns the latitude, longitude, and timezone
times = {} # of an ip
self_latlon = get_ip_latlon(serverip) # TODO: cache results in a picklefile
for ip, date, time_str, timezone, status, method, file in filter_logs(filename): latlon, timezone = ip_cache.get(ip)
if file in hitfiles:
hitfiles[file] += 1
else:
hitfiles[file] = 1
if ip in requesters: # convert `timethings` to a datetime object
requesters[ip] += 1 time_of_hit = datetime.strptime(timethings, log_date_format)
else: # convert from its default timezone in GMT to the timezone of the requester
requesters[ip] = 1 localtime = time_of_hit.replace(tzinfo=timezone.utc).astimezone(timezone)
latlon = get_ip_latlon(ip) # convert this time back to a string for logging purposes
if latlon is not None: localtime_str = localtime.strftime(log_date_format)
distances[ip] = latlon_distance(self_latlon, latlon)
hour = time_str.split(':')[0] distance = latlon_distance(self_latlon, latlon)
if hour in times: date, time = timethings.split(' ')[:2]
times[hour] += 1 result += (
else: f'{server.name},{ip},{distance},{date},{time},'
times[hour] = 1 + f'{localtime_str},{method},{file}\n'
print(f'\n\n--- ANALYSIS FOR {server.name.upper()} ---\n')
for dict_name in ['hitfiles', 'requesters', 'distances', 'times']:
print(
dict_name + ': {\n '
+ ',\n '.join(
f'{k!r}: {v!r}'
for k, v in value_sort(eval(dict_name))
)
+ '\n}'
) )
print(f'average: {sum(distances.values())/len(distances)}') return result
def get_server_ip(servername: str) -> str: def get_server_ip(servername: str) -> str:
# associate servers with ips # associate servers with ips
@ -115,45 +132,24 @@ def main(args: list) -> int:
print('no logdir provided') print('no logdir provided')
return 1 return 1
outfile = 'analysis.csv'
start_dir = Path('.').resolve() start_dir = Path('.').resolve()
f = open(outfile, 'a')
for logdir in args[1:]: for logdir in args[1:]:
chdir(logdir) chdir(logdir)
serverdir = Path('.') serverdir = Path('.')
for subdir in serverdir.iterdir(): for subdir in serverdir.iterdir():
serverip = get_server_ip(subdir.name) serverip = get_server_ip(subdir.name)
analyze_server(subdir, serverip) csv_lines = analyze_server(subdir, serverip)
chdir(start_dir) chdir(start_dir)
f.write(csv_lines)
f.close()
return 0 return 0
validnames = { log_date_format = r'%y/%m/%d %H:%M:%S %z'
'wp-login.php',
'.env',
'plugins/system/debug/debug.xml',
'administrator/language/en-GB/en-GB.xml',
'administrator/help/en-GB/toc.json',
'.git/config',
'vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php',
'xmlrpc.php',
'wp1/wp-includes/wlwmanifest.xml',
'wp/wp-includes/wlwmanifest.xml',
'wordpress/wp-includes/wlwmanifest.xml',
'web/wp-includes/wlwmanifest.xml',
'test/wp-includes/wlwmanifest.xml',
'site/wp-includes/wlwmanifest.xml',
'shop/wp-includes/wlwmanifest.xml',
'cms/wp-includes/wlwmanifest.xml',
'blog/wp-includes/wlwmanifest.xml',
'2019/wp-includes/wlwmanifest.xml',
'wp-load.php',
'public/_ignition/health-check',
'_ignition/health-check',
'admin/.env',
'protected/.env',
'wp-includes/wp-class.php',
'wp-commentin.php',
'wp-signin.php'
}
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(main(sys.argv)) with IPCache() as ip_cache:
sys.exit(main(sys.argv))