diff --git a/analyze.py b/analyze.py index f7f2559..5e756c9 100644 --- a/analyze.py +++ b/analyze.py @@ -1,61 +1,10 @@ import sys import requests from math import radians, cos, sin, acos, asin, sqrt -from itertools import combinations +from pathlib import Path +from os import chdir -def main(filename: str): - validnames = { - 'wp-login.php', - '.env', - 'plugins/system/debug/debug.xml', - 'administrator/language/en-GB/en-GB.xml', - 'administrator/help/en-GB/toc.json', - '.git/config', - 'vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php', - 'xmlrpc.php', - 'wp1/wp-includes/wlwmanifest.xml', - 'wp/wp-includes/wlwmanifest.xml', - 'wordpress/wp-includes/wlwmanifest.xml', - 'web/wp-includes/wlwmanifest.xml', - 'test/wp-includes/wlwmanifest.xml', - 'site/wp-includes/wlwmanifest.xml', - 'shop/wp-includes/wlwmanifest.xml', - 'cms/wp-includes/wlwmanifest.xml', - 'blog/wp-includes/wlwmanifest.xml', - '2019/wp-includes/wlwmanifest.xml', - 'wp-load.php', - 'public/_ignition/health-check', - '_ignition/health-check', - 'admin/.env', - 'protected/.env', - 'wp-includes/wp-class.php', - 'wp-commentin.php', - 'wp-signin.php' - } - hitfiles = {}.fromkeys(validnames, 0) - requesters = {} - locations = {} - for ip, date, time, timezone, status, method, file in filterwords(filename, validnames): - hitfiles[file] += 1 - - if ip in requesters: - requesters[ip] += 1 - else: - requesters[ip] = 1 - latlon = get_ip_latlon(ip) - if latlon is not None: - locations[ip] = latlon - - hitfiles = sorted_dict(hitfiles) - print(f'{hitfiles = }') - requesters = sorted_dict(requesters) - print(f'{requesters = }') - for p1, p2 in combinations(locations.values(), 2): - d = latlon_distance(p1, p2) - if d != 0: - print(d) - -def filterwords(filename: str, validnames: set): +def filter_logs(filename: str): with open(filename, 'r') as f: for line in f: # IP,YYMMDD hhmmss TIMEZONE,STATUS,METHOD,FILE @@ -74,15 +23,14 @@ def get_ip_latlon(ip: str) -> (int, int): # latitude and longitude addr = f'http://ip-api.com/json/{ip}' response = requests.get(addr) + if not response.ok: + raise RuntimeError(f'request for ip failed with {response.status_code}') resulting_dict = eval(response.content) if resulting_dict['status'] != 'fail': return resulting_dict['lat'], resulting_dict['lon'] -def sorted_dict(d: dict, reverse=True) -> dict: - return { - key: d[key] - for key in sorted(d, key=d.get, reverse=reverse) - } +def value_sort(d: dict, reverse=True) -> dict: + return sorted(d.items(), key=lambda kv_pair: kv_pair[1], reverse=reverse) def latlon_distance(p1, p2) -> float: # black magic do not touch. use the haversine formula to find the distance @@ -102,5 +50,110 @@ def latlon_distance(p1, p2) -> float: return c * earth_radius_km +def analyze_server(server: Path, serverip: str) -> None: + if not server.is_dir(): + return + + filename = f'{server.name}/access.log' + hitfiles = {} + requesters = {} + distances = {} + times = {} + self_latlon = get_ip_latlon(serverip) + for ip, date, time_str, timezone, status, method, file in filter_logs(filename): + if file in hitfiles: + hitfiles[file] += 1 + else: + hitfiles[file] = 1 + + if ip in requesters: + requesters[ip] += 1 + else: + requesters[ip] = 1 + latlon = get_ip_latlon(ip) + if latlon is not None: + distances[ip] = latlon_distance(self_latlon, latlon) + + hour = time_str.split(':')[0] + if hour in times: + times[hour] += 1 + else: + times[hour] = 1 + + print(f'\n\n--- ANALYSIS FOR {server.name.upper()} ---\n') + + for dict_name in ['hitfiles', 'requesters', 'distances', 'times']: + print( + dict_name + ': {\n ' + + ',\n '.join( + f'{k!r}: {v!r}' + for k, v in value_sort(eval(dict_name)) + ) + + '\n}' + ) + + print(f'average: {sum(distances.values())/len(distances)}') + +def get_server_ip(servername: str) -> str: + # associate servers with ips + if servername == 'nova': + return '184.73.25.153' + elif servername == 'singapore': + return '18.139.108.77' + elif servername == 'sydney': + return '54.206.216.118' + elif servername == 'dublin': + return '54.194.92.137' + elif servername == 'brazil': + return '18.228.245.48' + + raise ValueError(f'{servername} is not a known server') + +def main(args: list) -> int: + if len(args) == 1: + # no log dirs provided + print('no logdir provided') + return 1 + + start_dir = Path('.').resolve() + + for logdir in args[1:]: + chdir(logdir) + serverdir = Path('.') + for subdir in serverdir.iterdir(): + serverip = get_server_ip(subdir.name) + analyze_server(subdir, serverip) + chdir(start_dir) + + return 0 + +validnames = { + 'wp-login.php', + '.env', + 'plugins/system/debug/debug.xml', + 'administrator/language/en-GB/en-GB.xml', + 'administrator/help/en-GB/toc.json', + '.git/config', + 'vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php', + 'xmlrpc.php', + 'wp1/wp-includes/wlwmanifest.xml', + 'wp/wp-includes/wlwmanifest.xml', + 'wordpress/wp-includes/wlwmanifest.xml', + 'web/wp-includes/wlwmanifest.xml', + 'test/wp-includes/wlwmanifest.xml', + 'site/wp-includes/wlwmanifest.xml', + 'shop/wp-includes/wlwmanifest.xml', + 'cms/wp-includes/wlwmanifest.xml', + 'blog/wp-includes/wlwmanifest.xml', + '2019/wp-includes/wlwmanifest.xml', + 'wp-load.php', + 'public/_ignition/health-check', + '_ignition/health-check', + 'admin/.env', + 'protected/.env', + 'wp-includes/wp-class.php', + 'wp-commentin.php', + 'wp-signin.php' +} if __name__ == '__main__': - main(sys.argv[1]) \ No newline at end of file + sys.exit(main(sys.argv)) \ No newline at end of file