import sys import requests from math import radians, cos, sin, acos, asin, sqrt from itertools import combinations def main(filename: str): validnames = { 'wp-login.php', '.env', 'plugins/system/debug/debug.xml', 'administrator/language/en-GB/en-GB.xml', 'administrator/help/en-GB/toc.json', '.git/config', 'vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php', 'xmlrpc.php', 'wp1/wp-includes/wlwmanifest.xml', 'wp/wp-includes/wlwmanifest.xml', 'wordpress/wp-includes/wlwmanifest.xml', 'web/wp-includes/wlwmanifest.xml', 'test/wp-includes/wlwmanifest.xml', 'site/wp-includes/wlwmanifest.xml', 'shop/wp-includes/wlwmanifest.xml', 'cms/wp-includes/wlwmanifest.xml', 'blog/wp-includes/wlwmanifest.xml', '2019/wp-includes/wlwmanifest.xml', 'wp-load.php', 'public/_ignition/health-check', '_ignition/health-check', 'admin/.env', 'protected/.env', 'wp-includes/wp-class.php', 'wp-commentin.php', 'wp-signin.php' } hitfiles = {}.fromkeys(validnames, 0) requesters = {} locations = {} for ip, date, time, timezone, status, method, file in filterwords(filename, validnames): hitfiles[file] += 1 if ip in requesters: requesters[ip] += 1 else: requesters[ip] = 1 latlon = get_ip_latlon(ip) if latlon is not None: locations[ip] = latlon hitfiles = sorted_dict(hitfiles) print(f'{hitfiles = }') requesters = sorted_dict(requesters) print(f'{requesters = }') for p1, p2 in combinations(locations.values(), 2): d = latlon_distance(p1, p2) if d != 0: print(d) def filterwords(filename: str, validnames: set): with open(filename, 'r') as f: for line in f: # IP,YYMMDD hhmmss TIMEZONE,STATUS,METHOD,FILE ip, timethings, status, method, filepath = line.split(',', 4) file = '/'.join(filepath.split('/')[4:]).strip() if ( status != '200' or method != 'GET' or file not in validnames ): continue date, time, timezone = timethings.split(' ') yield ip, date, time, timezone, status, method, file def get_ip_latlon(ip: str) -> (int, int): # make a reqest to ip-api.com to associate an ip to a # latitude and longitude addr = f'http://ip-api.com/json/{ip}' response = requests.get(addr) resulting_dict = eval(response.content) if resulting_dict['status'] != 'fail': return resulting_dict['lat'], resulting_dict['lon'] def sorted_dict(d: dict, reverse=True) -> dict: return { key: d[key] for key in sorted(d, key=d.get, reverse=reverse) } def latlon_distance(p1, p2) -> float: # black magic do not touch. use the haversine formula to find the distance lat1, lon1 = p1 lat2, lon2 = p2 lon1 = radians(lon1) lat1 = radians(lat1) lon2 = radians(lon2) lat2 = radians(lat2) dlon = lon2 - lon1 dlat = lat2 - lat1 a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2 c = 2 * asin(sqrt(a)) earth_radius_km = 6371 return c * earth_radius_km if __name__ == '__main__': main(sys.argv[1])