correctly analyzes distances, documents times

master
Nicholas Hope 2023-01-01 20:57:52 -05:00
parent 12e624c641
commit c69236ed9b
1 changed files with 113 additions and 60 deletions

View File

@ -1,61 +1,10 @@
import sys import sys
import requests import requests
from math import radians, cos, sin, acos, asin, sqrt from math import radians, cos, sin, acos, asin, sqrt
from itertools import combinations from pathlib import Path
from os import chdir
def main(filename: str): def filter_logs(filename: str):
validnames = {
'wp-login.php',
'.env',
'plugins/system/debug/debug.xml',
'administrator/language/en-GB/en-GB.xml',
'administrator/help/en-GB/toc.json',
'.git/config',
'vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php',
'xmlrpc.php',
'wp1/wp-includes/wlwmanifest.xml',
'wp/wp-includes/wlwmanifest.xml',
'wordpress/wp-includes/wlwmanifest.xml',
'web/wp-includes/wlwmanifest.xml',
'test/wp-includes/wlwmanifest.xml',
'site/wp-includes/wlwmanifest.xml',
'shop/wp-includes/wlwmanifest.xml',
'cms/wp-includes/wlwmanifest.xml',
'blog/wp-includes/wlwmanifest.xml',
'2019/wp-includes/wlwmanifest.xml',
'wp-load.php',
'public/_ignition/health-check',
'_ignition/health-check',
'admin/.env',
'protected/.env',
'wp-includes/wp-class.php',
'wp-commentin.php',
'wp-signin.php'
}
hitfiles = {}.fromkeys(validnames, 0)
requesters = {}
locations = {}
for ip, date, time, timezone, status, method, file in filterwords(filename, validnames):
hitfiles[file] += 1
if ip in requesters:
requesters[ip] += 1
else:
requesters[ip] = 1
latlon = get_ip_latlon(ip)
if latlon is not None:
locations[ip] = latlon
hitfiles = sorted_dict(hitfiles)
print(f'{hitfiles = }')
requesters = sorted_dict(requesters)
print(f'{requesters = }')
for p1, p2 in combinations(locations.values(), 2):
d = latlon_distance(p1, p2)
if d != 0:
print(d)
def filterwords(filename: str, validnames: set):
with open(filename, 'r') as f: with open(filename, 'r') as f:
for line in f: for line in f:
# IP,YYMMDD hhmmss TIMEZONE,STATUS,METHOD,FILE # IP,YYMMDD hhmmss TIMEZONE,STATUS,METHOD,FILE
@ -74,15 +23,14 @@ def get_ip_latlon(ip: str) -> (int, int):
# latitude and longitude # latitude and longitude
addr = f'http://ip-api.com/json/{ip}' addr = f'http://ip-api.com/json/{ip}'
response = requests.get(addr) response = requests.get(addr)
if not response.ok:
raise RuntimeError(f'request for ip failed with {response.status_code}')
resulting_dict = eval(response.content) resulting_dict = eval(response.content)
if resulting_dict['status'] != 'fail': if resulting_dict['status'] != 'fail':
return resulting_dict['lat'], resulting_dict['lon'] return resulting_dict['lat'], resulting_dict['lon']
def sorted_dict(d: dict, reverse=True) -> dict: def value_sort(d: dict, reverse=True) -> dict:
return { return sorted(d.items(), key=lambda kv_pair: kv_pair[1], reverse=reverse)
key: d[key]
for key in sorted(d, key=d.get, reverse=reverse)
}
def latlon_distance(p1, p2) -> float: def latlon_distance(p1, p2) -> float:
# black magic do not touch. use the haversine formula to find the distance # black magic do not touch. use the haversine formula to find the distance
@ -102,5 +50,110 @@ def latlon_distance(p1, p2) -> float:
return c * earth_radius_km return c * earth_radius_km
def analyze_server(server: Path, serverip: str) -> None:
if not server.is_dir():
return
filename = f'{server.name}/access.log'
hitfiles = {}
requesters = {}
distances = {}
times = {}
self_latlon = get_ip_latlon(serverip)
for ip, date, time_str, timezone, status, method, file in filter_logs(filename):
if file in hitfiles:
hitfiles[file] += 1
else:
hitfiles[file] = 1
if ip in requesters:
requesters[ip] += 1
else:
requesters[ip] = 1
latlon = get_ip_latlon(ip)
if latlon is not None:
distances[ip] = latlon_distance(self_latlon, latlon)
hour = time_str.split(':')[0]
if hour in times:
times[hour] += 1
else:
times[hour] = 1
print(f'\n\n--- ANALYSIS FOR {server.name.upper()} ---\n')
for dict_name in ['hitfiles', 'requesters', 'distances', 'times']:
print(
dict_name + ': {\n '
+ ',\n '.join(
f'{k!r}: {v!r}'
for k, v in value_sort(eval(dict_name))
)
+ '\n}'
)
print(f'average: {sum(distances.values())/len(distances)}')
def get_server_ip(servername: str) -> str:
# associate servers with ips
if servername == 'nova':
return '184.73.25.153'
elif servername == 'singapore':
return '18.139.108.77'
elif servername == 'sydney':
return '54.206.216.118'
elif servername == 'dublin':
return '54.194.92.137'
elif servername == 'brazil':
return '18.228.245.48'
raise ValueError(f'{servername} is not a known server')
def main(args: list) -> int:
if len(args) == 1:
# no log dirs provided
print('no logdir provided')
return 1
start_dir = Path('.').resolve()
for logdir in args[1:]:
chdir(logdir)
serverdir = Path('.')
for subdir in serverdir.iterdir():
serverip = get_server_ip(subdir.name)
analyze_server(subdir, serverip)
chdir(start_dir)
return 0
validnames = {
'wp-login.php',
'.env',
'plugins/system/debug/debug.xml',
'administrator/language/en-GB/en-GB.xml',
'administrator/help/en-GB/toc.json',
'.git/config',
'vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php',
'xmlrpc.php',
'wp1/wp-includes/wlwmanifest.xml',
'wp/wp-includes/wlwmanifest.xml',
'wordpress/wp-includes/wlwmanifest.xml',
'web/wp-includes/wlwmanifest.xml',
'test/wp-includes/wlwmanifest.xml',
'site/wp-includes/wlwmanifest.xml',
'shop/wp-includes/wlwmanifest.xml',
'cms/wp-includes/wlwmanifest.xml',
'blog/wp-includes/wlwmanifest.xml',
'2019/wp-includes/wlwmanifest.xml',
'wp-load.php',
'public/_ignition/health-check',
'_ignition/health-check',
'admin/.env',
'protected/.env',
'wp-includes/wp-class.php',
'wp-commentin.php',
'wp-signin.php'
}
if __name__ == '__main__': if __name__ == '__main__':
main(sys.argv[1]) sys.exit(main(sys.argv))