correctly analyzes distances, documents times
parent
12e624c641
commit
c69236ed9b
173
analyze.py
173
analyze.py
|
@ -1,61 +1,10 @@
|
||||||
import sys
|
import sys
|
||||||
import requests
|
import requests
|
||||||
from math import radians, cos, sin, acos, asin, sqrt
|
from math import radians, cos, sin, acos, asin, sqrt
|
||||||
from itertools import combinations
|
from pathlib import Path
|
||||||
|
from os import chdir
|
||||||
|
|
||||||
def main(filename: str):
|
def filter_logs(filename: str):
|
||||||
validnames = {
|
|
||||||
'wp-login.php',
|
|
||||||
'.env',
|
|
||||||
'plugins/system/debug/debug.xml',
|
|
||||||
'administrator/language/en-GB/en-GB.xml',
|
|
||||||
'administrator/help/en-GB/toc.json',
|
|
||||||
'.git/config',
|
|
||||||
'vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php',
|
|
||||||
'xmlrpc.php',
|
|
||||||
'wp1/wp-includes/wlwmanifest.xml',
|
|
||||||
'wp/wp-includes/wlwmanifest.xml',
|
|
||||||
'wordpress/wp-includes/wlwmanifest.xml',
|
|
||||||
'web/wp-includes/wlwmanifest.xml',
|
|
||||||
'test/wp-includes/wlwmanifest.xml',
|
|
||||||
'site/wp-includes/wlwmanifest.xml',
|
|
||||||
'shop/wp-includes/wlwmanifest.xml',
|
|
||||||
'cms/wp-includes/wlwmanifest.xml',
|
|
||||||
'blog/wp-includes/wlwmanifest.xml',
|
|
||||||
'2019/wp-includes/wlwmanifest.xml',
|
|
||||||
'wp-load.php',
|
|
||||||
'public/_ignition/health-check',
|
|
||||||
'_ignition/health-check',
|
|
||||||
'admin/.env',
|
|
||||||
'protected/.env',
|
|
||||||
'wp-includes/wp-class.php',
|
|
||||||
'wp-commentin.php',
|
|
||||||
'wp-signin.php'
|
|
||||||
}
|
|
||||||
hitfiles = {}.fromkeys(validnames, 0)
|
|
||||||
requesters = {}
|
|
||||||
locations = {}
|
|
||||||
for ip, date, time, timezone, status, method, file in filterwords(filename, validnames):
|
|
||||||
hitfiles[file] += 1
|
|
||||||
|
|
||||||
if ip in requesters:
|
|
||||||
requesters[ip] += 1
|
|
||||||
else:
|
|
||||||
requesters[ip] = 1
|
|
||||||
latlon = get_ip_latlon(ip)
|
|
||||||
if latlon is not None:
|
|
||||||
locations[ip] = latlon
|
|
||||||
|
|
||||||
hitfiles = sorted_dict(hitfiles)
|
|
||||||
print(f'{hitfiles = }')
|
|
||||||
requesters = sorted_dict(requesters)
|
|
||||||
print(f'{requesters = }')
|
|
||||||
for p1, p2 in combinations(locations.values(), 2):
|
|
||||||
d = latlon_distance(p1, p2)
|
|
||||||
if d != 0:
|
|
||||||
print(d)
|
|
||||||
|
|
||||||
def filterwords(filename: str, validnames: set):
|
|
||||||
with open(filename, 'r') as f:
|
with open(filename, 'r') as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
# IP,YYMMDD hhmmss TIMEZONE,STATUS,METHOD,FILE
|
# IP,YYMMDD hhmmss TIMEZONE,STATUS,METHOD,FILE
|
||||||
|
@ -74,15 +23,14 @@ def get_ip_latlon(ip: str) -> (int, int):
|
||||||
# latitude and longitude
|
# latitude and longitude
|
||||||
addr = f'http://ip-api.com/json/{ip}'
|
addr = f'http://ip-api.com/json/{ip}'
|
||||||
response = requests.get(addr)
|
response = requests.get(addr)
|
||||||
|
if not response.ok:
|
||||||
|
raise RuntimeError(f'request for ip failed with {response.status_code}')
|
||||||
resulting_dict = eval(response.content)
|
resulting_dict = eval(response.content)
|
||||||
if resulting_dict['status'] != 'fail':
|
if resulting_dict['status'] != 'fail':
|
||||||
return resulting_dict['lat'], resulting_dict['lon']
|
return resulting_dict['lat'], resulting_dict['lon']
|
||||||
|
|
||||||
def sorted_dict(d: dict, reverse=True) -> dict:
|
def value_sort(d: dict, reverse=True) -> dict:
|
||||||
return {
|
return sorted(d.items(), key=lambda kv_pair: kv_pair[1], reverse=reverse)
|
||||||
key: d[key]
|
|
||||||
for key in sorted(d, key=d.get, reverse=reverse)
|
|
||||||
}
|
|
||||||
|
|
||||||
def latlon_distance(p1, p2) -> float:
|
def latlon_distance(p1, p2) -> float:
|
||||||
# black magic do not touch. use the haversine formula to find the distance
|
# black magic do not touch. use the haversine formula to find the distance
|
||||||
|
@ -102,5 +50,110 @@ def latlon_distance(p1, p2) -> float:
|
||||||
|
|
||||||
return c * earth_radius_km
|
return c * earth_radius_km
|
||||||
|
|
||||||
|
def analyze_server(server: Path, serverip: str) -> None:
|
||||||
|
if not server.is_dir():
|
||||||
|
return
|
||||||
|
|
||||||
|
filename = f'{server.name}/access.log'
|
||||||
|
hitfiles = {}
|
||||||
|
requesters = {}
|
||||||
|
distances = {}
|
||||||
|
times = {}
|
||||||
|
self_latlon = get_ip_latlon(serverip)
|
||||||
|
for ip, date, time_str, timezone, status, method, file in filter_logs(filename):
|
||||||
|
if file in hitfiles:
|
||||||
|
hitfiles[file] += 1
|
||||||
|
else:
|
||||||
|
hitfiles[file] = 1
|
||||||
|
|
||||||
|
if ip in requesters:
|
||||||
|
requesters[ip] += 1
|
||||||
|
else:
|
||||||
|
requesters[ip] = 1
|
||||||
|
latlon = get_ip_latlon(ip)
|
||||||
|
if latlon is not None:
|
||||||
|
distances[ip] = latlon_distance(self_latlon, latlon)
|
||||||
|
|
||||||
|
hour = time_str.split(':')[0]
|
||||||
|
if hour in times:
|
||||||
|
times[hour] += 1
|
||||||
|
else:
|
||||||
|
times[hour] = 1
|
||||||
|
|
||||||
|
print(f'\n\n--- ANALYSIS FOR {server.name.upper()} ---\n')
|
||||||
|
|
||||||
|
for dict_name in ['hitfiles', 'requesters', 'distances', 'times']:
|
||||||
|
print(
|
||||||
|
dict_name + ': {\n '
|
||||||
|
+ ',\n '.join(
|
||||||
|
f'{k!r}: {v!r}'
|
||||||
|
for k, v in value_sort(eval(dict_name))
|
||||||
|
)
|
||||||
|
+ '\n}'
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f'average: {sum(distances.values())/len(distances)}')
|
||||||
|
|
||||||
|
def get_server_ip(servername: str) -> str:
|
||||||
|
# associate servers with ips
|
||||||
|
if servername == 'nova':
|
||||||
|
return '184.73.25.153'
|
||||||
|
elif servername == 'singapore':
|
||||||
|
return '18.139.108.77'
|
||||||
|
elif servername == 'sydney':
|
||||||
|
return '54.206.216.118'
|
||||||
|
elif servername == 'dublin':
|
||||||
|
return '54.194.92.137'
|
||||||
|
elif servername == 'brazil':
|
||||||
|
return '18.228.245.48'
|
||||||
|
|
||||||
|
raise ValueError(f'{servername} is not a known server')
|
||||||
|
|
||||||
|
def main(args: list) -> int:
|
||||||
|
if len(args) == 1:
|
||||||
|
# no log dirs provided
|
||||||
|
print('no logdir provided')
|
||||||
|
return 1
|
||||||
|
|
||||||
|
start_dir = Path('.').resolve()
|
||||||
|
|
||||||
|
for logdir in args[1:]:
|
||||||
|
chdir(logdir)
|
||||||
|
serverdir = Path('.')
|
||||||
|
for subdir in serverdir.iterdir():
|
||||||
|
serverip = get_server_ip(subdir.name)
|
||||||
|
analyze_server(subdir, serverip)
|
||||||
|
chdir(start_dir)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
validnames = {
|
||||||
|
'wp-login.php',
|
||||||
|
'.env',
|
||||||
|
'plugins/system/debug/debug.xml',
|
||||||
|
'administrator/language/en-GB/en-GB.xml',
|
||||||
|
'administrator/help/en-GB/toc.json',
|
||||||
|
'.git/config',
|
||||||
|
'vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php',
|
||||||
|
'xmlrpc.php',
|
||||||
|
'wp1/wp-includes/wlwmanifest.xml',
|
||||||
|
'wp/wp-includes/wlwmanifest.xml',
|
||||||
|
'wordpress/wp-includes/wlwmanifest.xml',
|
||||||
|
'web/wp-includes/wlwmanifest.xml',
|
||||||
|
'test/wp-includes/wlwmanifest.xml',
|
||||||
|
'site/wp-includes/wlwmanifest.xml',
|
||||||
|
'shop/wp-includes/wlwmanifest.xml',
|
||||||
|
'cms/wp-includes/wlwmanifest.xml',
|
||||||
|
'blog/wp-includes/wlwmanifest.xml',
|
||||||
|
'2019/wp-includes/wlwmanifest.xml',
|
||||||
|
'wp-load.php',
|
||||||
|
'public/_ignition/health-check',
|
||||||
|
'_ignition/health-check',
|
||||||
|
'admin/.env',
|
||||||
|
'protected/.env',
|
||||||
|
'wp-includes/wp-class.php',
|
||||||
|
'wp-commentin.php',
|
||||||
|
'wp-signin.php'
|
||||||
|
}
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(sys.argv[1])
|
sys.exit(main(sys.argv))
|
Loading…
Reference in New Issue