now caches gotten ips
parent
119bb50dc3
commit
51e0a1944a
158
analyze.py
158
analyze.py
|
@ -3,6 +3,53 @@ import requests
|
|||
from math import radians, cos, sin, acos, asin, sqrt
|
||||
from pathlib import Path
|
||||
from os import chdir
|
||||
from datetime import datetime, timezone
|
||||
from dateutil import tz
|
||||
import pickle
|
||||
|
||||
class IPCache:
|
||||
picklefile_name = 'cached_ips.pkl'
|
||||
|
||||
def __init__(self, /):
|
||||
with Path('.') as p:
|
||||
self.creation_dir = p.resolve()
|
||||
|
||||
def get(self, ip, /):
|
||||
if ip in self.cache:
|
||||
return self.cache[ip]
|
||||
addr = f'http://ip-api.com/json/{ip}'
|
||||
response = requests.get(addr)
|
||||
if not response.ok:
|
||||
raise RuntimeError(f'request for ip failed with {response.status_code}')
|
||||
resulting_dict = eval(response.content)
|
||||
if resulting_dict['status'] == 'fail':
|
||||
raise RuntimeError(f'ip was invalid')
|
||||
# the given timezone is like, 'Australia/Sydney'. we need to convert to
|
||||
# a datetime.timezone type
|
||||
timezone_str = resulting_dict['timezone']
|
||||
tzfile = tz.gettz(timezone_str)
|
||||
as_timedelta = tzfile.utcoffset(datetime.utcnow())
|
||||
as_timezone_type = timezone(as_timedelta)
|
||||
|
||||
self.cache[ip] = ((resulting_dict['lat'], resulting_dict['lon']), as_timezone_type)
|
||||
return self.cache[ip]
|
||||
|
||||
def __enter__(self, filename=picklefile_name, /):
|
||||
chdir(self.creation_dir)
|
||||
with Path(filename) as p:
|
||||
if p.exists():
|
||||
with open(filename, 'rb') as f:
|
||||
cache = pickle.load(f)
|
||||
else:
|
||||
cache = {}
|
||||
self.filename = filename
|
||||
self.cache = cache
|
||||
return self
|
||||
|
||||
def __exit__(self, err_type, err_value, traceback, /):
|
||||
chdir(self.creation_dir)
|
||||
with open(self.filename, 'wb') as f:
|
||||
pickle.dump(self.cache, f)
|
||||
|
||||
def filter_logs(filename: str):
|
||||
with open(filename, 'r') as f:
|
||||
|
@ -13,24 +60,8 @@ def filter_logs(filename: str):
|
|||
if (
|
||||
status != '200'
|
||||
or method != 'GET'
|
||||
or file not in validnames
|
||||
): continue
|
||||
date, time, timezone = timethings.split(' ')
|
||||
yield ip, date, time, timezone, status, method, file
|
||||
|
||||
def get_ip_latlon(ip: str) -> (int, int):
|
||||
# make a reqest to ip-api.com to associate an ip to a
|
||||
# latitude and longitude
|
||||
addr = f'http://ip-api.com/json/{ip}'
|
||||
response = requests.get(addr)
|
||||
if not response.ok:
|
||||
raise RuntimeError(f'request for ip failed with {response.status_code}')
|
||||
resulting_dict = eval(response.content)
|
||||
if resulting_dict['status'] != 'fail':
|
||||
return resulting_dict['lat'], resulting_dict['lon']
|
||||
|
||||
def value_sort(d: dict, reverse=True) -> dict:
|
||||
return sorted(d.items(), key=lambda kv_pair: kv_pair[1], reverse=reverse)
|
||||
yield ip, timethings, status, method, file
|
||||
|
||||
def latlon_distance(p1, p2) -> float:
|
||||
# black magic do not touch. use the haversine formula to find the distance
|
||||
|
@ -54,45 +85,31 @@ def analyze_server(server: Path, serverip: str) -> None:
|
|||
if not server.is_dir():
|
||||
return
|
||||
|
||||
result = ''
|
||||
|
||||
filename = f'{server.name}/access.log'
|
||||
hitfiles = {}
|
||||
requesters = {}
|
||||
distances = {}
|
||||
times = {}
|
||||
self_latlon = get_ip_latlon(serverip)
|
||||
for ip, date, time_str, timezone, status, method, file in filter_logs(filename):
|
||||
if file in hitfiles:
|
||||
hitfiles[file] += 1
|
||||
else:
|
||||
hitfiles[file] = 1
|
||||
self_latlon, _ = ip_cache.get(serverip)
|
||||
for ip, timethings, status, method, file in filter_logs(filename):
|
||||
# get_ip_latlon_tz() returns the latitude, longitude, and timezone
|
||||
# of an ip
|
||||
# TODO: cache results in a picklefile
|
||||
latlon, timezone = ip_cache.get(ip)
|
||||
|
||||
if ip in requesters:
|
||||
requesters[ip] += 1
|
||||
else:
|
||||
requesters[ip] = 1
|
||||
latlon = get_ip_latlon(ip)
|
||||
if latlon is not None:
|
||||
distances[ip] = latlon_distance(self_latlon, latlon)
|
||||
# convert `timethings` to a datetime object
|
||||
time_of_hit = datetime.strptime(timethings, log_date_format)
|
||||
# convert from its default timezone in GMT to the timezone of the requester
|
||||
localtime = time_of_hit.replace(tzinfo=timezone.utc).astimezone(timezone)
|
||||
# convert this time back to a string for logging purposes
|
||||
localtime_str = localtime.strftime(log_date_format)
|
||||
|
||||
hour = time_str.split(':')[0]
|
||||
if hour in times:
|
||||
times[hour] += 1
|
||||
else:
|
||||
times[hour] = 1
|
||||
|
||||
print(f'\n\n--- ANALYSIS FOR {server.name.upper()} ---\n')
|
||||
|
||||
for dict_name in ['hitfiles', 'requesters', 'distances', 'times']:
|
||||
print(
|
||||
dict_name + ': {\n '
|
||||
+ ',\n '.join(
|
||||
f'{k!r}: {v!r}'
|
||||
for k, v in value_sort(eval(dict_name))
|
||||
)
|
||||
+ '\n}'
|
||||
distance = latlon_distance(self_latlon, latlon)
|
||||
date, time = timethings.split(' ')[:2]
|
||||
result += (
|
||||
f'{server.name},{ip},{distance},{date},{time},'
|
||||
+ f'{localtime_str},{method},{file}\n'
|
||||
)
|
||||
|
||||
print(f'average: {sum(distances.values())/len(distances)}')
|
||||
return result
|
||||
|
||||
def get_server_ip(servername: str) -> str:
|
||||
# associate servers with ips
|
||||
|
@ -115,45 +132,24 @@ def main(args: list) -> int:
|
|||
print('no logdir provided')
|
||||
return 1
|
||||
|
||||
outfile = 'analysis.csv'
|
||||
start_dir = Path('.').resolve()
|
||||
|
||||
f = open(outfile, 'a')
|
||||
for logdir in args[1:]:
|
||||
chdir(logdir)
|
||||
serverdir = Path('.')
|
||||
for subdir in serverdir.iterdir():
|
||||
serverip = get_server_ip(subdir.name)
|
||||
analyze_server(subdir, serverip)
|
||||
csv_lines = analyze_server(subdir, serverip)
|
||||
chdir(start_dir)
|
||||
f.write(csv_lines)
|
||||
f.close()
|
||||
|
||||
return 0
|
||||
|
||||
validnames = {
|
||||
'wp-login.php',
|
||||
'.env',
|
||||
'plugins/system/debug/debug.xml',
|
||||
'administrator/language/en-GB/en-GB.xml',
|
||||
'administrator/help/en-GB/toc.json',
|
||||
'.git/config',
|
||||
'vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php',
|
||||
'xmlrpc.php',
|
||||
'wp1/wp-includes/wlwmanifest.xml',
|
||||
'wp/wp-includes/wlwmanifest.xml',
|
||||
'wordpress/wp-includes/wlwmanifest.xml',
|
||||
'web/wp-includes/wlwmanifest.xml',
|
||||
'test/wp-includes/wlwmanifest.xml',
|
||||
'site/wp-includes/wlwmanifest.xml',
|
||||
'shop/wp-includes/wlwmanifest.xml',
|
||||
'cms/wp-includes/wlwmanifest.xml',
|
||||
'blog/wp-includes/wlwmanifest.xml',
|
||||
'2019/wp-includes/wlwmanifest.xml',
|
||||
'wp-load.php',
|
||||
'public/_ignition/health-check',
|
||||
'_ignition/health-check',
|
||||
'admin/.env',
|
||||
'protected/.env',
|
||||
'wp-includes/wp-class.php',
|
||||
'wp-commentin.php',
|
||||
'wp-signin.php'
|
||||
}
|
||||
log_date_format = r'%y/%m/%d %H:%M:%S %z'
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main(sys.argv))
|
||||
with IPCache() as ip_cache:
|
||||
sys.exit(main(sys.argv))
|
Loading…
Reference in New Issue