now caches gotten ips
parent
119bb50dc3
commit
51e0a1944a
158
analyze.py
158
analyze.py
|
@ -3,6 +3,53 @@ import requests
|
||||||
from math import radians, cos, sin, acos, asin, sqrt
|
from math import radians, cos, sin, acos, asin, sqrt
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from os import chdir
|
from os import chdir
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from dateutil import tz
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
class IPCache:
|
||||||
|
picklefile_name = 'cached_ips.pkl'
|
||||||
|
|
||||||
|
def __init__(self, /):
|
||||||
|
with Path('.') as p:
|
||||||
|
self.creation_dir = p.resolve()
|
||||||
|
|
||||||
|
def get(self, ip, /):
|
||||||
|
if ip in self.cache:
|
||||||
|
return self.cache[ip]
|
||||||
|
addr = f'http://ip-api.com/json/{ip}'
|
||||||
|
response = requests.get(addr)
|
||||||
|
if not response.ok:
|
||||||
|
raise RuntimeError(f'request for ip failed with {response.status_code}')
|
||||||
|
resulting_dict = eval(response.content)
|
||||||
|
if resulting_dict['status'] == 'fail':
|
||||||
|
raise RuntimeError(f'ip was invalid')
|
||||||
|
# the given timezone is like, 'Australia/Sydney'. we need to convert to
|
||||||
|
# a datetime.timezone type
|
||||||
|
timezone_str = resulting_dict['timezone']
|
||||||
|
tzfile = tz.gettz(timezone_str)
|
||||||
|
as_timedelta = tzfile.utcoffset(datetime.utcnow())
|
||||||
|
as_timezone_type = timezone(as_timedelta)
|
||||||
|
|
||||||
|
self.cache[ip] = ((resulting_dict['lat'], resulting_dict['lon']), as_timezone_type)
|
||||||
|
return self.cache[ip]
|
||||||
|
|
||||||
|
def __enter__(self, filename=picklefile_name, /):
|
||||||
|
chdir(self.creation_dir)
|
||||||
|
with Path(filename) as p:
|
||||||
|
if p.exists():
|
||||||
|
with open(filename, 'rb') as f:
|
||||||
|
cache = pickle.load(f)
|
||||||
|
else:
|
||||||
|
cache = {}
|
||||||
|
self.filename = filename
|
||||||
|
self.cache = cache
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, err_type, err_value, traceback, /):
|
||||||
|
chdir(self.creation_dir)
|
||||||
|
with open(self.filename, 'wb') as f:
|
||||||
|
pickle.dump(self.cache, f)
|
||||||
|
|
||||||
def filter_logs(filename: str):
|
def filter_logs(filename: str):
|
||||||
with open(filename, 'r') as f:
|
with open(filename, 'r') as f:
|
||||||
|
@ -13,24 +60,8 @@ def filter_logs(filename: str):
|
||||||
if (
|
if (
|
||||||
status != '200'
|
status != '200'
|
||||||
or method != 'GET'
|
or method != 'GET'
|
||||||
or file not in validnames
|
|
||||||
): continue
|
): continue
|
||||||
date, time, timezone = timethings.split(' ')
|
yield ip, timethings, status, method, file
|
||||||
yield ip, date, time, timezone, status, method, file
|
|
||||||
|
|
||||||
def get_ip_latlon(ip: str) -> (int, int):
|
|
||||||
# make a reqest to ip-api.com to associate an ip to a
|
|
||||||
# latitude and longitude
|
|
||||||
addr = f'http://ip-api.com/json/{ip}'
|
|
||||||
response = requests.get(addr)
|
|
||||||
if not response.ok:
|
|
||||||
raise RuntimeError(f'request for ip failed with {response.status_code}')
|
|
||||||
resulting_dict = eval(response.content)
|
|
||||||
if resulting_dict['status'] != 'fail':
|
|
||||||
return resulting_dict['lat'], resulting_dict['lon']
|
|
||||||
|
|
||||||
def value_sort(d: dict, reverse=True) -> dict:
|
|
||||||
return sorted(d.items(), key=lambda kv_pair: kv_pair[1], reverse=reverse)
|
|
||||||
|
|
||||||
def latlon_distance(p1, p2) -> float:
|
def latlon_distance(p1, p2) -> float:
|
||||||
# black magic do not touch. use the haversine formula to find the distance
|
# black magic do not touch. use the haversine formula to find the distance
|
||||||
|
@ -54,45 +85,31 @@ def analyze_server(server: Path, serverip: str) -> None:
|
||||||
if not server.is_dir():
|
if not server.is_dir():
|
||||||
return
|
return
|
||||||
|
|
||||||
|
result = ''
|
||||||
|
|
||||||
filename = f'{server.name}/access.log'
|
filename = f'{server.name}/access.log'
|
||||||
hitfiles = {}
|
self_latlon, _ = ip_cache.get(serverip)
|
||||||
requesters = {}
|
for ip, timethings, status, method, file in filter_logs(filename):
|
||||||
distances = {}
|
# get_ip_latlon_tz() returns the latitude, longitude, and timezone
|
||||||
times = {}
|
# of an ip
|
||||||
self_latlon = get_ip_latlon(serverip)
|
# TODO: cache results in a picklefile
|
||||||
for ip, date, time_str, timezone, status, method, file in filter_logs(filename):
|
latlon, timezone = ip_cache.get(ip)
|
||||||
if file in hitfiles:
|
|
||||||
hitfiles[file] += 1
|
|
||||||
else:
|
|
||||||
hitfiles[file] = 1
|
|
||||||
|
|
||||||
if ip in requesters:
|
# convert `timethings` to a datetime object
|
||||||
requesters[ip] += 1
|
time_of_hit = datetime.strptime(timethings, log_date_format)
|
||||||
else:
|
# convert from its default timezone in GMT to the timezone of the requester
|
||||||
requesters[ip] = 1
|
localtime = time_of_hit.replace(tzinfo=timezone.utc).astimezone(timezone)
|
||||||
latlon = get_ip_latlon(ip)
|
# convert this time back to a string for logging purposes
|
||||||
if latlon is not None:
|
localtime_str = localtime.strftime(log_date_format)
|
||||||
distances[ip] = latlon_distance(self_latlon, latlon)
|
|
||||||
|
|
||||||
hour = time_str.split(':')[0]
|
distance = latlon_distance(self_latlon, latlon)
|
||||||
if hour in times:
|
date, time = timethings.split(' ')[:2]
|
||||||
times[hour] += 1
|
result += (
|
||||||
else:
|
f'{server.name},{ip},{distance},{date},{time},'
|
||||||
times[hour] = 1
|
+ f'{localtime_str},{method},{file}\n'
|
||||||
|
|
||||||
print(f'\n\n--- ANALYSIS FOR {server.name.upper()} ---\n')
|
|
||||||
|
|
||||||
for dict_name in ['hitfiles', 'requesters', 'distances', 'times']:
|
|
||||||
print(
|
|
||||||
dict_name + ': {\n '
|
|
||||||
+ ',\n '.join(
|
|
||||||
f'{k!r}: {v!r}'
|
|
||||||
for k, v in value_sort(eval(dict_name))
|
|
||||||
)
|
|
||||||
+ '\n}'
|
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f'average: {sum(distances.values())/len(distances)}')
|
return result
|
||||||
|
|
||||||
def get_server_ip(servername: str) -> str:
|
def get_server_ip(servername: str) -> str:
|
||||||
# associate servers with ips
|
# associate servers with ips
|
||||||
|
@ -115,45 +132,24 @@ def main(args: list) -> int:
|
||||||
print('no logdir provided')
|
print('no logdir provided')
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
outfile = 'analysis.csv'
|
||||||
start_dir = Path('.').resolve()
|
start_dir = Path('.').resolve()
|
||||||
|
|
||||||
|
f = open(outfile, 'a')
|
||||||
for logdir in args[1:]:
|
for logdir in args[1:]:
|
||||||
chdir(logdir)
|
chdir(logdir)
|
||||||
serverdir = Path('.')
|
serverdir = Path('.')
|
||||||
for subdir in serverdir.iterdir():
|
for subdir in serverdir.iterdir():
|
||||||
serverip = get_server_ip(subdir.name)
|
serverip = get_server_ip(subdir.name)
|
||||||
analyze_server(subdir, serverip)
|
csv_lines = analyze_server(subdir, serverip)
|
||||||
chdir(start_dir)
|
chdir(start_dir)
|
||||||
|
f.write(csv_lines)
|
||||||
|
f.close()
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
validnames = {
|
log_date_format = r'%y/%m/%d %H:%M:%S %z'
|
||||||
'wp-login.php',
|
|
||||||
'.env',
|
|
||||||
'plugins/system/debug/debug.xml',
|
|
||||||
'administrator/language/en-GB/en-GB.xml',
|
|
||||||
'administrator/help/en-GB/toc.json',
|
|
||||||
'.git/config',
|
|
||||||
'vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php',
|
|
||||||
'xmlrpc.php',
|
|
||||||
'wp1/wp-includes/wlwmanifest.xml',
|
|
||||||
'wp/wp-includes/wlwmanifest.xml',
|
|
||||||
'wordpress/wp-includes/wlwmanifest.xml',
|
|
||||||
'web/wp-includes/wlwmanifest.xml',
|
|
||||||
'test/wp-includes/wlwmanifest.xml',
|
|
||||||
'site/wp-includes/wlwmanifest.xml',
|
|
||||||
'shop/wp-includes/wlwmanifest.xml',
|
|
||||||
'cms/wp-includes/wlwmanifest.xml',
|
|
||||||
'blog/wp-includes/wlwmanifest.xml',
|
|
||||||
'2019/wp-includes/wlwmanifest.xml',
|
|
||||||
'wp-load.php',
|
|
||||||
'public/_ignition/health-check',
|
|
||||||
'_ignition/health-check',
|
|
||||||
'admin/.env',
|
|
||||||
'protected/.env',
|
|
||||||
'wp-includes/wp-class.php',
|
|
||||||
'wp-commentin.php',
|
|
||||||
'wp-signin.php'
|
|
||||||
}
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main(sys.argv))
|
with IPCache() as ip_cache:
|
||||||
|
sys.exit(main(sys.argv))
|
Loading…
Reference in New Issue