sciencefair2023/analyze.py

106 lines
3.3 KiB
Python
Raw Normal View History

2022-12-28 23:31:24 -05:00
import sys
import requests
2022-12-30 14:14:29 -05:00
from math import radians, cos, sin, acos, asin, sqrt
from itertools import combinations
2022-12-30 12:35:08 -05:00
2022-12-28 23:31:24 -05:00
def main(filename: str):
validnames = {
2022-12-28 23:31:24 -05:00
'wp-login.php',
'.env',
'plugins/system/debug/debug.xml',
'administrator/language/en-GB/en-GB.xml',
'administrator/help/en-GB/toc.json',
'.git/config',
'vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php',
'xmlrpc.php',
'wp1/wp-includes/wlwmanifest.xml',
'wp/wp-includes/wlwmanifest.xml',
'wordpress/wp-includes/wlwmanifest.xml',
'web/wp-includes/wlwmanifest.xml',
'test/wp-includes/wlwmanifest.xml',
'site/wp-includes/wlwmanifest.xml',
'shop/wp-includes/wlwmanifest.xml',
'cms/wp-includes/wlwmanifest.xml',
'blog/wp-includes/wlwmanifest.xml',
'2019/wp-includes/wlwmanifest.xml',
'wp-load.php',
'public/_ignition/health-check',
'_ignition/health-check',
'admin/.env',
'protected/.env',
'wp-includes/wp-class.php',
'wp-commentin.php',
'wp-signin.php'
}
hitfiles = {}.fromkeys(validnames, 0)
requesters = {}
2022-12-30 12:35:08 -05:00
locations = {}
for ip, date, time, timezone, status, method, file in filterwords(filename, validnames):
hitfiles[file] += 1
if ip in requesters:
requesters[ip] += 1
else:
requesters[ip] = 1
latlon = get_ip_latlon(ip)
if latlon is not None:
2022-12-30 12:35:08 -05:00
locations[ip] = latlon
2022-12-30 12:35:08 -05:00
hitfiles = sorted_dict(hitfiles)
print(f'{hitfiles = }')
2022-12-30 12:35:08 -05:00
requesters = sorted_dict(requesters)
print(f'{requesters = }')
2022-12-30 14:14:29 -05:00
for p1, p2 in combinations(locations.values(), 2):
d = latlon_distance(p1, p2)
if d != 0:
print(d)
2022-12-28 23:31:24 -05:00
def filterwords(filename: str, validnames: set):
with open(filename, 'r') as f:
for line in f:
# IP,YYMMDD hhmmss TIMEZONE,STATUS,METHOD,FILE
ip, timethings, status, method, filepath = line.split(',', 4)
file = '/'.join(filepath.split('/')[4:]).strip()
2022-12-28 23:31:24 -05:00
if (
status != '200'
or method != 'GET'
or file not in validnames
2022-12-28 23:31:24 -05:00
): continue
date, time, timezone = timethings.split(' ')
yield ip, date, time, timezone, status, method, file
2022-12-28 23:31:24 -05:00
2022-12-30 14:14:29 -05:00
def get_ip_latlon(ip: str) -> (int, int):
# make a reqest to ip-api.com to associate an ip to a
# latitude and longitude
addr = f'http://ip-api.com/json/{ip}'
response = requests.get(addr)
resulting_dict = eval(response.content)
if resulting_dict['status'] != 'fail':
return resulting_dict['lat'], resulting_dict['lon']
def sorted_dict(d: dict, reverse=True) -> dict:
return {
key: d[key]
for key in sorted(d, key=d.get, reverse=reverse)
}
def latlon_distance(p1, p2) -> float:
# black magic do not touch. use the haversine formula to find the distance
lat1, lon1 = p1
lat2, lon2 = p2
lon1 = radians(lon1)
lat1 = radians(lat1)
lon2 = radians(lon2)
lat2 = radians(lat2)
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
c = 2 * asin(sqrt(a))
earth_radius_km = 6371
return c * earth_radius_km
2022-12-28 23:31:24 -05:00
if __name__ == '__main__':
main(sys.argv[1])