geolog/tail_service.py
2025-08-30 23:49:30 +02:00

66 lines
1.9 KiB
Python

mport re
import ipaddress
from datetime import datetime, timezone
import time
ACCESS_LOG = "/var/log/nginx/access.log"
OUTPUT_LOG = "./file.log"
INTERNAL_NETWORKS = [
ipaddress.ip_network("10.0.0.0/8"),
ipaddress.ip_network("192.168.0.0/16"),
ipaddress.ip_network("172.16.0.0/12"),
]
log_line_re = re.compile(
r'(?P<ip>\S+) - - \[(?P<time>[^\]]+)\] "(?P<method>\S+) (?P<path>\S+) \S+"'
)
def is_external(ip):
ip_addr = ipaddress.ip_address(ip)
return not any(ip_addr in net for net in INTERNAL_NETWORKS)
def parse_nginx_line(line):
match = log_line_re.match(line)
if not match:
return None
data = match.groupdict()
if not is_external(data["ip"]):
return None
dt = datetime.strptime(data["time"], "%d/%b/%Y:%H:%M:%S %z")
dt_utc = dt.astimezone(timezone.utc)
iso_time = dt_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
return f'{iso_time} {data["ip"]} {data["method"]} {data["path"]}'
def tail(f):
f.seek(0, 2) # Go to the end of the file
while True:
line = f.readline()
if not line:
time.sleep(0.01) # Sleep very briefly
continue
yield line
def main():
buffer = []
buffer_size = 10 # adjust for your throughput
flush_interval = 0.5 # seconds
with open(ACCESS_LOG, "r") as f:
tail_lines = tail(f)
last_flush = time.time()
for line in tail_lines:
parsed = parse_nginx_line(line)
if parsed:
buffer.append(parsed)
# Flush buffer if size reached or interval passed
if len(buffer) >= buffer_size or (time.time() - last_flush) > flush_interval:
if buffer:
with open(OUTPUT_LOG, "a") as out:
out.write("\n".join(buffer) + "\n")
buffer.clear()
last_flush = time.time()
if __name__ == "__main__":
main()