66 lines
1.9 KiB
Python
66 lines
1.9 KiB
Python
import re
|
|
import ipaddress
|
|
from datetime import datetime, timezone
|
|
import time
|
|
|
|
ACCESS_LOG = "/var/log/nginx/access.log"
|
|
OUTPUT_LOG = "./file.log"
|
|
|
|
INTERNAL_NETWORKS = [
|
|
ipaddress.ip_network("10.0.0.0/8"),
|
|
ipaddress.ip_network("192.168.0.0/16"),
|
|
ipaddress.ip_network("172.16.0.0/12"),
|
|
]
|
|
|
|
log_line_re = re.compile(
|
|
r'(?P<ip>\S+) - - \[(?P<time>[^\]]+)\] "(?P<method>\S+) (?P<path>\S+) \S+"'
|
|
)
|
|
|
|
def is_external(ip):
|
|
ip_addr = ipaddress.ip_address(ip)
|
|
return not any(ip_addr in net for net in INTERNAL_NETWORKS)
|
|
|
|
def parse_nginx_line(line):
|
|
match = log_line_re.match(line)
|
|
if not match:
|
|
return None
|
|
data = match.groupdict()
|
|
if not is_external(data["ip"]):
|
|
return None
|
|
dt = datetime.strptime(data["time"], "%d/%b/%Y:%H:%M:%S %z")
|
|
dt_utc = dt.astimezone(timezone.utc)
|
|
iso_time = dt_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
return f'{iso_time} {data["ip"]} {data["method"]} {data["path"]}'
|
|
|
|
def tail(f):
|
|
f.seek(0, 2) # Go to the end of the file
|
|
while True:
|
|
line = f.readline()
|
|
if not line:
|
|
time.sleep(0.01) # Sleep very briefly
|
|
continue
|
|
yield line
|
|
|
|
def main():
|
|
buffer = []
|
|
buffer_size = 10 # adjust for your throughput
|
|
flush_interval = 0.5 # seconds
|
|
|
|
with open(ACCESS_LOG, "r") as f:
|
|
tail_lines = tail(f)
|
|
last_flush = time.time()
|
|
for line in tail_lines:
|
|
parsed = parse_nginx_line(line)
|
|
if parsed:
|
|
buffer.append(parsed)
|
|
# Flush buffer if size reached or interval passed
|
|
if len(buffer) >= buffer_size or (time.time() - last_flush) > flush_interval:
|
|
if buffer:
|
|
with open(OUTPUT_LOG, "a") as out:
|
|
out.write("\n".join(buffer) + "\n")
|
|
buffer.clear()
|
|
last_flush = time.time()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|