first setup
This commit is contained in:
commit
b2a5f85e4e
7 changed files with 745 additions and 0 deletions
66
tail_service.py
Normal file
66
tail_service.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
mport re
|
||||
import ipaddress
|
||||
from datetime import datetime, timezone
|
||||
import time
|
||||
|
||||
ACCESS_LOG = "/var/log/nginx/access.log"
|
||||
OUTPUT_LOG = "./file.log"
|
||||
|
||||
INTERNAL_NETWORKS = [
|
||||
ipaddress.ip_network("10.0.0.0/8"),
|
||||
ipaddress.ip_network("192.168.0.0/16"),
|
||||
ipaddress.ip_network("172.16.0.0/12"),
|
||||
]
|
||||
|
||||
log_line_re = re.compile(
|
||||
r'(?P<ip>\S+) - - \[(?P<time>[^\]]+)\] "(?P<method>\S+) (?P<path>\S+) \S+"'
|
||||
)
|
||||
|
||||
def is_external(ip):
|
||||
ip_addr = ipaddress.ip_address(ip)
|
||||
return not any(ip_addr in net for net in INTERNAL_NETWORKS)
|
||||
|
||||
def parse_nginx_line(line):
|
||||
match = log_line_re.match(line)
|
||||
if not match:
|
||||
return None
|
||||
data = match.groupdict()
|
||||
if not is_external(data["ip"]):
|
||||
return None
|
||||
dt = datetime.strptime(data["time"], "%d/%b/%Y:%H:%M:%S %z")
|
||||
dt_utc = dt.astimezone(timezone.utc)
|
||||
iso_time = dt_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
return f'{iso_time} {data["ip"]} {data["method"]} {data["path"]}'
|
||||
|
||||
def tail(f):
|
||||
f.seek(0, 2) # Go to the end of the file
|
||||
while True:
|
||||
line = f.readline()
|
||||
if not line:
|
||||
time.sleep(0.01) # Sleep very briefly
|
||||
continue
|
||||
yield line
|
||||
|
||||
def main():
|
||||
buffer = []
|
||||
buffer_size = 10 # adjust for your throughput
|
||||
flush_interval = 0.5 # seconds
|
||||
|
||||
with open(ACCESS_LOG, "r") as f:
|
||||
tail_lines = tail(f)
|
||||
last_flush = time.time()
|
||||
for line in tail_lines:
|
||||
parsed = parse_nginx_line(line)
|
||||
if parsed:
|
||||
buffer.append(parsed)
|
||||
# Flush buffer if size reached or interval passed
|
||||
if len(buffer) >= buffer_size or (time.time() - last_flush) > flush_interval:
|
||||
if buffer:
|
||||
with open(OUTPUT_LOG, "a") as out:
|
||||
out.write("\n".join(buffer) + "\n")
|
||||
buffer.clear()
|
||||
last_flush = time.time()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue