This commit is contained in:
lichene 2025-10-23 23:41:38 +02:00
parent 8f56cbd5c8
commit b816412306
6 changed files with 304 additions and 3 deletions

0
access.log Normal file
View file

219
backend-new.py Normal file
View file

@ -0,0 +1,219 @@
#!/usr/bin/env python3
import os
import re
import pandas as pd
import geoip2.database
from fastapi import FastAPI, Query
from fastapi.middleware.cors import CORSMiddleware
from typing import Optional, List
from datetime import datetime
#logging
import logging
# ----------------------------
# Logging Setup
# ----------------------------
LOG_FILE = os.path.join(os.path.dirname(__file__), "backend.log")
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(LOG_FILE),
logging.StreamHandler() # also print to console
]
)
logger = logging.getLogger(__name__)
# ----------------------------
# Configuration
# ----------------------------
LOG_DIR = os.path.join(os.path.dirname(__file__), "logs")
LOG_PREFIX = "filtered_" # matches cron-generated files
GEO_DB_PATH = "GeoLite2-City.mmdb"
FILENAME_RE = re.compile(r"filtered_(\d{4}-\d{2}-\d{2})_(\d+)\.log")
# ----------------------------
# FastAPI Setup
# ----------------------------
app = FastAPI(title="Reverse Proxy Connections Map API")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"]
)
# ----------------------------
# GeoIP Setup
# ----------------------------
reader = geoip2.database.Reader(GEO_DB_PATH)
geo_cache = {} # cache IP lookups to save CPU
def ip_to_geo(ip):
if ip in geo_cache:
return geo_cache[ip]
try:
response = reader.city(ip)
latlon = (response.location.latitude, response.location.longitude)
except Exception:
latlon = (None, None)
geo_cache[ip] = latlon
return latlon
# ----------------------------
# Helper: Parse timestamp from line
# ----------------------------
def line_timestamp(line: str):
try:
ts_str = line.split(" ", 1)[0]
return pd.to_datetime(ts_str)
except Exception:
return None
# ----------------------------
# Binary search on lines
# ----------------------------
def find_line_index(lines, target_time, seek_start=True):
lo, hi = 0, len(lines) - 1
best_idx = None
while lo <= hi:
mid = (lo + hi) // 2
ts = line_timestamp(lines[mid])
if ts is None:
if seek_start:
lo = mid + 1
else:
hi = mid - 1
continue
if seek_start:
if ts >= target_time:
best_idx = mid
hi = mid - 1
else:
lo = mid + 1
else:
if ts <= target_time:
best_idx = mid
lo = mid + 1
else:
hi = mid - 1
if best_idx is None:
return len(lines) - 1 if not seek_start else 0
return best_idx
# ----------------------------
# List log files and parse dates
# ----------------------------
def list_log_files() -> List[tuple[str, datetime]]:
files = []
for f in os.listdir(LOG_DIR):
if f.startswith(LOG_PREFIX) and f.endswith(".log"):
match = FILENAME_RE.match(f)
if not match:
continue
date_str = match.group(1)
try:
date = datetime.strptime(date_str, "%Y-%m-%d")
files.append((os.path.join(LOG_DIR, f), date))
except Exception:
continue
# sort by date and index
return sorted(files, key=lambda x: (x[1], x[0]))
# ----------------------------
# Load logs efficiently using filename dates
# ----------------------------
def load_logs_binary(service: Optional[str], start: Optional[str], end: Optional[str]):
start_dt = pd.to_datetime(start) if start else None
end_dt = pd.to_datetime(end) if end else None
records = []
files = list_log_files()
if not files:
return []
for file_path, file_date in files:
# Skip file if outside range based on filename date
if start_dt and file_date.date() < start_dt.date():
continue
if end_dt and file_date.date() > end_dt.date():
continue
with open(file_path, "r", errors="ignore") as f:
lines = f.readlines()
if not lines:
continue
start_idx = find_line_index(lines, start_dt, seek_start=True) if start_dt else 0
end_idx = find_line_index(lines, end_dt, seek_start=False) if end_dt else len(lines) - 1
for line in lines[start_idx:end_idx + 1]:
try:
parts = line.strip().split(" ", 3)
if len(parts) != 4:
continue
timestamp, ip, method, path = parts
ts = pd.to_datetime(timestamp)
if start_dt and ts < start_dt:
continue
if end_dt and ts > end_dt:
break
if service and service not in path:
continue
lat, lon = ip_to_geo(ip)
if lat is None or lon is None:
continue
records.append({
"timestamp": ts.isoformat(),
"path": path,
"lat": lat,
"lon": lon
})
except Exception:
continue
return records
# ----------------------------
# API Endpoints
# ----------------------------
@app.get("/connections")
def get_connections(
service: Optional[str] = Query(None, description="Filter by service path"),
start: Optional[str] = Query(None, description="Start datetime (ISO format)"),
end: Optional[str] = Query(None, description="End datetime (ISO format)")
):
print("Endpoint hit!", flush=True)
return load_logs_binary(service, start, end)
@app.get("/health")
def health():
files = list_log_files()
total_size = sum(os.path.getsize(f[0]) for f in files)
return {
"status": "ok",
"log_files": len(files),
"total_log_size_bytes": total_size,
"cached_ips": len(geo_cache)
}
# ----------------------------
# Run with Uvicorn
# ----------------------------
if __name__ == "__main__":
import uvicorn
uvicorn.run("backend:app", host="0.0.0.0", port=8000, reload=True)

2
backend.log Normal file
View file

@ -0,0 +1,2 @@
2025-10-23 23:25:31,585 [ERROR] 1
2025-10-23 23:25:32,555 [ERROR] 1

4
requirements.txt Normal file
View file

@ -0,0 +1,4 @@
geoip2
pandas
fastapi
uvicorn

78
tail_cron.py Normal file
View file

@ -0,0 +1,78 @@
#!/usr/bin/env python3
import re
import ipaddress
from datetime import datetime, timezone
import os
from local import * # Make sure this defines ACCESS_LOG, LOG_DIR, etc.
# ==== CONFIGURATION ====
MAX_LOG_LINES = 50000 # adjust as needed
LOG_DIR = os.path.join(os.path.dirname(__file__), "logs")
os.makedirs(LOG_DIR, exist_ok=True)
INTERNAL_NETWORKS = [
ipaddress.ip_network("10.0.0.0/8"),
ipaddress.ip_network("192.168.0.0/16"),
ipaddress.ip_network("172.16.0.0/12"),
]
log_line_re = re.compile(
r'(?P<ip>\S+) - - \[(?P<time>[^\]]+)\] "(?P<method>\S+) (?P<path>\S+) \S+"'
)
def is_external(ip):
ip_addr = ipaddress.ip_address(ip)
return not any(ip_addr in net for net in INTERNAL_NETWORKS)
def parse_nginx_line(line):
match = log_line_re.match(line)
if not match:
return None
data = match.groupdict()
if not is_external(data["ip"]):
return None
dt = datetime.strptime(data["time"], "%d/%b/%Y:%H:%M:%S %z")
dt_utc = dt.astimezone(timezone.utc)
iso_time = dt_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
return f'{iso_time} {data["ip"]} {data["method"]} {data["path"]}'
def get_current_logfile():
"""Find or create the latest log file with line limit."""
today = datetime.now().strftime("%Y-%m-%d")
base_name = os.path.join(LOG_DIR, f"filtered_{today}")
index = 1
while True:
log_file = f"{base_name}_{index}.log"
if not os.path.exists(log_file):
return log_file
# Check line count
with open(log_file, "r") as f:
line_count = sum(1 for _ in f)
if line_count < MAX_LOG_LINES:
return log_file
index += 1
def process_log():
output_file = get_current_logfile()
buffer = []
with open(ACCESS_LOG, "r") as f:
for line in f:
parsed = parse_nginx_line(line)
if parsed:
buffer.append(parsed)
if buffer:
with open(output_file, "a") as out:
out.write("\n".join(buffer) + "\n")
def flush_access_log():
"""Safely truncate the access log after processing."""
with open(ACCESS_LOG, "w"):
pass # Opening with 'w' truncates file
def main():
process_log()
#flush_access_log()
if __name__ == "__main__":
main()

View file

@ -2,9 +2,7 @@ import re
import ipaddress
from datetime import datetime, timezone
import time
ACCESS_LOG = "/var/log/nginx/access.log"
OUTPUT_LOG = "/home/proxi/geolog/file.log"
from local import *
INTERNAL_NETWORKS = [
ipaddress.ip_network("10.0.0.0/8"),