From 26985812a40791734454affb9d2e399e82400689 Mon Sep 17 00:00:00 2001 From: lichene Date: Sun, 26 Oct 2025 11:31:11 +0100 Subject: [PATCH] completed --- README | 2 +- access.log | 24 ++++++ backend2.py | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++ compressor.py | 76 +++++++++++++++++ config.js | 1 + index.html | 4 +- tail_cron.py | 79 +++++++++++++----- 7 files changed, 387 insertions(+), 25 deletions(-) create mode 100644 backend2.py create mode 100644 compressor.py create mode 100644 config.js diff --git a/README b/README index 86ff4df..15dddd2 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ # geolog Tool to monitor the internet backgroung radiation. -This readme is still to be done. +The nginx logs rotate every day at midnight so we read every day with a cronjob the last completed access log diff --git a/access.log b/access.log index e69de29..7d62c0c 100644 --- a/access.log +++ b/access.log @@ -0,0 +1,24 @@ +192.168.1.1 - - [25/Oct/2025:00:03:14 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:16 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:18 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:20 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:22 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:24 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:26 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:28 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +151.62.125.56 - cur [25/Oct/2025:00:03:30 +0200] "GET /ocs/v2.php/apps/notifications/api/v2/notifications?format=json HTTP/1.1" 304 0 "-" "Mozilla/5.0 (Linux) mirall/3.17.2daily (Nextcloud, manjaro-6.12.48-1-MANJARO ClientArchitecture: > +151.62.125.56 - cur [25/Oct/2025:00:03:30 +0200] "GET /ocs/v2.php/apps/user_status/api/v1/user_status?format=json HTTP/1.1" 200 149 "-" "Mozilla/5.0 (Linux) mirall/3.17.2daily (Nextcloud, manjaro-6.12.48-1-MANJARO ClientArchitecture: x8> +151.62.125.56 - Freekkettone [25/Oct/2025:00:03:30 +0200] "GET /ocs/v2.php/apps/user_status/api/v1/user_status?format=json HTTP/1.1" 200 173 "-" "Mozilla/5.0 (Linux) mirall/3.17.2daily (Nextcloud, manjaro-6.12.48-1-MANJARO ClientArchite> +151.62.125.56 - Freekkettone [25/Oct/2025:00:03:30 +0200] "GET /ocs/v2.php/apps/notifications/api/v2/notifications?format=json HTTP/1.1" 304 0 "-" "Mozilla/5.0 (Linux) mirall/3.17.2daily (Nextcloud, manjaro-6.12.48-1-MANJARO ClientArchi> +151.62.125.56 - Freekkettone [25/Oct/2025:00:03:30 +0200] "PROPFIND /remote.php/dav/files/Freekkettone/ HTTP/1.1" 207 254 "-" "Mozilla/5.0 (Linux) mirall/3.17.2daily (Nextcloud, manjaro-6.12.48-1-MANJARO ClientArchitecture: x86_64 OsArc> +151.62.125.56 - cur [25/Oct/2025:00:03:30 +0200] "PROPFIND /remote.php/dav/files/cur/ HTTP/1.1" 207 250 "-" "Mozilla/5.0 (Linux) mirall/3.17.2daily (Nextcloud, manjaro-6.12.48-1-MANJARO ClientArchitecture: x86_64 OsArchitecture: x86_64)" +192.168.1.1 - - [25/Oct/2025:00:03:30 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:32 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:35 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:36 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:38 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:40 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:42 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:44 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:46 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" +192.168.1.1 - - [25/Oct/2025:00:03:48 +0200] "POST /api/actions/runner.v1.RunnerService/FetchTask HTTP/2.0" 200 2 "-" "connect-go/1.18.1 (go1.24.4)" diff --git a/backend2.py b/backend2.py new file mode 100644 index 0000000..d77531b --- /dev/null +++ b/backend2.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python3 +import os +import re +import pandas as pd +import geoip2.database +from fastapi import FastAPI, Query +from fastapi.middleware.cors import CORSMiddleware +from typing import Optional, List +from datetime import datetime +from compressor import * + + +#logging +import logging +logger = logging.getLogger('uvicorn.error') +# ---------------------------- +# Configuration +# ---------------------------- +LOG_DIR = os.path.join(os.path.dirname(__file__), "logs") +LOG_PREFIX = "filtered_" # matches cron-generated files +GEO_DB_PATH = "GeoLite2-City.mmdb" +FILENAME_RE = re.compile(r"filtered_(\d{4}-\d{2}-\d{2})_(\d+)\.bin") + +# ---------------------------- +# FastAPI Setup +# ---------------------------- +app = FastAPI(title="Reverse Proxy Connections Map API") + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_methods=["*"], + allow_headers=["*"] +) + +# ---------------------------- +# GeoIP Setup +# ---------------------------- +reader = geoip2.database.Reader(GEO_DB_PATH) +geo_cache = {} # cache IP lookups to save CPU + +def ip_to_geo(ip): + if ip in geo_cache: + return geo_cache[ip] + try: + response = reader.city(ip) + latlon = (response.location.latitude, response.location.longitude) + except Exception: + latlon = (None, None) + geo_cache[ip] = latlon + return latlon + + +# ---------------------------- +# Helper: Parse timestamp from line +# ---------------------------- +def line_timestamp(line: str): + try: + ts_str = line.split(" ", 1)[0] + return pd.to_datetime(ts_str) + except Exception: + return None + + +# ---------------------------- +# Binary search on lines +# ---------------------------- +def find_line_index(lines, target_time, seek_start=True): + lo, hi = 0, len(lines) - 1 + best_idx = None + + while lo <= hi: + mid = (lo + hi) // 2 + ts = line_timestamp(lines[mid]) + if ts is None: + if seek_start: + lo = mid + 1 + else: + hi = mid - 1 + continue + + if seek_start: + if ts >= target_time: + best_idx = mid + hi = mid - 1 + else: + lo = mid + 1 + else: + if ts <= target_time: + best_idx = mid + lo = mid + 1 + else: + hi = mid - 1 + + if best_idx is None: + return len(lines) - 1 if not seek_start else 0 + return best_idx + + +# ---------------------------- +# List log files and parse dates +# ---------------------------- +def list_log_files() -> List[tuple[str, datetime]]: + files = [] + for f in os.listdir(LOG_DIR): + if f.startswith(LOG_PREFIX) and f.endswith(".bin"): + match = FILENAME_RE.match(f) + if not match: + continue + date_str = match.group(1) + try: + date = datetime.strptime(date_str, "%Y-%m-%d") + files.append((os.path.join(LOG_DIR, f), date)) + except Exception: + continue + # sort by date and index + return sorted(files, key=lambda x: (x[1], x[0])) + +def read_compressed_log(log_file): + """Helper to read and decompress a log file.""" + compressor = load_or_create_compressor() + entries = [] + print(log_file) + + with open(log_file, "rb") as f: + while chunk := f.read(13): + if len(chunk) < 13: # Incomplete entry at end of file + print(f"Warning: Incomplete entry at end of {log_file} ({len(chunk)} bytes), skipping") + break + try: + iso_time, ip, method, path = compressor.decompress_entry(chunk) + entries.append(f"{iso_time} {ip} {method} {path}") + except Exception as e: + print(f"Warning: Failed to decompress entry: {e}") + continue + + return entries +# ---------------------------- +# Load logs efficiently using filename dates +# ---------------------------- +def load_logs_binary(service: Optional[str], start: Optional[str], end: Optional[str]): + start_dt = pd.to_datetime(start) if start else None + end_dt = pd.to_datetime(end) if end else None + records = [] + + files = list_log_files() + logger.error(files) + if not files: + return [] + + for file_path, file_date in files: + # Skip file if outside range based on filename date + if start_dt and file_date.date() < start_dt.date(): + continue + if end_dt and file_date.date() > end_dt.date(): + continue + + #with open(file_path, "r", errors="ignore") as f: + #lines = f.readlines() + lines = read_compressed_log(file_path) + + if not lines: + continue + + start_idx = find_line_index(lines, start_dt, seek_start=True) if start_dt else 0 + end_idx = find_line_index(lines, end_dt, seek_start=False) if end_dt else len(lines) - 1 + + for line in lines[start_idx:end_idx + 1]: + try: + parts = line.strip().split(" ", 3) + if len(parts) != 4: + continue + timestamp, ip, method, path = parts + ts = pd.to_datetime(timestamp) + if start_dt and ts < start_dt: + continue + if end_dt and ts > end_dt: + break + if service and service not in path: + continue + lat, lon = ip_to_geo(ip) + if lat is None or lon is None: + continue + records.append({ + "timestamp": ts.isoformat(), + "path": path, + "lat": lat, + "lon": lon + }) + except Exception: + continue + + return records + + +# ---------------------------- +# API Endpoints +# ---------------------------- +@app.get("/connections") +def get_connections( + service: Optional[str] = Query(None, description="Filter by service path"), + start: Optional[str] = Query(None, description="Start datetime (ISO format)"), + end: Optional[str] = Query(None, description="End datetime (ISO format)") +): + logger.error("Endpoint hit!") + return load_logs_binary(service, start, end) + + +@app.get("/health") +def health(): + files = list_log_files() + total_size = sum(os.path.getsize(f[0]) for f in files) + return { + "status": "ok", + "log_files": len(files), + "total_log_size_bytes": total_size, + "cached_ips": len(geo_cache) + } + + +# ---------------------------- +# Run with Uvicorn +# ---------------------------- +if __name__ == "__main__": + import uvicorn + uvicorn.run("backend2:app", host="0.0.0.0", port=8000, reload=True) diff --git a/compressor.py b/compressor.py new file mode 100644 index 0000000..39a6668 --- /dev/null +++ b/compressor.py @@ -0,0 +1,76 @@ +import struct +from datetime import datetime, timezone +import pickle +import os + +LOG_DIR = os.path.join(os.path.dirname(__file__), "logs") +# Global compressor instance (needs to persist across runs) +COMPRESSOR_FILE = os.path.join(LOG_DIR, "compressor_state.pkl") + +def load_or_create_compressor(): + """Load existing compressor state or create new one.""" + if os.path.exists(COMPRESSOR_FILE): + with open(COMPRESSOR_FILE, "rb") as f: + return pickle.load(f) + return LogCompressor() + +def save_compressor(compressor): + """Save compressor state to preserve path dictionary.""" + with open(COMPRESSOR_FILE, "wb") as f: + pickle.dump(compressor, f) + +class LogCompressor: + def __init__(self): + self.path_to_id = {} + self.id_to_path = {} + self.next_path_id = 0 + self.method_map = {"GET": 0, "POST": 1, "PUT": 2, "DELETE": 3, "HEAD": 4, "PATCH": 5, "OPTIONS": 6} + + def get_path_id(self, path): + """Get or create ID for a path.""" + if path not in self.path_to_id: + self.path_to_id[path] = self.next_path_id + self.id_to_path[self.next_path_id] = path + self.next_path_id += 1 + return self.path_to_id[path] + + def compress_entry(self, iso_time, ip, method, path): + """ + Compress a log entry to bytes. + Format: 4 bytes (timestamp) + 4 bytes (IP) + 1 byte (method) + 4 bytes (path_id) = 13 bytes + """ + # Timestamp: Unix timestamp as 4-byte unsigned int (good until 2106) + dt = datetime.strptime(iso_time, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc) + timestamp = int(dt.timestamp()) + + # IP: Convert to 4 bytes + ip_parts = [int(part) for part in ip.split('.')] + ip_int = (ip_parts[0] << 24) + (ip_parts[1] << 16) + (ip_parts[2] << 8) + ip_parts[3] + + # Method: 1 byte + method_id = self.method_map.get(method, 255) # 255 for unknown + + # Path: Get ID (4 bytes for path index) + path_id = self.get_path_id(path) + + # Pack into bytes: I=unsigned int (4 bytes), B=unsigned char (1 byte) + return struct.pack('> 24) & 0xFF}.{(ip_int >> 16) & 0xFF}.{(ip_int >> 8) & 0xFF}.{ip_int & 0xFF}" + + # Method + method = {v: k for k, v in self.method_map.items()}.get(method_id, "UNKNOWN") + + # Path + path = self.id_to_path.get(path_id, "UNKNOWN") + + return iso_time, ip, method, path diff --git a/config.js b/config.js new file mode 100644 index 0000000..fbb2138 --- /dev/null +++ b/config.js @@ -0,0 +1 @@ +const apiBaseUrl = 'http://localhost:8000'; \ No newline at end of file diff --git a/index.html b/index.html index 245f64d..ecb6221 100644 --- a/index.html +++ b/index.html @@ -49,6 +49,7 @@
+