/usr/local/bin/connection-monitor-avg.py
#!/usr/bin/env python3# /usr/local/bin/connection-monitor-avg.pyimport subprocessimport datetimeimport timeimport collectionsimport jsonimport osimport signalimport sysimport gzipimport shutilclass ConnectionMonitor: def __init__(self, sample_interval=60, log_dir="/var/log", log_interval=60, max_log_size_mb=100, keep_days=7, console_output=False): """ Initialize the monitor sample_interval: seconds between samples (default 60) log_interval: seconds between log writes (default 60) max_log_size_mb: maximum log file size before rotation (default 100MB) keep_days: days to keep old logs (default 7) console_output: whether to print to console (default False) """ self.sample_interval = sample_interval self.log_interval = log_interval self.log_dir = log_dir self.max_log_size_bytes = max_log_size_mb * 1024 * 1024 self.keep_days = keep_days self.console_output = console_output # Calculate how many samples per window based on actual interval # For 5-second samples: 5 min = 60 samples, 15 min = 180 samples, etc. samples_per_minute = 60 / sample_interval self.windows = { 5: collections.deque(maxlen=int(5 * samples_per_minute)), 15: collections.deque(maxlen=int(15 * samples_per_minute)), 30: collections.deque(maxlen=int(30 * samples_per_minute)), 60: collections.deque(maxlen=int(60 * samples_per_minute)) } # Track last log write time self.last_log_time = 0 # Stats file for current averages self.stats_file = os.path.join(log_dir, "connection-stats.json") self.log_file = os.path.join(log_dir, "connection-averages.log") # Setup graceful shutdown signal.signal(signal.SIGTERM, self.shutdown) signal.signal(signal.SIGINT, self.shutdown) def get_established_count(self): """Get current count of ESTABLISHED connections""" try: # Using ss as it's faster than netstat result = subprocess.run(['ss', '-tan', 'state', 'established'], capture_output=True, text=True, check=True) # Subtract 1 for header line return len(result.stdout.strip().split('\n')) - 1 except subprocess.CalledProcessError: # Fallback to netstat if ss fails try: result = subprocess.run(['netstat', '-an'], capture_output=True, text=True, check=True) established_lines = [line for line in result.stdout.split('\n') if 'ESTABLISHED' in line] return len(established_lines) except Exception as e: print(f"Error getting connection count: {e}") return -1 except Exception as e: print(f"Error: {e}") return -1 def update_samples(self, count): """Add new sample to all windows""" for window in self.windows.values(): window.append(count) def calculate_averages(self): """Calculate current averages for all windows""" averages = {} for minutes, samples in self.windows.items(): if samples: averages[f"{minutes}min"] = round(sum(samples) / len(samples), 2) else: averages[f"{minutes}min"] = 0 return averages def write_stats(self, count, averages): """Write current stats to JSON file for easy reading""" timestamp = datetime.datetime.now().isoformat() stats = { "timestamp": timestamp, "current": count, "averages": averages, "samples_collected": { f"{m}min": len(self.windows[m]) for m in self.windows } } try: with open(self.stats_file, 'w') as f: json.dump(stats, f, indent=2) except Exception as e: print(f"Error writing stats file: {e}") def rotate_log_if_needed(self): """Rotate log file if it exceeds size limit""" if not os.path.exists(self.log_file): return file_size = os.path.getsize(self.log_file) if file_size > self.max_log_size_bytes: timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') rotated_name = f"{self.log_file}.{timestamp}" # Move current log to timestamped file shutil.move(self.log_file, rotated_name) # Compress the rotated file with open(rotated_name, 'rb') as f_in: with gzip.open(f"{rotated_name}.gz", 'wb') as f_out: shutil.copyfileobj(f_in, f_out) os.remove(rotated_name) print(f"Rotated log file to {rotated_name}.gz") self.cleanup_old_logs() def cleanup_old_logs(self): """Remove log files older than keep_days""" cutoff_time = time.time() - (self.keep_days * 24 * 3600) for filename in os.listdir(self.log_dir): if filename.startswith('connection-averages.log.') and filename.endswith('.gz'): filepath = os.path.join(self.log_dir, filename) if os.path.getmtime(filepath) < cutoff_time: os.remove(filepath) print(f"Removed old log: {filename}") def should_write_log(self): """Check if enough time has passed to write to log""" current_time = time.time() if current_time - self.last_log_time >= self.log_interval: self.last_log_time = current_time return True return False def log_averages(self, count, averages): """Append averages to log file only at specified intervals""" if not self.should_write_log(): return # Check if rotation is needed self.rotate_log_if_needed() timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') log_entry = (f"{timestamp},current={count}," f"avg_5min={averages.get('5min', 0)}," f"avg_15min={averages.get('15min', 0)}," f"avg_30min={averages.get('30min', 0)}," f"avg_60min={averages.get('60min', 0)}") try: with open(self.log_file, 'a') as f: f.write(log_entry + '\n') except Exception as e: print(f"Error writing log file: {e}") def print_status(self, count, averages): """Print current status to console""" if not self.console_output: return timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print(f"\n[{timestamp}] Connections Monitor") print(f"Current ESTABLISHED connections: {count}") print("Rolling Averages:") for period, avg in sorted(averages.items()): samples = len(self.windows[int(period.replace('min', ''))]) max_samples = self.windows[int(period.replace('min', ''))].maxlen print(f" {period:6s}: {avg:6.1f} (based on {samples}/{max_samples} samples)") def run(self): """Main monitoring loop""" print(f"Starting connection monitor") print(f" Sampling every: {self.sample_interval} seconds") print(f" Logging every: {self.log_interval} seconds") print(f" Max log size: {self.max_log_size_bytes / (1024*1024):.1f} MB") print(f" Keep logs for: {self.keep_days} days") print(f" Stats file: {self.stats_file}") print(f" Log file: {self.log_file}") while True: try: # Get current connection count count = self.get_established_count() if count >= 0: # Update samples self.update_samples(count) # Calculate averages averages = self.calculate_averages() # Always write stats file (it's small and gets overwritten) self.write_stats(count, averages) # Write to log only at specified intervals self.log_averages(count, averages) # Print to console if enabled self.print_status(count, averages) # Sleep until next sample time.sleep(self.sample_interval) except KeyboardInterrupt: break except Exception as e: print(f"Error in main loop: {e}") time.sleep(self.sample_interval) def shutdown(self, signum, frame): """Graceful shutdown""" print("\nShutting down connection monitor...") sys.exit(0)def main(): import argparse parser = argparse.ArgumentParser(description='Monitor ESTABLISHED connections with rolling averages') parser.add_argument('--interval', type=int, default=60, help='Sampling interval in seconds (default: 60)') parser.add_argument('--log-interval', type=int, default=60, help='Log writing interval in seconds (default: 60)') parser.add_argument('--log-dir', default='/var/log', help='Directory for log files (default: /var/log)') parser.add_argument('--max-log-size', type=int, default=100, help='Max log file size in MB before rotation (default: 100)') parser.add_argument('--keep-days', type=int, default=7, help='Days to keep old logs (default: 7)') parser.add_argument('--console', action='store_true', help='Enable console output') args = parser.parse_args() monitor = ConnectionMonitor( sample_interval=args.interval, log_dir=args.log_dir, log_interval=args.log_interval, max_log_size_mb=args.max_log_size, keep_days=args.keep_days, console_output=args.console ) monitor.run()if __name__ == "__main__": main()/etc/systemd/system/connection-monitor.service
[Unit]Description=Network Connection Monitor with Rolling AveragesAfter=network.target[Service]Type=simpleUser=root# Sample every 5 seconds, log every 60 seconds, rotate at 50MB, keep 7 daysExecStart=/usr/bin/python3 /usr/local/bin/connection-monitor-avg.py --interval 5 --log-interval 60 --max-log-size 50 --keep-days 7Restart=alwaysRestartSec=10StandardOutput=journalStandardError=journal[Install]WantedBy=multi-user.target执行
sudo chmod +x /usr/local/bin/connection-monitor-avg.pysudo systemctl daemon-reloadsudo systemctl enable connection-monitor.servicesudo systemctl start connection-monitor.servicelogs在
/var/log/connection-stats.json
和
/var/log/connection-averages.log
有错误请指正!!!
评论 (0)