Files
botlimiter/classes/BotLogger.php
2026-03-22 09:23:41 +02:00

290 lines
9.0 KiB
PHP

<?php
class BotLogger
{
const LOG_FILE = _PS_ROOT_DIR_ . '/var/logs/botlimiter_ban.log';
const WHITELIST_FILE = _PS_CACHE_DIR_ . DIRECTORY_SEPARATOR . 'botlimiter_whitelist.php';
const MAX_SIZE = 10485760; // 10 MB
const UPDATE_INTERVAL = 86400; // 24 hours (in seconds)
// Store in memory during execution so we don't read the file multiple times per request
private static $cachedWhitelist = null;
public static function logBan($ip, $reason)
{
// 0. Check if IP is a known good bot (Google, Bing, etc.)
if (self::isWhitelisted($ip)) {
return; // Exit silently, do not log or ban
}
// 1. Check file size before writing (The Safety Valve)
if (file_exists(self::LOG_FILE) && filesize(self::LOG_FILE) > self::MAX_SIZE) {
self::rotateLog();
}
$date = date('Y-m-d H:i:s');
$message = sprintf("[%s] [IP:%s] [REASON:%s]" . PHP_EOL, $date, $ip, $reason);
// 2. Append to log file
file_put_contents(self::LOG_FILE, $message, FILE_APPEND | LOCK_EX);
}
/**
* Checks if the given IP belongs to the whitelisted networks.
*/
public static function isWhitelisted($ip)
{
// 1. FASTEST CHECK: Is it the server itself? (Localhost or Server IP)
$serverIps = ['127.0.0.1', '::1'];
if (!empty($_SERVER['SERVER_ADDR'])) {
$serverIps[] = $_SERVER['SERVER_ADDR'];
}
if (in_array($ip, $serverIps, true)) {
return true;
}
// 2. FAST CHECK: PrestaShop Maintenance IPs
$maintenance_ips = Configuration::get('PS_MAINTENANCE_IP');
if ($maintenance_ips) {
$admin_ips = array_filter(array_map('trim', explode(',', $maintenance_ips)));
if (in_array($ip, $admin_ips, true)) {
return true;
}
}
self::updateWhitelistIfNeeded();
// Load whitelist into memory if not already done
if (self::$cachedWhitelist === null) {
if (file_exists(self::WHITELIST_FILE)) {
// FASTEST: OPcache will serve this directly from RAM
self::$cachedWhitelist = include(self::WHITELIST_FILE);
} else {
self::$cachedWhitelist = [];
}
}
// Check against CIDR blocks
foreach (self::$cachedWhitelist as $cidr) {
if (self::ipMatch($ip, $cidr)) {
return true;
}
}
return false;
}
/**
* Updates the whitelist once a day by fetching JSON lists from Google and Bing.
*/
private static function updateWhitelistIfNeeded()
{
$needsUpdate = true;
if (file_exists(self::WHITELIST_FILE)) {
$lastModified = filemtime(self::WHITELIST_FILE);
if ((time() - $lastModified) < self::UPDATE_INTERVAL) {
$needsUpdate = false;
}
}
if (!$needsUpdate) {
return;
}
$cidrs = [];
$urls = [
'https://developers.google.com/search/apis/ipranges/googlebot.json',
'https://www.bing.com/toolbox/bingbot.json',
'https://openai.com/chatgpt-user.json',
'https://openai.com/searchbot.json',
'https://openai.com/gptbot.json'
];
foreach ($urls as $url) {
$cidrs = array_merge($cidrs, self::extractIpPrefix($url));
}
// If we successfully fetched networks, update the file
if (!empty($cidrs)) {
$cidrs = array_values(array_unique($cidrs));
// Generate valid PHP code
$phpCode = "<?php\n\n// Auto-generated whitelist\nreturn " . var_export($cidrs, true) . ";\n";
// Write to file
file_put_contents(self::WHITELIST_FILE, $phpCode, LOCK_EX);
// IMPORTANT: Clear the old file from OPcache so PHP knows it changed!
if (function_exists('opcache_invalidate')) {
opcache_invalidate(self::WHITELIST_FILE, true);
}
self::$cachedWhitelist = $cidrs;
} else {
// If fetching failed (network issue), touch the file to delay the next retry to prevent spamming
if (file_exists(self::WHITELIST_FILE)) {
touch(self::WHITELIST_FILE);
}
}
}
public static function extractIpPrefix(string $url): array
{
$cidrs = [];
$json = @Tools::file_get_contents($url);
if ($json) {
$data = json_decode($json, true);
if (isset($data['prefixes'])) {
foreach ($data['prefixes'] as $prefix) {
if (isset($prefix['ipv4Prefix'])) $cidrs[] = $prefix['ipv4Prefix'];
if (isset($prefix['ipv6Prefix'])) $cidrs[] = $prefix['ipv6Prefix'];
}
}
}
return $cidrs;
}
/**
* Matches an IPv4 or IPv6 address against a CIDR block.
*/
private static function ipMatch($ip, $cidr)
{
if (strpos($cidr, '/') === false) {
return $ip === $cidr;
}
list($subnet, $mask) = explode('/', $cidr);
// Match IPv4
if (filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4) && filter_var($subnet, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
$ip_long = ip2long($ip);
$subnet_long = ip2long($subnet);
$mask_long = ~((1 << (32 - $mask)) - 1);
return ($ip_long & $mask_long) === ($subnet_long & $mask_long);
}
// Match IPv6
if (filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6) && filter_var($subnet, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)) {
$ip_bin = inet_pton($ip);
$subnet_bin = inet_pton($subnet);
if (!$ip_bin || !$subnet_bin) return false;
$bytes = floor($mask / 8);
$bits = $mask % 8;
if ($bytes > 0 && substr($ip_bin, 0, $bytes) !== substr($subnet_bin, 0, $bytes)) {
return false;
}
if ($bits > 0) {
$ip_byte = ord($ip_bin[$bytes]);
$subnet_byte = ord($subnet_bin[$bytes]);
$bitmask = ~((1 << (8 - $bits)) - 1) & 0xFF;
if (($ip_byte & $bitmask) !== ($subnet_byte & $bitmask)) {
return false;
}
}
return true;
}
return false;
}
/**
* Rotates the log:
* 1. Deletes the .old file
* 2. Renames current .log to .old
* 3. Current logging continues in new empty file
*/
private static function rotateLog()
{
$backup_file = self::LOG_FILE . '.old';
// Remove ancient backup
if (file_exists($backup_file)) {
@unlink($backup_file);
}
// Rename current to backup
@rename(self::LOG_FILE, $backup_file);
}
/**
* Safely gets the real client IP, completely immune to Header Spoofing.
*/
public static function getRealIp()
{
// 1. Get the actual, un-spoofable TCP connection IP
$remoteAddr = $_SERVER['REMOTE_ADDR'] ?? '';
// 2. Is the connection physically coming from Cloudflare?
if (self::isCloudflareIp($remoteAddr)) {
// ONLY trust these headers because Cloudflare guaranteed them
if (!empty($_SERVER['HTTP_CF_CONNECTING_IP']) && filter_var($_SERVER['HTTP_CF_CONNECTING_IP'], FILTER_VALIDATE_IP)) {
return $_SERVER['HTTP_CF_CONNECTING_IP'];
}
if (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])) {
$ips = explode(',', $_SERVER['HTTP_X_FORWARDED_FOR']);
$ip = trim($ips[0]);
if (filter_var($ip, FILTER_VALIDATE_IP)) {
return $ip;
}
}
}
// 3. Fallback: If it's NOT Cloudflare, it's either a direct user or a hacker.
// We MUST ignore their headers and use the raw TCP connection IP.
return $remoteAddr;
}
/**
* Checks if the IP belongs to official Cloudflare Networks.
* (These ranges rarely change, standard practice is to hardcode them)
*/
private static function isCloudflareIp($ip)
{
static $cf_ips = [
// IPv4
'173.245.48.0/20',
'103.21.244.0/22',
'103.22.200.0/22',
'103.31.4.0/22',
'141.101.64.0/18',
'108.162.192.0/18',
'190.93.240.0/20',
'188.114.96.0/20',
'197.234.240.0/22',
'198.41.128.0/17',
'162.158.0.0/15',
'104.16.0.0/13',
'104.24.0.0/14',
'172.64.0.0/13',
'131.0.72.0/22',
// IPv6
'2400:cb00::/32',
'2606:4700::/32',
'2803:f800::/32',
'2405:b500::/32',
'2405:8100::/32',
'2a06:98c0::/29',
'2c0f:f248::/32'
];
foreach ($cf_ips as $cidr) {
if (self::ipMatch($ip, $cidr)) {
return true;
}
}
return false;
}
}