290 lines
9.0 KiB
PHP
290 lines
9.0 KiB
PHP
<?php
|
|
|
|
class BotLogger
|
|
{
|
|
const LOG_FILE = _PS_ROOT_DIR_ . '/var/logs/botlimiter_ban.log';
|
|
const WHITELIST_FILE = _PS_CACHE_DIR_ . DIRECTORY_SEPARATOR . 'botlimiter_whitelist.php';
|
|
const MAX_SIZE = 10485760; // 10 MB
|
|
const UPDATE_INTERVAL = 86400; // 24 hours (in seconds)
|
|
|
|
// Store in memory during execution so we don't read the file multiple times per request
|
|
private static $cachedWhitelist = null;
|
|
|
|
public static function logBan($ip, $reason)
|
|
{
|
|
// 0. Check if IP is a known good bot (Google, Bing, etc.)
|
|
if (self::isWhitelisted($ip)) {
|
|
return; // Exit silently, do not log or ban
|
|
}
|
|
|
|
// 1. Check file size before writing (The Safety Valve)
|
|
if (file_exists(self::LOG_FILE) && filesize(self::LOG_FILE) > self::MAX_SIZE) {
|
|
self::rotateLog();
|
|
}
|
|
|
|
$date = date('Y-m-d H:i:s');
|
|
$message = sprintf("[%s] [IP:%s] [REASON:%s]" . PHP_EOL, $date, $ip, $reason);
|
|
|
|
// 2. Append to log file
|
|
file_put_contents(self::LOG_FILE, $message, FILE_APPEND | LOCK_EX);
|
|
}
|
|
|
|
/**
|
|
* Checks if the given IP belongs to the whitelisted networks.
|
|
*/
|
|
public static function isWhitelisted($ip)
|
|
{
|
|
|
|
// 1. FASTEST CHECK: Is it the server itself? (Localhost or Server IP)
|
|
$serverIps = ['127.0.0.1', '::1'];
|
|
if (!empty($_SERVER['SERVER_ADDR'])) {
|
|
$serverIps[] = $_SERVER['SERVER_ADDR'];
|
|
}
|
|
if (in_array($ip, $serverIps, true)) {
|
|
return true;
|
|
}
|
|
|
|
// 2. FAST CHECK: PrestaShop Maintenance IPs
|
|
$maintenance_ips = Configuration::get('PS_MAINTENANCE_IP');
|
|
if ($maintenance_ips) {
|
|
$admin_ips = array_filter(array_map('trim', explode(',', $maintenance_ips)));
|
|
if (in_array($ip, $admin_ips, true)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
self::updateWhitelistIfNeeded();
|
|
|
|
// Load whitelist into memory if not already done
|
|
if (self::$cachedWhitelist === null) {
|
|
if (file_exists(self::WHITELIST_FILE)) {
|
|
// FASTEST: OPcache will serve this directly from RAM
|
|
self::$cachedWhitelist = include(self::WHITELIST_FILE);
|
|
} else {
|
|
self::$cachedWhitelist = [];
|
|
}
|
|
}
|
|
|
|
// Check against CIDR blocks
|
|
foreach (self::$cachedWhitelist as $cidr) {
|
|
if (self::ipMatch($ip, $cidr)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Updates the whitelist once a day by fetching JSON lists from Google and Bing.
|
|
*/
|
|
private static function updateWhitelistIfNeeded()
|
|
{
|
|
$needsUpdate = true;
|
|
|
|
if (file_exists(self::WHITELIST_FILE)) {
|
|
$lastModified = filemtime(self::WHITELIST_FILE);
|
|
if ((time() - $lastModified) < self::UPDATE_INTERVAL) {
|
|
$needsUpdate = false;
|
|
}
|
|
}
|
|
|
|
if (!$needsUpdate) {
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
$cidrs = [];
|
|
|
|
|
|
$urls = [
|
|
'https://developers.google.com/search/apis/ipranges/googlebot.json',
|
|
'https://www.bing.com/toolbox/bingbot.json',
|
|
'https://openai.com/chatgpt-user.json',
|
|
'https://openai.com/searchbot.json',
|
|
'https://openai.com/gptbot.json'
|
|
|
|
];
|
|
foreach ($urls as $url) {
|
|
$cidrs = array_merge($cidrs, self::extractIpPrefix($url));
|
|
}
|
|
|
|
// If we successfully fetched networks, update the file
|
|
if (!empty($cidrs)) {
|
|
$cidrs = array_values(array_unique($cidrs));
|
|
|
|
// Generate valid PHP code
|
|
$phpCode = "<?php\n\n// Auto-generated whitelist\nreturn " . var_export($cidrs, true) . ";\n";
|
|
|
|
// Write to file
|
|
file_put_contents(self::WHITELIST_FILE, $phpCode, LOCK_EX);
|
|
|
|
// IMPORTANT: Clear the old file from OPcache so PHP knows it changed!
|
|
if (function_exists('opcache_invalidate')) {
|
|
opcache_invalidate(self::WHITELIST_FILE, true);
|
|
}
|
|
|
|
self::$cachedWhitelist = $cidrs;
|
|
} else {
|
|
// If fetching failed (network issue), touch the file to delay the next retry to prevent spamming
|
|
if (file_exists(self::WHITELIST_FILE)) {
|
|
touch(self::WHITELIST_FILE);
|
|
}
|
|
}
|
|
}
|
|
|
|
public static function extractIpPrefix(string $url): array
|
|
{
|
|
$cidrs = [];
|
|
$json = @Tools::file_get_contents($url);
|
|
if ($json) {
|
|
$data = json_decode($json, true);
|
|
if (isset($data['prefixes'])) {
|
|
foreach ($data['prefixes'] as $prefix) {
|
|
if (isset($prefix['ipv4Prefix'])) $cidrs[] = $prefix['ipv4Prefix'];
|
|
if (isset($prefix['ipv6Prefix'])) $cidrs[] = $prefix['ipv6Prefix'];
|
|
}
|
|
}
|
|
}
|
|
return $cidrs;
|
|
}
|
|
/**
|
|
* Matches an IPv4 or IPv6 address against a CIDR block.
|
|
*/
|
|
private static function ipMatch($ip, $cidr)
|
|
{
|
|
if (strpos($cidr, '/') === false) {
|
|
return $ip === $cidr;
|
|
}
|
|
|
|
list($subnet, $mask) = explode('/', $cidr);
|
|
|
|
// Match IPv4
|
|
if (filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4) && filter_var($subnet, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
|
|
$ip_long = ip2long($ip);
|
|
$subnet_long = ip2long($subnet);
|
|
$mask_long = ~((1 << (32 - $mask)) - 1);
|
|
return ($ip_long & $mask_long) === ($subnet_long & $mask_long);
|
|
}
|
|
|
|
// Match IPv6
|
|
if (filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6) && filter_var($subnet, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)) {
|
|
$ip_bin = inet_pton($ip);
|
|
$subnet_bin = inet_pton($subnet);
|
|
|
|
if (!$ip_bin || !$subnet_bin) return false;
|
|
|
|
$bytes = floor($mask / 8);
|
|
$bits = $mask % 8;
|
|
|
|
if ($bytes > 0 && substr($ip_bin, 0, $bytes) !== substr($subnet_bin, 0, $bytes)) {
|
|
return false;
|
|
}
|
|
|
|
if ($bits > 0) {
|
|
$ip_byte = ord($ip_bin[$bytes]);
|
|
$subnet_byte = ord($subnet_bin[$bytes]);
|
|
$bitmask = ~((1 << (8 - $bits)) - 1) & 0xFF;
|
|
if (($ip_byte & $bitmask) !== ($subnet_byte & $bitmask)) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Rotates the log:
|
|
* 1. Deletes the .old file
|
|
* 2. Renames current .log to .old
|
|
* 3. Current logging continues in new empty file
|
|
*/
|
|
private static function rotateLog()
|
|
{
|
|
$backup_file = self::LOG_FILE . '.old';
|
|
|
|
// Remove ancient backup
|
|
if (file_exists($backup_file)) {
|
|
@unlink($backup_file);
|
|
}
|
|
|
|
// Rename current to backup
|
|
@rename(self::LOG_FILE, $backup_file);
|
|
}
|
|
|
|
/**
|
|
* Safely gets the real client IP, completely immune to Header Spoofing.
|
|
*/
|
|
public static function getRealIp()
|
|
{
|
|
// 1. Get the actual, un-spoofable TCP connection IP
|
|
$remoteAddr = $_SERVER['REMOTE_ADDR'] ?? '';
|
|
|
|
// 2. Is the connection physically coming from Cloudflare?
|
|
if (self::isCloudflareIp($remoteAddr)) {
|
|
|
|
// ONLY trust these headers because Cloudflare guaranteed them
|
|
if (!empty($_SERVER['HTTP_CF_CONNECTING_IP']) && filter_var($_SERVER['HTTP_CF_CONNECTING_IP'], FILTER_VALIDATE_IP)) {
|
|
return $_SERVER['HTTP_CF_CONNECTING_IP'];
|
|
}
|
|
|
|
if (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])) {
|
|
$ips = explode(',', $_SERVER['HTTP_X_FORWARDED_FOR']);
|
|
$ip = trim($ips[0]);
|
|
if (filter_var($ip, FILTER_VALIDATE_IP)) {
|
|
return $ip;
|
|
}
|
|
}
|
|
}
|
|
|
|
// 3. Fallback: If it's NOT Cloudflare, it's either a direct user or a hacker.
|
|
// We MUST ignore their headers and use the raw TCP connection IP.
|
|
return $remoteAddr;
|
|
}
|
|
|
|
/**
|
|
* Checks if the IP belongs to official Cloudflare Networks.
|
|
* (These ranges rarely change, standard practice is to hardcode them)
|
|
*/
|
|
private static function isCloudflareIp($ip)
|
|
{
|
|
static $cf_ips = [
|
|
// IPv4
|
|
'173.245.48.0/20',
|
|
'103.21.244.0/22',
|
|
'103.22.200.0/22',
|
|
'103.31.4.0/22',
|
|
'141.101.64.0/18',
|
|
'108.162.192.0/18',
|
|
'190.93.240.0/20',
|
|
'188.114.96.0/20',
|
|
'197.234.240.0/22',
|
|
'198.41.128.0/17',
|
|
'162.158.0.0/15',
|
|
'104.16.0.0/13',
|
|
'104.24.0.0/14',
|
|
'172.64.0.0/13',
|
|
'131.0.72.0/22',
|
|
// IPv6
|
|
'2400:cb00::/32',
|
|
'2606:4700::/32',
|
|
'2803:f800::/32',
|
|
'2405:b500::/32',
|
|
'2405:8100::/32',
|
|
'2a06:98c0::/29',
|
|
'2c0f:f248::/32'
|
|
];
|
|
|
|
foreach ($cf_ips as $cidr) {
|
|
if (self::ipMatch($ip, $cidr)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
}
|