<?php
defined('ABSPATH') || exit;

require_once dirname(dirname(__FILE__)) . '/whitelist.php';

function wpsec_check_file_heuristics($file_path, $ext) {
    // Early exit for our own plugin files to prevent self-flagging
    if (function_exists('wpsec_is_file_whitelisted') && wpsec_is_file_whitelisted($file_path)) {
        return [];
    }
    
    // Check if this is a test file for enhanced logging
    $is_test_file = (stripos($file_path, 'test-heuristic') !== false || 
                    stripos($file_path, 'test-') !== false || 
                    stripos($file_path, 'eicar') !== false);
    $is_deep_scan = get_option('wpsec_deep_scan_mode', false);
    
    if ($is_test_file || $is_deep_scan) {
        wpsec_debug_log(" HEURISTIC ENGINE: Starting scan for $file_path", 'info');
    }
    
    // Get file hash for cache key
    $file_hash = md5_file($file_path);
    if ($file_hash === false) {
        if ($is_test_file || $is_deep_scan) {
            wpsec_debug_log(" HEURISTIC ENGINE: Could not calculate file hash for $file_path", 'info');
        }
        return [];
    }
    
    // Check if we're in force deep scan mode
    global $wpsec_current_scan_id, $wpsec_force_deep_scan;
    
    // Check cache first (but properly handle deep scan mode)
    $cache_key = 'wpsec_heuristic_' . $file_hash;
    $use_cache = empty($wpsec_force_deep_scan);
    $cached_result = false;
    
    if (!$use_cache) {
        // In deep scan mode, delete existing transients to ensure fresh results
        delete_transient($cache_key);
        $cached_result = false;
        
        if (!empty($wpsec_current_scan_id)) {
            wpsec_debug_log("WPFort Deep Scan: Bypassing heuristic cache for {$file_path}", 'info');
        }
        
        if ($is_test_file) {
            wpsec_debug_log(" HEURISTIC ENGINE: Deep scan - bypassing cache for: " . basename($file_path), 'info');
        }
    } else {
        // Normal scan - use cache if available, but only for existing files
        // This ensures new files are always scanned regardless of cache
        if (file_exists($file_path)) {
            $cached_result = get_transient($cache_key);
            if ($cached_result !== false) {
                wpsec_debug_log(" HEURISTIC ENGINE: Using cached result for: $file_path", 'info');
                return $cached_result;
            }
        }
    }
    
    // Skip non-PHP files
    if ($ext !== 'php') {
        if ($is_test_file || $is_deep_scan) {
            wpsec_debug_log(" HEURISTIC ENGINE: Skipping non-PHP file: $file_path (ext: $ext)", 'info');
        }
        return [];
    }
    
    if ($is_test_file || $is_deep_scan) {
        wpsec_debug_log(" HEURISTIC ENGINE: Processing PHP file: $file_path", 'info');
    }
    
    $context = wpsec_get_file_context($file_path);
    
    // Skip files from known complex plugins
    if ($context['risk_level'] === 'very_low') {
        if ($is_test_file || $is_deep_scan) {
            wpsec_debug_log(" HEURISTIC ENGINE: Skipping very low-risk file: $file_path", 'info');
        }
        return [];
    }
    
    // For test files, always proceed regardless of risk level
    if ($is_test_file) {
        wpsec_debug_log(" HEURISTIC ENGINE: Processing test file regardless of risk level: $file_path", 'info');
    }
    
    $content = file_get_contents($file_path);
    if (!$content) {
        return [];
    }
    
    // Skip files that are too large (prevent memory issues)
    if (strlen($content) > 5 * 1024 * 1024) { // 5MB limit
        return [];
    }
    
    $metrics = [
        'obfuscation_score' => 0,
        'network_score' => 0,
        'wp_manipulation_score' => 0,
        'encoding_score' => 0
    ];
    
    // Check 1: Code Obfuscation
    $obfuscation_patterns = [
        // Very suspicious patterns (weight: 2.0)
        'eval\s*\(\s*base64_decode' => 2.0,
        'eval\s*\(\s*gzinflate' => 2.0,
        'eval\s*\(\s*str_rot13' => 2.0,
        'eval\s*\(\s*gzuncompress' => 2.0,
        'eval\s*\(\s*convert_uudecode' => 2.0,
        
        // Moderately suspicious patterns (weight: 1.0)
        'preg_replace\s*\(\s*[\'"]\/\.\*\/e[\'"]\s*,' => 1.0,
        'create_function\s*\(' => 1.0,
        
        // Slightly suspicious patterns (weight: 0.5)
        '\\\\x[0-9a-fA-F]{2}' => 0.5,
        '\$[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*\s*=\s*\'[^\']{100,}\'' => 0.5
    ];
    
    foreach ($obfuscation_patterns as $pattern => $weight) {
        if (preg_match('/' . $pattern . '/i', $content)) {
            $metrics['obfuscation_score'] += $weight;
        }
    }
    
    // Check 2: Suspicious Network Communication
    $network_patterns = [
        // Very suspicious patterns (weight: 2.0)
        'fsockopen\s*\(\s*[\'"]\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}[\'"]\s*,' => 2.0,
        'curl_exec\s*\(\s*\$[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*\s*\)' => 1.0,
        
        // Moderately suspicious patterns (weight: 1.0)
        'file_get_contents\s*\(\s*[\'"]https?:\/\/' => 0.5,
        'wp_remote_get\s*\(\s*[\'"]https?:\/\/' => 0.25
    ];
    
    foreach ($network_patterns as $pattern => $weight) {
        if (preg_match('/' . $pattern . '/i', $content)) {
            $metrics['network_score'] += $weight;
        }
    }
    
    // Check 3: WordPress Variable/Database Manipulation
    $wp_patterns = [
        // Database manipulation patterns
        '\$wpdb->query\s*\(\s*\$' => 1.0,
        'update_option\s*\(\s*\$' => 0.5,
        '\$table_prefix\s*=' => 1.0,
        'ALTER\s+TABLE\s+\$' => 1.0,
        
        // User manipulation patterns
        'wp_create_user\s*\(' => 1.5,            // User creation backdoors
        'wp_insert_user\s*\(' => 1.5,            // User insertion backdoors
        'wp_update_user\s*\(' => 1.0,            // User modification
        'wp_set_current_user\s*\(' => 1.5,       // User impersonation
        
        // Hook injection patterns  
        'add_action\s*\(\s*[\'"]wp_head[\'"]' => 1.0,     // Header injection
        'add_action\s*\(\s*[\'"]wp_footer[\'"]' => 1.0,   // Footer injection
        'add_action\s*\(\s*[\'"]init[\'"]' => 0.5,        // Init hook abuse
        'add_action\s*\(\s*[\'"]wp_enqueue_scripts[\'"]' => 0.8, // Script injection
        
        // Database direct access patterns
        'global\s+\$wpdb.*->query' => 1.0,       // Global wpdb usage
        '\$wpdb.*INSERT\s+INTO' => 1.0,          // Direct insertions
        '\$wpdb.*UPDATE\s+.*SET' => 0.8,         // Direct updates
        '\$wpdb.*DELETE\s+FROM' => 1.2,          // Direct deletions
        
        // Admin/capability bypass patterns
        'wp_set_auth_cookie\s*\(' => 1.3,        // Authentication bypass
        'current_user_can\s*\(\s*[\'"]administrator[\'"]' => 0.7, // Admin checks (suspicious in some contexts)
        'add_role\s*\(' => 1.0,                  // Role creation
        'remove_role\s*\(' => 1.0,               // Role manipulation
    ];
    
    foreach ($wp_patterns as $pattern => $weight) {
        if (preg_match('/' . $pattern . '/i', $content)) {
            $metrics['wp_manipulation_score'] += $weight;
        }
    }
    
    // Check 4: Encoded Content
    $encoding_patterns = [
        // Very suspicious patterns (weight: 2.0)
        'base64_decode\s*\(\s*[\'"][A-Za-z0-9+\/=]{100,}[\'"]\s*\)' => 2.0,
        
        // Moderately suspicious patterns (weight: 1.0)
        'str_rot13\s*\(\s*[\'"][A-Za-z0-9+\/=]{50,}[\'"]\s*\)' => 1.0,
        'gzinflate\s*\(\s*base64_decode' => 1.0
    ];
    
    foreach ($encoding_patterns as $pattern => $weight) {
        if (preg_match('/' . $pattern . '/i', $content)) {
            $metrics['encoding_score'] += $weight;
        }
    }
    
    // Calculate final metrics
    $total_score = array_sum($metrics);
    
    // Determine severity and confidence based on total score
    $severity = 'info';
    $confidence = 0;
    $description = [];
    
    if ($total_score >= 6) {
        $severity = 'critical';
        $confidence = 90;
        $description[] = 'Multiple high-risk code patterns detected';
    } elseif ($total_score >= 4) {
        $severity = 'high';
        $confidence = 80;
        $description[] = 'Several suspicious code patterns detected';
    } elseif ($total_score >= 2) {
        $severity = 'medium';
        $confidence = 70;
        $description[] = 'Some potentially suspicious code patterns';
    } elseif ($total_score >= 1) {
        $severity = 'low';
        $confidence = 60;
        $description[] = 'Minor code anomalies detected';
    } else {
        $description[] = 'No significant code anomalies detected';
    }
    
    $results = [];
    if ($total_score >= 1) {
        // Add detailed descriptions based on specific metrics
        if ($metrics['obfuscation_score'] > 0) {
            $description[] = sprintf('Code obfuscation detected (score: %.1f)', $metrics['obfuscation_score']);
        }
        if ($metrics['network_score'] > 0) {
            $description[] = sprintf('Suspicious network activity (score: %.1f)', $metrics['network_score']);
        }
        if ($metrics['wp_manipulation_score'] > 0) {
            $description[] = sprintf('WordPress core manipulation (score: %.1f)', $metrics['wp_manipulation_score']);
        }
        if ($metrics['encoding_score'] > 0) {
            $description[] = sprintf('Suspicious encoding patterns (score: %.1f)', $metrics['encoding_score']);
        }
        
        $results[] = [
            'type' => 'heuristic',
            'name' => 'Suspicious Code Behavior',
            'severity' => $severity,
            'confidence' => $confidence,
            'description' => implode('. ', $description),
            'score' => $total_score,
            'metrics' => $metrics,
            'file_hash' => $file_hash // Include hash for cache validation
        ];
    }
    
    // Cache the result for 24 hours
    // Use shorter cache time for critical/high severity to ensure more frequent rescans
    $cache_duration = ($severity === 'critical' || $severity === 'high') ? 6 * HOUR_IN_SECONDS : DAY_IN_SECONDS;
    set_transient($cache_key, $results, $cache_duration);
    
    if ($is_test_file || $is_deep_scan) {
        wpsec_debug_log(" HEURISTIC ENGINE: Scan results for $file_path: " . json_encode($results), 'info');
    }
    
    return $results;
}