<?php
/**
 * UTF-8 Sanitization Tests
 * Tests that malformed UTF-8 characters are properly handled
 */

require_once __DIR__ . '/../api/config.php';
require_once __DIR__ . '/../src/classes/autoload.php';

class Utf8SanitizationTest
{
    private array $results = [];
    
    public function run(): void
    {
        echo "Utf8SanitizationTest:\n";
        
        $this->testSanitizeUtf8String();
        $this->testSanitizeUtf8Array();
        $this->testSanitizeUtf8NestedArray();
        $this->testMalformedUtf8Characters();
        $this->testControlCharacters();
        $this->testJsonEncodingWithSanitization();
        $this->testSendResponseWithMalformedData();
        
        $this->displayResults();
    }
    
    private function testSanitizeUtf8String(): void
    {
        $validString = "Hello World";
        $result = sanitizeUtf8($validString);
        
        $this->assert(
            $result === $validString,
            'sanitizeUtf8 preserves valid UTF-8 strings'
        );
    }
    
    private function testSanitizeUtf8Array(): void
    {
        $data = [
            'name' => 'John Doe',
            'address' => '123 Main St',
            'city' => 'London'
        ];
        
        $result = sanitizeUtf8($data);
        
        $this->assert(
            is_array($result) && $result['name'] === 'John Doe',
            'sanitizeUtf8 handles arrays correctly'
        );
    }
    
    private function testSanitizeUtf8NestedArray(): void
    {
        $data = [
            'page_number' => 1,
            'pii_blocks' => [
                ['text' => 'John Doe', 'type' => 'NAME'],
                ['text' => '123456789', 'type' => 'SSN']
            ]
        ];
        
        $result = sanitizeUtf8($data);
        
        $this->assert(
            is_array($result) && isset($result['pii_blocks']) && count($result['pii_blocks']) === 2,
            'sanitizeUtf8 handles nested arrays'
        );
    }
    
    private function testMalformedUtf8Characters(): void
    {
        // Create string with invalid UTF-8 byte sequence
        $malformed = "Hello\xC3\x28World"; // Invalid UTF-8
        
        $result = sanitizeUtf8($malformed);
        
        // Should be sanitized and not cause JSON encoding errors
        $json = json_encode(['text' => $result]);
        
        $this->assert(
            $json !== false,
            'Malformed UTF-8 characters are sanitized and JSON-encodable'
        );
    }
    
    private function testControlCharacters(): void
    {
        // String with null bytes and control characters
        $withControls = "Hello\x00World\x01Test\x1F";
        
        $result = sanitizeUtf8($withControls);
        
        // Control characters should be removed
        $this->assert(
            strpos($result, "\x00") === false && strpos($result, "\x01") === false,
            'Control characters are removed'
        );
        
        $json = json_encode(['text' => $result]);
        $this->assert(
            $json !== false,
            'Text with control characters is JSON-encodable after sanitization'
        );
    }
    
    private function testJsonEncodingWithSanitization(): void
    {
        // Simulate API response with potentially problematic data
        $apiResponse = [
            'success' => true,
            'total_pages' => 3,
            'pages' => [
                [
                    'page_number' => 1,
                    'text' => "Normal text"
                ],
                [
                    'page_number' => 2,
                    'text' => "Text with\xC3\x28invalid UTF-8"
                ],
                [
                    'page_number' => 3,
                    'text' => "Text with\x00null byte"
                ]
            ]
        ];
        
        $sanitized = sanitizeUtf8($apiResponse);
        $json = json_encode($sanitized, JSON_PRETTY_PRINT);
        
        $this->assert(
            $json !== false,
            'Complex API response with malformed UTF-8 is sanitized and encodable'
        );
        
        // Verify structure is preserved
        $decoded = json_decode($json, true);
        $this->assert(
            $decoded !== null && $decoded['total_pages'] === 3 && count($decoded['pages']) === 3,
            'Sanitization preserves data structure'
        );
    }
    
    private function testSendResponseWithMalformedData(): void
    {
        // Test that sendResponse can handle malformed data
        $data = [
            'success' => true,
            'message' => "Test\xC3\x28with\x00bad\x01chars"
        ];
        
        // Capture output
        ob_start();
        try {
            // Manually call sanitization (simulating what sendResponse does)
            $sanitized = sanitizeUtf8($data);
            $json = json_encode($sanitized, JSON_PRETTY_PRINT);
            $success = ($json !== false);
            ob_end_clean();
            
            $this->assert(
                $success,
                'sendResponse sanitization prevents JSON encoding errors'
            );
        } catch (Exception $e) {
            ob_end_clean();
            $this->assert(false, 'sendResponse handles malformed data', $e->getMessage());
        }
    }
    
    private function assert(bool $condition, string $message, string $error = ''): void
    {
        $this->results[] = [
            'passed' => $condition,
            'message' => $message,
            'error' => $error
        ];
        
        $status = $condition ? '✓' : '✗';
        $output = "  $status $message";
        if (!$condition && $error) {
            $output .= " - Error: $error";
        }
        echo "$output\n";
    }
    
    private function displayResults(): void
    {
        $passed = count(array_filter($this->results, fn($r) => $r['passed']));
        $total = count($this->results);
        echo "\n";
    }
}

// Run tests if executed directly
if (basename(__FILE__) === basename($_SERVER['PHP_SELF'])) {
    $test = new Utf8SanitizationTest();
    $test->run();
}
