Navigation

Php

How to Handle Binary File Operations

Work with binary files in PHP for image processing, file format validation, and low-level data manipulation using proper binary-safe functions.

Table Of Contents

Working Solution

Binary file operations require special handling to prevent data corruption. Unlike text files, binary files contain raw bytes that must be processed without character encoding interference, making them essential for images, executables, and custom file formats.

<?php

// Read binary file safely
function readBinaryFile(string $filename): string {
    $handle = fopen($filename, 'rb'); // 'b' flag for binary mode
    
    if (!$handle) {
        throw new RuntimeException("Cannot open binary file: $filename");
    }
    
    $content = '';
    while (!feof($handle)) {
        $chunk = fread($handle, 8192);
        if ($chunk === false) break;
        $content .= $chunk;
    }
    
    fclose($handle);
    return $content;
}

// Write binary data to file
function writeBinaryFile(string $filename, string $data): bool {
    $handle = fopen($filename, 'wb');
    
    if (!$handle) {
        return false;
    }
    
    $written = fwrite($handle, $data);
    fclose($handle);
    
    return $written !== false;
}

// Read file header to identify format
function getFileSignature(string $filename, int $bytes = 16): string {
    $handle = fopen($filename, 'rb');
    
    if (!$handle) {
        throw new RuntimeException("Cannot open file: $filename");
    }
    
    $signature = fread($handle, $bytes);
    fclose($handle);
    
    return $signature;
}

// Detect file type by binary signature
function detectFileType(string $filename): string {
    $signature = getFileSignature($filename, 16);
    
    return match(true) {
        str_starts_with($signature, "\xFF\xD8\xFF") => 'JPEG',
        str_starts_with($signature, "\x89PNG\r\n\x1A\n") => 'PNG',
        str_starts_with($signature, "GIF87a") || str_starts_with($signature, "GIF89a") => 'GIF',
        str_starts_with($signature, "PK\x03\x04") => 'ZIP',
        str_starts_with($signature, "%PDF") => 'PDF',
        str_starts_with($signature, "\x7fELF") => 'ELF Executable',
        str_starts_with($signature, "MZ") => 'Windows Executable',
        str_starts_with($signature, "\x00\x00\x01\x00") => 'ICO',
        str_starts_with($signature, "RIFF") && substr($signature, 8, 4) === "WEBP" => 'WEBP',
        default => 'Unknown'
    };
}

// Extract metadata from binary file
function extractBinaryMetadata(string $filename): array {
    $handle = fopen($filename, 'rb');
    if (!$handle) {
        throw new RuntimeException("Cannot open file: $filename");
    }
    
    $metadata = [
        'size' => filesize($filename),
        'signature' => bin2hex(fread($handle, 16)),
        'type' => detectFileType($filename)
    ];
    
    // Additional analysis based on file type
    rewind($handle);
    $header = fread($handle, 512);
    
    if (str_starts_with($header, "\xFF\xD8\xFF")) {
        // JPEG specific metadata
        $imageInfo = getimagesize($filename);
        if ($imageInfo) {
            $metadata['width'] = $imageInfo[0];
            $metadata['height'] = $imageInfo[1];
            $metadata['channels'] = $imageInfo['channels'] ?? null;
        }
    } elseif (str_starts_with($header, "\x89PNG")) {
        // PNG specific metadata
        fseek($handle, 16); // Skip PNG signature and IHDR length/type
        $ihdr = fread($handle, 13);
        $metadata['width'] = unpack('N', substr($ihdr, 0, 4))[1];
        $metadata['height'] = unpack('N', substr($ihdr, 4, 4))[1];
        $metadata['bit_depth'] = ord($ihdr[8]);
        $metadata['color_type'] = ord($ihdr[9]);
    }
    
    fclose($handle);
    return $metadata;
}

// Binary file comparison
function compareBinaryFiles(string $file1, string $file2): array {
    $handle1 = fopen($file1, 'rb');
    $handle2 = fopen($file2, 'rb');
    
    if (!$handle1 || !$handle2) {
        throw new RuntimeException('Cannot open files for comparison');
    }
    
    $identical = true;
    $position = 0;
    $firstDifference = null;
    
    while (!feof($handle1) && !feof($handle2)) {
        $chunk1 = fread($handle1, 8192);
        $chunk2 = fread($handle2, 8192);
        
        if ($chunk1 !== $chunk2) {
            $identical = false;
            if ($firstDifference === null) {
                // Find exact position of first difference
                for ($i = 0; $i < min(strlen($chunk1), strlen($chunk2)); $i++) {
                    if ($chunk1[$i] !== $chunk2[$i]) {
                        $firstDifference = $position + $i;
                        break;
                    }
                }
            }
            break;
        }
        
        $position += strlen($chunk1);
    }
    
    // Check if one file is longer
    if ($identical && (feof($handle1) !== feof($handle2))) {
        $identical = false;
        $firstDifference = $position;
    }
    
    fclose($handle1);
    fclose($handle2);
    
    return [
        'identical' => $identical,
        'first_difference_at' => $firstDifference,
        'file1_size' => filesize($file1),
        'file2_size' => filesize($file2)
    ];
}

// Patch binary file (replace bytes at specific position)
function patchBinaryFile(string $filename, int $offset, string $newData): bool {
    $handle = fopen($filename, 'r+b');
    
    if (!$handle) {
        return false;
    }
    
    if (fseek($handle, $offset) !== 0) {
        fclose($handle);
        return false;
    }
    
    $written = fwrite($handle, $newData);
    fclose($handle);
    
    return $written === strlen($newData);
}

// Extract specific bytes from binary file
function extractBytes(string $filename, int $offset, int $length): string {
    $handle = fopen($filename, 'rb');
    
    if (!$handle) {
        throw new RuntimeException("Cannot open file: $filename");
    }
    
    if (fseek($handle, $offset) !== 0) {
        fclose($handle);
        throw new RuntimeException("Cannot seek to offset: $offset");
    }
    
    $data = fread($handle, $length);
    fclose($handle);
    
    return $data;
}

// Convert binary data to hex dump
function binaryToHexDump(string $data, int $bytesPerLine = 16): string {
    $output = '';
    $length = strlen($data);
    
    for ($i = 0; $i < $length; $i += $bytesPerLine) {
        $line = substr($data, $i, $bytesPerLine);
        $hex = '';
        $ascii = '';
        
        for ($j = 0; $j < strlen($line); $j++) {
            $byte = ord($line[$j]);
            $hex .= sprintf('%02X ', $byte);
            $ascii .= ($byte >= 32 && $byte <= 126) ? chr($byte) : '.';
        }
        
        $hex = str_pad($hex, $bytesPerLine * 3, ' ');
        $offset = sprintf('%08X', $i);
        
        $output .= "$offset  $hex $ascii\n";
    }
    
    return $output;
}

// Validate binary file integrity
function validateBinaryFile(string $filename, string $expectedChecksum = null): array {
    if (!file_exists($filename)) {
        throw new InvalidArgumentException("File not found: $filename");
    }
    
    $result = [
        'exists' => true,
        'readable' => is_readable($filename),
        'size' => filesize($filename),
        'md5' => md5_file($filename),
        'sha1' => sha1_file($filename)
    ];
    
    if ($expectedChecksum) {
        $result['checksum_valid'] = ($result['md5'] === $expectedChecksum || 
                                   $result['sha1'] === $expectedChecksum);
    }
    
    // Basic corruption check
    $handle = fopen($filename, 'rb');
    if ($handle) {
        $result['can_read_start'] = fread($handle, 1) !== false;
        fseek($handle, -1, SEEK_END);
        $result['can_read_end'] = fread($handle, 1) !== false;
        fclose($handle);
    }
    
    return $result;
}

// Usage examples
try {
    $binaryFile = 'image.jpg';
    
    // Detect file type
    $fileType = detectFileType($binaryFile);
    echo "File type: $fileType\n";
    
    // Extract metadata
    $metadata = extractBinaryMetadata($binaryFile);
    echo "Size: {$metadata['size']} bytes\n";
    echo "Dimensions: {$metadata['width']}x{$metadata['height']}\n";
    echo "Signature: {$metadata['signature']}\n";
    
    // Read file signature
    $signature = getFileSignature($binaryFile, 8);
    echo "First 8 bytes: " . bin2hex($signature) . "\n";
    
    // Compare two files
    $comparison = compareBinaryFiles('file1.bin', 'file2.bin');
    if ($comparison['identical']) {
        echo "Files are identical\n";
    } else {
        echo "Files differ at byte: {$comparison['first_difference_at']}\n";
    }
    
    // Extract specific bytes
    $headerBytes = extractBytes($binaryFile, 0, 32);
    echo "Header hex dump:\n";
    echo binaryToHexDump($headerBytes);
    
    // Validate file integrity
    $validation = validateBinaryFile($binaryFile);
    echo "MD5: {$validation['md5']}\n";
    echo "SHA1: {$validation['sha1']}\n";
    
    // Patch binary file (be careful!)
    $backupData = extractBytes('config.bin', 10, 4);
    patchBinaryFile('config.bin', 10, "\x01\x02\x03\x04");
    echo "File patched\n";
    
    // Restore if needed
    // patchBinaryFile('config.bin', 10, $backupData);
    
} catch (Exception $e) {
    echo "Binary operation error: " . $e->getMessage() . "\n";
}

Key Points

Binary file operations require the 'b' flag in fopen() to prevent character encoding issues on Windows systems. This ensures that bytes are read and written exactly as they appear in the file without any translation or modification.

The key difference from text files is that binary files can contain null bytes, control characters, and byte sequences that would be invalid in text encoding. Functions like fread(), fwrite(), and fseek() work at the byte level, making them perfect for precise binary manipulation.

Essential for file format analysis, image processing, executable modification, and any application requiring low-level file access. Always use binary mode and handle data as raw bytes rather than strings when working with non-text files.

Share this article

Add Comment

No comments yet. Be the first to comment!

More from Php