Here's how to process large files without running out of memory:
Table Of Contents
Code Example
<?php
// Stream large file download
function streamFileDownload(string $filepath, string $filename = null): void {
if (!file_exists($filepath) || !is_readable($filepath)) {
http_response_code(404);
exit('File not found');
}
$filename = $filename ?: basename($filepath);
$filesize = filesize($filepath);
// Set appropriate headers
header('Content-Type: application/octet-stream');
header('Content-Disposition: attachment; filename="' . $filename . '"');
header('Content-Length: ' . $filesize);
header('Cache-Control: must-revalidate');
header('Pragma: public');
// Disable output buffering
if (ob_get_level()) {
ob_end_clean();
}
$handle = fopen($filepath, 'rb');
if (!$handle) {
http_response_code(500);
exit('Cannot open file');
}
// Stream in chunks
while (!feof($handle)) {
echo fread($handle, 8192); // 8KB chunks
flush(); // Force output
}
fclose($handle);
}
// Stream with range support (partial downloads)
function streamFileWithRanges(string $filepath, string $filename = null): void {
if (!file_exists($filepath)) {
http_response_code(404);
exit();
}
$filename = $filename ?: basename($filepath);
$filesize = filesize($filepath);
$start = 0;
$end = $filesize - 1;
// Handle range requests
if (isset($_SERVER['HTTP_RANGE'])) {
if (preg_match('/bytes=(\d+)-(\d*)/', $_SERVER['HTTP_RANGE'], $matches)) {
$start = intval($matches[1]);
$end = $matches[2] ? intval($matches[2]) : $filesize - 1;
if ($start > $end || $start >= $filesize) {
http_response_code(416);
exit();
}
http_response_code(206);
header("Content-Range: bytes $start-$end/$filesize");
}
}
$contentLength = $end - $start + 1;
header('Accept-Ranges: bytes');
header('Content-Type: application/octet-stream');
header('Content-Disposition: attachment; filename="' . $filename . '"');
header('Content-Length: ' . $contentLength);
$handle = fopen($filepath, 'rb');
fseek($handle, $start);
$bytesRemaining = $contentLength;
while ($bytesRemaining > 0 && !feof($handle)) {
$chunkSize = min(8192, $bytesRemaining);
echo fread($handle, $chunkSize);
$bytesRemaining -= $chunkSize;
flush();
}
fclose($handle);
}
// Stream large file upload processing
function processLargeUpload(string $inputName, callable $processor): array {
if (!isset($_FILES[$inputName])) {
return ['success' => false, 'error' => 'No file uploaded'];
}
$file = $_FILES[$inputName];
if ($file['error'] !== UPLOAD_ERR_OK) {
return ['success' => false, 'error' => 'Upload error: ' . $file['error']];
}
$handle = fopen($file['tmp_name'], 'rb');
if (!$handle) {
return ['success' => false, 'error' => 'Cannot open uploaded file'];
}
$processedBytes = 0;
$chunkIndex = 0;
while (!feof($handle)) {
$chunk = fread($handle, 1024 * 1024); // 1MB chunks
if ($chunk === false) break;
$chunkSize = strlen($chunk);
$processor($chunk, $chunkIndex, $chunkSize);
$processedBytes += $chunkSize;
$chunkIndex++;
}
fclose($handle);
return [
'success' => true,
'bytes_processed' => $processedBytes,
'chunks_processed' => $chunkIndex
];
}
// Copy large files efficiently
function copyLargeFile(string $source, string $destination): bool {
$sourceHandle = fopen($source, 'rb');
$destHandle = fopen($destination, 'wb');
if (!$sourceHandle || !$destHandle) {
return false;
}
while (!feof($sourceHandle)) {
$chunk = fread($sourceHandle, 1024 * 1024);
if ($chunk === false) break;
if (fwrite($destHandle, $chunk) === false) {
fclose($sourceHandle);
fclose($destHandle);
return false;
}
}
fclose($sourceHandle);
fclose($destHandle);
return true;
}
// Stream remote file download
function streamRemoteFile(string $url, string $localPath): array {
$remoteHandle = fopen($url, 'rb');
$localHandle = fopen($localPath, 'wb');
if (!$remoteHandle || !$localHandle) {
return ['success' => false, 'error' => 'Cannot open files'];
}
$totalBytes = 0;
$startTime = microtime(true);
while (!feof($remoteHandle)) {
$chunk = fread($remoteHandle, 8192);
if ($chunk === false) break;
$written = fwrite($localHandle, $chunk);
if ($written === false) break;
$totalBytes += $written;
// Optional: Report progress
if ($totalBytes % (1024 * 1024) === 0) { // Every MB
$elapsed = microtime(true) - $startTime;
$speed = $totalBytes / $elapsed / 1024 / 1024; // MB/s
echo "Downloaded: " . round($totalBytes / 1024 / 1024, 2) . " MB (Speed: " .
round($speed, 2) . " MB/s)\r";
}
}
fclose($remoteHandle);
fclose($localHandle);
$elapsed = microtime(true) - $startTime;
return [
'success' => true,
'bytes_downloaded' => $totalBytes,
'time_elapsed' => $elapsed,
'average_speed' => $totalBytes / $elapsed / 1024 / 1024 // MB/s
];
}
// Process CSV file in streaming mode
function streamProcessCSV(string $csvFile, callable $rowProcessor): array {
$handle = fopen($csvFile, 'r');
if (!$handle) {
throw new RuntimeException("Cannot open CSV file: $csvFile");
}
$header = fgetcsv($handle);
$rowCount = 0;
$errors = [];
while (($row = fgetcsv($handle)) !== false) {
try {
$rowData = array_combine($header, $row);
$rowProcessor($rowData, $rowCount);
$rowCount++;
} catch (Exception $e) {
$errors[] = "Row $rowCount: " . $e->getMessage();
}
}
fclose($handle);
return [
'rows_processed' => $rowCount,
'errors' => $errors
];
}
// Memory-efficient file compression
function compressFileStream(string $sourceFile, string $compressedFile): bool {
$source = fopen($sourceFile, 'rb');
$compressed = gzopen($compressedFile, 'wb9'); // Maximum compression
if (!$source || !$compressed) {
return false;
}
while (!feof($source)) {
$chunk = fread($source, 8192);
if ($chunk === false) break;
if (gzwrite($compressed, $chunk) === false) {
fclose($source);
gzclose($compressed);
return false;
}
}
fclose($source);
gzclose($compressed);
return true;
}
// Usage examples
try {
// Stream a large file for download
if (isset($_GET['download'])) {
streamFileWithRanges('large_file.zip', 'download.zip');
exit;
}
// Process large upload
if (isset($_FILES['large_file'])) {
$result = processLargeUpload('large_file', function($chunk, $index, $size) {
echo "Processing chunk $index: $size bytes\n";
// Process chunk data here
});
if ($result['success']) {
echo "Processed {$result['bytes_processed']} bytes in {$result['chunks_processed']} chunks\n";
}
}
// Copy large file efficiently
if (copyLargeFile('huge_source.dat', 'huge_backup.dat')) {
echo "Large file copied successfully\n";
}
// Download remote file
$download = streamRemoteFile('https://example.com/large_file.zip', 'local_copy.zip');
if ($download['success']) {
echo "Downloaded {$download['bytes_downloaded']} bytes in {$download['time_elapsed']} seconds\n";
echo "Average speed: {$download['average_speed']} MB/s\n";
}
// Process large CSV
$csvResult = streamProcessCSV('large_data.csv', function($row, $index) {
// Process each row without loading entire file
if ($index % 1000 === 0) {
echo "Processed $index rows\n";
}
});
} catch (Exception $e) {
echo "Streaming error: " . $e->getMessage() . "\n";
}
Behind the Scenes
Streaming techniques prevent memory exhaustion with large files:
- Chunked Processing: Read/write small chunks instead of entire file
- Buffer Management: Control memory usage with appropriate chunk sizes
- Stream Functions: Use file handles instead of loading content to variables
- Progressive Output: Send data as it's processed rather than buffering
Memory Benefits:
- Constant memory usage regardless of file size
- Ability to process files larger than available RAM
- Better server performance under load
- Responsive user experience with progress updates
Key Techniques:
- Use
fread()
/fwrite()
with small chunks (8KB-1MB) - Call
flush()
to force output immediately - Handle HTTP ranges for resumable downloads
- Process data incrementally rather than loading entirely
Essential for file servers, backup systems, data processing applications, and any scenario involving large files that could exceed memory limits.
Share this article
Add Comment
No comments yet. Be the first to comment!