Table Of Contents
Quick Fix: Basic File Hashing
const crypto = require('crypto');
const fs = require('fs');
const fsPromises = require('fs').promises;
// Calculate SHA-256 hash of file content
async function calculateFileHash(filePath, algorithm = 'sha256') {
try {
const data = await fsPromises.readFile(filePath);
const hash = crypto.createHash(algorithm);
hash.update(data);
const hashValue = hash.digest('hex');
console.log(`${algorithm.toUpperCase()} hash:`, hashValue);
return hashValue;
} catch (error) {
console.error('Error calculating hash:', error);
throw error;
}
}
// Calculate hash using streams (for large files)
async function calculateFileHashStream(filePath, algorithm = 'sha256') {
return new Promise((resolve, reject) => {
const hash = crypto.createHash(algorithm);
const stream = fs.createReadStream(filePath);
stream.on('data', (data) => {
hash.update(data);
});
stream.on('end', () => {
const hashValue = hash.digest('hex');
resolve(hashValue);
});
stream.on('error', (error) => {
reject(error);
});
});
}
// Calculate multiple hash algorithms at once
async function calculateMultipleHashes(filePath, algorithms = ['md5', 'sha1', 'sha256']) {
return new Promise((resolve, reject) => {
const hashes = {};
// Create hash objects for each algorithm
algorithms.forEach(algorithm => {
hashes[algorithm] = crypto.createHash(algorithm);
});
const stream = fs.createReadStream(filePath);
stream.on('data', (data) => {
// Update all hashes with the same data
algorithms.forEach(algorithm => {
hashes[algorithm].update(data);
});
});
stream.on('end', () => {
const results = {};
algorithms.forEach(algorithm => {
results[algorithm] = hashes[algorithm].digest('hex');
});
resolve(results);
});
stream.on('error', reject);
});
}
// Verify file hash against expected value
async function verifyFileHash(filePath, expectedHash, algorithm = 'sha256') {
try {
const actualHash = await calculateFileHashStream(filePath, algorithm);
const isValid = actualHash === expectedHash.toLowerCase();
console.log('Hash verification:', isValid ? 'PASSED' : 'FAILED');
return {
isValid,
expectedHash: expectedHash.toLowerCase(),
actualHash,
algorithm
};
} catch (error) {
console.error('Error verifying hash:', error);
throw error;
}
}
// Calculate hash synchronously (not recommended for large files)
function calculateFileHashSync(filePath, algorithm = 'sha256') {
try {
const data = fs.readFileSync(filePath);
const hash = crypto.createHash(algorithm);
hash.update(data);
return hash.digest('hex');
} catch (error) {
console.error('Error calculating hash:', error);
throw error;
}
}
// Usage examples
async function examples() {
const testFile = './test-file.txt';
// Create test file
await fsPromises.writeFile(testFile, 'Hello, World!');
// Calculate SHA-256 hash
const sha256Hash = await calculateFileHash(testFile, 'sha256');
console.log('SHA-256:', sha256Hash);
// Calculate hash using stream
const streamHash = await calculateFileHashStream(testFile, 'sha256');
console.log('Stream hash:', streamHash);
// Calculate multiple hashes
const multiHashes = await calculateMultipleHashes(testFile);
console.log('Multiple hashes:', multiHashes);
// Verify hash
const verification = await verifyFileHash(testFile, sha256Hash);
console.log('Verification passed:', verification.isValid);
// Cleanup
await fsPromises.unlink(testFile);
}
The Problem: Production File Integrity and Hash Management System
const crypto = require('crypto');
const fs = require('fs');
const fsPromises = require('fs').promises;
const path = require('path');
const { pipeline } = require('stream');
const { promisify } = require('util');
const pipelineAsync = promisify(pipeline);
// Advanced file hashing service with comprehensive features
class FileHashService {
constructor(options = {}) {
this.defaultAlgorithm = options.defaultAlgorithm || 'sha256';
this.supportedAlgorithms = options.supportedAlgorithms || ['md5', 'sha1', 'sha256', 'sha512'];
this.chunkSize = options.chunkSize || 64 * 1024; // 64KB
this.enableProgress = options.enableProgress !== false;
this.enableCache = options.enableCache === true;
this.cacheTimeout = options.cacheTimeout || 300000; // 5 minutes
this.maxFileSize = options.maxFileSize || 1024 * 1024 * 1024; // 1GB
this.enableVerification = options.enableVerification !== false;
this.cache = new Map();
}
// Calculate file hash with comprehensive options
async calculateHash(filePath, options = {}) {
const {
algorithm = this.defaultAlgorithm,
encoding = 'hex',
useCache = this.enableCache,
includeMetadata = true,
onProgress = null,
validateFile = true
} = options;
try {
const startTime = Date.now();
const absolutePath = path.resolve(filePath);
// Validate algorithm
if (!this.supportedAlgorithms.includes(algorithm)) {
throw new Error(`Unsupported algorithm: ${algorithm}`);
}
// Check cache first
if (useCache) {
const cached = await this.getCachedHash(absolutePath, algorithm, encoding);
if (cached) {
return cached;
}
}
// Validate file
if (validateFile) {
await this.validateFile(absolutePath);
}
// Get file metadata
const stats = await fsPromises.stat(absolutePath);
// Calculate hash
const hashResult = await this.performHashCalculation(absolutePath, algorithm, {
encoding,
fileSize: stats.size,
onProgress
});
const result = {
success: true,
filePath: absolutePath,
algorithm,
encoding,
hash: hashResult.hash,
fileSize: stats.size,
processingTime: Date.now() - startTime,
chunkCount: hashResult.chunkCount
};
if (includeMetadata) {
result.metadata = {
modified: stats.mtime,
created: stats.birthtime,
mode: stats.mode,
isFile: stats.isFile()
};
}
// Cache the result
if (useCache) {
await this.setCachedHash(absolutePath, algorithm, encoding, result);
}
return result;
} catch (error) {
return this.handleHashError(error, filePath, algorithm);
}
}
// Perform the actual hash calculation
async performHashCalculation(filePath, algorithm, options) {
const { encoding, fileSize, onProgress } = options;
return new Promise((resolve, reject) => {
const hash = crypto.createHash(algorithm);
const stream = fs.createReadStream(filePath, {
highWaterMark: this.chunkSize
});
let processedBytes = 0;
let chunkCount = 0;
const startTime = Date.now();
stream.on('data', (chunk) => {
hash.update(chunk);
processedBytes += chunk.length;
chunkCount++;
// Report progress
if (onProgress && this.enableProgress) {
const progress = {
processedBytes,
totalBytes: fileSize,
percentage: (processedBytes / fileSize) * 100,
chunksProcessed: chunkCount,
elapsedTime: Date.now() - startTime,
estimatedTimeRemaining: this.calculateETA(processedBytes, fileSize, startTime)
};
onProgress(progress);
}
});
stream.on('end', () => {
const hashValue = hash.digest(encoding);
resolve({
hash: hashValue,
chunkCount,
processedBytes
});
});
stream.on('error', reject);
});
}
// Calculate hashes for multiple algorithms simultaneously
async calculateMultipleHashes(filePath, algorithms, options = {}) {
const {
encoding = 'hex',
onProgress = null,
includeMetadata = true
} = options;
try {
const startTime = Date.now();
const absolutePath = path.resolve(filePath);
// Validate algorithms
for (const algorithm of algorithms) {
if (!this.supportedAlgorithms.includes(algorithm)) {
throw new Error(`Unsupported algorithm: ${algorithm}`);
}
}
const stats = await fsPromises.stat(absolutePath);
const hashes = {};
const hashObjects = {};
// Create hash objects for each algorithm
algorithms.forEach(algorithm => {
hashObjects[algorithm] = crypto.createHash(algorithm);
});
return new Promise((resolve, reject) => {
const stream = fs.createReadStream(absolutePath, {
highWaterMark: this.chunkSize
});
let processedBytes = 0;
let chunkCount = 0;
const processingStartTime = Date.now();
stream.on('data', (chunk) => {
// Update all hashes with the same data
algorithms.forEach(algorithm => {
hashObjects[algorithm].update(chunk);
});
processedBytes += chunk.length;
chunkCount++;
// Report progress
if (onProgress && this.enableProgress) {
const progress = {
processedBytes,
totalBytes: stats.size,
percentage: (processedBytes / stats.size) * 100,
chunksProcessed: chunkCount,
algorithmsProcessing: algorithms.length,
elapsedTime: Date.now() - processingStartTime
};
onProgress(progress);
}
});
stream.on('end', () => {
// Generate final hashes
algorithms.forEach(algorithm => {
hashes[algorithm] = hashObjects[algorithm].digest(encoding);
});
const result = {
success: true,
filePath: absolutePath,
algorithms,
encoding,
hashes,
fileSize: stats.size,
processingTime: Date.now() - startTime,
chunkCount
};
if (includeMetadata) {
result.metadata = {
modified: stats.mtime,
created: stats.birthtime,
mode: stats.mode
};
}
resolve(result);
});
stream.on('error', reject);
});
} catch (error) {
return this.handleHashError(error, filePath, algorithms.join(','));
}
}
// Verify file integrity against expected hash
async verifyIntegrity(filePath, expectedHash, options = {}) {
const {
algorithm = this.defaultAlgorithm,
encoding = 'hex',
strict = true,
includeDetails = true
} = options;
try {
const hashResult = await this.calculateHash(filePath, {
algorithm,
encoding,
includeMetadata: includeDetails
});
if (!hashResult.success) {
return {
success: false,
isValid: false,
error: hashResult.error
};
}
const actualHash = hashResult.hash;
const expectedHashNormalized = expectedHash.toLowerCase();
const actualHashNormalized = actualHash.toLowerCase();
const isValid = actualHashNormalized === expectedHashNormalized;
const result = {
success: true,
isValid,
filePath,
algorithm,
encoding,
expectedHash: expectedHashNormalized,
actualHash: actualHashNormalized,
fileSize: hashResult.fileSize,
processingTime: hashResult.processingTime
};
if (includeDetails) {
result.metadata = hashResult.metadata;
result.chunkCount = hashResult.chunkCount;
}
if (!isValid && strict) {
result.error = 'Hash verification failed: hashes do not match';
}
return result;
} catch (error) {
return {
success: false,
isValid: false,
error: error.message,
filePath,
algorithm
};
}
}
// Calculate hash for multiple files
async calculateBatchHashes(filePaths, options = {}) {
const {
algorithm = this.defaultAlgorithm,
concurrency = 5,
continueOnError = true,
onFileComplete = null,
onProgress = null
} = options;
const results = [];
const errors = [];
let completedFiles = 0;
// Process files in batches
for (let i = 0; i < filePaths.length; i += concurrency) {
const batch = filePaths.slice(i, i + concurrency);
const batchPromises = batch.map(async (filePath, index) => {
try {
const result = await this.calculateHash(filePath, {
algorithm,
onProgress: onProgress ? (progress) => {
onProgress({
...progress,
fileIndex: i + index,
fileName: path.basename(filePath),
completedFiles,
totalFiles: filePaths.length
});
} : null
});
completedFiles++;
if (onFileComplete) {
onFileComplete({
index: i + index,
filePath,
result,
completedFiles,
totalFiles: filePaths.length
});
}
return { index: i + index, filePath, ...result };
} catch (error) {
const errorResult = {
index: i + index,
filePath,
success: false,
error: error.message
};
if (continueOnError) {
return errorResult;
} else {
throw errorResult;
}
}
});
const batchResults = await Promise.allSettled(batchPromises);
batchResults.forEach(promiseResult => {
if (promiseResult.status === 'fulfilled') {
const result = promiseResult.value;
if (result.error) {
errors.push(result);
} else {
results.push(result);
}
} else {
errors.push({
index: -1,
filePath: 'unknown',
success: false,
error: promiseResult.reason
});
}
});
}
return {
success: errors.length === 0 || continueOnError,
results: results.sort((a, b) => a.index - b.index),
errors,
total: filePaths.length,
successful: results.length,
failed: errors.length,
algorithm
};
}
// Generate hash manifest file
async generateManifest(directoryPath, options = {}) {
const {
algorithm = this.defaultAlgorithm,
outputPath = path.join(directoryPath, `manifest-${algorithm}.txt`),
recursive = true,
includePattern = null,
excludePattern = null,
format = 'standard' // 'standard', 'json', 'csv'
} = options;
try {
const files = await this.findFiles(directoryPath, {
recursive,
includePattern,
excludePattern
});
const batchResult = await this.calculateBatchHashes(files, {
algorithm,
continueOnError: true
});
// Generate manifest content
let manifestContent;
switch (format) {
case 'json':
manifestContent = this.generateJSONManifest(batchResult);
break;
case 'csv':
manifestContent = this.generateCSVManifest(batchResult);
break;
default:
manifestContent = this.generateStandardManifest(batchResult);
}
// Write manifest file
await fsPromises.writeFile(outputPath, manifestContent, 'utf8');
return {
success: true,
manifestPath: outputPath,
directoryPath,
algorithm,
format,
filesProcessed: batchResult.successful,
errors: batchResult.failed,
totalFiles: batchResult.total
};
} catch (error) {
return {
success: false,
error: error.message,
directoryPath,
algorithm
};
}
}
// Verify files against manifest
async verifyManifest(manifestPath, options = {}) {
const {
algorithm = this.defaultAlgorithm,
continueOnError = true,
onFileVerified = null
} = options;
try {
const manifestContent = await fsPromises.readFile(manifestPath, 'utf8');
const hashes = this.parseManifest(manifestContent);
const results = [];
const errors = [];
let verifiedFiles = 0;
for (const { filePath, expectedHash } of hashes) {
try {
const verification = await this.verifyIntegrity(filePath, expectedHash, {
algorithm,
strict: false
});
verifiedFiles++;
if (onFileVerified) {
onFileVerified({
filePath,
isValid: verification.isValid,
verifiedFiles,
totalFiles: hashes.length
});
}
results.push({
filePath,
isValid: verification.isValid,
expectedHash,
actualHash: verification.actualHash,
...verification
});
} catch (error) {
const errorResult = {
filePath,
isValid: false,
error: error.message
};
if (continueOnError) {
errors.push(errorResult);
} else {
throw error;
}
}
}
return {
success: true,
manifestPath,
algorithm,
results,
errors,
total: hashes.length,
valid: results.filter(r => r.isValid).length,
invalid: results.filter(r => !r.isValid).length,
failed: errors.length
};
} catch (error) {
return {
success: false,
error: error.message,
manifestPath
};
}
}
// Helper methods
async validateFile(filePath) {
const stats = await fsPromises.stat(filePath);
if (!stats.isFile()) {
throw new Error(`Not a file: ${filePath}`);
}
if (stats.size > this.maxFileSize) {
throw new Error(`File too large: ${stats.size} bytes (max: ${this.maxFileSize})`);
}
if (stats.size === 0) {
console.warn(`Warning: Empty file: ${filePath}`);
}
}
async findFiles(directoryPath, options) {
const { recursive, includePattern, excludePattern } = options;
const files = [];
const traverse = async (currentPath) => {
const entries = await fsPromises.readdir(currentPath, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(currentPath, entry.name);
if (entry.isFile()) {
// Apply pattern filters
if (includePattern && !includePattern.test(entry.name)) {
continue;
}
if (excludePattern && excludePattern.test(entry.name)) {
continue;
}
files.push(fullPath);
} else if (entry.isDirectory() && recursive) {
await traverse(fullPath);
}
}
};
await traverse(directoryPath);
return files;
}
generateStandardManifest(batchResult) {
const lines = batchResult.results.map(result =>
`${result.hash} ${path.relative(process.cwd(), result.filePath)}`
);
return lines.join('\n') + '\n';
}
generateJSONManifest(batchResult) {
const manifest = {
algorithm: batchResult.algorithm,
generated: new Date().toISOString(),
files: batchResult.results.map(result => ({
path: path.relative(process.cwd(), result.filePath),
hash: result.hash,
size: result.fileSize,
modified: result.metadata?.modified
}))
};
return JSON.stringify(manifest, null, 2);
}
generateCSVManifest(batchResult) {
const header = 'path,hash,size,modified\n';
const rows = batchResult.results.map(result =>
`"${path.relative(process.cwd(), result.filePath)}","${result.hash}",${result.fileSize},"${result.metadata?.modified}"`
);
return header + rows.join('\n') + '\n';
}
parseManifest(content) {
const lines = content.split('\n').filter(line => line.trim());
const hashes = [];
for (const line of lines) {
// Parse standard format: "hash filename"
const match = line.match(/^([a-fA-F0-9]+)\s+(.+)$/);
if (match) {
hashes.push({
expectedHash: match[1],
filePath: path.resolve(match[2])
});
}
}
return hashes;
}
calculateETA(processedBytes, totalBytes, startTime) {
const elapsedTime = Date.now() - startTime;
const processingRate = processedBytes / elapsedTime; // bytes per ms
const remainingBytes = totalBytes - processedBytes;
return remainingBytes / processingRate;
}
// Cache management
async getCachedHash(filePath, algorithm, encoding) {
if (!this.enableCache) return null;
const key = `${filePath}:${algorithm}:${encoding}`;
const cached = this.cache.get(key);
if (!cached) return null;
// Check if file has been modified since cache
try {
const stats = await fsPromises.stat(filePath);
if (stats.mtime.getTime() <= cached.fileModified) {
return cached.result;
}
} catch {
// File doesn't exist, remove from cache
this.cache.delete(key);
}
return null;
}
async setCachedHash(filePath, algorithm, encoding, result) {
if (!this.enableCache) return;
const key = `${filePath}:${algorithm}:${encoding}`;
const stats = await fsPromises.stat(filePath);
this.cache.set(key, {
result,
fileModified: stats.mtime.getTime(),
timestamp: Date.now()
});
// Clean old cache entries
setTimeout(() => this.cleanCache(), this.cacheTimeout);
}
cleanCache() {
const now = Date.now();
for (const [key, value] of this.cache.entries()) {
if (now - value.timestamp > this.cacheTimeout) {
this.cache.delete(key);
}
}
}
handleHashError(error, filePath, algorithm) {
let errorCode = error.code || 'UNKNOWN';
let errorMessage = error.message;
switch (error.code) {
case 'ENOENT':
errorMessage = `File not found: ${filePath}`;
break;
case 'EACCES':
errorMessage = `Permission denied: ${filePath}`;
break;
case 'EISDIR':
errorMessage = `Is a directory: ${filePath}`;
break;
}
return {
success: false,
error: errorMessage,
code: errorCode,
filePath,
algorithm
};
}
}
// Express application for hash operations
const express = require('express');
const multer = require('multer');
const app = express();
const upload = multer({ dest: './uploads/' });
// Initialize hash service
const hashService = new FileHashService({
enableProgress: true,
enableCache: true,
supportedAlgorithms: ['md5', 'sha1', 'sha256', 'sha512']
});
app.use(express.json());
// Routes
app.post('/api/hash/calculate', upload.single('file'), async (req, res) => {
try {
if (!req.file) {
return res.status(400).json({
error: 'File required',
code: 'FILE_MISSING'
});
}
const { algorithm = 'sha256', encoding = 'hex' } = req.body;
const result = await hashService.calculateHash(req.file.path, {
algorithm,
encoding,
includeMetadata: true
});
res.json(result);
} catch (error) {
res.status(500).json({
error: error.message,
code: 'HASH_ERROR'
});
}
});
app.post('/api/hash/verify', upload.single('file'), async (req, res) => {
try {
if (!req.file) {
return res.status(400).json({
error: 'File required',
code: 'FILE_MISSING'
});
}
const { expectedHash, algorithm = 'sha256' } = req.body;
if (!expectedHash) {
return res.status(400).json({
error: 'Expected hash required',
code: 'HASH_MISSING'
});
}
const result = await hashService.verifyIntegrity(req.file.path, expectedHash, {
algorithm,
includeDetails: true
});
res.json(result);
} catch (error) {
res.status(500).json({
error: error.message,
code: 'VERIFY_ERROR'
});
}
});
app.post('/api/hash/multiple', upload.single('file'), async (req, res) => {
try {
if (!req.file) {
return res.status(400).json({
error: 'File required',
code: 'FILE_MISSING'
});
}
const { algorithms = ['md5', 'sha256'] } = req.body;
const result = await hashService.calculateMultipleHashes(req.file.path, algorithms, {
includeMetadata: true
});
res.json(result);
} catch (error) {
res.status(500).json({
error: error.message,
code: 'MULTIPLE_HASH_ERROR'
});
}
});
// Demonstration
async function demonstrateUsage() {
try {
console.log('Starting file hashing demonstration...');
// Create test file
const testFile = './demo-file.txt';
await fsPromises.writeFile(testFile, 'Hello, World! This is a test file for hashing.');
// Calculate single hash
const hashResult = await hashService.calculateHash(testFile, {
algorithm: 'sha256',
includeMetadata: true
});
console.log('SHA-256 hash:', hashResult.hash);
// Calculate multiple hashes
const multiHashResult = await hashService.calculateMultipleHashes(testFile,
['md5', 'sha1', 'sha256', 'sha512']
);
console.log('Multiple hashes calculated:', Object.keys(multiHashResult.hashes).length);
// Verify integrity
const verification = await hashService.verifyIntegrity(testFile, hashResult.hash, {
algorithm: 'sha256'
});
console.log('Verification passed:', verification.isValid);
// Generate manifest for current directory
const manifestResult = await hashService.generateManifest('.', {
algorithm: 'sha256',
includePattern: /\.txt$/,
format: 'json'
});
console.log('Manifest generated:', manifestResult.success);
// Cleanup
await fsPromises.unlink(testFile);
try {
await fsPromises.unlink(manifestResult.manifestPath);
} catch {
// Ignore cleanup error
}
} catch (error) {
console.error('Demonstration error:', error);
}
}
module.exports = {
FileHashService,
app
};
File hash calculation solves "data integrity", "file verification", and "security validation" issues. Use crypto module for reliable hashing, implement streaming for large files, support multiple algorithms simultaneously. Provide verification, manifest generation, and batch processing capabilities. Handle caching for performance, progress tracking, and comprehensive error handling. Alternative: system utilities (shasum, md5sum), cloud hash services, specialized cryptographic libraries.
Share this article
Add Comment
No comments yet. Be the first to comment!