How to Tune Hyperparameters with GridSearchCV

Finding the Sweet Spot
Basic GridSearchCV Usage
Random Forest Hyperparameter Tuning
Multiple Scoring Metrics
Pipeline Hyperparameter Tuning
RandomizedSearchCV Alternative
Nested Cross-Validation
Regression Hyperparameter Tuning
Advanced Parameter Grids
Hyperparameter Tuning Best Practices
Model Comparison with Tuning
Optimization Tips
Master Model Optimization

Finding the Sweet Spot

Default hyperparameters rarely give optimal performance. GridSearchCV automates the search for the best hyperparameter combinations through cross-validation.

Basic GridSearchCV Usage

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Generate sample data
X, y = make_classification(n_samples=1000, n_features=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [0.001, 0.01, 0.1, 1],
    'kernel': ['rbf', 'linear']
}

# Create and fit GridSearchCV
svm = SVC(random_state=42)
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

print(f"Best parameters: {grid_search.best_params_}")
print(f"Best cross-validation score: {grid_search.best_score_:.3f}")
print(f"Test set score: {grid_search.score(X_test, y_test):.3f}")

Random Forest Hyperparameter Tuning

from sklearn.ensemble import RandomForestClassifier

# Random Forest parameter grid
rf_param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf = RandomForestClassifier(random_state=42)
rf_grid = GridSearchCV(rf, rf_param_grid, cv=3, scoring='accuracy', n_jobs=-1)
rf_grid.fit(X_train, y_train)

print(f"RF Best parameters: {rf_grid.best_params_}")
print(f"RF Best CV score: {rf_grid.best_score_:.3f}")

Multiple Scoring Metrics

from sklearn.metrics import make_scorer, precision_score, recall_score

# Define multiple scoring metrics
scoring = {
    'accuracy': 'accuracy',
    'precision': make_scorer(precision_score, average='weighted'),
    'recall': make_scorer(recall_score, average='weighted'),
    'f1': 'f1_weighted'
}

# Grid search with multiple metrics
multi_grid = GridSearchCV(
    svm, param_grid, cv=5, scoring=scoring, 
    refit='f1', n_jobs=-1  # Refit on F1 score
)
multi_grid.fit(X_train, y_train)

print(f"Best params (F1): {multi_grid.best_params_}")
print(f"Best F1 score: {multi_grid.best_score_:.3f}")

# Access all scores
results = multi_grid.cv_results_
print(f"Mean test accuracy: {results['mean_test_accuracy'][:3].round(3)}")

Pipeline Hyperparameter Tuning

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif

# Create pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('selector', SelectKBest(f_classif)),
    ('classifier', SVC(random_state=42))
])

# Pipeline parameter grid (use step__parameter format)
pipeline_params = {
    'selector__k': [5, 10, 15],
    'classifier__C': [0.1, 1, 10],
    'classifier__gamma': [0.01, 0.1, 1],
    'classifier__kernel': ['rbf', 'linear']
}

pipeline_grid = GridSearchCV(pipeline, pipeline_params, cv=3, n_jobs=-1)
pipeline_grid.fit(X_train, y_train)

print(f"Pipeline best params: {pipeline_grid.best_params_}")
print(f"Pipeline best score: {pipeline_grid.best_score_:.3f}")

RandomizedSearchCV Alternative

from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint

# Random parameter distributions
random_param_dist = {
    'C': uniform(0.1, 100),  # Continuous distribution
    'gamma': uniform(0.001, 1),
    'kernel': ['rbf', 'linear']
}

# Randomized search
random_search = RandomizedSearchCV(
    svm, random_param_dist, n_iter=20, cv=5, 
    scoring='accuracy', random_state=42, n_jobs=-1
)
random_search.fit(X_train, y_train)

print(f"Random search best params: {random_search.best_params_}")
print(f"Random search best score: {random_search.best_score_:.3f}")

Nested Cross-Validation

from sklearn.model_selection import cross_val_score

# Nested CV for unbiased performance estimate
def nested_cv_score(estimator, param_grid, X, y, outer_cv=5, inner_cv=3):
    """Perform nested cross-validation"""
    
    # Inner loop: hyperparameter tuning
    grid_search = GridSearchCV(estimator, param_grid, cv=inner_cv, scoring='accuracy')
    
    # Outer loop: performance estimation
    nested_scores = cross_val_score(grid_search, X, y, cv=outer_cv)
    
    return nested_scores

# Apply nested CV
nested_scores = nested_cv_score(svm, param_grid, X_train, y_train)
print(f"Nested CV scores: {nested_scores.round(3)}")
print(f"Nested CV mean: {nested_scores.mean():.3f} (+/- {nested_scores.std() * 2:.3f})")

Regression Hyperparameter Tuning

from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Regression data
X_reg, y_reg = make_regression(n_samples=1000, n_features=10, random_state=42)
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(
    X_reg, y_reg, test_size=0.2, random_state=42
)

# Regression parameter grid
reg_param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 10],
    'min_samples_split': [2, 5]
}

# Grid search for regression
rf_reg = RandomForestRegressor(random_state=42)
reg_grid = GridSearchCV(
    rf_reg, reg_param_grid, cv=5, 
    scoring='neg_mean_squared_error', n_jobs=-1
)
reg_grid.fit(X_reg_train, y_reg_train)

print(f"Regression best params: {reg_grid.best_params_}")
print(f"Best CV MSE: {-reg_grid.best_score_:.3f}")

Advanced Parameter Grids

# Different parameter combinations
advanced_param_grid = [
    # RBF kernel parameters
    {
        'kernel': ['rbf'],
        'C': [0.1, 1, 10],
        'gamma': [0.01, 0.1, 1]
    },
    # Linear kernel parameters
    {
        'kernel': ['linear'],
        'C': [0.1, 1, 10]
    },
    # Polynomial kernel parameters
    {
        'kernel': ['poly'],
        'C': [0.1, 1, 10],
        'degree': [2, 3, 4],
        'gamma': [0.01, 0.1]
    }
]

advanced_grid = GridSearchCV(svm, advanced_param_grid, cv=3, n_jobs=-1)
advanced_grid.fit(X_train, y_train)

print(f"Advanced grid best params: {advanced_grid.best_params_}")

Hyperparameter Tuning Best Practices

import time

def efficient_grid_search(X, y):
    """Demonstrate efficient grid search practices"""
    
    # Start with coarse grid
    coarse_grid = {
        'C': [0.01, 1, 100],
        'gamma': [0.001, 0.1, 10]
    }
    
    # Coarse search
    start_time = time.time()
    coarse_search = GridSearchCV(svm, coarse_grid, cv=3, n_jobs=-1)
    coarse_search.fit(X, y)
    coarse_time = time.time() - start_time
    
    # Fine-tune around best parameters
    best_C = coarse_search.best_params_['C']
    best_gamma = coarse_search.best_params_['gamma']
    
    fine_grid = {
        'C': [best_C * 0.1, best_C, best_C * 10],
        'gamma': [best_gamma * 0.1, best_gamma, best_gamma * 10]
    }
    
    # Fine search
    start_time = time.time()
    fine_search = GridSearchCV(svm, fine_grid, cv=5, n_jobs=-1)
    fine_search.fit(X, y)
    fine_time = time.time() - start_time
    
    print(f"Coarse search time: {coarse_time:.2f}s")
    print(f"Fine search time: {fine_time:.2f}s")
    print(f"Final best params: {fine_search.best_params_}")
    
    return fine_search

# Apply efficient search
efficient_model = efficient_grid_search(X_train, y_train)

Model Comparison with Tuning

from sklearn.linear_model import LogisticRegression

# Compare multiple models with tuning
models = {
    'SVM': (SVC(random_state=42), {
        'C': [0.1, 1, 10],
        'gamma': [0.01, 0.1, 1],
        'kernel': ['rbf', 'linear']
    }),
    'Random Forest': (RandomForestClassifier(random_state=42), {
        'n_estimators': [50, 100],
        'max_depth': [3, 5, None],
        'min_samples_split': [2, 5]
    }),
    'Logistic Regression': (LogisticRegression(random_state=42, max_iter=1000), {
        'C': [0.1, 1, 10],
        'penalty': ['l1', 'l2'],
        'solver': ['liblinear']
    })
}

# Tune and compare all models
results = {}
for name, (model, params) in models.items():
    grid = GridSearchCV(model, params, cv=3, scoring='accuracy', n_jobs=-1)
    grid.fit(X_train, y_train)
    
    results[name] = {
        'best_score': grid.best_score_,
        'best_params': grid.best_params_,
        'test_score': grid.score(X_test, y_test)
    }

# Display results
for name, result in results.items():
    print(f"\n{name}:")
    print(f"  Best CV score: {result['best_score']:.3f}")
    print(f"  Test score: {result['test_score']:.3f}")
    print(f"  Best params: {result['best_params']}")

Optimization Tips

Start with coarse grids, then refine
Use RandomizedSearchCV for large parameter spaces
Limit CV folds for initial exploration (3-5 folds)
Use n_jobs=-1 for parallel processing
Consider early stopping for iterative algorithms

Master Model Optimization

Explore automated hyperparameter optimization, learn Bayesian optimization techniques, and discover advanced model selection strategies.

Share this article

Navigation

How to Tune Hyperparameters with GridSearchCV

Table Of Contents

Finding the Sweet Spot

Basic GridSearchCV Usage

Random Forest Hyperparameter Tuning

Multiple Scoring Metrics

Pipeline Hyperparameter Tuning

RandomizedSearchCV Alternative

Nested Cross-Validation

Regression Hyperparameter Tuning

Advanced Parameter Grids

Hyperparameter Tuning Best Practices

Model Comparison with Tuning

Optimization Tips

Master Model Optimization

Add Comment

More from Python

Navigation

Table Of Contents

Finding the Sweet Spot

Basic GridSearchCV Usage

Random Forest Hyperparameter Tuning

Multiple Scoring Metrics

Pipeline Hyperparameter Tuning

RandomizedSearchCV Alternative

Nested Cross-Validation

Regression Hyperparameter Tuning

Advanced Parameter Grids

Hyperparameter Tuning Best Practices

Model Comparison with Tuning

Optimization Tips

Master Model Optimization

Comments

Add Comment

More from Python

How to Handle Mutable Default Arguments

How to Handle Method Resolution Order (MRO)

How to Use Python's @staticmethod vs @classmethod

How to Create Singleton Classes in Python

Python Context Managers and the with Statement: Complete Guide for 2025

How to Evaluate Model Performance with Metrics