Table Of Contents
- Time is Your Most Valuable Data Dimension
- Parsing and Creating DateTime Data
- DateTime Index and Time Series
- DateTime Components and Attributes
- Time Zone Handling
- DateTime Arithmetic and Calculations
- Resampling and Frequency Conversion
- Rolling Windows and Moving Averages
- Business Analytics with DateTime
- Advanced DateTime Operations
- Performance Tips for DateTime Operations
- Master Time Series Analysis
Time is Your Most Valuable Data Dimension
DateTime data unlocks temporal insights that transform static analysis into dynamic intelligence. Master pandas' datetime capabilities to reveal patterns hidden in time.
Parsing and Creating DateTime Data
import pandas as pd
import numpy as np
from datetime import datetime, date, timedelta
# Various datetime input formats
datetime_strings = [
'2025-01-15',
'15/01/2025',
'January 15, 2025',
'2025-01-15 14:30:00',
'15-Jan-2025',
'2025-01-15T14:30:00Z'
]
# Parse different formats
parsed_dates = []
for date_str in datetime_strings:
try:
parsed = pd.to_datetime(date_str)
parsed_dates.append(parsed)
print(f"'{date_str}' → {parsed}")
except:
print(f"Could not parse: '{date_str}'")
# Create DataFrame with datetime data
df = pd.DataFrame({
'event': ['Meeting', 'Deadline', 'Launch', 'Review', 'Planning'],
'date_str': ['2025-01-15', '2025-02-20', '2025-03-10', '2025-04-05', '2025-05-12'],
'time_str': ['09:00', '17:30', '14:15', '11:45', '16:20']
})
# Convert string columns to datetime
df['date'] = pd.to_datetime(df['date_str'])
df['datetime'] = pd.to_datetime(df['date_str'] + ' ' + df['time_str'])
print("\nDataFrame with datetime columns:")
print(df[['event', 'date', 'datetime']])
print(f"\nData types:\n{df[['date', 'datetime']].dtypes}")
DateTime Index and Time Series
# Create time series with datetime index
date_range = pd.date_range(start='2025-01-01', end='2025-12-31', freq='D')
time_series = pd.DataFrame({
'sales': np.random.randint(100, 1000, len(date_range)),
'customers': np.random.randint(20, 200, len(date_range))
}, index=date_range)
print("Time series with datetime index:")
print(time_series.head())
print(f"Index type: {type(time_series.index)}")
# Time-based slicing
january_data = time_series['2025-01']
print(f"\nJanuary data shape: {january_data.shape}")
# Date range slicing
q1_data = time_series['2025-01-01':'2025-03-31']
print(f"Q1 data shape: {q1_data.shape}")
# Specific date access
new_years_data = time_series['2025-01-01']
print(f"\nNew Year's Day data: {new_years_data}")
# Recent data (last 7 days)
recent_data = time_series.tail(7)
print(f"\nLast 7 days:")
print(recent_data)
DateTime Components and Attributes
# Extract datetime components
sample_dates = pd.DataFrame({
'datetime': pd.date_range('2025-01-01', periods=10, freq='3D')
})
# Extract various datetime components
sample_dates['year'] = sample_dates['datetime'].dt.year
sample_dates['month'] = sample_dates['datetime'].dt.month
sample_dates['day'] = sample_dates['datetime'].dt.day
sample_dates['weekday'] = sample_dates['datetime'].dt.weekday # 0=Monday
sample_dates['day_name'] = sample_dates['datetime'].dt.day_name()
sample_dates['month_name'] = sample_dates['datetime'].dt.month_name()
sample_dates['quarter'] = sample_dates['datetime'].dt.quarter
sample_dates['week_of_year'] = sample_dates['datetime'].dt.isocalendar().week
print("DateTime components:")
print(sample_dates)
# Business day calculations
sample_dates['is_weekend'] = sample_dates['datetime'].dt.weekday >= 5
sample_dates['days_to_weekend'] = 4 - sample_dates['datetime'].dt.weekday
sample_dates['days_to_weekend'] = sample_dates['days_to_weekend'].clip(lower=0)
print("\nBusiness day analysis:")
print(sample_dates[['datetime', 'day_name', 'is_weekend', 'days_to_weekend']])
Time Zone Handling
# Create timezone-aware datetime
utc_time = pd.Timestamp('2025-01-15 14:30:00', tz='UTC')
print(f"UTC time: {utc_time}")
# Convert to different timezones
ny_time = utc_time.tz_convert('America/New_York')
tokyo_time = utc_time.tz_convert('Asia/Tokyo')
london_time = utc_time.tz_convert('Europe/London')
print(f"New York time: {ny_time}")
print(f"Tokyo time: {tokyo_time}")
print(f"London time: {london_time}")
# DataFrame with timezone-aware data
global_meetings = pd.DataFrame({
'meeting': ['Team Standup', 'Client Call', 'Project Review'],
'utc_time': pd.to_datetime([
'2025-01-15 09:00:00',
'2025-01-15 14:00:00',
'2025-01-15 18:30:00'
], utc=True)
})
# Add timezone conversions
global_meetings['ny_time'] = global_meetings['utc_time'].dt.tz_convert('America/New_York')
global_meetings['tokyo_time'] = global_meetings['utc_time'].dt.tz_convert('Asia/Tokyo')
print("\nGlobal meeting times:")
print(global_meetings)
DateTime Arithmetic and Calculations
# Date arithmetic
base_date = pd.Timestamp('2025-01-15')
# Add/subtract time periods
print("Date arithmetic:")
print(f"Base date: {base_date}")
print(f"+ 7 days: {base_date + pd.Timedelta(days=7)}")
print(f"+ 2 weeks: {base_date + pd.Timedelta(weeks=2)}")
print(f"+ 3 months: {base_date + pd.DateOffset(months=3)}")
print(f"- 1 year: {base_date - pd.DateOffset(years=1)}")
# Calculate age from birth date
people = pd.DataFrame({
'name': ['Alice', 'Bob', 'Charlie'],
'birth_date': pd.to_datetime(['1995-06-15', '1988-03-20', '1992-11-08'])
})
today = pd.Timestamp.now()
people['age_days'] = (today - people['birth_date']).dt.days
people['age_years'] = people['age_days'] / 365.25 # Account for leap years
print("\nAge calculations:")
print(people)
# Time differences in dataset
events = pd.DataFrame({
'event': ['Start', 'Milestone 1', 'Milestone 2', 'End'],
'date': pd.to_datetime(['2025-01-01', '2025-02-15', '2025-04-10', '2025-06-30'])
})
events['days_since_start'] = (events['date'] - events['date'].iloc[0]).dt.days
events['days_to_next'] = events['date'].diff(-1).dt.days * -1
print("\nProject timeline:")
print(events)
Resampling and Frequency Conversion
# Create high-frequency time series
hourly_data = pd.DataFrame({
'value': np.random.randn(24*7), # 1 week of hourly data
'category': np.random.choice(['A', 'B'], 24*7)
}, index=pd.date_range('2025-01-01', periods=24*7, freq='H'))
print("Original hourly data (first 10 rows):")
print(hourly_data.head(10))
# Resample to daily data
daily_mean = hourly_data['value'].resample('D').mean()
daily_sum = hourly_data['value'].resample('D').sum()
daily_count = hourly_data['category'].resample('D').count()
daily_summary = pd.DataFrame({
'daily_mean': daily_mean,
'daily_sum': daily_sum,
'daily_count': daily_count
})
print("\nDaily resampled data:")
print(daily_summary)
# Business day resampling
business_day_data = hourly_data['value'].resample('B').mean() # Business days only
print(f"\nBusiness day data shape: {business_day_data.shape}")
# Custom resampling with multiple functions
custom_resample = hourly_data['value'].resample('D').agg({
'mean_value': 'mean',
'max_value': 'max',
'min_value': 'min',
'std_value': 'std',
'count': 'count'
})
print("\nCustom resampling:")
print(custom_resample)
Rolling Windows and Moving Averages
# Stock price simulation
np.random.seed(42)
stock_prices = pd.DataFrame({
'price': 100 + np.random.randn(100).cumsum()
}, index=pd.date_range('2025-01-01', periods=100, freq='D'))
# Moving averages
stock_prices['MA_7'] = stock_prices['price'].rolling(window=7).mean()
stock_prices['MA_21'] = stock_prices['price'].rolling(window=21).mean()
stock_prices['MA_50'] = stock_prices['price'].rolling(window=50).mean()
# Rolling statistics
stock_prices['volatility_7d'] = stock_prices['price'].rolling(window=7).std()
stock_prices['min_7d'] = stock_prices['price'].rolling(window=7).min()
stock_prices['max_7d'] = stock_prices['price'].rolling(window=7).max()
print("Stock price analysis (last 10 days):")
print(stock_prices.tail(10))
# Expanding windows (cumulative)
stock_prices['cumulative_mean'] = stock_prices['price'].expanding().mean()
stock_prices['cumulative_max'] = stock_prices['price'].expanding().max()
print("\nCumulative statistics (first 10 days):")
print(stock_prices[['price', 'cumulative_mean', 'cumulative_max']].head(10))
Business Analytics with DateTime
# Sales analytics with datetime
sales_data = pd.DataFrame({
'date': pd.date_range('2024-01-01', '2024-12-31', freq='D'),
'sales': np.random.randint(500, 2000, 366), # 2024 is leap year
'product': np.random.choice(['A', 'B', 'C'], 366)
})
# Add datetime features for analysis
sales_data['month'] = sales_data['date'].dt.month
sales_data['quarter'] = sales_data['date'].dt.quarter
sales_data['day_of_week'] = sales_data['date'].dt.day_name()
sales_data['is_weekend'] = sales_data['date'].dt.weekday >= 5
sales_data['week_of_year'] = sales_data['date'].dt.isocalendar().week
# Monthly sales analysis
monthly_sales = sales_data.groupby('month')['sales'].agg({
'total_sales': 'sum',
'avg_daily_sales': 'mean',
'sales_std': 'std'
}).round(2)
print("Monthly sales analysis:")
print(monthly_sales)
# Seasonal patterns
seasonal_analysis = sales_data.groupby(['quarter', 'is_weekend'])['sales'].mean().unstack()
print("\nSeasonal patterns (weekday vs weekend):")
print(seasonal_analysis.round(2))
# Day of week performance
dow_performance = sales_data.groupby('day_of_week')['sales'].agg({
'avg_sales': 'mean',
'total_sales': 'sum'
})
# Reorder by actual day sequence
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
dow_performance = dow_performance.reindex(day_order)
print("\nDay of week performance:")
print(dow_performance.round(2))
Advanced DateTime Operations
# Holiday and business day handling
from pandas.tseries.holiday import USFederalHolidayCalendar
# Create business day calendar
bday_calendar = pd.bdate_range('2025-01-01', '2025-12-31')
print(f"Business days in 2025: {len(bday_calendar)}")
# Generate custom business calendar
us_holidays = USFederalHolidayCalendar()
holiday_dates = us_holidays.holidays(start='2025-01-01', end='2025-12-31')
print(f"\nUS Federal Holidays in 2025:")
for holiday in holiday_dates:
print(f" {holiday.strftime('%Y-%m-%d %A')}")
# Create work schedule analysis
work_schedule = pd.DataFrame({
'date': pd.date_range('2025-01-01', '2025-01-31', freq='D')
})
work_schedule['is_business_day'] = work_schedule['date'].isin(bday_calendar)
work_schedule['is_holiday'] = work_schedule['date'].isin(holiday_dates)
work_schedule['is_work_day'] = (work_schedule['is_business_day'] &
~work_schedule['is_holiday'])
print(f"\nJanuary 2025 work days: {work_schedule['is_work_day'].sum()}")
# Lag and lead operations
financial_data = pd.DataFrame({
'date': pd.date_range('2025-01-01', periods=10, freq='D'),
'price': [100, 102, 98, 105, 107, 103, 109, 111, 108, 115]
})
# Calculate returns
financial_data['price_lag1'] = financial_data['price'].shift(1)
financial_data['daily_return'] = (financial_data['price'] / financial_data['price_lag1'] - 1)
financial_data['price_lead1'] = financial_data['price'].shift(-1)
print("\nFinancial data with lags and leads:")
print(financial_data)
Performance Tips for DateTime Operations
# Performance comparison
large_dates = pd.date_range('2020-01-01', '2025-12-31', freq='H')
large_df = pd.DataFrame({
'datetime': large_dates,
'value': np.random.randn(len(large_dates))
})
print(f"Large dataset: {len(large_df):,} rows")
# Efficient datetime operations
import time
# Method 1: Direct datetime access
start = time.time()
result1 = large_df['datetime'].dt.month
time1 = time.time() - start
# Method 2: Set as index first (faster for multiple operations)
start = time.time()
indexed_df = large_df.set_index('datetime')
result2 = indexed_df.index.month
time2 = time.time() - start
print(f"\nDirect access time: {time1:.4f}s")
print(f"Index access time: {time2:.4f}s")
# Tips for optimization
print("\n=== DateTime Performance Tips ===")
print("✅ Use datetime index for time series data")
print("✅ Vectorize datetime operations instead of loops")
print("✅ Use pd.to_datetime() with format parameter for parsing")
print("✅ Consider timezone-naive data if timezones aren't needed")
print("✅ Use categorical data type for repeated datetime components")
Master Time Series Analysis
Explore advanced time series forecasting, learn seasonal decomposition techniques, and discover financial time series analysis.
Share this article
Add Comment
No comments yet. Be the first to comment!