16. Report Generation
Chapter 16 of 18 · 20 min
Automated report generation saves time and ensures consistency. Local LLMs can produce formatted reports from analysis results without external services.
Structured Report Templates
def generate_analysis_report(df, analysis_results):
report = f"""
# Data Analysis Report
Generated: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')}
## Executive Summary
- Total Records: {len(df):,}
- Date Range: {df.index.min().date()} to {df.index.max().date()}
- Key Metric Total: {analysis_results['total']:,.2f}
## Key Findings
### Trend Analysis
{analysis_results['trend_summary']}
### Top Performers
{analysis_results['top_performers']}
### Anomalies Detected
Total anomalies: {analysis_results['anomaly_count']}
{analysis_results['anomaly_summary']}
## Recommendations
{analysis_results['recommendations']}
Report generated locally using Ollama
"""
return report
Integrating Visualizations
from weasyprint import HTML
import base64
def image_to_base64(image_path):
with open(image_path, 'rb') as img:
return base64.b64encode(img.read()).decode()
def generate_pdf_report(df, charts, narrative):
chart_html = ''.join([
f'<img src="data:image/png;base64,{image_to_base64(c)}" style="width:100%;margin:20px 0;"/>'
for c in charts
])
html = f"""
<html>
<head><style>
body {{ font-family: Arial, sans-serif; margin: 40px; }}
h1 {{ color: #2c3e50; border-bottom: 2px solid #3498db; }}
h2 {{ color: #34495e; margin-top: 30px; }}
.metric {{ background: #ecf0f1; padding: 15px; border-radius: 5px; }}
table {{ border-collapse: collapse; width: 100%; }}
th, td {{ border: 1px solid #bdc3c7; padding: 10px; text-align: left; }}
</style></head>
<body>
{narrative}
{chart_html}
</body>
</html>"""
HTML(string=html).write_pdf('analysis_report.pdf')
Automated Scheduled Reports
import schedule
import time
def daily_report_job():
df = load_data()
results = perform_analysis(df)
report = generate_analysis_report(df, results)
with open(f"reports/report_{pd.Timestamp.now().strftime('%Y%m%d')}.md", 'w') as f:
f.write(report)
# Generate visualizations
create_dashboard(df, results)
print(f"Daily report generated: {pd.Timestamp.now()}")
# Schedule for 8 AM daily
schedule.every().day.at("08:00").do(daily_report_job)
while True:
schedule.run_pending()
time.sleep(60)
Report Versioning
import json
from pathlib import Path
def save_report_metadata(report_path, analysis_results, data_hash):
metadata = {
'report_path': str(report_path),
'generated_at': pd.Timestamp.now().isoformat(),
'data_hash': data_hash,
'record_count': analysis_results['record_count'],
'parameters': analysis_results['parameters']
}
metadata_path = report_path.with_suffix('.json')
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
# Compute data hash for reproducibility
import hashlib
def compute_data_hash(df):
return hashlib.sha256(
pd.util.hash_pandas_object(df).values.tobytes()
).hexdigest()[:12]
Multi-Section Reports
def generate_full_report(df):
sections = []
# Section 1: Overview
sections.append("## Overview")
sections.append(f"- Records: {len(df):,}")
sections.append(f"- Time span: {(df.index.max() - df.index.min()).days} days")
sections.append(f"- Columns: {len(df.columns)}")
# Section 2: Statistical Summary
sections.append("\n## Statistical Summary")
stats = df.describe().T[['mean', 'std', 'min', 'max']]
sections.append(stats.to_markdown())
# Section 3: Anomalies
sections.append("\n## Anomalies")
sections.append(f"- Detected: {df['anomaly'].sum()} ({df['anomaly'].mean()*100:.1f}%)")
# Section 4: Correlations
sections.append("\n## High Correlations (|r| > 0.7)")
corr = df.corr().abs()
high_corr = [(a, b, corr.loc[a, b]) for a in corr.index for b in corr.columns if corr.loc[a, b] > 0.7 and a < b]
for a, b, r in high_corr:
sections.append(f"- {a} ↔ {b}: {r:.3f}")
return '\n'.join(sections)
EXERCISE
Create a report generation script that takes an analysis results dictionary, produces a Markdown report, converts it to PDF using WeasyPrint, and saves metadata for versioning.