KEY INSIGHT
Understanding how accuracy degrades as model size decreases enables principled selection of compression targets based on acceptable loss thresholds.
The relationship between model size and accuracy is rarely linear. Deep understanding of this relationship guides compression decisions and prevents over-compression or under-compression.
### Degradation Patterns
Different model architectures exhibit different degradation patterns:
```python
def analyze_degradation(model, size_targets, test_loader):
"""
Analyze how accuracy changes across different model sizes.
Returns degradation rate and critical thresholds.
"""
results = []
baseline_acc = evaluate(model, test_loader)
baseline_size = model.num_parameters() * 4 / 1e6 # MB (float32)
for target_size_mb in size_targets:
# Compute required compression ratio
ratio = baseline_size / target_size_mb
# Apply compression
compressed = compress_to_size(model, ratio)
acc = evaluate(compressed, test_loader)
# Calculate metrics
acc_drop = baseline_acc - acc
size_reduction = ratio
results.append({
'target_size': target_size_mb,
'accuracy': acc,
'accuracy_drop': acc_drop,
'compression_ratio': ratio,
'efficiency': acc_drop / (1 - 1/ratio) # accuracy per size unit
})
return results
def compress_to_size(model, target_ratio):
"""Iteratively find compression settings to hit target size."""
# Binary search for pruning ratio to hit target size
low, high = 0.0, 0.99
for _ in range(20): # Binary search iterations
mid = (low + high) / 2
pruned = magnitude_pruning(model, mid)
current_ratio = model.num_parameters() / pruned.num_parameters()
if current_ratio < target_ratio:
low = mid
else:
high = mid
return pruned
```
### Acceptable Loss Thresholds
Different applications tolerate different accuracy losses:
```python
def recommend_compression_target(task, baseline_acc):
"""
Recommend compression ratio based on acceptable accuracy loss.
"""
thresholds = {
'safety_critical': 0.01, # <1% accuracy drop allowed
'medical_diagnosis': 0.02, # <2% drop allowed
'standard_classification': 0.05, # <5% drop allowed
'ranking_recommendation': 0.10, # <10% drop allowed
'generative_creative': 0.15, # <15% drop allowed
}
max_drop = thresholds.get(task, 0.05)
min_acceptable_acc = baseline_acc - max_drop
return min_acceptable_acc
```
### Identifying Critical Layers
Some layers degrade faster under compression than others. Identifying these layers allows targeted preservation of important capacity:
```python
def identify_critical_layers(model, train_loader):
"""
Identify layers where pruning causes largest accuracy drops.
These layers should be pruned less aggressively.
"""
original_acc = evaluate(model, test_loader)
layer_importance = {}
for name, module in model.named_modules():
if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
# Test sensitivity by pruning this layer alone
pruned = prune_single_layer(model, name, 0.5)
pruned_acc = evaluate(pruned, test_loader)
layer_importance[name] = original_acc - pruned_acc
# Sort by importance (highest first)
sorted_importance = sorted(
layer_importance.items(),
key=lambda x: x[1],
reverse=True
)
return sorted_importance
def compress_with_layer_sensitivity(model, sensitivity_scores, target_ratio):
"""
Apply variable pruning ratios based on layer sensitivity.
Critical layers get higher保留 (less pruning).
"""
# Assign pruning ratios inversely proportional to sensitivity
# High sensitivity = low pruning ratio
sensitivity_values = list(sensitivity_scores.values())
max_sens = max(sensitivity_values)
for name, sens in sensitivity_scores.items():
# Normalize to [0.3, 0.8] range
# Less critical layers can be pruned more
layer_prune_ratio = 0.3 + 0.5 * (sens / max_sens)
prune_layer(model, name, layer_prune_ratio)
return model
```
### Degradation Recovery
Fine-tuning partially recovers accuracy lost during compression:
```python
def gradual_degradation_recovery(model, train_loader, eval_loader):
"""
Apply compression in stages with recovery between each.
"""
stages = [0.2, 0.4, 0.6, 0.8] # Progressive pruning ratios
current_model = model
for stage_ratio in stages:
# Apply stage compression
current_model = magnitude_pruning(current_model, stage_ratio)
# Recovery fine-tuning
current_model = finetune_recovery(
current_model,
train_loader,
epochs=5,
eval_loader=eval_loader
)
acc = evaluate(current_model, eval_loader)
size = count_parameters(current_model)
print(f"Stage {stage_ratio}: acc={acc:.4f}, size={size/1e6:.2f}M")
return current_model
```
### Common Failure: Over-Compression
The most common error is aggressive compression without verifying the accuracy impact:
```python
def validate_compression_target(compressed_model, original_model, test_loader,
max_accuracy_drop=0.05):
"""
Validate that compression stayed within acceptable accuracy loss.
"""
original_acc = evaluate(original_model, test_loader)
compressed_acc = evaluate(compressed_model, test_loader)
actual_drop = original_acc - compressed_acc
if actual_drop > max_accuracy_drop:
print(f"WARNING: Accuracy drop {actual_drop:.4f} exceeds threshold "
f"{max_accuracy_drop:.4f}")
print("Consider reducing compression intensity or using more recovery epochs.")
return False
return True
```