11. Emotion and Sentiment
Chapter 11 of 18 · 15 min
Sentiment analysis identifies overall polarity (positive, negative, neutral), while emotion detection classifies specific emotional states (joy, sadness, anger, fear, surprise, disgust). Both tasks require understanding beyond surface-level word counts.
Classification Approaches
RoBERTa-based models fine-tuned for sentiment achieve current performance on standard benchmarks:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch.nn.functional as F
class EmotionClassifier:
EMOTION_LABELS = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]
def __init__(self, model_path="./models/roberta-emotion"):
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
self.model = AutoModelForSequenceClassification.from_pretrained(
model_path,
num_labels=len(self.EMOTION_LABELS)
)
def predict_emotions(self, texts: list[str], threshold: float = 0.3) -> list[dict]:
inputs = self.tokenizer(texts, padding=True, truncation=True,
return_tensors="pt", max_length=256)
with torch.no_grad():
outputs = self.model(**inputs)
probs = F.softmax(outputs.logits, dim=-1)
results = []
for i, text_probs in enumerate(probs):
emotion_dict = {}
for idx, prob in enumerate(text_probs):
if prob.item() > threshold:
emotion_dict[self.EMOTION_LABELS[idx]] = prob.item()
results.append(emotion_dict)
return results
def predict_sentiment(self, text: str) -> dict:
"""Return sentiment with intensity score."""
inputs = self.tokenizer(text, return_tensors="pt", max_length=256)
with torch.no_grad():
outputs = self.model(**inputs)
probs = F.softmax(outputs.logits, dim=-1)
sentiment_map = {0: "negative", 1: "neutral", 2: "positive"}
pred_idx = probs.argmax().item()
return {
"sentiment": sentiment_map[pred_idx],
"confidence": probs[0, pred_idx].item(),
"scores": {
sentiment_map[i]: probs[0, i].item()
for i in range(len(sentiment_map))
}
}
Aspect-Aware Sentiment with LLM Integration
Local LLMs can perform nuanced sentiment analysis through structured prompting:
class LLMSentimentAnalyzer:
def __init__(self, model_path="./models/llama-2-13b"):
from transformers import AutoModelForCausalLM
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
self.model = AutoModelForCausalLM.from_pretrained(model_path)
def analyze_with_explanation(self, text: str) -> dict:
prompt = f"""Analyze the sentiment of this text. Provide:
1. Overall sentiment (positive/negative/neutral)
2. Intensity (1-10)
3. Key phrases that contributed to the sentiment
Text: {text}
Output as JSON:"""
inputs = self.tokenizer(prompt, return_tensors="pt", max_length=512)
outputs = self.model.generate(
**inputs,
max_new_tokens=200,
temperature=0.3,
do_sample=True
)
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
return self._parse_json_response(response)
EXERCISE
Build a real-time sentiment monitor that processes streaming text data. Implement sliding window analysis to detect sentiment trends over time, not just individual text classifications.