12. Aspect-Based Sentiment

Chapter 12 of 18 · 15 min

Aspect-based sentiment analysis (ABSA) extracts specific aspects from text and determines sentiment toward each. "The battery lasts long but the screen scratches easily" contains two aspects with opposite sentiments.

Aspect Extraction with Sequence Labeling

Named entity recognition-style models extract aspect terms:

from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
import torch

class AspectExtractor:
    def __init__(self, model_path="./models/deberta-aspect"):
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.model = AutoModelForTokenClassification.from_pretrained(model_path)
        self.label_map = {0: "O", 1: "B-ASPECT", 2: "I-ASPECT"}
    
    def extract_aspects(self, text: str) -> list[str]:
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, 
                                max_length=256, return_offsets_mapping=True)
        offsets = inputs.pop("offset_mapping")[0]
        
        with torch.no_grad():
            outputs = self.model(**inputs)
            predictions = outputs.logits.argmax(dim=-1)[0]
        
        aspects = []
        current_aspect = []
        
        for idx, (token_id, pred) in enumerate(zip(inputs["input_ids"][0], predictions)):
            if self.label_map[pred.item()] == "B-ASPECT":
                if current_aspect:
                    aspects.append(self.tokenizer.decode(tokenizer.encode(text[start:end].strip(), add_special_tokens=False)))
                start, end = offsets[idx]
                current_aspect = [(start.item(), end.item())]
            elif self.label_map[pred.item()] == "I-ASPECT" and current_aspect:
                current_aspect.append(offsets[idx].tolist())
        
        # Extract final aspect
        if current_aspect:
            start = current_aspect[0][0]
            end = current_aspect[-1][1]
            aspects.append(text[start:end])
        
        return aspects

    def extract_aspects_ner(self, text: str) -> list[dict]:
        """Alternative using NER pipeline for aspect extraction."""
        ner_pipeline = pipeline(
            "ner", 
            model=self.model,
            tokenizer=self.tokenizer,
            aggregation_strategy="simple"
        )
        entities = ner_pipeline(text)
        return [
            {"text": e["word"], "start": e["start"], "end": e["end"], "score": e["score"]}
            for e in entities if e["entity_group"] == "ASPECT"
        ]

Joint Aspect-Sentiment Prediction

Modern approaches predict aspects and their sentiments jointly:

class AspectSentimentAnalyzer:
    def __init__(self, model_path="./models/llama-2-7b-chat-hf"):
        from transformers import AutoModelForCausalLM, AutoTokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.model = AutoModelForCausalLM.from_pretrained(model_path)
    
    def analyze(self, text: str) -> dict:
        prompt = f"""Extract all aspects mentioned in this review with their sentiment polarity.
For each aspect, indicate: aspect name, sentiment (positive/negative/neutral), and supporting evidence.

Review: {text}

Format:
Aspect | Sentiment | Evidence"""
        
        inputs = self.tokenizer(prompt, return_tensors="pt", max_length=512)
        outputs = self.model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.1,
            do_sample=False
        )
        
        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return self._parse_results(response)
    
    def _parse_results(self, response: str) -> dict:
        """Parse LLM output into structured format."""
        aspects = []
        for line in response.split("\n"):
            if "|" in line and "Aspect" not in line:
                parts = [p.strip() for p in line.split("|")]
                if len(parts) >= 2:
                    aspects.append({
                        "aspect": parts[0],
                        "sentiment": parts[1],
                        "evidence": parts[2] if len(parts) > 2 else ""
                    })
        return {"text": response, "aspects": aspects}
EXERCISE

Create an ABSA system that processes product reviews and outputs structured data suitable for a business dashboard. Aggregate sentiment scores per aspect across multiple reviews to identify product trends.