14. Natural Language Insights
Chapter 14 of 18 · 20 min
Local LLMs extract meaningful insights from unstructured text data without sending sensitive information externally.
Sentiment Analysis with Local Models
import ollama
def analyze_sentiment(text, model='llama3.2'):
prompt = f"""Analyze the sentiment of this text. Return ONLY a single word:
- "positive" for positive sentiment
- "negative" for negative sentiment
- "neutral" for neutral sentiment
Text: {text}"""
response = ollama.chat(model=model, messages=[
{'role': 'user', 'content': prompt}
])
return response['message']['content'].strip().lower()
# Batch process reviews
df['sentiment'] = df['review_text'].apply(lambda x: analyze_sentiment(str(x)[:500]))
Topic Extraction
def extract_topics(texts, n_topics=5):
prompt = f"""From these texts, identify {n_topics} main topics.
Return a comma-separated list of topic names.
Texts:
{texts[:2000]}"""
response = ollama.chat(model='llama3.2', messages=[
{'role': 'user', 'content': prompt}
])
topics = [t.strip() for t in response['message']['content'].split(',')]
return topics
corpus = ' '.join(df['comments'].dropna().tolist())
main_topics = extract_topics(corpus, n_topics=5)
Named Entity Recognition (NER)
Extract entities from text using structured prompting:
def extract_entities(text):
prompt = f"""Extract named entities from this text. Format as:
Entity | Type (ORGANIZATION, PERSON, LOCATION, DATE, PRODUCT)
Text: {text}
Only include actual entities found in the text."""
response = ollama.chat(model='llama3.2', messages=[
{'role': 'user', 'content': prompt}
])
return response['message']['content']
sample_text = "Apple released iPhone 15 on September 12, 2023 at their Cupertino headquarters."
entities = extract_entities(sample_text)
print(entities)
# Output: Apple | ORGANIZATION
# iPhone 15 | PRODUCT
# September 12, 2023 | DATE
# Cupertino | LOCATION
Text Summarization
def summarize_text(text, max_length=200):
prompt = f"""Summarize this text in no more than {max_length} characters.
Include the main points and key numbers if present.
Text: {text}"""
response = ollama.chat(model='llama3.2', messages=[
{'role': 'user', 'content': prompt}
])
return response['message']['content']
# Summarize customer feedback themes
feedback_corpus = df['feedback'].dropna().head(100).tolist()
summary = summarize_text(' '.join(feedback_corpus))
Question Answering from Data
def answer_question(documents, question):
prompt = f"""Based on the following documents, answer the question.
If the answer is not in the documents, say "I don't know".
Documents:
{documents}
Question: {question}"""
response = ollama.chat(model='llama3.2', messages=[
{'role': 'user', 'content': prompt}
])
return response['message']['content']
# Query your data
relevant_docs = df[df['category'] == 'technical_issue']['description'].head(10).tolist()
answer = answer_question(relevant_docs, "What are the most common technical issues?")
EXERCISE
Load a text dataset (reviews, comments, tickets), run sentiment analysis, extract top 5 topics, and generate a summary report using Ollama.