This repository has been archived on 2025-02-10. You can view files and clone it, but cannot push or open issues or pull requests.
lastin-ai/src/analysis/analysis_engine.py

53 lines
2.0 KiB
Python

from langchain.schema import Document
from langchain.chains.summarize import load_summarize_chain
from langchain_community.llms import OpenAI
from typing import List, Dict
import yaml
import os
class TechnicalConceptExtractor:
def __init__(self, llm):
self.llm = llm
def extract_concepts(self, text: str) -> List[str]:
prompt = f"""Identify key technical concepts from this text:
{text[:5000]}
List them as comma-separated values:"""
result = self.llm(prompt)
return [c.strip() for c in result.split(",")]
class InsightAggregator:
def __init__(self, llm):
self.llm = llm
def find_connections(self, papers: List[Dict]) -> str:
summaries = "\n\n".join([p["summary"] for p in papers])
prompt = f"""Analyze these paper summaries and identify cross-cutting themes:
{summaries[:10000]}
Provide a structured analysis:"""
return self.llm(prompt)
class PaperAnalyzer:
def __init__(self, config_path: str = "config/settings.yaml"):
with open(config_path) as f:
self.config = yaml.safe_load(f)
self.llm = OpenAI(
temperature=0.5, # More conservative temperature for academic analysis
model_name="deepseek-r1",
api_key=os.getenv("DEEPSEEK_API_KEY"),
base_url="https://api.deepseek.com/v1",
max_tokens=4096 # Increased token limit for complex papers
)
self.summary_chain = load_summarize_chain(self.llm, chain_type="map_reduce")
self.concept_extractor = TechnicalConceptExtractor(self.llm)
self.insight_aggregator = InsightAggregator(self.llm)
def analyze_document(self, document: Document) -> Dict:
summary = self.summary_chain.run([document])
concepts = self.concept_extractor.extract_concepts(document.page_content)
return {
"summary": summary,
"concepts": concepts,
"metadata": document.metadata
}