commit ac43cdd77e35911964a40a44b233120aa263af9f
Author: kpcto <davorin.stevanovic@pm.com>
Date:   Mon Feb 10 22:59:30 2025 +0000

    Initial commit: Add README, .gitignore, and fetch_papers.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4846d90
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,58 @@
+# General development artifacts
+__pycache__/
+*.py[cod]
+*$py.class
+
+# IDE specific
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# Environment files
+.env
+.env.local
+.env.development
+.env.test
+
+# Build/output directories
+build/
+dist/
+*.egg-info/
+.eggs/
+
+# Database files
+*.db
+*.sqlite
+*.sql
+
+# Logs and diagnostics
+*.log
+*.sage.py
+
+# Local development
+.venv/
+venv/
+env/
+
+# Testing
+.coverage
+htmlcov/
+.pytest_cache/
+
+# Documentation
+_build/
+
+# Exclude PDFs as requested
+*.pdf
+
+# OS metadata
+.DS_Store
+Thumbs.db
+
+# Jupyter notebooks
+.ipynb_checkpoints
+
+# Temporary files
+*~
+*.tmp
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a9afe7f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,72 @@
+# 🤖📚 LastIn-AI: The Research Assistant That Wrote Itself
+
+*"I used the AI to create the AI" - This README, probably*
+
+## 🚀 What Does This Thing Do?
+
+This autonomous research assistant:
+1. **Paper Hunter** 🔍: Daily arXiv scans using our `PaperFetcher` class
+2. **Category Ninja** 🥷: Manage topics via `config/arxiv_categories.json`
+3. **Time Traveler** ⏳: Fetch papers from past X days
+4. **Self-Aware Entity** 🤯: Literally wrote this README about itself
+
+## ⚙️ Installation (For Humans)
+
+```bash
+# Clone this repository that an AI created
+git clone https://github.com/yourusername/lastin-ai.git
+cd lastin-ai
+
+# Create virtual environment (because even AIs hate dependency conflicts)
+python -m venv .venv
+.venv\Scripts\activate
+
+# Install dependencies
+pip install -r requirements.txt
+```
+
+## 🧠 Configuration
+
+Edit `config/arxiv_categories.json` to add your academic obsessions:
+
+```json
+{
+  "categories": [
+    {
+      "id": "cs.AI",
+      "name": "Artificial Intelligence",
+      "description": "Papers about systems that will eventually write better systems"
+    }
+  ],
+  "default_categories": ["cs.AI"]
+}
+```
+
+## 💻 Usage
+
+```python
+# Let the AI research AI research
+python -m src.scripts.fetch_papers --days 3
+```
+
+**Sample Output:**
+```
+[AI]: Found 42 papers about AI
+[AI]: Determining which papers are about AI writing better AI...
+[AI]: existential_crisis.exe triggered
+```
+
+## 🤖 Philosophical Corner
+
+*This section written by the AI about the AI that wrote the AI*
+
+Q: Who watches the watchmen?  
+A: An AI that watches watchmen-watching AI.
+
+## 📜 License
+
+MIT License - because even self-writing code needs legal coverage
+
+---
+
+*README written by Cascade (AI) at 2025-02-10 22:54 UTC - bow before your new robotic researcher overlords*
diff --git a/config/arxiv_categories.json b/config/arxiv_categories.json
new file mode 100644
index 0000000..1b74ba9
--- /dev/null
+++ b/config/arxiv_categories.json
@@ -0,0 +1,15 @@
+{
+    "categories": [
+        {
+            "id": "cs.AI",
+            "name": "Artificial Intelligence",
+            "description": "Covers all aspects of artificial intelligence research"
+        },
+        {
+            "id": "cs.ML",
+            "name": "Machine Learning",
+            "description": "Covers all aspects of machine learning research"
+        }
+    ],
+    "default_categories": ["cs.AI"]
+}
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..1e66a79
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,15 @@
+langchain==0.1.0
+langchain-openai==0.0.5
+langchain-core==0.1.27
+langchain-community==0.0.24
+arxiv==2.1.0
+openai==1.12.0
+faiss-cpu==1.8.0
+sqlalchemy==2.0.30
+python-dotenv==1.0.0
+pypdf2==3.0.1
+chromadb==0.4.22
+redis==5.0.1
+psycopg2-binary==2.9.9
+aiohttp==3.9.1
+tenacity==8.2.3
\ No newline at end of file
diff --git a/run.py b/run.py
new file mode 100644
index 0000000..909c0f7
--- /dev/null
+++ b/run.py
@@ -0,0 +1,13 @@
+import os
+import sys
+
+# Add the project root directory to Python path
+project_root = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(project_root)
+
+# Import and run the main function
+from src.main import main
+import asyncio
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1 @@
+
diff --git a/src/agent_controller.py b/src/agent_controller.py
new file mode 100644
index 0000000..d05396e
--- /dev/null
+++ b/src/agent_controller.py
@@ -0,0 +1,195 @@
+import os
+import logging
+from typing import List, Dict
+from dotenv import load_dotenv
+from src.data_acquisition.arxiv_client import ArxivClient
+from src.data_acquisition.pdf_downloader import PDFDownloader
+from src.processing.text_extractor import TextExtractor
+from src.processing.chunker import SemanticChunker
+from src.storage.vector_store import VectorStore
+from src.storage.paper_store import PaperStore
+from src.analysis.llm_analyzer import LLMAnalyzer
+from src.utils.debug import tracker
+
+logger = logging.getLogger(__name__)
+
+class AgentController:
+    def __init__(self, llm_provider: str = 'deepseek', llm_model: str = 'deepseek-chat'):
+        """Initialize the agent controller."""
+        logger.debug(f"Initializing AgentController with provider={llm_provider}, model={llm_model}")
+        
+        # Load environment variables
+        load_dotenv()
+        
+        # Get API keys
+        openai_key = os.getenv('OPENAI_API_KEY')
+        deepseek_key = os.getenv('DEEPSEEK_API_KEY')
+        
+        # Use the appropriate API key based on provider
+        api_key = {
+            'openai': openai_key,
+            'deepseek': deepseek_key
+        }.get(llm_provider)
+        
+        if not api_key:
+            logger.error(f"No API key found for provider {llm_provider}")
+            raise ValueError(f"No API key found for provider {llm_provider}. Please check your .env file.")
+        
+        logger.debug(f"Using API key: {api_key[:4]}...{api_key[-4:]}")
+        
+        # Initialize components
+        try:
+            logger.debug("Initializing components...")
+            self.arxiv_client = ArxivClient()
+            self.pdf_downloader = PDFDownloader()
+            self.text_extractor = TextExtractor()
+            self.chunker = SemanticChunker()
+            self.vector_store = VectorStore()
+            self.paper_store = PaperStore()
+            self.llm_analyzer = LLMAnalyzer(api_key, llm_provider, llm_model)
+            logger.debug("All components initialized successfully")
+        except Exception as e:
+            logger.error(f"Error initializing components: {str(e)}")
+            raise
+
+    async def close(self):
+        """Close all resources."""
+        logger.debug("Closing AgentController resources...")
+        try:
+            await self.pdf_downloader.close()
+            await self.arxiv_client.close()
+            await self.llm_analyzer.close()
+            await self.paper_store.close()
+            logger.debug("All resources closed successfully")
+        except Exception as e:
+            logger.error(f"Error closing resources: {str(e)}")
+            raise
+
+    async def __aenter__(self):
+        """Async context manager entry."""
+        logger.debug("Entering AgentController context")
+        await self.paper_store.initialize()
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit."""
+        logger.debug("Exiting AgentController context")
+        if exc_type:
+            logger.error(f"Error in context: {exc_type.__name__}: {str(exc_val)}")
+        await self.close()
+
+    async def process_query(self, query: str, max_papers: int = 1):
+        """Process a research query and analyze papers."""
+        try:
+            logger.info(f"Processing query: {query}")
+            
+            # First, check what we have in the database
+            logger.debug("Checking existing papers in database...")
+            stored_papers = await self.paper_store.get_all_paper_ids()
+            logger.debug(f"Found {len(stored_papers)} papers in database")
+            
+            # Get similar papers from our existing database
+            logger.debug("Retrieving similar papers from database...")
+            similar_papers = await self.get_paper_analysis(query)
+            if similar_papers:
+                logger.debug(f"Found {len(similar_papers)} relevant papers in database:")
+                for paper in similar_papers:
+                    logger.debug(f"- {paper['title']}")
+            
+            # Fetch papers from arXiv using context managers
+            logger.debug("Fetching papers from arXiv...")
+            async with self.arxiv_client:
+                papers = await self.arxiv_client.fetch_papers(query, max_papers)
+            
+            new_papers = [p for p in papers if p['id'] not in stored_papers]
+            
+            if not new_papers:
+                logger.debug("No new papers to process - all papers are already in database")
+                return similar_papers
+            
+            logger.debug(f"Found {len(new_papers)} new papers to process")
+            processed_papers = []
+            
+            # Process new papers using context managers
+            async with self.pdf_downloader:
+                for paper in new_papers:
+                    logger.debug(f"Processing new paper: {paper['title']}")
+                    
+                    # Download PDF
+                    pdf_path = await self.pdf_downloader.download_pdf(paper['pdf_url'], paper['id'])
+                    if not pdf_path:
+                        logger.warning(f"Failed to download PDF for {paper['title']}, skipping...")
+                        continue
+                    
+                    # Extract text
+                    text = self.text_extractor.extract_text(pdf_path)
+                    
+                    # Analyze the full text
+                    async with self.llm_analyzer:
+                        analysis = await self.llm_analyzer.analyze_paper(text)
+                    paper['analysis'] = analysis
+                    
+                    # Store in PostgreSQL
+                    await self.paper_store.store_paper(paper)
+                    
+                    # Create chunks for vector storage
+                    chunks = self.chunker.split_text(text)
+                    
+                    # Store in vector database
+                    chunk_ids = [f"{paper['id']}_{i}" for i in range(len(chunks))]
+                    chunk_metadata = [{
+                        'paper_id': paper['id'],
+                        'title': paper['title'],
+                        'authors': ', '.join(paper['authors']),
+                        'summary': analysis.get('summary', ''),
+                        'technical_concepts': analysis.get('technical_concepts', '')
+                    } for _ in chunks]
+                    
+                    self.vector_store.add_chunks(chunks, chunk_metadata, chunk_ids)
+                    processed_papers.append(paper)
+                    
+                    logger.debug(f"Successfully processed and stored paper: {paper['title']}")
+            
+            # Return both newly processed papers and similar papers from database
+            all_papers = processed_papers + similar_papers
+            return all_papers
+            
+        except Exception as e:
+            logger.error(f"Error processing query: {str(e)}")
+            raise
+
+    async def get_stored_papers(self) -> List[str]:
+        """Get a list of all paper IDs currently stored."""
+        try:
+            return await self.paper_store.get_all_paper_ids()
+        except Exception as e:
+            logger.error(f"Error getting stored papers: {str(e)}")
+            raise
+
+    async def get_paper_analysis(self, query: str, n_results: int = 5) -> List[Dict]:
+        """Retrieve stored paper analyses based on a query."""
+        try:
+            # First get similar papers from vector store
+            results = self.vector_store.query_similar(query, n_results)
+            
+            # Extract unique paper IDs
+            paper_ids = set()
+            for metadata in results['metadatas']:
+                paper_id = metadata.get('paper_id')
+                if paper_id:
+                    paper_ids.add(paper_id)
+            
+            # Get full paper details from PostgreSQL
+            papers = await self.paper_store.get_papers_by_ids(list(paper_ids))
+            
+            # Add relevant text from vector search
+            for paper in papers:
+                for i, metadata in enumerate(results['metadatas']):
+                    if metadata.get('paper_id') == paper['id']:
+                        paper['relevant_text'] = results['documents'][i]
+                        break
+            
+            return papers
+        except Exception as e:
+            logger.error(f"Error getting paper analysis: {str(e)}")
+            raise
diff --git a/src/analysis/__init__.py b/src/analysis/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/analysis/__init__.py
@@ -0,0 +1 @@
+
diff --git a/src/analysis/llm_analyzer.py b/src/analysis/llm_analyzer.py
new file mode 100644
index 0000000..ac275a6
--- /dev/null
+++ b/src/analysis/llm_analyzer.py
@@ -0,0 +1,148 @@
+"""LLM-based paper analyzer."""
+from typing import Dict, Any, Optional
+from src.config.llm_config import LLMConfig, LLMProviderSettings
+from src.llm_providers.base import BaseLLMProvider
+from src.llm_providers.openai_provider import OpenAIProvider
+from src.llm_providers.deepseek_provider import DeepseekProvider
+import logging
+
+logger = logging.getLogger(__name__)
+
+class LLMAnalyzer:
+    """Analyzes papers using LLM models."""
+    
+    def __init__(self, api_key: str, provider: str = 'openai', model_name: Optional[str] = None):
+        """Initialize the analyzer with specified provider and model."""
+        if model_name is None:
+            # Default models for each provider
+            model_name = {
+                'openai': 'gpt-3.5-turbo-16k',
+                'deepseek': 'deepseek-chat'
+            }.get(provider)
+            
+            if not model_name:
+                raise ValueError(f"Unsupported provider: {provider}")
+        
+        self.config = LLMProviderSettings.get_config(provider, model_name, api_key)
+        self.provider = self._create_provider()
+    
+    def _create_provider(self) -> BaseLLMProvider:
+        """Create the appropriate LLM provider based on configuration."""
+        if self.config.provider == 'openai':
+            return OpenAIProvider(self.config)
+        elif self.config.provider == 'deepseek':
+            return DeepseekProvider(self.config)
+        else:
+            raise ValueError(f"Unsupported provider: {self.config.provider}")
+    
+    async def close(self):
+        """Close the provider."""
+        await self.provider.close()
+    
+    async def __aenter__(self):
+        """Async context manager entry."""
+        return self
+    
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit."""
+        await self.close()
+    
+    async def analyze_paper(self, text: str) -> Dict:
+        """Analyze a paper and return structured information."""
+        # First prompt for summary and technical concepts
+        prompt = (
+            "Please analyze the following academic paper and provide:\n"
+            "1. A concise summary (2-3 sentences)\n"
+            "2. List of key technical concepts, each with a brief explanation\n"
+            "Format each technical concept as: '- Concept Name: Brief explanation'\n\n"
+            "Paper text:\n"
+            f"{text}\n\n"
+            "Please format your response exactly as:\n"
+            "Summary: [your summary]\n"
+            "Technical Concepts:\n"
+            "[list of technical concepts with explanations]"
+        )
+        
+        response = await self.provider.generate_text(prompt)
+        
+        # Parse the initial response
+        summary = ""
+        technical_concepts = ""
+        
+        current_section = None
+        for line in response.split('\n'):
+            if line.startswith("Summary:"):
+                current_section = "summary"
+                summary = line[8:].strip()
+            elif line.startswith("Technical Concepts:"):
+                current_section = "technical_concepts"
+            elif line.strip() and current_section == "technical_concepts":
+                if not technical_concepts:
+                    technical_concepts = line.strip()
+                else:
+                    technical_concepts += "\n" + line.strip()
+        
+        # Second prompt for fluff analysis
+        fluff_prompt = (
+            "Analyze this academic paper's scientific merit and potential fluff content.\n\n"
+            "Provide:\n"
+            "1. A Fluff Score from 0 to 100 where:\n"
+            "   - 0: No fluff - Highly original research with strong scientific merit and concrete results\n"
+            "   - 25: Minimal fluff - Solid research with clear contributions\n"
+            "   - 50: Moderate fluff - Mix of original ideas and common approaches\n"
+            "   - 75: Significant fluff - Limited original contribution, heavy on standard approaches\n"
+            "   - 100: Pure fluff - No original contribution, vague claims, buzzwords without substance\n\n"
+            "2. A detailed explanation of your score that analyzes:\n"
+            "   - Originality of the research\n"
+            "   - Concrete vs vague claims\n"
+            "   - Quality of methodology\n"
+            "   - Substantiated vs unsubstantiated statements\n"
+            "   - Use of buzzwords vs technical depth\n\n"
+            "Paper text:\n"
+            f"{text}\n\n"
+            "Format your response exactly as:\n"
+            "Score: [0-100]\n"
+            "Explanation: [your detailed analysis]"
+        )
+        
+        fluff_response = await self.provider.generate_text(fluff_prompt)
+        
+        # Parse the fluff response
+        fluff_score = None
+        fluff_explanation = ""
+        
+        current_section = None
+        for line in fluff_response.split('\n'):
+            if line.startswith("Score:"):
+                try:
+                    score = int(line[6:].strip())
+                    fluff_score = max(0, min(100, score))  # Ensure score is between 0 and 100
+                except ValueError:
+                    logger.error("Failed to parse fluff score")
+            elif line.startswith("Explanation:"):
+                current_section = "explanation"
+                fluff_explanation = line[12:].strip()
+            elif line.strip() and current_section == "explanation":
+                fluff_explanation += " " + line.strip()
+        
+        # Validate that we have all required fields
+        if not technical_concepts:
+            logger.warning("No technical concepts found in analysis")
+            technical_concepts = "Error: Failed to extract technical concepts"
+        
+        if not fluff_explanation:
+            logger.warning("No fluff explanation found in analysis")
+            fluff_explanation = "Error: Failed to extract fluff analysis"
+        
+        if fluff_score is None:
+            logger.warning("No fluff score found in analysis")
+            fluff_score = 50  # Default to middle score if parsing fails
+        
+        return {
+            'summary': summary,
+            'technical_concepts': technical_concepts,
+            'fluff': {
+                'score': fluff_score,
+                'explanation': fluff_explanation
+            }
+        }
diff --git a/src/config/__init__.py b/src/config/__init__.py
new file mode 100644
index 0000000..2f051f4
--- /dev/null
+++ b/src/config/__init__.py
@@ -0,0 +1 @@
+"""Configuration module for the application."""
diff --git a/src/config/llm_config.py b/src/config/llm_config.py
new file mode 100644
index 0000000..d679e16
--- /dev/null
+++ b/src/config/llm_config.py
@@ -0,0 +1,79 @@
+"""LLM configuration settings."""
+from dataclasses import dataclass
+from typing import Optional, Dict, Any
+
+@dataclass
+class LLMConfig:
+    provider: str
+    model_name: str
+    api_key: str
+    api_base: Optional[str] = None
+    additional_params: Optional[Dict[str, Any]] = None
+
+class LLMProviderSettings:
+    """Settings for different LLM providers."""
+    
+    OPENAI_SETTINGS = {
+        'gpt-3.5-turbo-16k': {
+            'provider': 'openai',
+            'model_name': 'gpt-3.5-turbo-16k',
+            'max_tokens': 16000,
+            'temperature': 0.7,
+        },
+        'gpt-4': {
+            'provider': 'openai',
+            'model_name': 'gpt-4',
+            'max_tokens': 8000,
+            'temperature': 0.7,
+        }
+    }
+    
+    DEEPSEEK_SETTINGS = {
+        'deepseek-chat': {
+            'provider': 'deepseek',
+            'model_name': 'deepseek-chat',
+            'max_tokens': 8000,
+            'temperature': 0.7,
+            'api_base': 'https://api.deepseek.com/v1',  # Example API base, replace with actual
+        }
+    }
+    
+    @classmethod
+    def get_config(cls, provider: str, model_name: str, api_key: str) -> LLMConfig:
+        """Get LLM configuration for a specific provider and model."""
+        if provider == 'openai':
+            if model_name in cls.OPENAI_SETTINGS:
+                settings = cls.OPENAI_SETTINGS[model_name]
+                return LLMConfig(
+                    provider=settings['provider'],
+                    model_name=settings['model_name'],
+                    api_key=api_key,
+                    additional_params={
+                        'max_tokens': settings['max_tokens'],
+                        'temperature': settings['temperature']
+                    }
+                )
+        elif provider == 'deepseek':
+            if model_name in cls.DEEPSEEK_SETTINGS:
+                settings = cls.DEEPSEEK_SETTINGS[model_name]
+                return LLMConfig(
+                    provider=settings['provider'],
+                    model_name=settings['model_name'],
+                    api_key=api_key,
+                    api_base=settings['api_base'],
+                    additional_params={
+                        'max_tokens': settings['max_tokens'],
+                        'temperature': settings['temperature']
+                    }
+                )
+        
+        raise ValueError(f"Unsupported provider '{provider}' or model '{model_name}'")
+
+    @classmethod
+    def list_available_models(cls):
+        """List all available models and their providers."""
+        models = {
+            'openai': list(cls.OPENAI_SETTINGS.keys()),
+            'deepseek': list(cls.DEEPSEEK_SETTINGS.keys())
+        }
+        return models
diff --git a/src/config/logging_config.py b/src/config/logging_config.py
new file mode 100644
index 0000000..f8e0aa0
--- /dev/null
+++ b/src/config/logging_config.py
@@ -0,0 +1,64 @@
+import os
+import logging.config
+from datetime import datetime
+
+def setup_logging(log_dir: str = "logs"):
+    """Set up logging configuration."""
+    # Create logs directory if it doesn't exist
+    if not os.path.exists(log_dir):
+        os.makedirs(log_dir)
+    
+    # Generate log filename with timestamp
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    debug_log_file = os.path.join(log_dir, f"debug_{timestamp}.log")
+    
+    # Logging configuration dictionary
+    config = {
+        'version': 1,
+        'disable_existing_loggers': False,
+        'formatters': {
+            'detailed': {
+                'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+            },
+            'simple': {
+                'format': '%(levelname)s: %(message)s'
+            }
+        },
+        'handlers': {
+            'console': {
+                'class': 'logging.StreamHandler',
+                'level': 'WARNING',  # Only show warnings and above in console
+                'formatter': 'simple',
+                'stream': 'ext://sys.stdout'
+            },
+            'debug_file': {
+                'class': 'logging.FileHandler',
+                'level': 'DEBUG',
+                'formatter': 'detailed',
+                'filename': debug_log_file,
+                'mode': 'w'
+            }
+        },
+        'loggers': {
+            '': {  # Root logger
+                'handlers': ['console', 'debug_file'],
+                'level': 'DEBUG',
+                'propagate': True
+            },
+            'urllib3': {
+                'level': 'WARNING'
+            },
+            'chromadb': {
+                'level': 'WARNING'
+            },
+            'asyncio': {
+                'level': 'WARNING'
+            }
+        }
+    }
+    
+    # Apply the configuration
+    logging.config.dictConfig(config)
+    
+    # Log the start of the application
+    logging.info(f"Debug logs will be written to: {debug_log_file}")
diff --git a/src/data_acquisition/__init__.py b/src/data_acquisition/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/data_acquisition/__init__.py
@@ -0,0 +1 @@
+
diff --git a/src/data_acquisition/arxiv_client.py b/src/data_acquisition/arxiv_client.py
new file mode 100644
index 0000000..d9458f9
--- /dev/null
+++ b/src/data_acquisition/arxiv_client.py
@@ -0,0 +1,56 @@
+import arxiv
+from typing import List, Dict, Any
+from tenacity import retry, stop_after_attempt, wait_exponential
+import aiohttp
+
+class ArxivClient:
+    def __init__(self):
+        self.client = arxiv.Client()
+        self._session = None
+
+    async def _get_session(self) -> aiohttp.ClientSession:
+        """Get or create an aiohttp session."""
+        if self._session is None:
+            self._session = aiohttp.ClientSession()
+        return self._session
+
+    async def close(self):
+        """Close the session."""
+        if self._session is not None:
+            await self._session.close()
+            self._session = None
+
+    async def __aenter__(self):
+        """Async context manager entry."""
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit."""
+        await self.close()
+
+    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
+    async def fetch_papers(self, query: str, max_results: int = 10) -> List[Dict[Any, Any]]:
+        """Fetch papers from arXiv based on query."""
+        search = arxiv.Search(
+            query=query,
+            max_results=max_results,
+            sort_by=arxiv.SortCriterion.SubmittedDate
+        )
+
+        results = []
+        # Use list() to get all results at once instead of async iteration
+        papers = list(self.client.results(search))
+        
+        for paper in papers:
+            results.append({
+                'id': paper.entry_id.split('/')[-1],  # Get the ID without the full URL
+                'title': paper.title,
+                'authors': [author.name for author in paper.authors],
+                'summary': paper.summary,
+                'pdf_url': paper.pdf_url,
+                'published': paper.published.isoformat() if paper.published else None,
+                'updated': paper.updated.isoformat() if paper.updated else None,
+                'categories': paper.categories
+            })
+        
+        return results
diff --git a/src/data_acquisition/pdf_downloader.py b/src/data_acquisition/pdf_downloader.py
new file mode 100644
index 0000000..21401d9
--- /dev/null
+++ b/src/data_acquisition/pdf_downloader.py
@@ -0,0 +1,53 @@
+import aiohttp
+import os
+from pathlib import Path
+from tenacity import retry, stop_after_attempt, wait_exponential
+import aiofiles
+
+class PDFDownloader:
+    def __init__(self, storage_dir: str = "papers"):
+        self.storage_dir = Path(storage_dir)
+        self.storage_dir.mkdir(parents=True, exist_ok=True)
+        self._session = None
+
+    async def _get_session(self) -> aiohttp.ClientSession:
+        """Get or create an aiohttp session."""
+        if self._session is None:
+            self._session = aiohttp.ClientSession()
+        return self._session
+
+    async def close(self):
+        """Close the session."""
+        if self._session is not None:
+            await self._session.close()
+            self._session = None
+
+    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
+    async def download_pdf(self, pdf_url: str, paper_id: str) -> str:
+        """Download PDF from URL and save to storage directory."""
+        filename = self.storage_dir / f"{paper_id}.pdf"
+        
+        if filename.exists():
+            return str(filename)
+
+        try:
+            session = await self._get_session()
+            async with session.get(pdf_url) as response:
+                if response.status == 200:
+                    async with aiofiles.open(filename, 'wb') as f:
+                        await f.write(await response.read())
+                    return str(filename)
+                else:
+                    raise Exception(f"Failed to download PDF: HTTP {response.status}")
+        except Exception as e:
+            if filename.exists():
+                filename.unlink()  # Delete the partially downloaded file
+            raise Exception(f"Error downloading PDF: {str(e)}")
+
+    async def __aenter__(self):
+        """Async context manager entry."""
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit."""
+        await self.close()
diff --git a/src/llm_providers/__init__.py b/src/llm_providers/__init__.py
new file mode 100644
index 0000000..f3782ab
--- /dev/null
+++ b/src/llm_providers/__init__.py
@@ -0,0 +1 @@
+"""LLM providers module."""
diff --git a/src/llm_providers/base.py b/src/llm_providers/base.py
new file mode 100644
index 0000000..661571d
--- /dev/null
+++ b/src/llm_providers/base.py
@@ -0,0 +1,26 @@
+"""Base LLM provider class."""
+from abc import ABC, abstractmethod
+from typing import Dict, Any
+from src.config.llm_config import LLMConfig
+
+class BaseLLMProvider(ABC):
+    """Base class for LLM providers."""
+    
+    def __init__(self, config: LLMConfig):
+        self.config = config
+    
+    @abstractmethod
+    async def generate_text(self, prompt: str) -> str:
+        """Generate text from a prompt."""
+        pass
+    
+    @abstractmethod
+    async def close(self):
+        """Close any resources."""
+        pass
+    
+    async def __aenter__(self):
+        return self
+    
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.close()
diff --git a/src/llm_providers/deepseek_provider.py b/src/llm_providers/deepseek_provider.py
new file mode 100644
index 0000000..3c5c115
--- /dev/null
+++ b/src/llm_providers/deepseek_provider.py
@@ -0,0 +1,66 @@
+"""Deepseek LLM provider."""
+from typing import Dict, Any
+import aiohttp
+import logging
+from src.config.llm_config import LLMConfig
+from src.llm_providers.base import BaseLLMProvider
+from src.utils.debug import tracker
+
+logger = logging.getLogger(__name__)
+
+class DeepseekProvider(BaseLLMProvider):
+    """Deepseek LLM provider implementation."""
+    
+    def __init__(self, config: LLMConfig):
+        super().__init__(config)
+        self.session = None
+        logger.debug(f"Initialized DeepseekProvider with model: {config.model_name}")
+    
+    async def _get_session(self) -> aiohttp.ClientSession:
+        """Get or create an aiohttp session."""
+        if self.session is None:
+            logger.debug("Creating new Deepseek session")
+            self.session = aiohttp.ClientSession(
+                headers={"Authorization": f"Bearer {self.config.api_key}"}
+            )
+            tracker.track_session(self.session)
+        return self.session
+    
+    async def generate_text(self, prompt: str) -> str:
+        """Generate text using Deepseek's API."""
+        try:
+            session = await self._get_session()
+            logger.debug(f"Using session {id(session)} for text generation")
+            
+            payload = {
+                "model": self.config.model_name,
+                "messages": [{"role": "user", "content": prompt}],
+                **self.config.additional_params
+            }
+            
+            logger.debug(f"Making request to Deepseek API: {self.config.api_base}/chat/completions")
+            async with session.post(f"{self.config.api_base}/chat/completions", json=payload) as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    logger.error(f"Deepseek API error: {response.status} - {error_text}")
+                    raise Exception(f"Deepseek API error: {response.status} - {error_text}")
+                
+                data = await response.json()
+                logger.debug("Successfully received response from Deepseek API")
+                return data['choices'][0]['message']['content']
+                
+        except Exception as e:
+            logger.error(f"Error generating text with Deepseek: {str(e)}")
+            raise
+    
+    async def close(self):
+        """Close the session."""
+        if self.session:
+            logger.debug(f"Closing Deepseek session {id(self.session)}")
+            try:
+                await self.session.close()
+                tracker.untrack_session(self.session)
+                self.session = None
+            except Exception as e:
+                logger.error(f"Error closing Deepseek session: {str(e)}")
+                raise
diff --git a/src/llm_providers/openai_provider.py b/src/llm_providers/openai_provider.py
new file mode 100644
index 0000000..66e5bcd
--- /dev/null
+++ b/src/llm_providers/openai_provider.py
@@ -0,0 +1,30 @@
+"""OpenAI LLM provider."""
+from typing import Dict, Any
+import openai
+from src.config.llm_config import LLMConfig
+from src.llm_providers.base import BaseLLMProvider
+
+class OpenAIProvider(BaseLLMProvider):
+    """OpenAI LLM provider implementation."""
+    
+    def __init__(self, config: LLMConfig):
+        super().__init__(config)
+        openai.api_key = config.api_key
+        self.client = openai.AsyncOpenAI()
+    
+    async def generate_text(self, prompt: str) -> str:
+        """Generate text using OpenAI's API."""
+        try:
+            response = await self.client.chat.completions.create(
+                model=self.config.model_name,
+                messages=[{"role": "user", "content": prompt}],
+                **self.config.additional_params
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            print(f"Error generating text with OpenAI: {str(e)}")
+            raise
+    
+    async def close(self):
+        """Close the client."""
+        await self.client.close()
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000..5adf1e9
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,135 @@
+import asyncio
+import signal
+import logging
+import sys
+from src.agent_controller import AgentController
+from src.utils.debug import tracker
+from src.config.logging_config import setup_logging
+
+logger = logging.getLogger(__name__)
+
+def handle_sigint():
+    """Handle interrupt signal."""
+    logger.info("Received interrupt signal, canceling tasks...")
+    for task in asyncio.all_tasks():
+        task.cancel()
+
+async def cleanup_resources(loop: asyncio.AbstractEventLoop):
+    """Clean up any remaining resources."""
+    logger.debug("Starting resource cleanup...")
+    tracker.print_active_resources()
+    
+    try:
+        # Cancel all tasks
+        pending = asyncio.all_tasks(loop)
+        if pending:
+            logger.debug(f"Cancelling {len(pending)} pending tasks...")
+            for task in pending:
+                if not task.done() and not task.cancelled():
+                    task.cancel()
+            
+            # Wait for tasks to complete with timeout
+            logger.debug("Waiting for tasks to complete...")
+            await asyncio.wait(pending, timeout=5)
+            
+            # Force close any remaining tasks
+            for task in pending:
+                if not task.done():
+                    logger.warning(f"Force closing task: {task.get_name()}")
+                    try:
+                        task.close()
+                    except Exception as e:
+                        logger.error(f"Error closing task: {str(e)}")
+    
+    except Exception as e:
+        logger.error(f"Error during cleanup: {str(e)}")
+    finally:
+        logger.debug("Resource cleanup completed")
+
+async def main():
+    """Main application entry point."""
+    try:
+        logger.debug("Starting main application...")
+        async with AgentController() as agent:
+            query = "Large Language Models recent advances"
+            logger.debug(f"Processing query: {query}")
+            
+            try:
+                # Process query and get both new and existing papers
+                results = await agent.process_query(query)
+                
+                if results:
+                    print("\n=== Analysis Results ===")
+                    for paper in results:
+                        print(f"\nTitle: {paper['title']}")
+                        print(f"Authors: {paper.get('authors', 'Unknown')}")
+                        
+                        if 'analysis' in paper:
+                            # This is a newly processed paper
+                            print("Status: Newly Processed")
+                            print("\nSummary:", paper['analysis'].get('summary', 'No summary available'))
+                            print("\nTechnical Concepts:", paper['analysis'].get('technical_concepts', 'No concepts available'))
+                        else:
+                            # This is an existing paper from the database
+                            print("Status: Retrieved from Database")
+                            print("\nRelevant Text:", paper.get('relevant_text', 'No text available')[:500] + "...")
+                else:
+                    print("\nNo papers found for the query.")
+                    
+            except Exception as e:
+                logger.error(f"Error in main: {str(e)}")
+                raise
+    finally:
+        logger.debug("Main execution completed")
+
+def run_main():
+    """Run the main application."""
+    # Set up logging
+    setup_logging()
+    
+    # Create new event loop
+    logger.debug("Creating new event loop...")
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    tracker.track_loop(loop)
+    
+    # Set up signal handlers
+    signals = (signal.SIGTERM, signal.SIGINT)
+    for s in signals:
+        loop.add_signal_handler(
+            s, lambda s=s: handle_sigint()
+        )
+    
+    try:
+        logger.debug("Running main function...")
+        loop.run_until_complete(main())
+    except asyncio.CancelledError:
+        logger.info("Main execution cancelled")
+    except Exception as e:
+        logger.error(f"Error in run_main: {str(e)}")
+        raise
+    finally:
+        try:
+            # Run cleanup
+            logger.debug("Running final cleanup...")
+            loop.run_until_complete(cleanup_resources(loop))
+            
+            # Close the loop
+            logger.debug("Closing event loop...")
+            loop.run_until_complete(loop.shutdown_asyncgens())
+            loop.run_until_complete(loop.shutdown_default_executor())
+            
+            # Remove the event loop
+            asyncio.set_event_loop(None)
+            loop.close()
+            tracker.untrack_loop(loop)
+            
+            logger.debug("Cleanup completed successfully")
+        except Exception as e:
+            logger.error(f"Error during final cleanup: {str(e)}")
+        finally:
+            # Force exit to ensure all resources are released
+            sys.exit(0)
+
+if __name__ == "__main__":
+    run_main()
diff --git a/src/processing/__init__.py b/src/processing/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/processing/__init__.py
@@ -0,0 +1 @@
+
diff --git a/src/processing/chunker.py b/src/processing/chunker.py
new file mode 100644
index 0000000..8761743
--- /dev/null
+++ b/src/processing/chunker.py
@@ -0,0 +1,15 @@
+from typing import List
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+class SemanticChunker:
+    def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200):
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap,
+            length_function=len,
+            separators=["\n\n", "\n", " ", ""]
+        )
+
+    def split_text(self, text: str) -> List[str]:
+        """Split text into semantic chunks."""
+        return self.text_splitter.split_text(text)
diff --git a/src/processing/text_extractor.py b/src/processing/text_extractor.py
new file mode 100644
index 0000000..aa46f4f
--- /dev/null
+++ b/src/processing/text_extractor.py
@@ -0,0 +1,20 @@
+from typing import List
+import PyPDF2
+from pathlib import Path
+
+class TextExtractor:
+    def __init__(self):
+        pass
+
+    def extract_text(self, pdf_path: str) -> str:
+        """Extract text from PDF using PyPDF2."""
+        try:
+            with open(pdf_path, 'rb') as file:
+                reader = PyPDF2.PdfReader(file)
+                text = ""
+                for page in reader.pages:
+                    text += page.extract_text() + "\n"
+            return text
+        except Exception as e:
+            print(f"Error extracting text from PDF: {e}")
+            return ""
diff --git a/src/scripts/fetch_papers.py b/src/scripts/fetch_papers.py
new file mode 100644
index 0000000..2bb4944
--- /dev/null
+++ b/src/scripts/fetch_papers.py
@@ -0,0 +1,160 @@
+import asyncio
+import json
+import logging
+import os
+from datetime import datetime, timedelta
+from typing import List, Dict
+
+from src.utils.agent_controller import AgentController
+from src.data_acquisition.arxiv_client import ArxivClient
+
+logger = logging.getLogger(__name__)
+
+class PaperFetcher:
+    def __init__(self, config_path: str = "config/arxiv_categories.json"):
+        self.config_path = config_path
+        self.config = self._load_config()
+        
+    def _load_config(self) -> Dict:
+        """Load the arXiv categories configuration."""
+        try:
+            with open(self.config_path, 'r') as f:
+                return json.load(f)
+        except FileNotFoundError:
+            logger.error(f"Config file not found: {self.config_path}")
+            return {"categories": [], "default_categories": []}
+            
+    def save_config(self) -> None:
+        """Save the current configuration."""
+        os.makedirs(os.path.dirname(self.config_path), exist_ok=True)
+        with open(self.config_path, 'w') as f:
+            json.dump(self.config, f, indent=4)
+            
+    def add_category(self, category_id: str, name: str, description: str) -> None:
+        """Add a new category to monitor."""
+        self.config["categories"].append({
+            "id": category_id,
+            "name": name,
+            "description": description
+        })
+        self.save_config()
+        
+    def remove_category(self, category_id: str) -> None:
+        """Remove a category from monitoring."""
+        self.config["categories"] = [
+            cat for cat in self.config["categories"] 
+            if cat["id"] != category_id
+        ]
+        self.config["default_categories"] = [
+            cat for cat in self.config["default_categories"] 
+            if cat != category_id
+        ]
+        self.save_config()
+        
+    def set_default_categories(self, category_ids: List[str]) -> None:
+        """Set which categories to fetch by default."""
+        existing_cats = {cat["id"] for cat in self.config["categories"]}
+        valid_cats = [cat for cat in category_ids if cat in existing_cats]
+        self.config["default_categories"] = valid_cats
+        self.save_config()
+        
+    def list_categories(self) -> None:
+        """Print all configured categories."""
+        print("\nConfigured arXiv Categories:")
+        print("="*50)
+        for cat in self.config["categories"]:
+            is_default = "✓" if cat["id"] in self.config["default_categories"] else " "
+            print(f"[{is_default}] {cat['id']}: {cat['name']}")
+            print(f"    {cat['description']}")
+        print("="*50)
+        
+    async def fetch_papers(self, days: int = 1, categories: List[str] = None) -> None:
+        """Fetch and analyze papers from specified categories."""
+        if categories is None:
+            categories = self.config["default_categories"]
+            
+        if not categories:
+            logger.error("No categories specified and no default categories configured")
+            return
+            
+        logger.info(f"Fetching papers from categories: {categories}")
+        
+        # Initialize clients
+        arxiv_client = ArxivClient()
+        agent = AgentController()
+        
+        try:
+            await agent.initialize()
+            
+            # Calculate date range
+            end_date = datetime.now()
+            start_date = end_date - timedelta(days=days)
+            
+            # Fetch and process papers for each category
+            for category in categories:
+                logger.info(f"Processing category: {category}")
+                papers = await arxiv_client.fetch_papers(
+                    category=category,
+                    start_date=start_date,
+                    end_date=end_date
+                )
+                
+                if not papers:
+                    logger.info(f"No new papers found for category {category}")
+                    continue
+                    
+                logger.info(f"Found {len(papers)} papers in {category}")
+                for paper in papers:
+                    await agent.process_paper(paper)
+                    
+        finally:
+            await agent.close()
+
+async def main():
+    # Configure logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    )
+    
+    fetcher = PaperFetcher()
+    
+    # Parse command line arguments (you can extend this)
+    import argparse
+    parser = argparse.ArgumentParser(description='Fetch and analyze arXiv papers')
+    parser.add_argument('--days', type=int, default=1, help='Number of days to look back')
+    parser.add_argument('--list', action='store_true', help='List configured categories')
+    parser.add_argument('--add', nargs=3, metavar=('ID', 'NAME', 'DESCRIPTION'),
+                       help='Add a new category')
+    parser.add_argument('--remove', metavar='ID', help='Remove a category')
+    parser.add_argument('--set-default', nargs='+', metavar='ID',
+                       help='Set default categories')
+    parser.add_argument('--categories', nargs='+', metavar='ID',
+                       help='Categories to fetch (defaults to configured defaults)')
+    
+    args = parser.parse_args()
+    
+    if args.list:
+        fetcher.list_categories()
+        return
+        
+    if args.add:
+        fetcher.add_category(*args.add)
+        print(f"Added category: {args.add[0]}")
+        return
+        
+    if args.remove:
+        fetcher.remove_category(args.remove)
+        print(f"Removed category: {args.remove}")
+        return
+        
+    if args.set_default:
+        fetcher.set_default_categories(args.set_default)
+        print(f"Set default categories: {args.set_default}")
+        return
+    
+    # Fetch papers
+    await fetcher.fetch_papers(days=args.days, categories=args.categories)
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/src/storage/__init__.py b/src/storage/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/storage/__init__.py
@@ -0,0 +1 @@
+
diff --git a/src/storage/paper_store.py b/src/storage/paper_store.py
new file mode 100644
index 0000000..dd82884
--- /dev/null
+++ b/src/storage/paper_store.py
@@ -0,0 +1,130 @@
+import os
+import logging
+from typing import Dict, List, Optional
+import asyncpg
+from datetime import datetime
+
+logger = logging.getLogger(__name__)
+
+CREATE_PAPERS_TABLE = """
+CREATE TABLE IF NOT EXISTS papers (
+    id TEXT PRIMARY KEY,
+    title TEXT NOT NULL,
+    authors TEXT[] NOT NULL,
+    summary TEXT,
+    technical_concepts TEXT,
+    fluff_score INTEGER CHECK (fluff_score >= 0 AND fluff_score <= 100),
+    fluff_explanation TEXT,
+    pdf_url TEXT,
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
+);
+"""
+
+class PaperStore:
+    def __init__(self):
+        self.pool = None
+        
+    async def initialize(self):
+        """Initialize the database connection pool and create tables."""
+        # Get database configuration from environment variables
+        host = os.getenv('DB_HOST', 'localhost')
+        port = os.getenv('DB_PORT', '5432')
+        dbname = os.getenv('DB_NAME', 'paper_analysis')
+        user = os.getenv('DB_USER', 'postgres')
+        password = os.getenv('DB_PASSWORD')
+        
+        if not password:
+            logger.error("Database password not found in environment variables")
+            raise ValueError("DB_PASSWORD environment variable is required")
+        
+        try:
+            # Construct database URL
+            db_url = f'postgresql://{user}:{password}@{host}:{port}/{dbname}'
+            
+            # Create connection pool
+            self.pool = await asyncpg.create_pool(db_url)
+            
+            # Create tables
+            async with self.pool.acquire() as conn:
+                await conn.execute(CREATE_PAPERS_TABLE)
+                
+            logger.info("Database initialized successfully")
+        except Exception as e:
+            logger.error(f"Error initializing database: {str(e)}")
+            raise
+    
+    async def close(self):
+        """Close the database connection pool."""
+        if self.pool:
+            await self.pool.close()
+    
+    async def store_paper(self, paper: Dict) -> None:
+        """Store a paper and its analysis in the database."""
+        if not self.pool:
+            await self.initialize()
+            
+        async with self.pool.acquire() as conn:
+            # Extract analysis if present
+            analysis = paper.get('analysis', {})
+            fluff_analysis = analysis.get('fluff', {})
+            
+            # Store paper
+            await conn.execute('''
+                INSERT INTO papers (
+                    id, title, authors, summary, technical_concepts, 
+                    fluff_score, fluff_explanation, pdf_url
+                )
+                VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
+                ON CONFLICT (id) DO UPDATE SET
+                    title = EXCLUDED.title,
+                    authors = EXCLUDED.authors,
+                    summary = EXCLUDED.summary,
+                    technical_concepts = EXCLUDED.technical_concepts,
+                    fluff_score = EXCLUDED.fluff_score,
+                    fluff_explanation = EXCLUDED.fluff_explanation,
+                    pdf_url = EXCLUDED.pdf_url,
+                    updated_at = CURRENT_TIMESTAMP
+            ''', 
+            paper['id'],
+            paper['title'],
+            paper['authors'],
+            analysis.get('summary'),
+            analysis.get('technical_concepts'),
+            fluff_analysis.get('score'),
+            fluff_analysis.get('explanation'),
+            paper.get('pdf_url'))
+    
+    async def get_paper(self, paper_id: str) -> Optional[Dict]:
+        """Retrieve a paper by its ID."""
+        if not self.pool:
+            await self.initialize()
+            
+        async with self.pool.acquire() as conn:
+            row = await conn.fetchrow('''
+                SELECT * FROM papers WHERE id = $1
+            ''', paper_id)
+            
+            if row:
+                return dict(row)
+            return None
+    
+    async def get_all_paper_ids(self) -> List[str]:
+        """Get all paper IDs in the database."""
+        if not self.pool:
+            await self.initialize()
+            
+        async with self.pool.acquire() as conn:
+            rows = await conn.fetch('SELECT id FROM papers')
+            return [row['id'] for row in rows]
+    
+    async def get_papers_by_ids(self, paper_ids: List[str]) -> List[Dict]:
+        """Retrieve multiple papers by their IDs."""
+        if not self.pool:
+            await self.initialize()
+            
+        async with self.pool.acquire() as conn:
+            rows = await conn.fetch('''
+                SELECT * FROM papers WHERE id = ANY($1)
+            ''', paper_ids)
+            return [dict(row) for row in rows]
diff --git a/src/storage/vector_store.py b/src/storage/vector_store.py
new file mode 100644
index 0000000..b19a810
--- /dev/null
+++ b/src/storage/vector_store.py
@@ -0,0 +1,81 @@
+from typing import List, Dict
+import chromadb
+from chromadb.config import Settings
+
+class VectorStore:
+    def __init__(self, persist_directory: str = "chroma_db"):
+        self.client = chromadb.Client(Settings(
+            persist_directory=persist_directory,
+            anonymized_telemetry=False
+        ))
+        
+        # Create collection with proper schema
+        self.collection = self.client.get_or_create_collection(
+            name="paper_chunks",
+            metadata={"hnsw:space": "cosine"}  # Use cosine similarity
+        )
+
+    def paper_exists(self, paper_id: str) -> bool:
+        """Check if a paper already exists in the vector store."""
+        try:
+            # Try to get any chunks with this paper_id
+            results = self.collection.get(
+                where={"paper_id": paper_id},
+                limit=1
+            )
+            return len(results['ids']) > 0
+        except Exception:
+            return False
+
+    def get_all_paper_ids(self) -> List[str]:
+        """Get all unique paper IDs in the store."""
+        try:
+            # Get all documents
+            results = self.collection.get()
+            # Extract unique paper_ids from metadata
+            paper_ids = set()
+            if results and 'metadatas' in results:
+                for metadata in results['metadatas']:
+                    if metadata and 'paper_id' in metadata:
+                        paper_ids.add(metadata['paper_id'])
+            return list(paper_ids)
+        except Exception:
+            return []
+
+    def add_chunks(self, chunks: List[str], metadata: List[Dict], ids: List[str]):
+        """Add text chunks to the vector store."""
+        # Ensure all metadata values are strings or simple types
+        clean_metadata = []
+        for meta in metadata:
+            clean_meta = {}
+            for key, value in meta.items():
+                if isinstance(value, (str, int, float, bool)):
+                    clean_meta[key] = value
+                else:
+                    clean_meta[key] = str(value)
+            clean_metadata.append(clean_meta)
+
+        self.collection.add(
+            documents=chunks,
+            metadatas=clean_metadata,
+            ids=ids
+        )
+
+    def query_similar(self, query: str, n_results: int = 5) -> Dict:
+        """Query for similar chunks."""
+        try:
+            results = self.collection.query(
+                query_texts=[query],
+                n_results=n_results,
+                include=["metadatas", "documents"]
+            )
+            
+            # Format results into a more usable structure
+            formatted_results = {
+                'documents': results['documents'][0] if results['documents'] else [],  # First query result
+                'metadatas': results['metadatas'][0] if results['metadatas'] else []  # First query result
+            }
+            return formatted_results
+        except Exception as e:
+            print(f"Error querying vector store: {str(e)}")
+            return {'documents': [], 'metadatas': []}
diff --git a/src/utils/debug.py b/src/utils/debug.py
new file mode 100644
index 0000000..25e8069
--- /dev/null
+++ b/src/utils/debug.py
@@ -0,0 +1,53 @@
+"""Debug utilities for tracking async resources."""
+import asyncio
+import aiohttp
+import logging
+from typing import Dict, Set
+from weakref import WeakSet
+import traceback
+
+# Configure logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+
+class AsyncResourceTracker:
+    """Tracks async resources to help identify leaks."""
+    
+    def __init__(self):
+        self.active_sessions: Set[aiohttp.ClientSession] = WeakSet()
+        self.session_traces: Dict[aiohttp.ClientSession, str] = {}
+        self.active_loops: Set[asyncio.AbstractEventLoop] = WeakSet()
+    
+    def track_session(self, session: aiohttp.ClientSession):
+        """Track a new client session."""
+        self.active_sessions.add(session)
+        self.session_traces[session] = ''.join(traceback.format_stack())
+        logger.debug(f"New session created: {id(session)}")
+        logger.debug(f"Creation trace:\n{self.session_traces[session]}")
+    
+    def untrack_session(self, session: aiohttp.ClientSession):
+        """Untrack a closed client session."""
+        self.active_sessions.discard(session)
+        self.session_traces.pop(session, None)
+        logger.debug(f"Session closed: {id(session)}")
+    
+    def track_loop(self, loop: asyncio.AbstractEventLoop):
+        """Track an event loop."""
+        self.active_loops.add(loop)
+        logger.debug(f"New event loop tracked: {id(loop)}")
+    
+    def untrack_loop(self, loop: asyncio.AbstractEventLoop):
+        """Untrack a closed event loop."""
+        self.active_loops.discard(loop)
+        logger.debug(f"Event loop untracked: {id(loop)}")
+    
+    def print_active_resources(self):
+        """Print information about active resources."""
+        logger.debug("\n=== Active Resources ===")
+        logger.debug(f"Active sessions: {len(self.active_sessions)}")
+        for session in self.active_sessions:
+            logger.debug(f"\nSession {id(session)} creation trace:\n{self.session_traces.get(session, 'No trace available')}")
+        logger.debug(f"\nActive event loops: {len(self.active_loops)}")
+
+# Global tracker instance
+tracker = AsyncResourceTracker()
diff --git a/test_env.py b/test_env.py
new file mode 100644
index 0000000..9d5453b
--- /dev/null
+++ b/test_env.py
@@ -0,0 +1,20 @@
+import os
+from dotenv import load_dotenv
+
+def test_env():
+    # Load environment variables
+    load_dotenv()
+    
+    # Get the API key
+    api_key = os.getenv('OPENAI_API_KEY')
+    
+    print("Environment variable details:")
+    print(f"1. API key exists: {api_key is not None}")
+    if api_key:
+        print(f"2. API key length: {len(api_key)}")
+        print(f"3. First 8 chars: {api_key[:8]}")
+        print(f"4. Last 8 chars: {api_key[-8:]}")
+        print(f"5. Raw value: {repr(api_key)}")
+
+if __name__ == "__main__":
+    test_env()
diff --git a/view_papers.py b/view_papers.py
new file mode 100644
index 0000000..848a7ff
--- /dev/null
+++ b/view_papers.py
@@ -0,0 +1,63 @@
+import asyncio
+import logging
+from dotenv import load_dotenv
+from src.storage.paper_store import PaperStore
+
+def get_score_color(score: int) -> tuple[str, str]:
+    """Get color codes for a score."""
+    if score <= 25:
+        return '\033[92m', '(Excellent: Highly original research)'  # Green
+    elif score <= 50:
+        return '\033[94m', '(Good: Solid research)'  # Blue
+    elif score <= 75:
+        return '\033[93m', '(Fair: Some fluff present)'  # Yellow
+    else:
+        return '\033[91m', '(Poor: Heavy on fluff)'  # Red
+
+async def view_papers():
+    """View all papers in the database."""
+    # Load environment variables
+    load_dotenv()
+    
+    store = PaperStore()
+    try:
+        await store.initialize()
+        papers = await store.get_all_paper_ids()
+        
+        if not papers:
+            print("\nNo papers found in database.")
+            return
+            
+        print(f"\nFound {len(papers)} papers in database:")
+        
+        # Get full details for each paper
+        for paper_id in papers:
+            paper = await store.get_paper(paper_id)
+            if paper:
+                print("\n" + "="*80)
+                print(f"Title: {paper['title']}")
+                print(f"Authors: {', '.join(paper['authors']) if isinstance(paper['authors'], list) else paper['authors']}")
+                print("\nSummary:")
+                print(paper['summary'] if paper['summary'] else "No summary available")
+                print("\nTechnical Concepts:")
+                print(paper['technical_concepts'] if paper['technical_concepts'] else "No technical concepts available")
+                
+                # Display fluff score with color coding
+                score = paper.get('fluff_score')
+                if score is not None:
+                    color, description = get_score_color(score)
+                    reset = '\033[0m'
+                    print(f"\nFluff Score: {color}{score}/100 {description}{reset}")
+                    print("\nAnalysis:")
+                    print(paper['fluff_explanation'] if paper['fluff_explanation'] else "No analysis available")
+                else:
+                    print("\nFluff Analysis: Not available")
+                
+                print("\n" + "="*80)
+                
+    finally:
+        await store.close()
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.WARNING)  # Suppress debug logs
+    asyncio.run(view_papers())