import asyncio import signal import logging import sys import os import json from datetime import datetime, timedelta from typing import List, Optional from zoneinfo import ZoneInfo from src.utils.agent_controller import AgentController from src.data_acquisition.arxiv_client import ArxivClient from src.utils.debug import tracker from src.config.logging_config import setup_logging logger = logging.getLogger(__name__) def handle_sigint(signum, frame): """Handle interrupt signal.""" logger.info("Received interrupt signal, exiting...") sys.exit(0) async def cleanup_resources(): """Clean up any remaining resources.""" logger.debug("Starting resource cleanup...") tracker.print_active_resources() try: # Cancel all tasks except current pending = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] if pending: logger.debug(f"Cancelling {len(pending)} pending tasks...") for task in pending: if not task.done() and not task.cancelled(): task.cancel() # Wait for tasks to complete with timeout try: logger.debug("Waiting for tasks to complete...") await asyncio.wait(pending, timeout=5) except asyncio.CancelledError: logger.debug("Task wait cancelled") logger.debug("Cleanup completed successfully") except Exception as e: logger.error(f"Error during cleanup: {e}") async def fetch_papers(days: int = 7, categories: Optional[List[str]] = None) -> None: """Fetch and analyze papers from arXiv.""" logger.info(f"Fetching papers from the last {days} days") if categories is None: categories = ["cs.AI"] async with ArxivClient() as client, AgentController() as agent: try: # Calculate date range end_date = datetime.now() start_date = end_date - timedelta(days=days) logger.info(f"Date range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}") # Fetch and analyze papers for each category for category in categories: logger.info(f"Fetching papers for category: {category}") papers = await client.fetch_papers(category=category, start_date=start_date, end_date=end_date) if not papers: print(f"\nNo recent papers found in {category}") continue print(f"\nProcessing papers in {category}:") print("=" * 80) print() for i, paper in enumerate(papers, 1): print(f"Processing paper {i}/{len(papers)}: {paper['title']}") try: # Analyze paper analysis = await agent.analyze_paper(paper) # Print analysis print("\nAnalysis:") print(f"Summary: {analysis.get('summary', 'No summary available')}") print("\nTechnical Concepts:") print(analysis.get('technical_concepts', 'No technical concepts available')) # Print fluff analysis fluff = analysis.get('fluff', {}) score = fluff.get('score') if score is not None: color = '\033[92m' if score < 30 else '\033[93m' if score < 70 else '\033[91m' reset = '\033[0m' print(f"\nFluff Score: {color}{score}/100{reset}") print("Analysis:") print(fluff.get('explanation', 'No explanation available')) except Exception as e: logger.error(f"Error processing paper: {e}") print("Failed to analyze paper") print("-" * 80) print() except Exception as e: logger.error(f"Error fetching papers: {e}") raise async def fetch_all_papers(categories: List[str], max_results: int = 1000): """Fetch all papers from specified categories.""" async with ArxivClient() as client, AgentController() as agent: for category in categories: papers = await client.fetch_papers(category=category, max_results=max_results) print(f"Found {len(papers)} papers in {category}") for paper in papers: await agent.analyze_paper(paper) async def fetch_single_paper(paper_id: str) -> None: """Fetch and analyze a single paper by ID.""" print(f"\nFetching paper: {paper_id}") async with ArxivClient() as client, AgentController() as agent: try: # Get paper from arXiv paper = await client.get_paper_by_id(paper_id) if not paper: print(f"\nPaper {paper_id} not found on arXiv.") return print(f"\nFound paper: {paper['title']}") print(f"Authors: {', '.join(paper['authors'])}") # Analyze the paper analysis = await agent.analyze_paper(paper) if analysis: print("\nAnalysis:") print("=" * 80) print("\nSummary:") print(analysis.get('summary', 'No summary available')) print("\nTechnical Concepts:") print(analysis.get('technical_concepts', 'No technical concepts available')) fluff = analysis.get('fluff', {}) score = fluff.get('score') if score is not None: color = '\033[92m' if score < 30 else '\033[93m' if score < 70 else '\033[91m' reset = '\033[0m' print(f"\nFluff Score: {color}{score}/100{reset}") print("Analysis:") print(fluff.get('explanation', 'No explanation available')) else: print("\nPaper was already analyzed or an error occurred during analysis.") except Exception as e: logger.error(f"Error processing paper: {e}") raise async def process_query(query: str) -> None: """Process a search query and display results.""" async with AgentController() as agent: try: results = await agent.process_query(query) if not results: print("\nNo matching papers found.") return print(f"\nFound {len(results)} matching papers:") print("=" * 80) for i, paper in enumerate(results, 1): print(f"\n{i}. {paper['title']}") print(f" Authors: {', '.join(paper['authors'])}") analysis = paper.get('analysis', {}) print("\nSummary:") print(analysis.get('summary', 'No summary available')) print("\nTechnical Concepts:") print(analysis.get('technical_concepts', 'No technical concepts available')) fluff = analysis.get('fluff', {}) score = fluff.get('score') if score is not None: color = '\033[92m' if score < 30 else '\033[93m' if score < 70 else '\033[91m' reset = '\033[0m' print(f"\nFluff Score: {color}{score}/100{reset}") print("Analysis:") print(fluff.get('explanation', 'No explanation available')) print("-" * 80) except Exception as e: logger.error(f"Error processing query: {e}") raise async def search_arxiv(query: str, category: Optional[str] = None, max_results: int = 10) -> None: """Search papers directly on arXiv.""" print(f"\nSearching arXiv for: {query}") if category: print(f"Category: {category}") async with ArxivClient() as client: try: papers = await client.fetch_papers(query=query, category=category, max_results=max_results) if not papers: print("\nNo papers found matching your query.") return print(f"\nFound {len(papers)} papers:") print("=" * 80) for i, paper in enumerate(papers, 1): print(f"\n{i}. {paper['title']}") print(f" Authors: {', '.join(paper['authors'])}") print(f" arXiv ID: {paper['entry_id']}") print(f" PDF: {paper['pdf_url']}") print("\nAbstract:") print(paper.get('abstract', 'No abstract available')) print("-" * 80) except Exception as e: logger.error(f"Error searching arXiv: {e}") raise async def main(): """Main application entry point.""" parser = argparse.ArgumentParser(description='AI Paper Analysis System') subparsers = parser.add_subparsers(dest='command', help='Command to run') # Fetch papers command fetch_parser = subparsers.add_parser('fetch', help='Fetch recent papers') fetch_parser.add_argument('--days', type=int, default=7, help='Number of days to look back') fetch_parser.add_argument('--categories', nargs='+', default=['cs.AI'], help='arXiv categories to fetch') # Fetch all papers command fetch_all_parser = subparsers.add_parser('fetch-all', help='Fetch all papers from categories') fetch_all_parser.add_argument('--categories', nargs='+', default=['cs.AI'], help='arXiv categories to fetch') fetch_all_parser.add_argument('--max-results', type=int, default=1000, help='Maximum number of papers to fetch per category') # Fetch single paper command fetch_one_parser = subparsers.add_parser('fetch-paper', help='Fetch and analyze a single paper') fetch_one_parser.add_argument('paper_id', help='arXiv paper ID (e.g., 2502.06788v1)') # Search local papers command search_parser = subparsers.add_parser('search', help='Search papers in local database') search_parser.add_argument('query', help='Search query') # Search arXiv directly command arxiv_parser = subparsers.add_parser('arxiv-search', help='Search papers directly on arXiv') arxiv_parser.add_argument('query', help='Search query') arxiv_parser.add_argument('--category', help='arXiv category (e.g., cs.AI)') arxiv_parser.add_argument('--max-results', type=int, default=10, help='Maximum number of results to return') args = parser.parse_args() if args.command == 'fetch': await fetch_papers(days=args.days, categories=args.categories) elif args.command == 'fetch-all': await fetch_all_papers(args.categories, args.max_results) elif args.command == 'fetch-paper': await fetch_single_paper(args.paper_id) elif args.command == 'search': await process_query(args.query) elif args.command == 'arxiv-search': await search_arxiv(args.query, args.category, args.max_results) else: parser.print_help() def run_main(): """Run the main application.""" # Set up logging setup_logging() # Set up signal handlers for Windows signal.signal(signal.SIGINT, handle_sigint) signal.signal(signal.SIGTERM, handle_sigint) try: # Run main application asyncio.run(main()) except KeyboardInterrupt: logger.info("Application cancelled by user") except Exception as e: logger.error(f"Application error: {e}") sys.exit(1) finally: # Clean up try: asyncio.run(cleanup_resources()) except Exception as e: logger.error(f"Error during cleanup: {e}") if __name__ == "__main__": import argparse run_main()