diff --git a/check_db.py b/check_db.py new file mode 100644 index 0000000..75b3dbc --- /dev/null +++ b/check_db.py @@ -0,0 +1,16 @@ +import asyncio +from src.storage.paper_store import PaperStore + +async def check_papers(): + store = PaperStore() + await store.initialize() + papers = await store.get_all_papers() + print('Papers in PostgreSQL:', len(papers)) + if papers: + print('\nPaper IDs:') + for paper in papers: + print(f"- {paper['id']}") + await store.close() + +if __name__ == "__main__": + asyncio.run(check_papers()) diff --git a/papers/2501_12948v1.json b/papers/2501_12948v1.json new file mode 100644 index 0000000..aeeef50 --- /dev/null +++ b/papers/2501_12948v1.json @@ -0,0 +1,213 @@ +{ + "entry_id": "http://arxiv.org/abs/2501.12948v1", + "title": "DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning", + "authors": [ + "DeepSeek-AI", + "Daya Guo", + "Dejian Yang", + "Haowei Zhang", + "Junxiao Song", + "Ruoyu Zhang", + "Runxin Xu", + "Qihao Zhu", + "Shirong Ma", + "Peiyi Wang", + "Xiao Bi", + "Xiaokang Zhang", + "Xingkai Yu", + "Yu Wu", + "Z. F. Wu", + "Zhibin Gou", + "Zhihong Shao", + "Zhuoshu Li", + "Ziyi Gao", + "Aixin Liu", + "Bing Xue", + "Bingxuan Wang", + "Bochao Wu", + "Bei Feng", + "Chengda Lu", + "Chenggang Zhao", + "Chengqi Deng", + "Chenyu Zhang", + "Chong Ruan", + "Damai Dai", + "Deli Chen", + "Dongjie Ji", + "Erhang Li", + "Fangyun Lin", + "Fucong Dai", + "Fuli Luo", + "Guangbo Hao", + "Guanting Chen", + "Guowei Li", + "H. Zhang", + "Han Bao", + "Hanwei Xu", + "Haocheng Wang", + "Honghui Ding", + "Huajian Xin", + "Huazuo Gao", + "Hui Qu", + "Hui Li", + "Jianzhong Guo", + "Jiashi Li", + "Jiawei Wang", + "Jingchang Chen", + "Jingyang Yuan", + "Junjie Qiu", + "Junlong Li", + "J. L. Cai", + "Jiaqi Ni", + "Jian Liang", + "Jin Chen", + "Kai Dong", + "Kai Hu", + "Kaige Gao", + "Kang Guan", + "Kexin Huang", + "Kuai Yu", + "Lean Wang", + "Lecong Zhang", + "Liang Zhao", + "Litong Wang", + "Liyue Zhang", + "Lei Xu", + "Leyi Xia", + "Mingchuan Zhang", + "Minghua Zhang", + "Minghui Tang", + "Meng Li", + "Miaojun Wang", + "Mingming Li", + "Ning Tian", + "Panpan Huang", + "Peng Zhang", + "Qiancheng Wang", + "Qinyu Chen", + "Qiushi Du", + "Ruiqi Ge", + "Ruisong Zhang", + "Ruizhe Pan", + "Runji Wang", + "R. J. Chen", + "R. L. Jin", + "Ruyi Chen", + "Shanghao Lu", + "Shangyan Zhou", + "Shanhuang Chen", + "Shengfeng Ye", + "Shiyu Wang", + "Shuiping Yu", + "Shunfeng Zhou", + "Shuting Pan", + "S. S. Li", + "Shuang Zhou", + "Shaoqing Wu", + "Shengfeng Ye", + "Tao Yun", + "Tian Pei", + "Tianyu Sun", + "T. Wang", + "Wangding Zeng", + "Wanjia Zhao", + "Wen Liu", + "Wenfeng Liang", + "Wenjun Gao", + "Wenqin Yu", + "Wentao Zhang", + "W. L. Xiao", + "Wei An", + "Xiaodong Liu", + "Xiaohan Wang", + "Xiaokang Chen", + "Xiaotao Nie", + "Xin Cheng", + "Xin Liu", + "Xin Xie", + "Xingchao Liu", + "Xinyu Yang", + "Xinyuan Li", + "Xuecheng Su", + "Xuheng Lin", + "X. Q. Li", + "Xiangyue Jin", + "Xiaojin Shen", + "Xiaosha Chen", + "Xiaowen Sun", + "Xiaoxiang Wang", + "Xinnan Song", + "Xinyi Zhou", + "Xianzu Wang", + "Xinxia Shan", + "Y. K. Li", + "Y. Q. Wang", + "Y. X. Wei", + "Yang Zhang", + "Yanhong Xu", + "Yao Li", + "Yao Zhao", + "Yaofeng Sun", + "Yaohui Wang", + "Yi Yu", + "Yichao Zhang", + "Yifan Shi", + "Yiliang Xiong", + "Ying He", + "Yishi Piao", + "Yisong Wang", + "Yixuan Tan", + "Yiyang Ma", + "Yiyuan Liu", + "Yongqiang Guo", + "Yuan Ou", + "Yuduan Wang", + "Yue Gong", + "Yuheng Zou", + "Yujia He", + "Yunfan Xiong", + "Yuxiang Luo", + "Yuxiang You", + "Yuxuan Liu", + "Yuyang Zhou", + "Y. X. Zhu", + "Yanhong Xu", + "Yanping Huang", + "Yaohui Li", + "Yi Zheng", + "Yuchen Zhu", + "Yunxian Ma", + "Ying Tang", + "Yukun Zha", + "Yuting Yan", + "Z. Z. Ren", + "Zehui Ren", + "Zhangli Sha", + "Zhe Fu", + "Zhean Xu", + "Zhenda Xie", + "Zhengyan Zhang", + "Zhewen Hao", + "Zhicheng Ma", + "Zhigang Yan", + "Zhiyu Wu", + "Zihui Gu", + "Zijia Zhu", + "Zijun Liu", + "Zilin Li", + "Ziwei Xie", + "Ziyang Song", + "Zizheng Pan", + "Zhen Huang", + "Zhipeng Xu", + "Zhongyu Zhang", + "Zhen Zhang" + ], + "summary": "We introduce our first-generation reasoning models, DeepSeek-R1-Zero and\nDeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement\nlearning (RL) without supervised fine-tuning (SFT) as a preliminary step,\ndemonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero\nnaturally emerges with numerous powerful and intriguing reasoning behaviors.\nHowever, it encounters challenges such as poor readability, and language\nmixing. To address these issues and further enhance reasoning performance, we\nintroduce DeepSeek-R1, which incorporates multi-stage training and cold-start\ndata before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1-1217\non reasoning tasks. To support the research community, we open-source\nDeepSeek-R1-Zero, DeepSeek-R1, and six dense models (1.5B, 7B, 8B, 14B, 32B,\n70B) distilled from DeepSeek-R1 based on Qwen and Llama.", + "pdf_url": "http://arxiv.org/pdf/2501.12948v1", + "categories": [ + "cs.CL", + "cs.AI", + "cs.LG" + ] +} \ No newline at end of file diff --git a/src/main.py b/src/main.py index 16059f1..0351012 100644 --- a/src/main.py +++ b/src/main.py @@ -109,6 +109,47 @@ async def fetch_papers(days: int = 7, categories: Optional[List[str]] = None) -> logger.error(f"Error fetching papers: {e}") raise +async def fetch_single_paper(paper_id: str) -> None: + """Fetch and analyze a single paper by ID.""" + print(f"\nFetching paper: {paper_id}") + + async with ArxivClient() as client, AgentController() as agent: + try: + # Get paper from arXiv + paper = await client.get_paper_by_id(paper_id) + if not paper: + print(f"\nPaper {paper_id} not found on arXiv.") + return + + print(f"\nFound paper: {paper['title']}") + print(f"Authors: {', '.join(paper['authors'])}") + + # Analyze the paper + analysis = await agent.analyze_paper(paper) + if analysis: + print("\nAnalysis:") + print("=" * 80) + print("\nSummary:") + print(analysis.get('summary', 'No summary available')) + + print("\nTechnical Concepts:") + print(analysis.get('technical_concepts', 'No technical concepts available')) + + fluff = analysis.get('fluff', {}) + score = fluff.get('score') + if score is not None: + color = '\033[92m' if score < 30 else '\033[93m' if score < 70 else '\033[91m' + reset = '\033[0m' + print(f"\nFluff Score: {color}{score}/100{reset}") + print("Analysis:") + print(fluff.get('explanation', 'No explanation available')) + else: + print("\nPaper was already analyzed or an error occurred during analysis.") + + except Exception as e: + logger.error(f"Error processing paper: {e}") + raise + async def process_query(query: str) -> None: """Process a search query and display results.""" async with AgentController() as agent: @@ -148,9 +189,39 @@ async def process_query(query: str) -> None: logger.error(f"Error processing query: {e}") raise +async def search_arxiv(query: str, category: Optional[str] = None, max_results: int = 10) -> None: + """Search papers directly on arXiv.""" + print(f"\nSearching arXiv for: {query}") + if category: + print(f"Category: {category}") + + async with ArxivClient() as client: + try: + papers = await client.fetch_papers(query=query, category=category, max_results=max_results) + + if not papers: + print("\nNo papers found matching your query.") + return + + print(f"\nFound {len(papers)} papers:") + print("=" * 80) + + for i, paper in enumerate(papers, 1): + print(f"\n{i}. {paper['title']}") + print(f" Authors: {', '.join(paper['authors'])}") + print(f" arXiv ID: {paper['entry_id']}") + print(f" PDF: {paper['pdf_url']}") + print("\nAbstract:") + print(paper.get('abstract', 'No abstract available')) + print("-" * 80) + + except Exception as e: + logger.error(f"Error searching arXiv: {e}") + raise + async def main(): """Main application entry point.""" - parser = argparse.ArgumentParser(description='Fetch and analyze arXiv papers') + parser = argparse.ArgumentParser(description='AI Paper Analysis System') subparsers = parser.add_subparsers(dest='command', help='Command to run') # Fetch papers command @@ -160,16 +231,31 @@ async def main(): fetch_parser.add_argument('--categories', nargs='+', default=['cs.AI'], help='arXiv categories to fetch') - # Search papers command - search_parser = subparsers.add_parser('search', help='Search papers') + # Fetch single paper command + fetch_one_parser = subparsers.add_parser('fetch-paper', help='Fetch and analyze a single paper') + fetch_one_parser.add_argument('paper_id', help='arXiv paper ID (e.g., 2502.06788v1)') + + # Search local papers command + search_parser = subparsers.add_parser('search', help='Search papers in local database') search_parser.add_argument('query', help='Search query') + # Search arXiv directly command + arxiv_parser = subparsers.add_parser('arxiv-search', help='Search papers directly on arXiv') + arxiv_parser.add_argument('query', help='Search query') + arxiv_parser.add_argument('--category', help='arXiv category (e.g., cs.AI)') + arxiv_parser.add_argument('--max-results', type=int, default=10, + help='Maximum number of results to return') + args = parser.parse_args() if args.command == 'fetch': await fetch_papers(days=args.days, categories=args.categories) + elif args.command == 'fetch-paper': + await fetch_single_paper(args.paper_id) elif args.command == 'search': await process_query(args.query) + elif args.command == 'arxiv-search': + await search_arxiv(args.query, args.category, args.max_results) else: parser.print_help()