impoved fetch-all. skip papers in db and added progress bar

2025-02-12 23:27:13 +00:00 · 2025-02-12 23:27:13 +00:00 · e4adde5484
commit e4adde5484
parent d66697e897
13 changed files with 37 additions and 430 deletions
--- a/papers/2501_12948v1.json
+++ b/papers/2501_12948v1.json
@ -1,213 +0,0 @@
 {
  "entry_id": "http://arxiv.org/abs/2501.12948v1",
  "title": "DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning",
  "authors": [
    "DeepSeek-AI",
    "Daya Guo",
    "Dejian Yang",
    "Haowei Zhang",
    "Junxiao Song",
    "Ruoyu Zhang",
    "Runxin Xu",
    "Qihao Zhu",
    "Shirong Ma",
    "Peiyi Wang",
    "Xiao Bi",
    "Xiaokang Zhang",
    "Xingkai Yu",
    "Yu Wu",
    "Z. F. Wu",
    "Zhibin Gou",
    "Zhihong Shao",
    "Zhuoshu Li",
    "Ziyi Gao",
    "Aixin Liu",
    "Bing Xue",
    "Bingxuan Wang",
    "Bochao Wu",
    "Bei Feng",
    "Chengda Lu",
    "Chenggang Zhao",
    "Chengqi Deng",
    "Chenyu Zhang",
    "Chong Ruan",
    "Damai Dai",
    "Deli Chen",
    "Dongjie Ji",
    "Erhang Li",
    "Fangyun Lin",
    "Fucong Dai",
    "Fuli Luo",
    "Guangbo Hao",
    "Guanting Chen",
    "Guowei Li",
    "H. Zhang",
    "Han Bao",
    "Hanwei Xu",
    "Haocheng Wang",
    "Honghui Ding",
    "Huajian Xin",
    "Huazuo Gao",
    "Hui Qu",
    "Hui Li",
    "Jianzhong Guo",
    "Jiashi Li",
    "Jiawei Wang",
    "Jingchang Chen",
    "Jingyang Yuan",
    "Junjie Qiu",
    "Junlong Li",
    "J. L. Cai",
    "Jiaqi Ni",
    "Jian Liang",
    "Jin Chen",
    "Kai Dong",
    "Kai Hu",
    "Kaige Gao",
    "Kang Guan",
    "Kexin Huang",
    "Kuai Yu",
    "Lean Wang",
    "Lecong Zhang",
    "Liang Zhao",
    "Litong Wang",
    "Liyue Zhang",
    "Lei Xu",
    "Leyi Xia",
    "Mingchuan Zhang",
    "Minghua Zhang",
    "Minghui Tang",
    "Meng Li",
    "Miaojun Wang",
    "Mingming Li",
    "Ning Tian",
    "Panpan Huang",
    "Peng Zhang",
    "Qiancheng Wang",
    "Qinyu Chen",
    "Qiushi Du",
    "Ruiqi Ge",
    "Ruisong Zhang",
    "Ruizhe Pan",
    "Runji Wang",
    "R. J. Chen",
    "R. L. Jin",
    "Ruyi Chen",
    "Shanghao Lu",
    "Shangyan Zhou",
    "Shanhuang Chen",
    "Shengfeng Ye",
    "Shiyu Wang",
    "Shuiping Yu",
    "Shunfeng Zhou",
    "Shuting Pan",
    "S. S. Li",
    "Shuang Zhou",
    "Shaoqing Wu",
    "Shengfeng Ye",
    "Tao Yun",
    "Tian Pei",
    "Tianyu Sun",
    "T. Wang",
    "Wangding Zeng",
    "Wanjia Zhao",
    "Wen Liu",
    "Wenfeng Liang",
    "Wenjun Gao",
    "Wenqin Yu",
    "Wentao Zhang",
    "W. L. Xiao",
    "Wei An",
    "Xiaodong Liu",
    "Xiaohan Wang",
    "Xiaokang Chen",
    "Xiaotao Nie",
    "Xin Cheng",
    "Xin Liu",
    "Xin Xie",
    "Xingchao Liu",
    "Xinyu Yang",
    "Xinyuan Li",
    "Xuecheng Su",
    "Xuheng Lin",
    "X. Q. Li",
    "Xiangyue Jin",
    "Xiaojin Shen",
    "Xiaosha Chen",
    "Xiaowen Sun",
    "Xiaoxiang Wang",
    "Xinnan Song",
    "Xinyi Zhou",
    "Xianzu Wang",
    "Xinxia Shan",
    "Y. K. Li",
    "Y. Q. Wang",
    "Y. X. Wei",
    "Yang Zhang",
    "Yanhong Xu",
    "Yao Li",
    "Yao Zhao",
    "Yaofeng Sun",
    "Yaohui Wang",
    "Yi Yu",
    "Yichao Zhang",
    "Yifan Shi",
    "Yiliang Xiong",
    "Ying He",
    "Yishi Piao",
    "Yisong Wang",
    "Yixuan Tan",
    "Yiyang Ma",
    "Yiyuan Liu",
    "Yongqiang Guo",
    "Yuan Ou",
    "Yuduan Wang",
    "Yue Gong",
    "Yuheng Zou",
    "Yujia He",
    "Yunfan Xiong",
    "Yuxiang Luo",
    "Yuxiang You",
    "Yuxuan Liu",
    "Yuyang Zhou",
    "Y. X. Zhu",
    "Yanhong Xu",
    "Yanping Huang",
    "Yaohui Li",
    "Yi Zheng",
    "Yuchen Zhu",
    "Yunxian Ma",
    "Ying Tang",
    "Yukun Zha",
    "Yuting Yan",
    "Z. Z. Ren",
    "Zehui Ren",
    "Zhangli Sha",
    "Zhe Fu",
    "Zhean Xu",
    "Zhenda Xie",
    "Zhengyan Zhang",
    "Zhewen Hao",
    "Zhicheng Ma",
    "Zhigang Yan",
    "Zhiyu Wu",
    "Zihui Gu",
    "Zijia Zhu",
    "Zijun Liu",
    "Zilin Li",
    "Ziwei Xie",
    "Ziyang Song",
    "Zizheng Pan",
    "Zhen Huang",
    "Zhipeng Xu",
    "Zhongyu Zhang",
    "Zhen Zhang"
  ],
  "summary": "We introduce our first-generation reasoning models, DeepSeek-R1-Zero and\nDeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement\nlearning (RL) without supervised fine-tuning (SFT) as a preliminary step,\ndemonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero\nnaturally emerges with numerous powerful and intriguing reasoning behaviors.\nHowever, it encounters challenges such as poor readability, and language\nmixing. To address these issues and further enhance reasoning performance, we\nintroduce DeepSeek-R1, which incorporates multi-stage training and cold-start\ndata before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1-1217\non reasoning tasks. To support the research community, we open-source\nDeepSeek-R1-Zero, DeepSeek-R1, and six dense models (1.5B, 7B, 8B, 14B, 32B,\n70B) distilled from DeepSeek-R1 based on Qwen and Llama.",
  "pdf_url": "http://arxiv.org/pdf/2501.12948v1",
  "categories": [
    "cs.CL",
    "cs.AI",
    "cs.LG"
  ]
 }
--- a/papers/2502_05110v1.json
+++ b/papers/2502_05110v1.json
@ -1,15 +0,0 @@
 {
  "title": "ApplE: An Applied Ethics Ontology with Event Context",
  "authors": [
    "Aisha Aijaz",
    "Raghava Mutharaju",
    "Manohar Kumar"
  ],
  "abstract": "Applied ethics is ubiquitous in most domains, requiring much deliberation due\nto its philosophical nature. Varying views often lead to conflicting courses of\naction where ethical dilemmas become challenging to resolve. Although many\nfactors contribute to such a decision, the major driving forces can be\ndiscretized and thus simplified to provide an indicative answer. Knowledge\nrepresentation and reasoning offer a way to explicitly translate abstract\nethical concepts into applicable principles within the context of an event. To\nachieve this, we propose ApplE, an Applied Ethics ontology that captures\nphilosophical theory and event context to holistically describe the morality of\nan action. The development process adheres to a modified version of the\nSimplified Agile Methodology for Ontology Development (SAMOD) and utilizes\nstandard design and publication practices. Using ApplE, we model a use case\nfrom the bioethics domain that demonstrates our ontology's social and\nscientific value. Apart from the ontological reasoning and quality checks,\nApplE is also evaluated using the three-fold testing process of SAMOD. ApplE\nfollows FAIR principles and aims to be a viable resource for applied ethicists\nand ontology engineers.",
  "pdf_url": "http://arxiv.org/pdf/2502.05110v1",
  "entry_id": "http://arxiv.org/abs/2502.05110v1",
  "categories": [
    "cs.CY",
    "cs.AI"
  ]
 }
--- a/papers/2502_05111v1.json
+++ b/papers/2502_05111v1.json
@ -1,15 +0,0 @@
 {
  "title": "Flexible and Efficient Grammar-Constrained Decoding",
  "authors": [
    "Kanghee Park",
    "Timothy Zhou",
    "Loris D'Antoni"
  ],
  "abstract": "Large Language Models (LLMs) are often asked to generate structured outputs\nthat obey precise syntactic rules, such as code snippets or formatted data.\nGrammar-constrained decoding (GCD) can guarantee that LLM outputs matches such\nrules by masking out tokens that will provably lead to outputs that do not\nbelong to a specified context-free grammar (CFG). To guarantee soundness, GCD\nalgorithms have to compute how a given LLM subword tokenizer can align with the\ntokens used\n  by a given context-free grammar and compute token masks based on this\ninformation. Doing so efficiently is challenging and existing GCD algorithms\nrequire tens of minutes to preprocess common grammars. We present a new GCD\nalgorithm together with an implementation that offers 17.71x faster offline\npreprocessing than existing approaches while preserving state-of-the-art\nefficiency in online mask computation.",
  "pdf_url": "http://arxiv.org/pdf/2502.05111v1",
  "entry_id": "http://arxiv.org/abs/2502.05111v1",
  "categories": [
    "cs.CL",
    "cs.AI"
  ]
 }
--- a/papers/2502_05115v1.json
+++ b/papers/2502_05115v1.json
@ -1,21 +0,0 @@
 {
  "title": "\"It Felt Like I Was Left in the Dark\": Exploring Information Needs and Design Opportunities for Family Caregivers of Older Adult Patients in Critical Care Settings",
  "authors": [
    "Shihan Fu",
    "Bingsheng Yao",
    "Smit Desai",
    "Yuqi Hu",
    "Yuling Sun",
    "Samantha Stonbraker",
    "Yanjun Gao",
    "Elizabeth M. Goldberg",
    "Dakuo Wang"
  ],
  "abstract": "Older adult patients constitute a rapidly growing subgroup of Intensive Care\nUnit (ICU) patients. In these situations, their family caregivers are expected\nto represent the unconscious patients to access and interpret patients' medical\ninformation. However, caregivers currently have to rely on overloaded\nclinicians for information updates and typically lack the health literacy to\nunderstand complex medical information. Our project aims to explore the\ninformation needs of caregivers of ICU older adult patients, from which we can\npropose design opportunities to guide future AI systems. The project begins\nwith formative interviews with 11 caregivers to identify their challenges in\naccessing and interpreting medical information; From these findings, we then\nsynthesize design requirements and propose an AI system prototype to cope with\ncaregivers' challenges. The system prototype has two key features: a timeline\nvisualization to show the AI extracted and summarized older adult patients' key\nmedical events; and an LLM-based chatbot to provide context-aware informational\nsupport. We conclude our paper by reporting on the follow-up user evaluation of\nthe system and discussing future AI-based systems for ICU caregivers of older\nadults.",
  "pdf_url": "http://arxiv.org/pdf/2502.05115v1",
  "entry_id": "http://arxiv.org/abs/2502.05115v1",
  "categories": [
    "cs.HC",
    "cs.AI"
  ]
 }
--- a/papers/2502_05130v1.json
+++ b/papers/2502_05130v1.json
@ -1,25 +0,0 @@
 {
  "title": "Latent Swap Joint Diffusion for Long-Form Audio Generation",
  "authors": [
    "Yusheng Dai",
    "Chenxi Wang",
    "Chang Li",
    "Chen Wang",
    "Jun Du",
    "Kewei Li",
    "Ruoyu Wang",
    "Jiefeng Ma",
    "Lei Sun",
    "Jianqing Gao"
  ],
  "abstract": "Previous work on long-form audio generation using global-view diffusion or\niterative generation demands significant training or inference costs. While\nrecent advancements in multi-view joint diffusion for panoramic generation\nprovide an efficient option, they struggle with spectrum generation with severe\noverlap distortions and high cross-view consistency costs. We initially explore\nthis phenomenon through the connectivity inheritance of latent maps and uncover\nthat averaging operations excessively smooth the high-frequency components of\nthe latent map. To address these issues, we propose Swap Forward (SaFa), a\nframe-level latent swap framework that synchronizes multiple diffusions to\nproduce a globally coherent long audio with more spectrum details in a\nforward-only manner. At its core, the bidirectional Self-Loop Latent Swap is\napplied between adjacent views, leveraging stepwise diffusion trajectory to\nadaptively enhance high-frequency components without disrupting low-frequency\ncomponents. Furthermore, to ensure cross-view consistency, the unidirectional\nReference-Guided Latent Swap is applied between the reference and the\nnon-overlap regions of each subview during the early stages, providing\ncentralized trajectory guidance. Quantitative and qualitative experiments\ndemonstrate that SaFa significantly outperforms existing joint diffusion\nmethods and even training-based long audio generation models. Moreover, we find\nthat it also adapts well to panoramic generation, achieving comparable\nstate-of-the-art performance with greater efficiency and model\ngeneralizability. Project page is available at https://swapforward.github.io/.",
  "pdf_url": "http://arxiv.org/pdf/2502.05130v1",
  "entry_id": "http://arxiv.org/abs/2502.05130v1",
  "categories": [
    "cs.SD",
    "cs.AI",
    "cs.CV",
    "cs.MM",
    "eess.AS"
  ]
 }
--- a/papers/2502_05147v1.json
+++ b/papers/2502_05147v1.json
@ -1,17 +0,0 @@
 {
  "title": "LP-DETR: Layer-wise Progressive Relations for Object Detection",
  "authors": [
    "Zhengjian Kang",
    "Ye Zhang",
    "Xiaoyu Deng",
    "Xintao Li",
    "Yongzhe Zhang"
  ],
  "abstract": "This paper presents LP-DETR (Layer-wise Progressive DETR), a novel approach\nthat enhances DETR-based object detection through multi-scale relation\nmodeling. Our method introduces learnable spatial relationships between object\nqueries through a relation-aware self-attention mechanism, which adaptively\nlearns to balance different scales of relations (local, medium and global)\nacross decoder layers. This progressive design enables the model to effectively\ncapture evolving spatial dependencies throughout the detection pipeline.\nExtensive experiments on COCO 2017 dataset demonstrate that our method improves\nboth convergence speed and detection accuracy compared to standard\nself-attention module. The proposed method achieves competitive results,\nreaching 52.3\\% AP with 12 epochs and 52.5\\% AP with 24 epochs using ResNet-50\nbackbone, and further improving to 58.0\\% AP with Swin-L backbone. Furthermore,\nour analysis reveals an interesting pattern: the model naturally learns to\nprioritize local spatial relations in early decoder layers while gradually\nshifting attention to broader contexts in deeper layers, providing valuable\ninsights for future research in object detection.",
  "pdf_url": "http://arxiv.org/pdf/2502.05147v1",
  "entry_id": "http://arxiv.org/abs/2502.05147v1",
  "categories": [
    "cs.CV",
    "cs.AI"
  ]
 }
--- a/papers/2502_05151v1.json
+++ b/papers/2502_05151v1.json
@ -1,28 +0,0 @@
 {
  "title": "Transforming Science with Large Language Models: A Survey on AI-assisted Scientific Discovery, Experimentation, Content Generation, and Evaluation",
  "authors": [
    "Steffen Eger",
    "Yong Cao",
    "Jennifer D'Souza",
    "Andreas Geiger",
    "Christian Greisinger",
    "Stephanie Gross",
    "Yufang Hou",
    "Brigitte Krenn",
    "Anne Lauscher",
    "Yizhi Li",
    "Chenghua Lin",
    "Nafise Sadat Moosavi",
    "Wei Zhao",
    "Tristan Miller"
  ],
  "abstract": "With the advent of large multimodal language models, science is now at a\nthreshold of an AI-based technological transformation. Recently, a plethora of\nnew AI models and tools has been proposed, promising to empower researchers and\nacademics worldwide to conduct their research more effectively and efficiently.\nThis includes all aspects of the research cycle, especially (1) searching for\nrelevant literature; (2) generating research ideas and conducting\nexperimentation; generating (3) text-based and (4) multimodal content (e.g.,\nscientific figures and diagrams); and (5) AI-based automatic peer review. In\nthis survey, we provide an in-depth overview over these exciting recent\ndevelopments, which promise to fundamentally alter the scientific research\nprocess for good. Our survey covers the five aspects outlined above, indicating\nrelevant datasets, methods and results (including evaluation) as well as\nlimitations and scope for future research. Ethical concerns regarding\nshortcomings of these tools and potential for misuse (fake science, plagiarism,\nharms to research integrity) take a particularly prominent place in our\ndiscussion. We hope that our survey will not only become a reference guide for\nnewcomers to the field but also a catalyst for new AI-based initiatives in the\narea of \"AI4Science\".",
  "pdf_url": "http://arxiv.org/pdf/2502.05151v1",
  "entry_id": "http://arxiv.org/abs/2502.05151v1",
  "categories": [
    "cs.CL",
    "cs.AI",
    "cs.CV",
    "cs.LG"
  ]
 }
--- a/papers/2502_05172v1.json
+++ b/papers/2502_05172v1.json
@ -1,24 +0,0 @@
 {
  "title": "Joint MoE Scaling Laws: Mixture of Experts Can Be Memory Efficient",
  "authors": [
    "Jan Ludziejewski",
    "Maciej Pióro",
    "Jakub Krajewski",
    "Maciej Stefaniak",
    "Michał Krutul",
    "Jan Małaśnicki",
    "Marek Cygan",
    "Piotr Sankowski",
    "Kamil Adamczewski",
    "Piotr Miłoś",
    "Sebastian Jaszczur"
  ],
  "abstract": "Mixture of Experts (MoE) architectures have significantly increased\ncomputational efficiency in both research and real-world applications of\nlarge-scale machine learning models. However, their scalability and efficiency\nunder memory constraints remain relatively underexplored. In this work, we\npresent joint scaling laws for dense and MoE models, incorporating key factors\nsuch as the number of active parameters, dataset size, and the number of\nexperts. Our findings provide a principled framework for selecting the optimal\nMoE configuration under fixed memory and compute budgets. Surprisingly, we show\nthat MoE models can be more memory-efficient than dense models, contradicting\nconventional wisdom. To derive and validate the theoretical predictions of our\nscaling laws, we conduct over 280 experiments with up to 2.7B active parameters\nand up to 5B total parameters. These results offer actionable insights for\ndesigning and deploying MoE models in practical large-scale training scenarios.",
  "pdf_url": "http://arxiv.org/pdf/2502.05172v1",
  "entry_id": "http://arxiv.org/abs/2502.05172v1",
  "categories": [
    "cs.LG",
    "cs.AI",
    "cs.CL"
  ]
 }
--- a/papers/2502_05174v1.json
+++ b/papers/2502_05174v1.json
@ -1,17 +0,0 @@
 {
  "title": "MELON: Indirect Prompt Injection Defense via Masked Re-execution and Tool Comparison",
  "authors": [
    "Kaijie Zhu",
    "Xianjun Yang",
    "Jindong Wang",
    "Wenbo Guo",
    "William Yang Wang"
  ],
  "abstract": "Recent research has explored that LLM agents are vulnerable to indirect\nprompt injection (IPI) attacks, where malicious tasks embedded in\ntool-retrieved information can redirect the agent to take unauthorized actions.\nExisting defenses against IPI have significant limitations: either require\nessential model training resources, lack effectiveness against sophisticated\nattacks, or harm the normal utilities. We present MELON (Masked re-Execution\nand TooL comparisON), a novel IPI defense. Our approach builds on the\nobservation that under a successful attack, the agent's next action becomes\nless dependent on user tasks and more on malicious tasks. Following this, we\ndesign MELON to detect attacks by re-executing the agent's trajectory with a\nmasked user prompt modified through a masking function. We identify an attack\nif the actions generated in the original and masked executions are similar. We\nalso include three key designs to reduce the potential false positives and\nfalse negatives. Extensive evaluation on the IPI benchmark AgentDojo\ndemonstrates that MELON outperforms SOTA defenses in both attack prevention and\nutility preservation. Moreover, we show that combining MELON with a SOTA prompt\naugmentation defense (denoted as MELON-Aug) further improves its performance.\nWe also conduct a detailed ablation study to validate our key designs.",
  "pdf_url": "http://arxiv.org/pdf/2502.05174v1",
  "entry_id": "http://arxiv.org/abs/2502.05174v1",
  "categories": [
    "cs.CR",
    "cs.AI"
  ]
 }
--- a/papers/2502_06784v1.json
+++ b/papers/2502_06784v1.json
@ -1,16 +0,0 @@
 {
  "title": "RelGNN: Composite Message Passing for Relational Deep Learning",
  "authors": [
    "Tianlang Chen",
    "Charilaos Kanatsoulis",
    "Jure Leskovec"
  ],
  "abstract": "Predictive tasks on relational databases are critical in real-world\napplications spanning e-commerce, healthcare, and social media. To address\nthese tasks effectively, Relational Deep Learning (RDL) encodes relational data\nas graphs, enabling Graph Neural Networks (GNNs) to exploit relational\nstructures for improved predictions. However, existing heterogeneous GNNs often\noverlook the intrinsic structural properties of relational databases, leading\nto modeling inefficiencies. Here we introduce RelGNN, a novel GNN framework\nspecifically designed to capture the unique characteristics of relational\ndatabases. At the core of our approach is the introduction of atomic routes,\nwhich are sequences of nodes forming high-order tripartite structures. Building\nupon these atomic routes, RelGNN designs new composite message passing\nmechanisms between heterogeneous nodes, allowing direct single-hop interactions\nbetween them. This approach avoids redundant aggregations and mitigates\ninformation entanglement, ultimately leading to more efficient and accurate\npredictive modeling. RelGNN is evaluated on 30 diverse real-world tasks from\nRelBench (Fey et al., 2024), and consistently achieves state-of-the-art\naccuracy with up to 25% improvement.",
  "pdf_url": "http://arxiv.org/pdf/2502.06784v1",
  "entry_id": "http://arxiv.org/abs/2502.06784v1",
  "categories": [
    "cs.LG",
    "cs.AI",
    "cs.DB"
  ]
 }
--- a/papers/2502_06786v1.json
+++ b/papers/2502_06786v1.json
@ -1,17 +0,0 @@
 {
  "title": "Matryoshka Quantization",
  "authors": [
    "Pranav Nair",
    "Puranjay Datta",
    "Jeff Dean",
    "Prateek Jain",
    "Aditya Kusupati"
  ],
  "abstract": "Quantizing model weights is critical for reducing the communication and\ninference costs of large models. However, quantizing models -- especially to\nlow precisions like int4 or int2 -- requires a trade-off in model quality;\nint2, in particular, is known to severely degrade model quality. Consequently,\npractitioners are often forced to maintain multiple models with different\nquantization levels or serve a single model that best satisfies the\nquality-latency trade-off. On the other hand, integer data types, such as int8,\ninherently possess a nested (Matryoshka) structure where smaller bit-width\nintegers, like int4 or int2, are nested within the most significant bits. This\npaper proposes Matryoshka Quantization (MatQuant), a novel multi-scale\nquantization technique that addresses the challenge of needing multiple\nquantized models. It allows training and maintaining just one model, which can\nthen be served at different precision levels. Furthermore, due to the\nco-training and co-distillation regularization provided by MatQuant, the int2\nprecision models extracted by MatQuant can be up to $10\\%$ more accurate than\nstandard int2 quantization (using techniques like QAT or OmniQuant). This\nrepresents significant progress in model quantization, demonstrated by the fact\nthat, with the same recipe, an int2 FFN-quantized Gemma-2 9B model is more\naccurate than an int8 FFN-quantized Gemma-2 2B model.",
  "pdf_url": "http://arxiv.org/pdf/2502.06786v1",
  "entry_id": "http://arxiv.org/abs/2502.06786v1",
  "categories": [
    "cs.LG",
    "cs.AI"
  ]
 }
--- a/papers/2502_06788v1.json
+++ b/papers/2502_06788v1.json
@ -1,21 +0,0 @@
 {
  "title": "EVEv2: Improved Baselines for Encoder-Free Vision-Language Models",
  "authors": [
    "Haiwen Diao",
    "Xiaotong Li",
    "Yufeng Cui",
    "Yueze Wang",
    "Haoge Deng",
    "Ting Pan",
    "Wenxuan Wang",
    "Huchuan Lu",
    "Xinlong Wang"
  ],
  "abstract": "Existing encoder-free vision-language models (VLMs) are rapidly narrowing the\nperformance gap with their encoder-based counterparts, highlighting the\npromising potential for unified multimodal systems with structural simplicity\nand efficient deployment. We systematically clarify the performance gap between\nVLMs using pre-trained vision encoders, discrete tokenizers, and minimalist\nvisual layers from scratch, deeply excavating the under-examined\ncharacteristics of encoder-free VLMs. We develop efficient strategies for\nencoder-free VLMs that rival mainstream encoder-based ones. After an in-depth\ninvestigation, we launch EVEv2.0, a new and improved family of encoder-free\nVLMs. We show that: (i) Properly decomposing and hierarchically associating\nvision and language within a unified model reduces interference between\nmodalities. (ii) A well-designed training strategy enables effective\noptimization for encoder-free VLMs. Through extensive evaluation, our EVEv2.0\nrepresents a thorough study for developing a decoder-only architecture across\nmodalities, demonstrating superior data efficiency and strong vision-reasoning\ncapability. Code is publicly available at: https://github.com/baaivision/EVE.",
  "pdf_url": "http://arxiv.org/pdf/2502.06788v1",
  "entry_id": "http://arxiv.org/abs/2502.06788v1",
  "categories": [
    "cs.CV",
    "cs.AI"
  ]
 }
--- a/src/main.py
+++ b/src/main.py
@ -113,10 +113,46 @@ async def fetch_all_papers(categories: List[str], max_results: int = 1000):
    """Fetch all papers from specified categories."""
    async with ArxivClient() as client, AgentController() as agent:
        for category in categories:
            print(f"\nProcessing category: {category}")
            papers = await client.fetch_papers(category=category, max_results=max_results)
            print(f"Found {len(papers)} papers in {category}")
            processed = 0
            skipped = 0
            failed = 0
            for paper in papers:
-                await agent.analyze_paper(paper)
+                try:
                    # Extract paper ID
                    paper_id = paper.get('entry_id', '').split('/')[-1]
                    # Check if paper exists in database
                    existing = await agent.paper_store.get_paper(paper_id)
                    if existing:
                        print(f"Skipping existing paper: {paper['title'][:60]}...")
                        skipped += 1
                        continue
                    # Process new paper
                    print(f"\nAnalyzing: {paper['title']}")
                    await agent.analyze_paper(paper)
                    processed += 1
                    print(f"Successfully analyzed paper {processed}/{len(papers)}")
                except Exception as e:
                    logger.error(f"Error processing paper: {e}")
                    print(f"Failed to analyze paper: {paper.get('title', 'Unknown Title')}")
                    failed += 1
                print("-" * 80)
            # Print summary for this category
            print(f"\nCategory {category} Summary:")
            print(f"Total papers found: {len(papers)}")
            print(f"Successfully processed: {processed}")
            print(f"Skipped (already exists): {skipped}")
            print(f"Failed to process: {failed}")
            print("=" * 80)
 async def fetch_single_paper(paper_id: str) -> None:
    """Fetch and analyze a single paper by ID."""