213 lines
4.7 KiB
JSON
213 lines
4.7 KiB
JSON
{
|
|
"entry_id": "http://arxiv.org/abs/2501.12948v1",
|
|
"title": "DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning",
|
|
"authors": [
|
|
"DeepSeek-AI",
|
|
"Daya Guo",
|
|
"Dejian Yang",
|
|
"Haowei Zhang",
|
|
"Junxiao Song",
|
|
"Ruoyu Zhang",
|
|
"Runxin Xu",
|
|
"Qihao Zhu",
|
|
"Shirong Ma",
|
|
"Peiyi Wang",
|
|
"Xiao Bi",
|
|
"Xiaokang Zhang",
|
|
"Xingkai Yu",
|
|
"Yu Wu",
|
|
"Z. F. Wu",
|
|
"Zhibin Gou",
|
|
"Zhihong Shao",
|
|
"Zhuoshu Li",
|
|
"Ziyi Gao",
|
|
"Aixin Liu",
|
|
"Bing Xue",
|
|
"Bingxuan Wang",
|
|
"Bochao Wu",
|
|
"Bei Feng",
|
|
"Chengda Lu",
|
|
"Chenggang Zhao",
|
|
"Chengqi Deng",
|
|
"Chenyu Zhang",
|
|
"Chong Ruan",
|
|
"Damai Dai",
|
|
"Deli Chen",
|
|
"Dongjie Ji",
|
|
"Erhang Li",
|
|
"Fangyun Lin",
|
|
"Fucong Dai",
|
|
"Fuli Luo",
|
|
"Guangbo Hao",
|
|
"Guanting Chen",
|
|
"Guowei Li",
|
|
"H. Zhang",
|
|
"Han Bao",
|
|
"Hanwei Xu",
|
|
"Haocheng Wang",
|
|
"Honghui Ding",
|
|
"Huajian Xin",
|
|
"Huazuo Gao",
|
|
"Hui Qu",
|
|
"Hui Li",
|
|
"Jianzhong Guo",
|
|
"Jiashi Li",
|
|
"Jiawei Wang",
|
|
"Jingchang Chen",
|
|
"Jingyang Yuan",
|
|
"Junjie Qiu",
|
|
"Junlong Li",
|
|
"J. L. Cai",
|
|
"Jiaqi Ni",
|
|
"Jian Liang",
|
|
"Jin Chen",
|
|
"Kai Dong",
|
|
"Kai Hu",
|
|
"Kaige Gao",
|
|
"Kang Guan",
|
|
"Kexin Huang",
|
|
"Kuai Yu",
|
|
"Lean Wang",
|
|
"Lecong Zhang",
|
|
"Liang Zhao",
|
|
"Litong Wang",
|
|
"Liyue Zhang",
|
|
"Lei Xu",
|
|
"Leyi Xia",
|
|
"Mingchuan Zhang",
|
|
"Minghua Zhang",
|
|
"Minghui Tang",
|
|
"Meng Li",
|
|
"Miaojun Wang",
|
|
"Mingming Li",
|
|
"Ning Tian",
|
|
"Panpan Huang",
|
|
"Peng Zhang",
|
|
"Qiancheng Wang",
|
|
"Qinyu Chen",
|
|
"Qiushi Du",
|
|
"Ruiqi Ge",
|
|
"Ruisong Zhang",
|
|
"Ruizhe Pan",
|
|
"Runji Wang",
|
|
"R. J. Chen",
|
|
"R. L. Jin",
|
|
"Ruyi Chen",
|
|
"Shanghao Lu",
|
|
"Shangyan Zhou",
|
|
"Shanhuang Chen",
|
|
"Shengfeng Ye",
|
|
"Shiyu Wang",
|
|
"Shuiping Yu",
|
|
"Shunfeng Zhou",
|
|
"Shuting Pan",
|
|
"S. S. Li",
|
|
"Shuang Zhou",
|
|
"Shaoqing Wu",
|
|
"Shengfeng Ye",
|
|
"Tao Yun",
|
|
"Tian Pei",
|
|
"Tianyu Sun",
|
|
"T. Wang",
|
|
"Wangding Zeng",
|
|
"Wanjia Zhao",
|
|
"Wen Liu",
|
|
"Wenfeng Liang",
|
|
"Wenjun Gao",
|
|
"Wenqin Yu",
|
|
"Wentao Zhang",
|
|
"W. L. Xiao",
|
|
"Wei An",
|
|
"Xiaodong Liu",
|
|
"Xiaohan Wang",
|
|
"Xiaokang Chen",
|
|
"Xiaotao Nie",
|
|
"Xin Cheng",
|
|
"Xin Liu",
|
|
"Xin Xie",
|
|
"Xingchao Liu",
|
|
"Xinyu Yang",
|
|
"Xinyuan Li",
|
|
"Xuecheng Su",
|
|
"Xuheng Lin",
|
|
"X. Q. Li",
|
|
"Xiangyue Jin",
|
|
"Xiaojin Shen",
|
|
"Xiaosha Chen",
|
|
"Xiaowen Sun",
|
|
"Xiaoxiang Wang",
|
|
"Xinnan Song",
|
|
"Xinyi Zhou",
|
|
"Xianzu Wang",
|
|
"Xinxia Shan",
|
|
"Y. K. Li",
|
|
"Y. Q. Wang",
|
|
"Y. X. Wei",
|
|
"Yang Zhang",
|
|
"Yanhong Xu",
|
|
"Yao Li",
|
|
"Yao Zhao",
|
|
"Yaofeng Sun",
|
|
"Yaohui Wang",
|
|
"Yi Yu",
|
|
"Yichao Zhang",
|
|
"Yifan Shi",
|
|
"Yiliang Xiong",
|
|
"Ying He",
|
|
"Yishi Piao",
|
|
"Yisong Wang",
|
|
"Yixuan Tan",
|
|
"Yiyang Ma",
|
|
"Yiyuan Liu",
|
|
"Yongqiang Guo",
|
|
"Yuan Ou",
|
|
"Yuduan Wang",
|
|
"Yue Gong",
|
|
"Yuheng Zou",
|
|
"Yujia He",
|
|
"Yunfan Xiong",
|
|
"Yuxiang Luo",
|
|
"Yuxiang You",
|
|
"Yuxuan Liu",
|
|
"Yuyang Zhou",
|
|
"Y. X. Zhu",
|
|
"Yanhong Xu",
|
|
"Yanping Huang",
|
|
"Yaohui Li",
|
|
"Yi Zheng",
|
|
"Yuchen Zhu",
|
|
"Yunxian Ma",
|
|
"Ying Tang",
|
|
"Yukun Zha",
|
|
"Yuting Yan",
|
|
"Z. Z. Ren",
|
|
"Zehui Ren",
|
|
"Zhangli Sha",
|
|
"Zhe Fu",
|
|
"Zhean Xu",
|
|
"Zhenda Xie",
|
|
"Zhengyan Zhang",
|
|
"Zhewen Hao",
|
|
"Zhicheng Ma",
|
|
"Zhigang Yan",
|
|
"Zhiyu Wu",
|
|
"Zihui Gu",
|
|
"Zijia Zhu",
|
|
"Zijun Liu",
|
|
"Zilin Li",
|
|
"Ziwei Xie",
|
|
"Ziyang Song",
|
|
"Zizheng Pan",
|
|
"Zhen Huang",
|
|
"Zhipeng Xu",
|
|
"Zhongyu Zhang",
|
|
"Zhen Zhang"
|
|
],
|
|
"summary": "We introduce our first-generation reasoning models, DeepSeek-R1-Zero and\nDeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement\nlearning (RL) without supervised fine-tuning (SFT) as a preliminary step,\ndemonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero\nnaturally emerges with numerous powerful and intriguing reasoning behaviors.\nHowever, it encounters challenges such as poor readability, and language\nmixing. To address these issues and further enhance reasoning performance, we\nintroduce DeepSeek-R1, which incorporates multi-stage training and cold-start\ndata before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1-1217\non reasoning tasks. To support the research community, we open-source\nDeepSeek-R1-Zero, DeepSeek-R1, and six dense models (1.5B, 7B, 8B, 14B, 32B,\n70B) distilled from DeepSeek-R1 based on Qwen and Llama.",
|
|
"pdf_url": "http://arxiv.org/pdf/2501.12948v1",
|
|
"categories": [
|
|
"cs.CL",
|
|
"cs.AI",
|
|
"cs.LG"
|
|
]
|
|
} |