lastin-ai-2/papers/2502_05147v1.json

{
  "title": "LP-DETR: Layer-wise Progressive Relations for Object Detection",
  "authors": [
    "Zhengjian Kang",
    "Ye Zhang",
    "Xiaoyu Deng",
    "Xintao Li",
    "Yongzhe Zhang"
  ],
  "abstract": "This paper presents LP-DETR (Layer-wise Progressive DETR), a novel approach\nthat enhances DETR-based object detection through multi-scale relation\nmodeling. Our method introduces learnable spatial relationships between object\nqueries through a relation-aware self-attention mechanism, which adaptively\nlearns to balance different scales of relations (local, medium and global)\nacross decoder layers. This progressive design enables the model to effectively\ncapture evolving spatial dependencies throughout the detection pipeline.\nExtensive experiments on COCO 2017 dataset demonstrate that our method improves\nboth convergence speed and detection accuracy compared to standard\nself-attention module. The proposed method achieves competitive results,\nreaching 52.3\\% AP with 12 epochs and 52.5\\% AP with 24 epochs using ResNet-50\nbackbone, and further improving to 58.0\\% AP with Swin-L backbone. Furthermore,\nour analysis reveals an interesting pattern: the model naturally learns to\nprioritize local spatial relations in early decoder layers while gradually\nshifting attention to broader contexts in deeper layers, providing valuable\ninsights for future research in object detection.",
  "pdf_url": "http://arxiv.org/pdf/2502.05147v1",
  "entry_id": "http://arxiv.org/abs/2502.05147v1",
  "categories": [
    "cs.CV",
    "cs.AI"
  ]
}