<template>
  <div>
    <el-row>
      <el-col class="title-admin">
        {{mainTitle}}
      </el-col>
    </el-row>
    <div v-for="(content,index) in contents" :key="index">
      <el-row class="primaryTitle" v-if="content.type === 'primaryTitle'">
        <!--熊猫图标-->
        <el-col :span="2">
          <img class="icon" :src="iconImage" alt="">
        </el-col>
        <!--title-->
        <el-col :span="22">
          <p class="title">{{content.text}}</p>
        </el-col>
      </el-row>

      <el-row v-else-if="content.type === 'text'">
        <el-col>
          <p class="demo-text" v-html="content.text"></p>
        </el-col>
      </el-row>

      <el-row v-else-if="content.type === 'secondaryBlack'">
        <el-col class="secondaryTitle">
          <p class="secondaryBlack" v-html="content.text"></p>
        </el-col>
      </el-row>

      <el-row v-else-if="content.type === 'secondaryTitle'">
        <el-col class="secondaryTitle">
          <p v-html="content.text"></p>
        </el-col>
      </el-row>

      <el-row v-else-if="content.type === 'paperTitle'">
        <el-col class="paperTitle">
          <p v-html="content.text"></p>
        </el-col>
      </el-row>

      <el-row v-else-if="content.type === 'paperAuthor'">
        <el-col class="paperAuthor">
          <p v-html="content.text"></p>
        </el-col>
      </el-row>

      <el-row v-else-if="content.type === 'noList'">
        <el-col>
          <li class="noList">
            <div  v-html="content.text"></div>
          </li>
        </el-col>
      </el-row>

      <el-row v-else-if="content.type === 'retract'">
        <el-col class="retract">
          <li class="noList">
            <div  v-html="content.text"></div>
          </li>
        </el-col>
      </el-row>
      <el-row v-else-if="content.type === 'noListBlack'">
        <el-col>
          <li class="noList black-text">
            <div  v-html="content.text"></div>
          </li>
        </el-col>
      </el-row>

      <el-row v-else-if="content.type === 'retractBlack'">
        <el-col class="retract">
          <li class="noList black-text">
            <div v-html="content.text"></div>
          </li>
        </el-col>
      </el-row>

      <el-row v-else-if="content.type === 'retract2'">
        <el-col class="retract2">
          <li class="noList">
            <div v-html="content.text"></div>
          </li>
        </el-col>
      </el-row>

      <el-row v-else-if="content.type === 'retract2Black'">
        <el-col class="retract2">
          <li class="noList black-text">
            <div v-html="content.text"></div>
          </li>
        </el-col>
      </el-row>

      <el-row v-else-if="content.type === 'textBlue'">
        <el-col>
          <p class="textBlue" v-html="content.text"></p>
        </el-col>
      </el-row>
    </div>
  </div>
</template>

<script>
export default {
  name: "Accepted Papers",
  data() {
    return {
		iconImage: require("@/assets/content/bullet_point.png"),
		mainTitle: 'Accepted Papers',
		contents:[
      {
        "type": "paperTitle",
        "text": "5 <b>AV-Deepfake1M: A Large-Scale LLM-Driven Audio-Visual Deepfake Dataset<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhixi Cai, Shreya Ghosh, Aman Pankaj Adatia, Munawar Hayat, Abhinav Dhall, Tom Gedeon, Kalin Stefanov"
      },
      {
        "type": "paperTitle",
        "text": "7 <b>DERD: Data-free Adversarial Robustness Distillation through Self-adversarial Teacher Group<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuhang Zhou, Yushu Zhang, Leo Zhang, Zhongyun Hua"
      },
      {
        "type": "paperTitle",
        "text": "9 <b>Diffusion Facial Forgery Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Harry Cheng, Yangyang Guo, Tianyi Wang, Liqiang Nie, Mohan Kankanhalli"
      },
      {
        "type": "paperTitle",
        "text": "19 <b>Object-Level Pseudo-3D Lifting for Distance-Aware Tracking<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haoyuan Jin, Xuesong Nie, Yunfeng Yan, Xi Chen, Zhihang Zhu, Donglian Qi"
      },
      {
        "type": "paperTitle",
        "text": "22 <b>Heterogeneous Graph Guided Contrastive Learning for Spatially Resolved Transcriptomics Data<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiao He, Chang Tang, Xinwang Liu, Chuankun Li, Shan An, Zhenglai Li"
      },
      {
        "type": "paperTitle",
        "text": "24 <b>SceneExpander: Real-Time Scene Synthesis for Interactive Floor Plan Editing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shao-Kui Zhang, Junkai Huang, Liang Yue, Jia-Tong Zhang, Jia-Hong Liu, Yu-Kun Lai, Song-Hai Zhang"
      },
      {
        "type": "paperTitle",
        "text": "25 <b>ScenePhotographer: Object-Oriented Photography for Residential Scenes<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shao-Kui Zhang, Hanxi Zhu, XUEBIN CHEN, Jinghuan Chen, Zhike Peng, Ziyang Chen, Yong-Liang Yang, Song-Hai Zhang"
      },
      {
        "type": "paperTitle",
        "text": "26 <b>GG-Editor: Locally Editing 3D Avatars with Multimodal Large Language Model Guidance<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yunqiu Xu, Linchao Zhu, Yi Yang"
      },
      {
        "type": "paperTitle",
        "text": "30 <b>Controllable Procedural Generation of Landscapes<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jia-Hong Liu, Shao-Kui Zhang, Chuyue Zhang, Song-Hai Zhang"
      },
      {
        "type": "paperTitle",
        "text": "34 <b>Disentangled-Multimodal Privileged Knowledge Distillation for Depression Recognition with Incomplete Multimodal Data<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuchen Pan, Junjun Jiang, Kui Jiang, Xianming Liu"
      },
      {
        "type": "paperTitle",
        "text": "40 <b>All rivers run into the sea: Unified Modality Brain-Inspired Emotional Central Mechanism<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xinji Mai, Junxiong Lin, Haoran Wang, Zeng Tao, Yan Wang, Shaoqi Yan, Xuan Tong, Jiawen Yu, Boyang Wang, Ziheng Zhou, Qing Zhao, Shuyong Gao, Wenqiang Zhang"
      },
      {
        "type": "paperTitle",
        "text": "41 <b>Lite-Mind: Towards Efficient and Robust Brain Representation Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zixuan Gong, Qi Zhang, Guangyin Bao, Lei Zhu, Yu Zhang, KE LIU, Liang Hu, Duoqian Miao"
      },
      {
        "type": "paperTitle",
        "text": "45 <b>Suppressing Uncertainties in Degradation Estimation for Blind Super-Resolution<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Junxiong Lin, Zeng Tao, Xuan Tong, Xinji Mai, Haoran Wang, Boyang Wang, Yan Wang, Qing Zhao, Jiawen Yu, Yuxuan Lin, Shaoqi Yan, Shuyong Gao, Wenqiang Zhang"
      },
      {
        "type": "paperTitle",
        "text": "47 <b>Parameter-Efficient Complementary Expert Learning for Long-Tailed Visual Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lixiang Ru, Xin Guo, Lei Yu, Yingying Zhang, Jiangwei Lao, Wang Jian, Jingdong Chen, Yansheng Li, Ming Yang"
      },
      {
        "type": "paperTitle",
        "text": "57 <b>Harmony Everything! Masked Autoencoders for Video Harmonization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuhang Li, Jincen Jiang, Xiaosong Yang, Youdong Ding, Jian Jun Zhang"
      },
      {
        "type": "paperTitle",
        "text": "86 <b>V2A-Mark: Versatile Deep Visual-Audio Watermarking for Manipulation Localization and Copyright Protection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xuanyu Zhang, Youmin Xu, Runyi Li, Jiwen Yu, Weiqi Li, Zhipei Xu, Jian Zhang"
      },
      {
        "type": "paperTitle",
        "text": "88 <b>ResVR: Joint Rescaling and Viewport Rendering of Omnidirectional Images<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weiqi Li, Shijie Zhao, Bin Chen, Xinhua Cheng, Junlin Li, Li zhang, Jian Zhang"
      },
      {
        "type": "paperTitle",
        "text": "90 <b>iControl3D: An Interactive System for Controllable 3D Scene Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xingyi Li, Yizheng Wu, Jun CEN, Juewen Peng, Kewei Wang, Ke Xian, Zhe Wang, Zhiguo Cao, Guosheng Lin"
      },
      {
        "type": "paperTitle",
        "text": "92 <b>Unveiling and Mitigating Bias in Audio Visual Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Peiwen Sun, Honggang Zhang, Di Hu"
      },
      {
        "type": "paperTitle",
        "text": "94 <b>Boosting Audio Visual Question Answering via Key Semantic-Aware Cues<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Guangyao Li,  HenghuiDu, Di Hu"
      },
      {
        "type": "paperTitle",
        "text": "97 <b>Spatiotemporal Graph Guided Multi-modal Network for Livestreaming Product Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": " HuXiaowan, Yiyi Chen, Yan Li, Minquan Wang, Haoqian Wang, Quan Chen, Han Li, Peng Jiang"
      },
      {
        "type": "paperTitle",
        "text": "99 <b>An End-to-End Real-World Camera  Imaging Pipeline<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kepeng Xu, Zijia Ma, Li Xu, Gang He, Yunsong Li, Wenxin Yu, Taichu Han, Cheng Yang"
      },
      {
        "type": "paperTitle",
        "text": "103 <b>Robust Contrastive Cross-modal Hashing with Noisy Labels<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wang Longan, Yang Qin, Yuan Sun, Dezhong Peng, Xi Peng, Peng Hu"
      },
      {
        "type": "paperTitle",
        "text": "108 <b>Semantic Codebook Learning for Dynamic Recommendation Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zheqi Lv, Shaoxuan He, Tianyu Zhan, Shengyu Zhang, Wenqiao Zhang, Jingyuan Chen, Zhou Zhao, Fei Wu"
      },
      {
        "type": "paperTitle",
        "text": "113 <b>Sniffing Threatening Open-World Objects in Autonomous Driving by Open-Vocabulary Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yulin He, Siqi Wang, Wei Chen, Tianci Xun, Yusong Tan"
      },
      {
        "type": "paperTitle",
        "text": "123 <b>DisControlFace: Adding Disentangled Control to Diffusion Autoencoder for One-shot Explicit Facial Image Editing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haozhe Jia, Yan Li, Hengfei Cui, Di Xu, Yuwang Wang, Tao Yu"
      },
      {
        "type": "paperTitle",
        "text": "124 <b>ReWiTe: Realistic Wide-angle and Telephoto Dual Camera Fusion Dataset via Beam Splitter Camera Rig<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chunli Peng, Xuan Dong, Tiantian Cao,  Lizhengqing, Kun Dong, Weixin Li"
      },
      {
        "type": "paperTitle",
        "text": "131 <b>Distribution Consistency Guided Hashing for Cross-Modal Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuan Sun, Liu kaiming, Yongxiang Li, Zhenwen Ren, Jian Dai, Dezhong Peng"
      },
      {
        "type": "paperTitle",
        "text": "138 <b>Affinity3D: Propagating Instance-Level Semantic Affinity for Zero-Shot Point Cloud Semantic Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haizhuang Liu, Junbao Zhuo, Chen Liang, Jiansheng Chen, Huimin Ma"
      },
      {
        "type": "paperTitle",
        "text": "139 <b>Continual Panoptic Perception: Towards Multi-modal Incremental Interpretation of Remote Sensing Images<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bo Yuan, Danpei Zhao, Zhuoran Liu, Wentao Li, Tian Li"
      },
      {
        "type": "paperTitle",
        "text": "143 <b>FakingRecipe: Detecting Fake News on Short Video Platforms from the Perspective of Creative Process<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuyan Bu, Qiang Sheng, Juan Cao, Peng Qi, Danding Wang, Jintao Li"
      },
      {
        "type": "paperTitle",
        "text": "152 <b>EvilEdit: Backdooring Text-to-Image Diffusion Models in One Second<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hao Wang, Shangwei Guo, Jialing He, Kangjie Chen, Shudong Zhang, Tianwei Zhang, Tao Xiang"
      },
      {
        "type": "paperTitle",
        "text": "164 <b>MoTrans: Customized Motion Transfer with Text-driven Video Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaomin Li, Xu Jia, Qinghe Wang, Haiwen Diao, mengmeng Ge, Pengxiang Li, You He, Huchuan Lu"
      },
      {
        "type": "paperTitle",
        "text": "174 <b>Maskable Retentive Network for Video Moment Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jingjing Hu, Dan Guo, Kun Li, Zhan Si, Xun Yang, Meng Wang"
      },
      {
        "type": "paperTitle",
        "text": "179 <b>MaterialSeg3D: Segmenting Dense Materials from 2D Priors for 3D Assets<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zeyu Li, Ruitong Gan, Chuanchen Luo, Yuxi Wang, Liu Jiaheng, Ziwei Zhu, Man Zhang, Qing Li, Zhaoxiang Zhang, Junran Peng, Xu-cheng Yin"
      },
      {
        "type": "paperTitle",
        "text": "181 <b>Harmony in Diversity: Improving All-in-One Image Restoration via Multi-Task Collaboration<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Gang Wu, Junjun Jiang, Kui Jiang, Xianming Liu"
      },
      {
        "type": "paperTitle",
        "text": "202 <b>Group-aware Parameter-efficient Updating for Content-Adaptive Neural Video Compression<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenghao Chen, Luping Zhou, Zhihao Hu, Dong Xu"
      },
      {
        "type": "paperTitle",
        "text": "208 <b>MambaTrack: a simple baseline for multiple object tracking with State Space Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Changcheng Xiao, Qiong Cao, Zhigang Luo, Long Lan"
      },
      {
        "type": "paperTitle",
        "text": "226 <b>LoopGaussian: Creating 3D Cinemagraph with Multi-view Images via Eulerian Motion Field<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiyang Li, Lechao Cheng, Zhangye Wang, Tingting Mu, Jingxuan He"
      },
      {
        "type": "paperTitle",
        "text": "233 <b>HideMIA: Hidden Wavelet Mining for Privacy-Enhancing Medical Image Analysis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xun Lin, Yi Yu, Zitong YU, Ruohan Meng, Jiale Zhou, A Liu, Yizhong Liu, Shuai Wang, Wenzhong Tang, Zhen Lei, Alex Kot"
      },
      {
        "type": "paperTitle",
        "text": "234 <b>DPO: Dual-Perturbation Optimization for Test-time Adaptation in 3D Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhuoxiao Chen, Zixin Wang, Yadan Luo, Sen Wang, Zi Huang"
      },
      {
        "type": "paperTitle",
        "text": "238 <b>QE-BEV: Query Evolution for Bird's Eye View Object Detection in Varied Contexts<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiawei Yao, Yingxing Lai, Hongrui Kou, Tong Wu, Ruixi Liu"
      },
      {
        "type": "paperTitle",
        "text": "244 <b>Improving Composed Image Retrieval via Contrastive Learning with Scaling Positives and Negatives<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhangchi Feng, Richong Zhang, Zhijie Nie"
      },
      {
        "type": "paperTitle",
        "text": "254 <b>Are handcrafted filters helpful for attributing AI-generated images?<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jialiang Li, haoyue Wang, Sheng Li, Zhenxing Qian, Xinpeng Zhang, ATHANASIOS V. VASILAKOS"
      },
      {
        "type": "paperTitle",
        "text": "269 <b>Transferring to Real-World Layouts: A Depth-aware Framework for Scene Adaptation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mu Chen, Zhedong Zheng, Yi Yang"
      },
      {
        "type": "paperTitle",
        "text": "272 <b>SynopGround: A Large-Scale Dataset for Multi-Paragraph Video Grounding from TV Dramas and Synopses<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chaolei Tan, Zihang Lin, Junfu Pu, Zhongang Qi, Wei-yi Pei, Zhi Qu, Yexin Wang, Ying Shan, Wei-Shi Zheng, Jian-Fang Hu"
      },
      {
        "type": "paperTitle",
        "text": "273 <b>SparseFormer: Detecting Objects in HRW Shots via Sparse Vision Transformer<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenxi Li, Yuchen Guo, Jilai Zheng, Haozhe Lin, Chao Ma, LU FANG, Xiaokang Yang"
      },
      {
        "type": "paperTitle",
        "text": "274 <b>FlashSpeech: Efficient Zero-Shot Speech Synthesis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhen Ye, Zeqian Ju, Haohe Liu, Xu Tan, Jianyi Chen, Yiwen Lu, Peiwen Sun, Jiahao Pan,  Bianweizhen, Shulin He, Wei Xue, Qifeng Liu, Yike Guo"
      },
      {
        "type": "paperTitle",
        "text": "279 <b>MovingColor: Seamless Fusion of Fine-grained Video Color Enhancement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yi Dong, Wang Yuxi, ZHENG FANG, Wenqi Ouyang, Xianhui Lin, Zhiqi Shen, Peiran Ren, Xuansong Xie, Qingming Huang"
      },
      {
        "type": "paperTitle",
        "text": "281 <b>VoCAPTER: Voting-based Pose Tracking for Category-level Articulated Object via Inter-frame Priors<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Li Zhang, Zean Han, Yan Zhong, Qiaojun Yu, Xingyu Wu, xue Wang,  RujingWang"
      },
      {
        "type": "paperTitle",
        "text": "282 <b>Text-prompt Camouflaged Instance Segmentation with Graduated Camouflage Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "zhentao he, Changqun Xia, Shengye Qiao, Jia Li"
      },
      {
        "type": "paperTitle",
        "text": "285 <b>Event-ID: Intrinsic Decomposition Using an Event Camera<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zehao Chen, Zhan Lu, De Ma, Huajin Tang, Xudong Jiang, Qian Zheng, Gang Pan"
      },
      {
        "type": "paperTitle",
        "text": "289 <b>Group Vision Transformer<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yaopeng Peng, Milan Sonka, Danny Chen"
      },
      {
        "type": "paperTitle",
        "text": "293 <b>VeCAF: Vision-language Collaborative Active Finetuning with Training Objective Awareness<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Rongyu Zhang, Zefan Cai, Huanrui Yang, Zidong Liu, Denis Gudovskiy, Tomoyuki Okuno, Yohei Nakata, Kurt Keutzer, Baobao Chang, Yuan Du, LI DU, Shanghang Zhang"
      },
      {
        "type": "paperTitle",
        "text": "299 <b>Learning Exposure Correction in Dynamic Scenes<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jin Liu, Bo Wang, Chuanming Wang, Huiyuan Fu, Huadong Ma"
      },
      {
        "type": "paperTitle",
        "text": "302 <b>Multi-view Feature Extraction via Tunable Prompts is Enough for Image Manipulation Localization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xuntao Liu, Yuzhou Yang, haoyue Wang, Qichao Ying, Zhenxing Qian, Xinpeng Zhang, Sheng Li"
      },
      {
        "type": "paperTitle",
        "text": "316 <b>Image-free Pre-training for Low-Level Vision<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Siyang Wang, JingHao Zhang, Huang Jie, Feng Zhao"
      },
      {
        "type": "paperTitle",
        "text": "317 <b>RobustFace: Adaptive Mining of Noise and Hard Samples for Robust Face Recognitions<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yang Xin, Yu Zhou, Jianmin Jiang"
      },
      {
        "type": "paperTitle",
        "text": "323 <b>OpenAVE: Moving towards Open Set Audio-Visual Event Localization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiale Yu, Baopeng Zhang, Zhu Teng, Jianping Fan"
      },
      {
        "type": "paperTitle",
        "text": "337 <b>InsVP: Efficient Instance Visual Prompting from Image Itself<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zichen Liu, Yuxin Peng, Jiahuan Zhou"
      },
      {
        "type": "paperTitle",
        "text": "340 <b>Progressive Prototype Evolving for Dual-Forgetting Mitigation in Non-Exemplar Online Continual Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qiwei Li, Yuxin Peng, Jiahuan Zhou"
      },
      {
        "type": "paperTitle",
        "text": "341 <b>Mitigate Catastrophic Remembering via Continual Knowledge Purification for Noisy Lifelong Person Re-Identification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kunlun Xu, Haozhuo Zhang, Yu Li, Yuxin Peng, Jiahuan Zhou"
      },
      {
        "type": "paperTitle",
        "text": "342 <b>Timeline and Boundary Guided Diffusion Network for Video Shadow Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "haipeng zhou, Hongqiu Wang, Tian Ye,  ZhaohuXing, Jun Ma, Ping Li, Qiong Wang, Lei Zhu"
      },
      {
        "type": "paperTitle",
        "text": "353 <b>SEDS: Semantically Enhanced Dual-Stream Encoder for Sign Language Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Longtao Jiang, Min Wang, Zecheng Li, Yao Fang, Wengang Zhou, Houqiang Li"
      },
      {
        "type": "paperTitle",
        "text": "356 <b>FD2Talk: Towards Generalized Talking Head Generation with Facial Decoupled Diffusion Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ziyu Yao, Xuxin Cheng, Zhiqi Huang"
      },
      {
        "type": "paperTitle",
        "text": "365 <b>Unraveling Motion Uncertainty for Local Motion Deblurring<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zeyu Xiao, Zhihe Lu, Michael Bi Mi, Zhiwei Xiong, Xinchao Wang"
      },
      {
        "type": "paperTitle",
        "text": "368 <b>Prompting Continual Person Search<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Pengcheng Zhang, Xiaohan Yu, Xiao Bai, Jin Zheng, Xin Ning"
      },
      {
        "type": "paperTitle",
        "text": "369 <b>GenUDC: High Quality 3D Mesh Generation With Unsigned Dual Contouring Representation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ruowei Wang, Jiaqi Li, Dan Zeng, Xueqi Ma, Xu Zixiang, Jianwei Zhang, Qijun Zhao"
      },
      {
        "type": "paperTitle",
        "text": "370 <b>Spatial-Temporal Context Model for Remote Sensing Imagery Compression<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jinxiao Zhang, Runmin Dong, Juepeng Zheng, Mengxuan Chen, Lixian Zhang, Yi Zhao, Haohuan Fu"
      },
      {
        "type": "paperTitle",
        "text": "387 <b>Disrupting Diffusion: Token-Level Attention Erasure Attack against Diffusion-based Customization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yisu Liu, Jinyang An, Wanqian Zhang, Dayan Wu,  JingziGU, zheng Lin, Weiping Wang"
      },
      {
        "type": "paperTitle",
        "text": "396 <b>RCA: Region Conditioned Adaptation for Visual Abductive Reasoning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hao Zhang, Ee Yeo Keat, Basura Fernando"
      },
      {
        "type": "paperTitle",
        "text": "404 <b>Towards Artist-Like Painting Agents with Multi-Granularity Semantic Alignment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhangli Hu, Ye Chen, Zhongyin Zhao,  JinfanLiu, Bilian Ke, Bingbing Ni"
      },
      {
        "type": "paperTitle",
        "text": "408 <b>Hierarchical Perceptual and Predictive Analogy-Inference Network for Abstract Visual Reasoning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wentao He, Jianfeng Ren, Ruibin Bai, Xudong Jiang"
      },
      {
        "type": "paperTitle",
        "text": "415 <b>DGMamba: Domain Generalization via Generalized State Space Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shaocong Long, Qianyu Zhou, Xiangtai Li, Xuequan Lu, Chenhao Ying, Yuan Luo, Lizhuang Ma, Shuicheng Yan"
      },
      {
        "type": "paperTitle",
        "text": "416 <b>Embodied Contrastive Learning with Geometric Consistency and Behavioral Awareness for Object Navigation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bolei Chen, Jiaxu Kang, Ping Zhong, Yixiong Liang, Yu Sheng, Jianxin Wang"
      },
      {
        "type": "paperTitle",
        "text": "418 <b>Cantor:  Inspiring Multimodal Chain-of-Thought of MLLM<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Timin Gao, Peixian Chen, Mengdan Zhang, Chaoyou Fu, Yunhang Shen, Yan Zhang, Shengchuan Zhang, Xiawu Zheng, Xing Sun, Liujuan Cao, Rongrong Ji"
      },
      {
        "type": "paperTitle",
        "text": "422 <b>Dual-Criterion Quality Loss for Blind Image Quality Assessment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Desen Yuan, Lei Wang"
      },
      {
        "type": "paperTitle",
        "text": "423 <b>Hallu-PI: Evaluating Hallucination in Multi-modal Large Language Models within Perturbed Inputs<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Peng Ding, Jingyu Wu, Jun Kuang, Dan Ma, Xuezhi Cao, Xunliang Cai, Shi Chen, Jiajun Chen, Shujian Huang"
      },
      {
        "type": "paperTitle",
        "text": "424 <b>TextGaze: Gaze-Controllable Face Generation with Natural Language<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hengfei Wang, Zhongqun Zhang, Yihua Cheng, Hyung Jin Chang"
      },
      {
        "type": "paperTitle",
        "text": "430 <b>Rethinking Impersonation and Dodging Attacks on Face Recognition Systems<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fengfan Zhou, Qianyu Zhou, Bangjie Yin, Hui Zheng, Xuequan Lu, Lizhuang Ma, Hefei Ling"
      },
      {
        "type": "paperTitle",
        "text": "431 <b>GeNSeg-Net: A General Segmentation Framework for Any Nucleus in Immunohistochemistry Images<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Siyuan Xu, Guannan Li, Haofei Song, Jiansheng Wang, Yan Wang, Qingli Li"
      },
      {
        "type": "paperTitle",
        "text": "433 <b>Weakly Supervised Video Anomaly Detection and Localization with Spatio-Temporal Prompts<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Peng Wu, Xuerong Zhou, Guansong Pang, Zhi Yang, Qingsen Yan, PENG WANG, Yanning Zhang"
      },
      {
        "type": "paperTitle",
        "text": "435 <b>Towards End-to-End Explainable Facial Action Unit Recognition via Vision-Language Joint Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xuri Ge, Junchen Fu, Fuhai Chen, Shan An, Nicu Sebe, Joemon Jose"
      },
      {
        "type": "paperTitle",
        "text": "443 <b>Multi-Instance Multi-Label Learning for Text-motion Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yang Yang, Liyuan Cao, Haoyu Shi, huaiwen zhang"
      },
      {
        "type": "paperTitle",
        "text": "445 <b>Rethinking Image Editing Detection in the Era of Generative AI Revolution<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhihao Sun, Haipeng Fang, Juan Cao, Xinying Zhao, Danding Wang"
      },
      {
        "type": "paperTitle",
        "text": "449 <b>FSL-QuickBoost: Minimal-Cost Ensemble for Few-Shot Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yunwei Bai, Bill Cai, Ying Kiat Tan, Zangwei Zheng, Shiming Chen, Tsuhan Chen"
      },
      {
        "type": "paperTitle",
        "text": "450 <b>Reference-based Burst Super-resolution<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Seonggwan Ko, Yeong Koh, Donghyeon Cho"
      },
      {
        "type": "paperTitle",
        "text": "451 <b>Future Motion Dynamic Modeling via Hybrid Supervision for Multi-Person Motion Prediction Uncertainty Reduction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yan Zhuang, Yanlu Cai, Weizhong Zhang, Cheng Jin"
      },
      {
        "type": "paperTitle",
        "text": "454 <b>StealthDiffusion: Towards Evading Diffusion Forensic Detection through Diffusion Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ziyin Zhou, Ke Sun, Zhongxi Chen, Huafeng Kuang, Xiaoshuai Sun, Rongrong Ji"
      },
      {
        "type": "paperTitle",
        "text": "455 <b>Caterpillar: A Pure-MLP Architecture with Shifted-Pillars-Concatenation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jin Sun, Xiaoshuang Shi, Zhiyuan Wang, Kaidi Xu, Hengtao Shen, Xiaofeng Zhu"
      },
      {
        "type": "paperTitle",
        "text": "457 <b>Hypergraph Multi-modal Large Language Model: Exploiting EEG and Eye-tracking Modalities to Evaluate Heterogeneous Responses for Video Understanding<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Minghui Wu, Chenxu Zhao, Anyang Su, Donglin Di, Tianyu Fu, Da An, Min He, Ya Gao, Meng Ma, Yan Kun, Ping Wang"
      },
      {
        "type": "paperTitle",
        "text": "459 <b>Compacter: A Lightweight Transformer for Image Restoration<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhijian Wu, Jun Li, Yang Hu, Dingjiang Huang"
      },
      {
        "type": "paperTitle",
        "text": "461 <b>GLGait: A Global-Local Temporal Receptive Field Network for Gait Recognition in the Wild<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Guozhen Peng, Yunhong Wang, Yuwei Zhao, Shaoxiong Zhang, Annan Li"
      },
      {
        "type": "paperTitle",
        "text": "464 <b>Learning to Transfer Heterogeneous Translucent Materials from a 2D Image to 3D Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaogang Wang, Yuhang Cheng, Ziyang Fan, Kevin Xu"
      },
      {
        "type": "paperTitle",
        "text": "471 <b>Auto DragGAN: Editing the Generative Image Manifold in an Autoregressive Manner<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Pengxiang Cai, Zhiwei Liu, Guibo Zhu, Yunfang Niu, Jinqiao Wang"
      },
      {
        "type": "paperTitle",
        "text": "479 <b>Text-Region Matching for Multi-Label Image Recognition with Missing Labels<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Leilei Ma, Xie Hongxing, Lei Wang, Yanping Fu, Dengdi Sun, Haifeng Zhao"
      },
      {
        "type": "paperTitle",
        "text": "482 <b>Learning Optimal  Combination Patterns for Lightweight  Stereo Image Super-Resolution<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hu Gao, Jing Yang, Ying Zhang, Jingfan Yang, Bowen Ma, Depeng Dang"
      },
      {
        "type": "paperTitle",
        "text": "483 <b>Tracking-forced Referring Video Object Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ruxue Yan, wenya guo, XuBo Liu, Xumeng Liu, Ying Zhang, Xiaojie Yuan"
      },
      {
        "type": "paperTitle",
        "text": "487 <b>Calibration for Long-tailed Scene Graph Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "XuHan Zhu, Yifei Xing, Ruiping Wang, Yaowei Wang, xiangyuan lan"
      },
      {
        "type": "paperTitle",
        "text": "489 <b>Prior Metadata-Driven RAW Reconstruction: Eliminating the Need for Per-Image Metadata<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wencheng Han, Chen Zhang, Zhou Yang, Wentao Liu, Chen Qian, Cheng-zhong Xu, Jianbing Shen"
      },
      {
        "type": "paperTitle",
        "text": "492 <b>Visual-linguistic Cross-domain Feature Learning with Group Attention and Gamma-correct Gated Fusion for Extracting Commonsense Knowledge<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jialu ZHANG, Xinyi Wang, Chenglin Yao, Jianfeng Ren, Xudong Jiang"
      },
      {
        "type": "paperTitle",
        "text": "495 <b>Mesh-Centric Gaussian Splatting for Human Avatar Modelling with Real-time Dynamic Mesh Reconstruction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ruiqi Zhang, Jie Chen"
      },
      {
        "type": "paperTitle",
        "text": "497 <b>Class Balance Matters to Active Class-Incremental Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zitong Huang, Ze Chen, Yuanze Li, Bowen Dong, Erjin Zhou, Yong Liu, Rick Siow Mong Goh, Chun-Mei Feng, Wangmeng Zuo"
      },
      {
        "type": "paperTitle",
        "text": "501 <b>Disentangling Identity Features from Interference Factors for Cloth-Changing Person Re-identification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yubo Li, De Cheng, Chaowei Fang, Changzhe Jiao, Nannan Wang, Xinbo Gao"
      },
      {
        "type": "paperTitle",
        "text": "502 <b>Cover-separable Fixed Neural Network Steganography via Deep Generative Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "GuoBiao Li, Sheng Li, Zhenxing Qian, Xinpeng Zhang"
      },
      {
        "type": "paperTitle",
        "text": "505 <b>FlexIR: Towards Flexible and Manipulable Image Restoration<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhengwei Yin, Guixu Lin, Mengshun Hu, Hao Zhang, Yinqiang Zheng"
      },
      {
        "type": "paperTitle",
        "text": "513 <b>TAS: Personalized Text-guided Audio Spatialization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhaojian Li, Bin Zhao, Yuan Yuan"
      },
      {
        "type": "paperTitle",
        "text": "531 <b>Weakly Supervised Gaussian Contrastive Grounding with Large Multimodal Models for Video Question Answering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "wang haibo, Chenghang Lai, Sun Yixuan, Weifeng Ge"
      },
      {
        "type": "paperTitle",
        "text": "533 <b>FineCLIPER: Multi-modal Fine-grained CLIP for Dynamic Facial Expression Recognition with AdaptERs<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haodong Chen, Haojian Huang, Junhao Dong, Mingzhe Zheng, Dian Shao"
      },
      {
        "type": "paperTitle",
        "text": "541 <b>ShiftMorph: A Fast and Robust Convolutional Neural Network for 3D Deformable Medical Image Registration<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lijian Yang, Weisheng Li, Yucheng Shu, Jianxun Mi, Yuping Huang, Bin Xiao"
      },
      {
        "type": "paperTitle",
        "text": "549 <b>Visual-Semantic Decomposition and Partial Alignment for Document-based Zero-Shot Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiangyan Qu, Jing Yuan, Keke Gai, JiaMin Zhuang, Yuanmin Tang, Gang Xiong, Gaopeng Gou, Qi Wu"
      },
      {
        "type": "paperTitle",
        "text": "554 <b>MAG-Edit: Localized Image Editing in Complex Scenarios via  $\\underline{M}$ask-Based $\\underline{A}$ttention-Adjusted $\\underline{G}$uidance<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qi Mao, Lan Chen, Yuchao Gu, Zhen Fang, Zheng Shou"
      },
      {
        "type": "paperTitle",
        "text": "556 <b>UniDense: Unleashing Diffusion Models with Meta-Routers for Universal Few-Shot Dense Prediction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lintao Dong, Wei Zhai, Zheng-Jun Zha"
      },
      {
        "type": "paperTitle",
        "text": "563 <b>Towards Low-latency Event-based Visual Recognition with Hybrid Step-wise Distillation Spiking Neural Networks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xian Zhong, Shengwang Hu, Wenxuan Liu, Wenxin Huang, Jianhao Ding, Zhaofei Yu, Tiejun Huang"
      },
      {
        "type": "paperTitle",
        "text": "570 <b>VrdONE: One-stage Video Visual Relation Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xinjie Jiang, Chenxi Zheng, Xuemiao Xu, Bangzhen Liu, Weiying Zheng, Huaidong Zhang, Shengfeng He"
      },
      {
        "type": "paperTitle",
        "text": "571 <b>Efficiency in Focus: LayerNorm as a Catalyst for Fine-tuning Medical Visual Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiawei Chen, Dingkang Yang, Yue Jiang, Mingcheng Li, Jinjie Wei, Xiaolu Hou, Lihua Zhang"
      },
      {
        "type": "paperTitle",
        "text": "572 <b>Enhancing Robustness in Learning with Noisy Labels: An Asymmetric Co-Training Approach<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mengmeng Sheng, Zeren Sun, Gensheng Pei, Tao Chen, Haonan Luo, Yazhou Yao"
      },
      {
        "type": "paperTitle",
        "text": "573 <b>Tunnel Try-on: Excavating Spatial-temporal Tunnels for High-quality Virtual Try-on in Videos<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhengze Xu, Mengting Chen, Zhao Wang, Linyu XING, Zhonghua Zhai, Nong Sang, Jinsong Lan, Shuai Xiao, Changxin Gao"
      },
      {
        "type": "paperTitle",
        "text": "574 <b>An Lightweight  Anchor-Based Incremental Framework to Multi-view Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qian Qu, Xinhang Wan, Weixuan Liang, Jiyuan Liu, Yu Feng, Huiying Xu, Xinwang Liu, En Zhu"
      },
      {
        "type": "paperTitle",
        "text": "578 <b>Advancing Multimodal Large Language Models with Quantization-Aware Scale Learning for Efficient Adaptation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "JingJing Xie, Yuxin Zhang, Mingbao Lin, Liujuan Cao, Rongrong Ji"
      },
      {
        "type": "paperTitle",
        "text": "579 <b>MPT: Multi-grained Prompt Tuning for Text-Video Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haonan Zhang, Pengpeng Zeng, Lianli Gao, Jingkuan Song, Hengtao Shen"
      },
      {
        "type": "paperTitle",
        "text": "585 <b>Fooling 3D Face Recognition with One Single 2D Image<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shizong Yan, Shan Chang, Hongzi Zhu, Huixiang Wen, Luo Zhou"
      },
      {
        "type": "paperTitle",
        "text": "586 <b>3D-GRES: Generalized 3D Referring Expression Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Changli Wu, Yihang Liu, Yiwei Ma, Haowei Wang, Gen Luogen, Jiayi Ji, Henghui Ding, Xiaoshuai Sun, Rongrong Ji"
      },
      {
        "type": "paperTitle",
        "text": "587 <b>GeoFormer: Learning Point Cloud Completion with Tri-Plane Integrated Transformer<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jinpeng Yu, Binbin Huang, Yuxuan Zhang, Huaxia Li, Xu Tang, Shenghua Gao"
      },
      {
        "type": "paperTitle",
        "text": "588 <b>GIST: Improving Parameter Efficient Fine-Tuning via Knowledge Interaction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiacheng Ruan, Jingsheng Gao, Mingye Xie, Suncheng Xiang, Zefang Yu, Ting Liu, yuzhuo fu, Xiaoye Qu"
      },
      {
        "type": "paperTitle",
        "text": "591 <b>MagicCartoon: 3D Pose and Shape Estimation for Bipedal Cartoon Characters<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yu-Pei Song, Yuan-Tong Liu, Xiao Wu, Qi He, Zhaoquan Yuan, Ao Luo"
      },
      {
        "type": "paperTitle",
        "text": "600 <b>ImageBind3D: Image as Binding Step for Controllable 3D Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenqiang Li, Jie LI, Yangjie Cao, Jiayi Wang, Runfeng Lv"
      },
      {
        "type": "paperTitle",
        "text": "603 <b>Motion-aware Latent Diffusion Models for Video Frame Interpolation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhilin Huang, Yijie Yu, Ling Yang, Chujun Qin, Bing Zheng, Xiawu Zheng, Zikun Zhou, Yaowei Wang, Wenming Yang"
      },
      {
        "type": "paperTitle",
        "text": "604 <b>Enabling Synergistic Full-Body Control in Prompt-Based Co-Speech Motion Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": " Bohong-Chen, Yumeng Li, Yao-Xiang Ding, Tianjia Shao, Kun Zhou"
      },
      {
        "type": "paperTitle",
        "text": "611 <b>PrimeComposer: Faster Progressively Combined Diffusion for Image Composition with Attention Steering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yibin Wang, Weizhong Zhang, Jianwei Zheng, Cheng Jin"
      },
      {
        "type": "paperTitle",
        "text": "618 <b>MagicFight: Personalized Martial Arts Combat Video Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiancheng Huang, Mingfu Yan, Songyan Chen, Yi Huang, Shifeng Chen"
      },
      {
        "type": "paperTitle",
        "text": "619 <b>Adaptive Selection based Referring Image Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Pengfei Yue, Jianghang Lin, Shengchuan Zhang, Jie Hu, Yilin Lu, Hongwei Niu, Haixin Ding, Yan Zhang, GUANNAN JIANG, Liujuan Cao, Rongrong Ji"
      },
      {
        "type": "paperTitle",
        "text": "621 <b>HarmonicNeRF: Geometry-Informed Synthetic View Augmentation for 3D Scene Reconstruction in Driving Scenarios<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaochao Pan, Jiawei Yao, Hongrui Kou, Tong Wu, Canran Xiao"
      },
      {
        "type": "paperTitle",
        "text": "623 <b>GOI: Find 3D Gaussians of Interest with an Optimizable Open-vocabulary Semantic-space Hyperplane<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yansong Qu, Shaohui Dai, Xinyang Li, Jianghang Lin, Liujuan Cao, Shengchuan Zhang, Rongrong Ji"
      },
      {
        "type": "paperTitle",
        "text": "625 <b>LD-BFR: Vector-Quantization-Based Face Restoration Model with Latent Diffusion Enhancement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuzhen Du, Teng Hu, Ran Yi, Lizhuang Ma"
      },
      {
        "type": "paperTitle",
        "text": "631 <b>SleepMG: Multimodal Generalizable Sleep Staging with Inter-modal Balance of Classification and Domain Discrimination<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuo Ma, Yingwei Zhang, Zhang Qiqi, Yiqiang Chen, Wang Haoran, Ziyu Jia"
      },
      {
        "type": "paperTitle",
        "text": "636 <b>Fine-Grained Promote Learning for Face Anti-Spoofing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xueli Hu, Huan Liu, Haocheng Yuan, Zhiyang Fu, Yizhi Luo, Ning Zhang, Hang Zou, Gan Jianwen, Yuan Zhang"
      },
      {
        "type": "paperTitle",
        "text": "638 <b>FM-CLIP: Flexible Modal CLIP for Face Anti-Spoofing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "A Liu, Ma Hui, Junze Zheng, Haocheng Yuan, Xiaoyuan Yu, Yanyan Liang, Sergio Escalera, Jun Wan, Zhen Lei"
      },
      {
        "type": "paperTitle",
        "text": "639 <b>Generalizable Face Anti-spoofing via Style-conditional Prompt Token Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiabao Guo, Huan Liu, Yizhi Luo, Xueli Hu, Hang Zou, Yuan Zhang, Hui Liu, Bo Zhao"
      },
      {
        "type": "paperTitle",
        "text": "641 <b>3D Gaussian Editing with A Single Image<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Guan Luo, Tian-Xing Xu, Ying-Tian Liu, Xiaoxiong Fan, Fang-Lue Zhang, Song-Hai Zhang"
      },
      {
        "type": "paperTitle",
        "text": "643 <b>DAC: 2D-3D Retrieval with Noisy Labels via Divide-and-Conquer Alignment and Correction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chaofan Gan, Yuanpeng Tu, Yuxi Li, Weiyao Lin"
      },
      {
        "type": "paperTitle",
        "text": "648 <b>PC$^2$: Pseudo-Classification Based Pseudo-Captioning for Noisy Correspondence Learning in Cross-Modal Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yue Duan, Zhangxuan Gu, Zhenzhe Ying, Lei Qin, Changhua Meng, Yinghuan Shi"
      },
      {
        "type": "paperTitle",
        "text": "653 <b>UVMap-ID: A Controllable and Personalized UV Map Generative Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weijie Wang, jichao zhang, Chang Liu, Xia Li, Xingqian Xu, Honghui Shi, Nicu Sebe, Bruno Lepri"
      },
      {
        "type": "paperTitle",
        "text": "657 <b>FreqMamba: Viewing Mamba from a Frequency Perspective for Image Deraining<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhen Zou, Hu Yu, Huang Jie, Feng Zhao"
      },
      {
        "type": "paperTitle",
        "text": "660 <b>VR-DiagNet: Medical Volumetric and Radiomic Diagnosis Networks with Interpretable Clinician-like Optimizing Visual Inspection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shouyu Chen, Tangwei Ye, Lai Zhong Yuan, Qi Zhang, KE LIU, Usman Naseem, Ke Sun, Nengjun Zhu, Liang Hu"
      },
      {
        "type": "paperTitle",
        "text": "665 <b>Generative Motion Stylization of Cross-structure Characters within Canonical Motion Space<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiaxu Zhang, Xin Chen, Gang Yu, Zhigang Tu"
      },
      {
        "type": "paperTitle",
        "text": "669 <b>Test-Time Training on Graphs with Large Language Models (LLMs)<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiaxin Zhang, Yiqi Wang, Xihong Yang, Siwei Wang, Yu Feng, Yu Shi, ruichao Ren, En Zhu, Xinwang Liu"
      },
      {
        "type": "paperTitle",
        "text": "674 <b>Effective optimization of root selection towards improved explanation of deep classifiers<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xin Zhang, Sheng-hua Zhong, Jianmin Jiang"
      },
      {
        "type": "paperTitle",
        "text": "677 <b>CoAst: Validation-Free Contribution Assessment for Federated Learning based on Cross-Round Valuation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hao Wu, Likun Zhang, Shucheng Li, Fengyuan Xu, Sheng Zhong"
      },
      {
        "type": "paperTitle",
        "text": "678 <b>Subjective-Aligned Dataset and Metric for Text-to-Video Quality Assessment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tengchuan Kou, Xiaohong Liu, zicheng zhang, Chunyi Li, Haoning Wu, Xiongkuo Min, Guangtao Zhai, Ning Liu"
      },
      {
        "type": "paperTitle",
        "text": "682 <b>Proactive Deepfake Detection via Training-Free Landmark Perceptual Watermarks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tianyi Wang, Mengxiao Huang, Harry Cheng, Xiao Zhang, Zhiqi Shen"
      },
      {
        "type": "paperTitle",
        "text": "692 <b>PSSD-Transformer: Powerful Sparse Spike-Driven Transformer for Image Semantic Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hongzhi Wang, Xiubo Liang, Tao Zhang, Gu Yue, Weidong Geng"
      },
      {
        "type": "paperTitle",
        "text": "693 <b>Video Anomaly Detection via Progressive Learning of Multiple Proxy Tasks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Menghao Zhang, Jingyu Wang, Qi Qi, Pengfei Ren, haifeng sun, Zirui Zhuang, Wang HuaZheng, Lei Zhang, Jianxin Liao"
      },
      {
        "type": "paperTitle",
        "text": "696 <b>SI-BiViT: Binarizing Vision Transformers with Spatial Interaction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Peng Yin, Xiaosu Zhu, Jingkuan Song, Lianli Gao, Hengtao Shen"
      },
      {
        "type": "paperTitle",
        "text": "697 <b>CoIn: A Lightweight and Effective Framework for Story Visualization and Continuation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tao Ming, Bingkun BAO, Hao Tang, Yaowei Wang, Changsheng Xu"
      },
      {
        "type": "paperTitle",
        "text": "701 <b>SSL: A Self-similarity Loss for Improving Generative Image Super-resolution<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Du Chen, Zhengqiang ZHANG, Jie Liang, Lei Zhang"
      },
      {
        "type": "paperTitle",
        "text": "706 <b>Bridging Visual Affective Gap: Borrowing Textual Knowledge by Learning from Noisy Image-Text Pairs<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Daiqing Wu, Dongbao Yang, Yu Zhou, Can Ma"
      },
      {
        "type": "paperTitle",
        "text": "707 <b>JoReS-Diff: Joint Retinex and Semantic Priors in Diffusion Model for Low-light Image Enhancement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuhui Wu, Guoqing Wang, Zhiwen Wang, Yang Yang, Tianyu Li, Malu Zhang, Chongyi Li, Hengtao Shen"
      },
      {
        "type": "paperTitle",
        "text": "710 <b>Focus, Distinguish, and Prompt: Unleashing CLIP for Efficient and Flexible Scene Text Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Gangyan Zeng, Yuan Zhang, Jin Wei, Dongbao Yang, peng zhang, Yiwen Gao, Xugong Qin, Yu Zhou"
      },
      {
        "type": "paperTitle",
        "text": "716 <b>Breaking Modality Gap in RGBT Tracking: Coupled Knowledge Distillation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Andong Lu, Jiacong Zhao, Chenglong Li, yun Xiao, Bin Luo"
      },
      {
        "type": "paperTitle",
        "text": "719 <b>CartoonNet: Cartoon Parsing with Semantic Consistency and Structure Correlation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jian-Jun Qiao, Meng-Yu Duan, Xiao Wu, Yu-Pei Song"
      },
      {
        "type": "paperTitle",
        "text": "721 <b>APP: Adaptive Pose Pooling for 3D Human Pose Estimation from Videos<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jinyan Zhang, Mengyuan Liu, Hong Liu, Guoquan Wang, Wenhao Li"
      },
      {
        "type": "paperTitle",
        "text": "725 <b>ARTS: Semi-Analytical Regressor using Disentangled Skeletal Representations for Human Mesh Recovery from Videos<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tao Tang, Hong Liu, Yingxuan You, Ti Wang, Wenhao Li"
      },
      {
        "type": "paperTitle",
        "text": "746 <b>Multi-modal Denoising Diffusion Pretraining for Whole-Slide Image Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "WEI LOU, Guanbin Li, Xiang Wan, Haofeng Li"
      },
      {
        "type": "paperTitle",
        "text": "764 <b>MetaEnzyme: Meta Pan-Enzyme Learning for Task-Adaptive Redesign<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiangbin Zheng, Han Zhang, Qianqing Xu, An-Ping Zeng, Stan Li"
      },
      {
        "type": "paperTitle",
        "text": "768 <b>Prompting to Adapt Foundational Segmentation Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jie Hu, Jie Li, Yue Ma, Liujuan Cao, Songan Zhang, Wei Zhang, GUANNAN JIANG, Rongrong Ji"
      },
      {
        "type": "paperTitle",
        "text": "772 <b>Selective Vision-Language Subspace Projection for Few-shot CLIP<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xingyu Zhu, Beier Zhu, Yi Tan, Shuo Wang, Yanbin Hao, Hanwang Zhang"
      },
      {
        "type": "paperTitle",
        "text": "789 <b>Multimodal LLM Enhanced Cross-lingual Cross-modal Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yabing Wang, Le Wang, Qiang Zhou, Zhibin Wang, Li Hao, Gang Hua, Wei Tang"
      },
      {
        "type": "paperTitle",
        "text": "790 <b>Explore Hybrid Modeling for Moving Infrared Small Target Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mingjin Zhang, Shilong Liu, Yuanjun Ouyang, Jie Guo, Zhihong Tang, Yunsong Li"
      },
      {
        "type": "paperTitle",
        "text": "791 <b>LoFormer: Local Frequency Transformer for Image Deblurring<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xintian Mao, Jiansheng Wang, Xingran Xie, Qingli Li, Yan Wang"
      },
      {
        "type": "paperTitle",
        "text": "792 <b>MM-LDM: Multi-Modal Latent Diffusion Model for Sounding Video Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mingzhen Sun, Weining Wang, Yanyuan Qiao, Jiahui Sun, Zihan Qin, Longteng Guo, Xinxin Zhu, Jing Liu"
      },
      {
        "type": "paperTitle",
        "text": "795 <b>Point Cloud Reconstruction Is Insufficient to Learn 3D Representations<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weichen Xu, Jian Cao, Tianhao Fu, Ruilong Ren, Zicong Hu, Xixin Cao, Xing Zhang"
      },
      {
        "type": "paperTitle",
        "text": "799 <b>MiNet: Weakly-Supervised Camouflaged Object Detection through Mutual Interaction between Region and Edge Cues<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuzhen Niu, Lifen Yang, Rui Xu, Yuezhou Li, Yuzhong Chen"
      },
      {
        "type": "paperTitle",
        "text": "800 <b>Emphasizing Semantic Consistency of Salient Posture for Speech-Driven Gesture Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fengqi Liu, Hexiang Wang, Jingyu Gong, Ran Yi, Qianyu Zhou, Xuequan Lu, Jiangbo Lu, Lizhuang Ma"
      },
      {
        "type": "paperTitle",
        "text": "801 <b>PRTGS: Precomputed Radiance Transfer of Gaussian Splats for Real-Time High-Quality Relighting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yijia Guo, Yuanxi Bai, Hu Liwen, Guo Ziyi, Mianzhi Liu, Yu Cai, Tiejun Huang, Lei Ma"
      },
      {
        "type": "paperTitle",
        "text": "803 <b>Infusion: Preventing Customized Text-to-Image Diffusion from Overfitting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zeng Weili, Yichao Yan, Qi Zhu, Zhuo Chen, Pengzhi Chu, Weiming Zhao, Xiaokang Yang"
      },
      {
        "type": "paperTitle",
        "text": "806 <b>Dynamic Mixed-Prototype Model for Incremental Deepfake Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tian Jiahe, Cai Yu, Peng Chen, Zihao Xiao, Xi Wang, Jizhong Han, Yesheng Chai"
      },
      {
        "type": "paperTitle",
        "text": "807 <b>Towards Small Object Editing: A Benchmark Dataset and A Training-Free Approach<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Pan Qihe, Zhen Zhao, Zicheng Wang, Sifan Long, Yiming Wu, Wei Ji, Haoran Liang, Ronghua Liang"
      },
      {
        "type": "paperTitle",
        "text": "825 <b>Triple Alignment Strategies for Zero-shot Phrase Grounding under Weak Supervision<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Pengyue Lin, Ruifan Li, Yuzhe Ji, Zhihan Yu, Fangxiang Feng, Zhanyu Ma, Xiaojie Wang"
      },
      {
        "type": "paperTitle",
        "text": "834 <b>EGGen: Image Generation with Multi-entity Prior Learning through Entity Guidance<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenhong Sun, Junyan Wang, Zhiyu Tan, Daoyi Dong, Hailan Ma, Li Hao, Dong Gong"
      },
      {
        "type": "paperTitle",
        "text": "841 <b>Stochastic Context Consistency Reasoning for Domain Adaptive Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yiming Cui, Liang Li, Jiehua Zhang, Chenggang Yan, Hongkui Wang, Shuai Wang, Jin Heng, Wu Li"
      },
      {
        "type": "paperTitle",
        "text": "852 <b>Diversified Semantic Distribution Matching for Dataset Distillation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hongcheng Li, Zhou Yucan, Xiaoyan Gu, Bo Li, Weiping Wang"
      },
      {
        "type": "paperTitle",
        "text": "853 <b>DINO is Also a Semantic Guider: Exploiting Class-aware Affinity for Weakly Supervised Semantic Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuanchen Wu, Xiaoqiang Li, Jide Li, Kequan Yang, Pinpin Zhu, Shaohua Zhang"
      },
      {
        "type": "paperTitle",
        "text": "858 <b>Natural Language Induced Adversarial Images<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaopei Zhu, PEIYANG XU, Guanning Zeng, Yinpeng Dong, Xiaolin Hu"
      },
      {
        "type": "paperTitle",
        "text": "859 <b>NFT1000: A Cross-Modal Dataset For Non-Fungible Token Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuxun Wang, Yunfei Lei, Ziqi Zhang, Wei Liu, Haowei Liu, Li Yang, Bing Li, Wenjuan Li, Jin Gao, Weiming Hu"
      },
      {
        "type": "paperTitle",
        "text": "865 <b>A Novel State Space Model with Local Enhancement and State Sharing for Image Fusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zihan Cao, Xiao Wu, Liang-Jian Deng, Yu Zhong"
      },
      {
        "type": "paperTitle",
        "text": "870 <b>Learning Spectral-decomposited Tokens for Domain Generalized Semantic Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jingjun Yi, Qi Bi, Hao Zheng, Haolan Zhan, Wei Ji, YW Huang, Yuexiang Li, Yefeng Zheng"
      },
      {
        "type": "paperTitle",
        "text": "875 <b>Highly Efficient No-reference 4K Video Quality Assessment with Full-Pixel Covering Sampling and Training Strategy<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaoheng Tan, Jiabin Zhang, Yuhui Quan, Jing Li, Yajing Wu, Zilin Bian"
      },
      {
        "type": "paperTitle",
        "text": "877 <b>FSVFG: Towards Immersive Full-Scene Volumetric Video Streaming with Adaptive Feature Grid<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Daheng Yin, Jianxin Shi, Miao Zhang, Zhaowu Huang, Jiangchuan Liu, Fang Dong"
      },
      {
        "type": "paperTitle",
        "text": "879 <b>Diversity Matters: User-Centric Multi-Interest Learning for Conversational Movie Recommendation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yongsen Zheng, Guohua Wang, Yang Liu, Liang Lin"
      },
      {
        "type": "paperTitle",
        "text": "887 <b>From Question to Exploration: Can Classic Test-Time Adaptation Strategies Be Effectively Applied in Semantic Segmentation?<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chang'an Yi, Haotian Chen, Yifan Zhang, Yonghui Xu, Yan Zhou, Lizhen Cui"
      },
      {
        "type": "paperTitle",
        "text": "888 <b>Imbalanced Multi-instance Multi-label Learning via Coding Ensemble and Adaptive Thresholds<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xinyue Zhang, Tingjin Luo,  liuyueying, Chenping Hou"
      },
      {
        "type": "paperTitle",
        "text": "892 <b>Non-uniform Timestep Sampling: Towards Faster Diffusion Model Training<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tianyi Zheng, Cong Geng, Peng-Tao Jiang, Ben Wan, Hao Zhang, Jinwei Chen, Jia Wang, Bo Li"
      },
      {
        "type": "paperTitle",
        "text": "894 <b>Non-Overlapped Multi-View Weak-Label Learning Guided by Multiple Correlations<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kaixiang Wang, Xiaojian Ding, Fan Yang"
      },
      {
        "type": "paperTitle",
        "text": "896 <b>MoBA: Mixture of Bidirectional Adapter for Multi-modal Sarcasm Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yifeng Xie, Zhihong Zhu, Xin Chen, Zhanpeng Chen, Zhiqi Huang"
      },
      {
        "type": "paperTitle",
        "text": "897 <b>View Gap Matters: Cross-view Topology and Information Decoupling for Multi-view Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fangdi Wang, Siwei Wang, Jiaqi Jin, Zhibin Dong, Xihong Yang, Yu Feng, Xinzhong Zhu, Tianrui Liu, Xinwang Liu, En Zhu"
      },
      {
        "type": "paperTitle",
        "text": "898 <b>RainMamba: Enhanced Locality Learning with State Space Models for Video Deraining<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hongtao Wu, Yijun Yang, Weiming Wang, JINNI ZHOU, Lei Zhu"
      },
      {
        "type": "paperTitle",
        "text": "908 <b>Blind Face Video Restoration with Temporal Consistent Generative Prior and Degradation-Aware Prompt<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jingfan Tan, Hyunhee Park, Ying Zhang, Tao Wang, Kaihao Zhang, Xiangyu Kong, Pengwen Dai, Zikun Liu, Wenhan Luo"
      },
      {
        "type": "paperTitle",
        "text": "917 <b>SIRLUT: Simulated Infrared Fusion Guided Image-adaptive 3D Lookup Tables for Lightweight Image Enhancement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kaijiang li, Hao Li, Haining Li, Peisen Wang, Chunyi Guo, Wenfeng Jiang"
      },
      {
        "type": "paperTitle",
        "text": "919 <b>Towards High-resolution 3D Anomaly Detection via Group-Level Feature Contrastive Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hongze Zhu, Guoyang Xie, Chengbin Hou, Tao Dai, Can GAO, Jinbao Wang, Linlin Shen"
      },
      {
        "type": "paperTitle",
        "text": "920 <b>Large Point-to-Gaussian Model for Image-to-3D Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Long Lu, Huachen Gao, Tao Dai, Yaohua Zha, Zhi Hou, Junta Wu, Shu-Tao Xia"
      },
      {
        "type": "paperTitle",
        "text": "924 <b>FacialFlowNet: Advancing Facial Optical Flow Estimation with a Diverse Dataset and a Decomposed Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lu Jianzhi, Ruian He, Shili Zhou, Weimin Tan, Bo Yan"
      },
      {
        "type": "paperTitle",
        "text": "927 <b>Data Generation Scheme for Thermal Modality with Edge-Guided Adversarial Conditional Diffusion Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Guoqing Zhu, Honghu Pan, Qiang Wang, Chao Tian, Chao Yang, Zhenyu He"
      },
      {
        "type": "paperTitle",
        "text": "930 <b>VoxelTrack: Exploring Multi-level Voxel Representation for 3D Point Cloud Object Tracking<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuxuan Lu, Jiahao Nie, Zhiwei He, Hongjie Gu, Xudong Lv"
      },
      {
        "type": "paperTitle",
        "text": "933 <b>Scalable Multi-Source Pre-training for Graph Neural Networks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mingkai Lin, Wenzhong Li, Xiaobin Hong, Sanglu Lu"
      },
      {
        "type": "paperTitle",
        "text": "935 <b>LOVD: Large Open Vocabulary  Object detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shiyu Tang, Zhaofan Luo, Yifan Wang, Lijun Wang, Huchuan Lu, Weibo Su, Libo Liu"
      },
      {
        "type": "paperTitle",
        "text": "936 <b>SelM: Selective Mechanism based Audio-Visual Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiaxu Li, Songsong Yu, Yifan Wang, Lijun Wang, Huchuan Lu"
      },
      {
        "type": "paperTitle",
        "text": "939 <b>Unseen No More: Unlocking the Potential of CLIP for Generative Zero-shot HOI Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yixin Guo, Yu Liu, Jianghao Li, weimin wang, Qi Jia"
      },
      {
        "type": "paperTitle",
        "text": "944 <b>PROMOTE: Prior-Guided Diffusion Model with Global-Local Contrastive Learning for Exemplar-Based Image Translation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Guojin Zhong, YIHU GUO, Jin Yuan, Qianjun Zhang, WEILI GUAN, Long Chen"
      },
      {
        "type": "paperTitle",
        "text": "948 <b>Alleviating the Equilibrium Challenge with Sample Virtual Labeling for Adversarial Domain Adaptation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenxu Shi, Bochuan Zheng"
      },
      {
        "type": "paperTitle",
        "text": "949 <b>CLiF-VQA: Enhancing Video Quality Assessment by Incorporating High-Level Semantic Information related to Human Feelings<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yachun Mi, Yan Shu, Yu Li, Chen Hui, Puchao Zhou, Shaohui Liu"
      },
      {
        "type": "paperTitle",
        "text": "952 <b>PEneo: Unifying Line Extraction, Line Grouping, and Entity Linking for End-to-end Document Pair Extraction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zening Lin, Jiapeng Wang, Teng Li, Wenhui Liao, DAYI HUANG, Longfei Xiong, Lianwen Jin"
      },
      {
        "type": "paperTitle",
        "text": "954 <b>Uncertainty-Aware Pseudo-Labeling and Dual Graph Driven Network for Incomplete Multi-View Multi-Label Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wulin Xie, Xiaohuan Lu, Liu Yadong, Jiang Long, Bob Zhang, Shuping Zhao, Jie Wen"
      },
      {
        "type": "paperTitle",
        "text": "956 <b>Exploring Stable Meta-optimization Patterns via Differentiable Reinforcement Learning for Few-shot Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zheng Han, Xiaobin Zhu, Chun Yang, Hongyang Zhou, Jingyan Qin, Xu-cheng Yin"
      },
      {
        "type": "paperTitle",
        "text": "958 <b>TDSD: Text-Driven Scene-Decoupled Weakly Supervised Video Anomaly Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shengyang Sun, Jiashen Hua, Junyi Feng, Dongxu Wei, Baisheng Lai, Xiaojin Gong"
      },
      {
        "type": "paperTitle",
        "text": "960 <b>Exposure Completing for Temporally Consistent Neural High Dynamic Range Video Rendering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiahao Cui, Wei Jiang, Zhan Peng, Zhiyu Pan, Zhiguo Cao"
      },
      {
        "type": "paperTitle",
        "text": "963 <b>ReCorD: Reasoning and Correcting Diffusion for HOI Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jian-Yu Jiang-Lin, Kang-Yang Huang, Ling Lo, Yi-Ning Huang, Terence Lin, Jhih-Ciang Wu, Hong-Han Shuai, Wen-Huang Cheng"
      },
      {
        "type": "paperTitle",
        "text": "964 <b>MLP Embedded Inverse Tone Mapping<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Panjun Liu, Jiacheng Li, Lizhi Wang, Zheng-Jun Zha, Zhiwei Xiong"
      },
      {
        "type": "paperTitle",
        "text": "967 <b>Enhancing Unsupervised Visible-Infrared Person Re-Identification with Bidirectional-Consistency Gradual Matching<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiao Teng, Xingyu Shen, Kele Xu, Long Lan"
      },
      {
        "type": "paperTitle",
        "text": "976 <b>T2I-Scorer: Quantitative Evaluation on Text-to-Image Generation via Fine-Tuned Large Multi-Modal Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haoning Wu, Xiele Wu, Chunyi Li, zicheng zhang, Chaofeng Chen, Xiaohong Liu, Guangtao Zhai, Weisi Lin"
      },
      {
        "type": "paperTitle",
        "text": "977 <b>Parameter-efficient is not Sufficient: Exploring Parameter, Memory, and Time Efficient Adapter Tuning for Dense Predictions<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Dongshuo Yin, Xueting Han, Bin Li, Hao Feng, Jing Bai"
      },
      {
        "type": "paperTitle",
        "text": "981 <b>LMM-PCQA: Assisting Point Cloud Quality Assessment with LMM<b>"
      },
      {
        "type": "paperAuthor",
        "text": "zicheng zhang, Haoning Wu, Yingjie Zhou, Chunyi Li, Wei Sun, Chaofeng Chen, Xiongkuo Min, Xiaohong Liu, Weisi Lin, Guangtao Zhai"
      },
      {
        "type": "paperTitle",
        "text": "992 <b>Not All Inputs Are Valid: Towards Open-Set Video Moment Retrieval using Language<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiang Fang, Wanlong Fang, Daizong Liu, Xiaoye Qu, Jianfeng Dong, Pan Zhou, Renfu Li, Zichuan Xu, Lixing Chen, Panpan Zheng, Yu Cheng"
      },
      {
        "type": "paperTitle",
        "text": "1000 <b>Anatomical Prior Guided Spatial Contrastive Learning for Few-Shot Medical Image Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wendong Huang, Jinwu Hu, Xiuli Bi, Bin Xiao"
      },
      {
        "type": "paperTitle",
        "text": "1009 <b>RoCo: Robust Cooperative Perception By Iterative Object Matching and Pose Adjustment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhe Huang, Shuo Wang, Yongcai Wang, Wanting Li, Deying Li, Lei Wang"
      },
      {
        "type": "paperTitle",
        "text": "1010 <b>Unpaired Photo-realistic Image Deraining with Energy-informed Diffusion Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuanbo Wen, Tao Gao, Ting Chen"
      },
      {
        "type": "paperTitle",
        "text": "1016 <b>Towards Real-time Video Compressive Sensing on Mobile Devices<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Miao Cao, Lishun Wang, Huan Wang, Guoqing Wang, Xin Yuan"
      },
      {
        "type": "paperTitle",
        "text": "1017 <b>Generative Text Steganography with Large Language Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiaxuan Wu, Wu Zhengxian, Xue yiming, Juan Wen, Wanli Peng"
      },
      {
        "type": "paperTitle",
        "text": "1024 <b>Compression-Realized Deep Structural Network for Video Quality Enhancement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hanchi Sun, Xiaohong Liu, XINYANG JIANG, Yifei Shen, Dongsheng Li, Xiongkuo Min, Guangtao Zhai"
      },
      {
        "type": "paperTitle",
        "text": "1032 <b>Dig into Detailed Structures: Key Context Encoding and Semantic-based Decoding for Point Cloud Completion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hongye Hou, Xuehao Gao, Zhan Liu, Yang Yang"
      },
      {
        "type": "paperTitle",
        "text": "1036 <b>Deciphering Perceptual Quality in Colored Point Cloud: Prioritizing Geometry or Texture Distortion?<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xuemei Zhou, Irene Viola, Yunlu Chen, Jiahuan Pei, Pablo Cesar"
      },
      {
        "type": "paperTitle",
        "text": "1040 <b>Generalized Source-free Domain-adaptive Segmentation via Reliable Knowledge Propagation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qi Zang, Shuang Wang, Dong Zhao, Yang HU, Dou Quan, Jinlong Li, Nicu Sebe, Zhun Zhong"
      },
      {
        "type": "paperTitle",
        "text": "1054 <b>Translating Motion to Notation: Hand Labanotation for Intuitive and Comprehensive Hand Movement Documentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": " LiLing, WenRui Yang, Xinchun Yu, Junliang Xing, Xiao-Ping Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1058 <b>Deblurring Neural Radiance Fields with Event-driven Bundle Adjustment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yunshan Qi, Lin Zhu, Yifan Zhao, Nan Bao, Jia Li"
      },
      {
        "type": "paperTitle",
        "text": "1074 <b>CAPNet: Cartoon Animal Parsing with Spatial Learning and Structural Modeling<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jian-Jun Qiao, Meng-Yu Duan, Xiao Wu, Wei Li"
      },
      {
        "type": "paperTitle",
        "text": "1075 <b>Deep Instruction Tuning for Segment Anything Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaorui Huang, Gen Luogen, Chaoyang Zhu, Bo Tong, Yiyi Zhou, Xiaoshuai Sun, Rongrong Ji"
      },
      {
        "type": "paperTitle",
        "text": "1076 <b>CalibRBEV: Multi-Camera Calibration via Reversed Bird's-eye-view Representations for Autonomous Driving<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenlong Liao, Sunyuan Qiang, Xianfei Li, Xiaolei Chen, Haoyu Wang, Yanyan Liang, Junchi Yan, Tao He, Pai Peng"
      },
      {
        "type": "paperTitle",
        "text": "1077 <b>RSNN: Recurrent Spiking Neural Networks for Dynamic Spatial-Temporal Information Processing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qi Xu, Xuanye Fang, Yaxin Li, Jiangrong Shen, De Ma, YI XU, Gang Pan"
      },
      {
        "type": "paperTitle",
        "text": "1091 <b>Q-Ground: Image Quality Grounding with Large Multi-modality Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chaofeng Chen, Yang Sensen, Haoning Wu, Liang Liao, zicheng zhang, Annan Wang, Wenxiu Sun, Qiong Yan, Weisi Lin"
      },
      {
        "type": "paperTitle",
        "text": "1093 <b>Hal-Eval: A Universal and Fine-grained Hallucination Evaluation Framework for Large Vision Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chao Jiang, Wei Ye, Mengfan Dong, Jia Hongrui, Haiyang Xu, Ming Yan, Ji Zhang, Shikun Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1095 <b>It Takes Two: Accurate Gait Recognition in the Wild via Cross-granularity Alignment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jinkai Zheng, Xinchen Liu, Boyue Zhang, Chenggang Yan, Jiyong Zhang, Wu Liu, Yongdong Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1097 <b>MambaMOS: LiDAR-based 3D Moving Object Segmentation with Motion-aware State Space Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kang Zeng, Shi Hao, Jiacheng Lin, Siyu Li, Jintao Cheng, Kaiwei Wang, Zhiyong Li, Kailun Yang"
      },
      {
        "type": "paperTitle",
        "text": "1101 <b>TagOOD: A Novel Approach to Out-of-Distribution Detection via Vision-Language Representations and Class Center Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jinglun Li, Xinyu Zhou, Kaixun Jiang, Lingyi Hong, Pinxue Guo, Zhaoyu Chen, Weifeng Ge, Wenqiang Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1106 <b>Embracing Adaptation: An Effective Dynamic Defense Strategy Against Adversarial Examples<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shenglin Yin, kelu Yao, Zhen Xiao, Jieyi Long"
      },
      {
        "type": "paperTitle",
        "text": "1107 <b>X-Prompt: Multi-modal Visual Prompt for Video Object Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Pinxue Guo, Wanyun Li, Hao Huang, Lingyi Hong, Xinyu Zhou, Zhaoyu Chen, Jinglun Li, Kaixun Jiang, Wei Zhang, Wenqiang Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1119 <b>CLIP2UDA: Making Frozen CLIP Reward Unsupervised Domain Adaptation in 3D Semantic Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "WU YAO, Mingwei Xing, Yachao Zhang, Yuan Xie, Yanyun Qu"
      },
      {
        "type": "paperTitle",
        "text": "1141 <b>Integrating Content-Semantics-World Knowledge to Detect Stress from Videos<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yang Ding, Yi Dai, Xin Wang, Ling Feng, Lei Cao, Huijun Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1145 <b>Coarse-to-Fine Proposal Refinement Framework For Audio Temporal Forgery Detection and Localization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wu Junyan, Wei Lu, Xiangyang Luo, Rui Yang, Qian Wang, Xiaochun Cao"
      },
      {
        "type": "paperTitle",
        "text": "1152 <b>QS-NeRV: Real-Time Quality-Scalable Decoding with Neural Representation for Videos<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chang Wu, Guancheng Quan, Gang He, Xin-Quan Lai, Yunsong Li, Wenxin Yu, Xianmeng Lin, Cheng Yang"
      },
      {
        "type": "paperTitle",
        "text": "1157 <b>Reproducing the Past: A Dataset for Benchmarking Inscription Restoration<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shipeng Zhu, hui xue, Na Nie, Chenjie Zhu, Haiyue Liu, Pengfei Fang"
      },
      {
        "type": "paperTitle",
        "text": "1160 <b>Devil is in Details: Locality-Aware 3D Abdominal CT Volume Generation for Organ Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuran Wang, Zhijing Wan, Yansheng Qiu, Zheng Wang"
      },
      {
        "type": "paperTitle",
        "text": "1161 <b>Hypergraph-guided Intra- and Inter-category Relation Modeling for Fine-grained Visual Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lu Chen, Qiangchang Wang, Zhaohui Li, Yilong Yin"
      },
      {
        "type": "paperTitle",
        "text": "1162 <b>Reverse2Complete: Unpaired Multimodal Point Cloud Completion via Guided Diffusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenxiao Zhang, Hossein Rahmani, Xun Yang, Jun Liu"
      },
      {
        "type": "paperTitle",
        "text": "1163 <b>Overcoming the Pitfalls of Vision-Language Model for Image-Text Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Feifei Zhang, Sijia Qu, Fan Shi, Changsheng Xu"
      },
      {
        "type": "paperTitle",
        "text": "1166 <b>Dynamic Prompting of Frozen Text-to-Image Diffusion Models for Panoptic Narrative Grounding<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hongyu Li, Tianrui Hui, Zihan Ding, Jing Zhang, Bin MA, Wei Xiaoming, Jizhong Han, Si Liu"
      },
      {
        "type": "paperTitle",
        "text": "1168 <b>A Multilevel Guidance-Exploration Network and Behavior-Scene Matching Method for Human Behavior Anomaly Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yang Guoqing, Zhiming Luo, Jianzhe Gao, Yingxing Lai, Kun Yang, Yifan He, Shaozi Li"
      },
      {
        "type": "paperTitle",
        "text": "1170 <b>Bi-directional Task-Guided Network for Few-Shot Fine-Grained Image Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenxiang Ma, Zhen-Duo Chen, Lijun Zhao, Zi-Chao Zhang, Tai Zheng, Xin Luo, Xin-Shun Xu"
      },
      {
        "type": "paperTitle",
        "text": "1171 <b>Attentive Linguistic Tracking in Diffusion Models for Training-free Text-guided Image Editing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bingyan Liu, Chengyu Wang, Jun Huang, Kui Jia"
      },
      {
        "type": "paperTitle",
        "text": "1176 <b>Student-Oriented Teacher Knowledge Refinement for Knowledge Distillation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chaomin Shen, Huang Yaomin, HaoKun Zhu, Jinsong Fan, Guixu Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1178 <b>GROOT: Generating Robust Watermark for Diffusion-Model-Based Audio Synthesis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weizhi Liu, Yue Li, Dongdong Lin, Hui Tian, Haizhou Li"
      },
      {
        "type": "paperTitle",
        "text": "1180 <b>ClickDiff: Click to Induce Semantic Contact Map for Controllable Grasp Generation with Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Peiming Li, Ziyi Wang, Mengyuan Liu, Hong Liu, Chen Chen"
      },
      {
        "type": "paperTitle",
        "text": "1181 <b>Semantic-Aware and Quality-Aware Interaction Network for Blind Video Quality Assessment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jianjun Xiang, Yuanjie Dang, Peng Chen, Ronghua Liang, Ruohong Huan, Nan Gao"
      },
      {
        "type": "paperTitle",
        "text": "1189 <b>Benchmarking In-the-wild Multimodal Disease Recognition and A Versatile Baseline<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tianqi Wei, Zhi Chen, Zi Huang, Xin Yu"
      },
      {
        "type": "paperTitle",
        "text": "1193 <b>COMD: Training-free Video Motion Transfer With Camera-Object Motion Disentanglement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Teng Hu, Jiangning Zhang, Ran Yi, Yating Wang, Jieyu Weng, Hongrui Huang, Yabiao Wang, Lizhuang Ma"
      },
      {
        "type": "paperTitle",
        "text": "1196 <b>KNN Transformer with Pyramid Prompts for Few-Shot Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenhao Li, Qiangchang Wang, peng zhao, Yilong Yin"
      },
      {
        "type": "paperTitle",
        "text": "1202 <b>GraphLearner: Graph Node Clustering with Fully Learnable Augmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xihong Yang, Erxue Min, KE LIANG, Yue Liu, Siwei Wang, sihang zhou, Huijun Wu, Xinwang Liu, En Zhu"
      },
      {
        "type": "paperTitle",
        "text": "1209 <b>Exploring Data Efficiency in Image Restoration: A Gaussian Denoising Case Study<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhengwei Yin, Mingze MA, Guixu Lin, Yinqiang Zheng"
      },
      {
        "type": "paperTitle",
        "text": "1213 <b>UrbanCross: Enhancing Satellite Image-Text Retrieval with Cross-Domain Adaptation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Siru Zhong, Xixuan Hao, Yibo Yan, Ying Zhang, Yangqiu Song, Yuxuan Liang"
      },
      {
        "type": "paperTitle",
        "text": "1214 <b>Introducing Common Null Space of Gradients for Gradient Projection Methods in Continual Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chengyi Yang,  mingdadong, Xiaoyue Zhang, JiaYin Qi, Aimin Zhou"
      },
      {
        "type": "paperTitle",
        "text": "1226 <b>Label Decoupling and Reconstruction: A Two-Stage Training Framework for Long-tailed Multi-label Medical Image Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jie Huang, Zhaomin Chen, Xiaoqin Zhang, Yisu Ge, Lusi Ye, Guodao Zhang, Huiling Chen"
      },
      {
        "type": "paperTitle",
        "text": "1230 <b>Masked Random Noise for Communication-Efficient Federated Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shiwei Li, Yingyi Cheng, Haozhao Wang, Xing Tang, Shijie Xu,  weihongluo, Yuhua Li, Dugang Liu, xiuqiang He, Ruixuan Li"
      },
      {
        "type": "paperTitle",
        "text": "1235 <b>Unleashing the Power of Generic Segmentation Model: A Simple Baseline for Infrared Small Target Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mingjin Zhang, Chi Zhang, Qiming Zhang, Yunsong Li, Xinbo Gao, Jing Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1238 <b>TreeReward: Improve Diffusion Model via Tree-Structured Feedback Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiacheng Zhang, Jie Wu, Huafeng Kuang, Haiming Zhang, Ren Yuxi, Weifeng Chen, Manlin Zhang, Xuefeng Xiao, Rui Wang, Shilei Wen, Guanbin Li"
      },
      {
        "type": "paperTitle",
        "text": "1240 <b>TAVGBench: Benchmarking Text to Audible-Video Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "yuxin mao, Xuyang Shen, Jing Zhang, Zhen Qin, Jinxing Zhou, Mochu Xiang, Yiran Zhong, Yuchao Dai"
      },
      {
        "type": "paperTitle",
        "text": "1242 <b>Advancing Semantic Edge Detection through Cross-Modal Knowledge Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ruoxi Deng, Bin Yu, Jinxuan Lu, Caixia Zhou, Zhaomin Chen, Jie Hu"
      },
      {
        "type": "paperTitle",
        "text": "1245 <b>Cross-Task Knowledge Transfer for Semi-supervised Joint 3D Grounding and Captioning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yang Liu, Daizong Liu, Zongming Guo, Wei Hu"
      },
      {
        "type": "paperTitle",
        "text": "1247 <b>Mitigating Sample Selection Bias with Robust Domain Adaption in Multimedia Recommendation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiaye Lin, Qing Li, Guorui Xie, Zhongxu Guan, Yong Jiang, Ting Xu, Zhong Zhang, Peilin Zhao"
      },
      {
        "type": "paperTitle",
        "text": "1248 <b>Exploring the Robustness of Decision-Level Through Adversarial Attacks on LLM-Based Embodied Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Liu Shuyuan, Jiawei Chen, Shouwei Ruan, Hang Su, ZHAOXIA YIN"
      },
      {
        "type": "paperTitle",
        "text": "1254 <b>DenseTrack: Drone-based Crowd Tracking via Density-aware Motion-appearance Synergy<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yi Lei, Huilin Zhu, Jingling Yuan, Guangli Xiang, Xian Zhong, Shengfeng He"
      },
      {
        "type": "paperTitle",
        "text": "1260 <b>Sampling to Distill: Knowledge Transfer from Open-World Data<b>"
      },
      {
        "type": "paperAuthor",
        "text": "yu'zheng Wang, Zhaoyu Chen, Jie Zhang, Dingkang Yang, Zuhao Ge, Yang Liu, Liu Siao, Yunquan Sun, Wenqiang Zhang, Lizhe Qi"
      },
      {
        "type": "paperTitle",
        "text": "1261 <b>ConsistentAvatar: Learning to Diffuse Fully Consistent Talking Head Avatar with Temporal Guidance<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haijie Yang, Zhenyu Zhang, Hao Tang, Jianjun Qian, Jian Yang"
      },
      {
        "type": "paperTitle",
        "text": "1262 <b>Towards High-performance Spiking Transformers from ANN to SNN Conversion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zihan Huang, Xinyu Shi, Zecheng Hao, Bu Tong, Jianhao Ding, Zhaofei Yu, Tiejun Huang"
      },
      {
        "type": "paperTitle",
        "text": "1266 <b>Customizing Text-to-Image Generation with Inverted Interaction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "mengmeng Ge, Xu Jia, Takashi Isobe, Xiaomin Li, Qinghe Wang, Jing Mu, Dong Zhou, liwang Amd, Huchuan Lu, Lu Tian, Ashish Sirasao, Emad Barsoum"
      },
      {
        "type": "paperTitle",
        "text": "1270 <b>Identity-Driven Multimedia Forgery Detection via Reference Assistance<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Junhao Xu, Jingjing Chen, Xue Song, Feng Han, Haijun Shan, Yu-Gang Jiang"
      },
      {
        "type": "paperTitle",
        "text": "1273 <b>Two in One Go: Single-stage Emotion Recognition with Decoupled Subject-context Transformer<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xinpeng Li, Teng Wang, Shuyi Mao, Jinbao Wang, Jian Zhao, Xiaojiang Peng, Feng Zheng, Xuelong Li"
      },
      {
        "type": "paperTitle",
        "text": "1277 <b>Learning Enriched Features via Selective State Spaces Model for Efficient Image Deblurring<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hu Gao, Bowen Ma, Ying Zhang, Jingfan Yang, Jing Yang, Depeng Dang"
      },
      {
        "type": "paperTitle",
        "text": "1278 <b>MambaGesture: Enhancing Co-Speech Gesture Generation with Mamba and Disentangled Multi-Modality Fusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chencan Fu, Yabiao Wang, Jiangning Zhang, Zhengkai Jiang, Xiaofeng Mao, JiaFu Wu, Weijian Cao, Chengjie Wang, Yanhao Ge, Yong Liu"
      },
      {
        "type": "paperTitle",
        "text": "1280 <b>Modality-Balanced Learning for Multimedia Recommendation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jinghao Zhang, Guofan Liu, Qiang Liu, Shu Wu, Liang Wang"
      },
      {
        "type": "paperTitle",
        "text": "1282 <b>Learning from Concealed Labels<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhongnian Li, Meng Wei, Peng Ying, Tongfeng Sun, Xinzheng Xu"
      },
      {
        "type": "paperTitle",
        "text": "1295 <b>P^2SAM: Probabilistically Prompted SAMs Are Efficient Segmentator for Ambiguous Medical Images<b>"
      },
      {
        "type": "paperAuthor",
        "text": " Yuzhihuang, Chenxin Li, ZiXu Lin, 恒宇 刘, haote xu, Yifan Liu, Yue Huang, Xinghao Ding, Xiaotong Tu, Yixuan Yuan"
      },
      {
        "type": "paperTitle",
        "text": "1299 <b>Video Bokeh Rendering: Make Casual Videography Cinematic<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yawen Luo, Min Shi, Liao Shen, Yachuan Huang, Zixuan Ye, Juewen Peng, Zhiguo Cao"
      },
      {
        "type": "paperTitle",
        "text": "1302 <b>Towards Video-based Activated Muscle Group Estimation in the Wild<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kunyu Peng, David Schneider, Alina Roitberg, Kailun Yang, Jiaming Zhang, Chen Deng, Kaiyu Zhang, M. Sarfraz, Rainer Stiefelhagen"
      },
      {
        "type": "paperTitle",
        "text": "1303 <b>Understanding the Impact of AI-Generated Content on Social Media: The Pixiv Case<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yiluo Wei, Gareth Tyson"
      },
      {
        "type": "paperTitle",
        "text": "1309 <b>ADDG: An Adaptive Domain Generalization Framework for Cross-Plane MRI Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zibo Ma, Bo Zhang, Zheng Zhang, Wu Liu, Wufan Wang, Hui Gao, Wendong Wang"
      },
      {
        "type": "paperTitle",
        "text": "1333 <b>Virtual Agent Positioning Driven by Personal Characteristics<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jingjing Liu, Youyi Zheng, Kun Zhou"
      },
      {
        "type": "paperTitle",
        "text": "1337 <b>DiffTV: Identity-Preserved Thermal-to-Visible Face Translation via Feature Alignment and Dual-Stage Conditions<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jingyu Lin, Guiqin Zhao, Jing Xu, Guoli Wang, Zejin Wang, Antitza Dantcheva, Lan Du, Cunjian Chen"
      },
      {
        "type": "paperTitle",
        "text": "1338 <b>PhysReaction: Physically Plausible Real-Time Humanoid Reaction Synthesis via Forward Dynamics Guided 4D Imitation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yunze Liu, Changxi Chen, Chenjing Ding, Li Yi"
      },
      {
        "type": "paperTitle",
        "text": "1342 <b>DisenStudio: Customized Multi-subject Text-to-Video Generation with Disentangled Spatial Control<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hong Chen, Xin Wang, Yipeng Zhang, Yuwei Zhou, Zeyang Zhang, Siao Tang, Wenwu Zhu"
      },
      {
        "type": "paperTitle",
        "text": "1344 <b>MetaRepair: Learning to Repair Deep Neural Networks from Repairing Experiences<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yun Xing, Qing Guo, Xiaofeng Cao, Ivor W Tsang, Lei Ma"
      },
      {
        "type": "paperTitle",
        "text": "1348 <b>RSC-SNN: Exploring the Trade-off Between Adversarial Robustness and Accuracy in Spiking Neural Networks via Randomized Smoothing Coding<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Keming Wu, Man Yao, Yuhong Chou, Xuerui Qiu, Rui Yang, Bo XU, Guoqi Li"
      },
      {
        "type": "paperTitle",
        "text": "1354 <b>Event-Guided Rolling Shutter Correction with Time-Aware Cross-Attentions<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hefei Huang, Xu Jia, Xinyu Zhang, Shengming Li, Huchuan Lu"
      },
      {
        "type": "paperTitle",
        "text": "1357 <b>Sketch3D: Style-Consistent Guidance for Sketch-to-3D Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wangguandong Zheng, Haifeng Xia, Rui Chen, Libo Sun, Ming Shao, Si-Yu Xia, Zhengming Ding"
      },
      {
        "type": "paperTitle",
        "text": "1358 <b>SIA-OVD: Shape-Invariant Adapter for Bridging the Image-Region Gap in Open-Vocabulary Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zishuo Wang, Wenhao Zhou, Jinglin Xu, Yuxin Peng"
      },
      {
        "type": "paperTitle",
        "text": "1360 <b>Hybrid Cost Volume for Memory-Efficient Optical Flow<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yang Zhao, Gangwei Xu, Gang Wu"
      },
      {
        "type": "paperTitle",
        "text": "1363 <b>A Sample-driven Selection Framework: Towards Graph Contrastive Networks with Reinforcement Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiangping Zheng, Wei Li, Bo Wu, Xigang Bao, Xuan Zhang, Wei Li, Xun Liang"
      },
      {
        "type": "paperTitle",
        "text": "1365 <b>Semi-supervised Camouflaged Object Detection from Noisy Data<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuanbin Fu, Jie Ying, Houlei Lv, Xiaojie Guo"
      },
      {
        "type": "paperTitle",
        "text": "1366 <b>Learning Unknowns from Unknowns: Diversified Negative Prototypes Generator for Few-shot Open-Set Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenyu Zhang, Guangyao Chen, Yixiong Zou, Yuhua Li, Ruixuan Li"
      },
      {
        "type": "paperTitle",
        "text": "1367 <b>MICM: Rethinking Unsupervised Pretraining for Enhanced Few-shot Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenyu Zhang, Guangyao Chen, Yixiong Zou, Zhimeng Huang, Yuhua Li, Ruixuan Li"
      },
      {
        "type": "paperTitle",
        "text": "1369 <b>VmambaSCI: Dynamic Deep Unfolding Network with Mamba for Compressive Spectral Imaging<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mingjin Zhang, Longyi Li, Wenxuan SHI, Jie Guo, Yunsong Li, Xinbo Gao"
      },
      {
        "type": "paperTitle",
        "text": "1371 <b>AesExpert: Towards Multi-modality Foundation Model for Image Aesthetics Perception<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yipo Huang, Xiangfei Sheng, Zhichao Yang, Quan Yuan, Zhichao Duan, Pengfei Chen, Leida Li, Weisi Lin, Guangming Shi"
      },
      {
        "type": "paperTitle",
        "text": "1376 <b>Multi-Scale and Detail-Enhanced Segment Anything Model for Salient Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shixuan Gao, Pingping Zhang, Tianyu Yan, Huchuan Lu"
      },
      {
        "type": "paperTitle",
        "text": "1380 <b>Dual-stream Feature Augmentation for Domain Generalization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shanshan Wang,  ALuSi, Xun Yang, Ke Xu, Huibin Tan, Xingyi Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1388 <b>Robust Multimodal Sentiment Analysis of Image-Text Pairs by Distribution-Based Feature Recovery and Fusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Daiqing Wu, Dongbao Yang, Yu Zhou, Can Ma"
      },
      {
        "type": "paperTitle",
        "text": "1390 <b>Reversing Structural Pattern Learning with Biologically Inspired Knowledge Distillation for Spiking Neural Networks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qi Xu, Yaxin Li, Xuanye Fang, Jiangrong Shen, Qiang Zhang, Gang Pan"
      },
      {
        "type": "paperTitle",
        "text": "1398 <b>Joint Homophily and Heterophily Relational Knowledge Distillation for Efficient and Compact 3D Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shidi Chen, Lili Wei, Liqian Liang, Congyan Lang"
      },
      {
        "type": "paperTitle",
        "text": "1401 <b>Consistencies are All You Need for Semi-supervised Vision-Language Tracking<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiawei Ge, Jiuxin Cao, Xuelin Zhu, Xinyu Zhang, Chang Liu, Kun Wang, Bo Liu"
      },
      {
        "type": "paperTitle",
        "text": "1413 <b>Pick-and-Draw: Training-free Semantic Guidance for Text-to-Image Personalization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Henglei Lv, Jiayu Xiao, Liang Li"
      },
      {
        "type": "paperTitle",
        "text": "1414 <b>Embodied Laser Attack:Leveraging Scene Priors to Achieve Agent-based Robust Non-contact Attacks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yitong Sun, Yao Huang, Xingxing Wei"
      },
      {
        "type": "paperTitle",
        "text": "1416 <b>SimpliGuard: Robust Mesh Simplification In the Wild<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Peibin Chen, Xijin Zhang, Daniel Du"
      },
      {
        "type": "paperTitle",
        "text": "1418 <b>P-RAG: Progressive Retrieval Augmented Generation For Planning on Embodied Everyday Task<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weiye Xu, Min Wang, Wengang Zhou, Houqiang Li"
      },
      {
        "type": "paperTitle",
        "text": "1420 <b>COCO-LC: Colorfulness Controllable Language-based Colorization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yifan Li, Yuhang Bai, Shuai Yang, Jiaying Liu"
      },
      {
        "type": "paperTitle",
        "text": "1431 <b>CLIPCleaner: Cleaning Noisy Labels with CLIP<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chen Feng, Georgios Tzimiropoulos, Ioannis Patras"
      },
      {
        "type": "paperTitle",
        "text": "1432 <b>AutoM3L: An Automated Multimodal Machine Learning Framework with Large Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Daqin Luo, Chengjian Feng, yuxuan nong, Yiqing Shen"
      },
      {
        "type": "paperTitle",
        "text": "1437 <b>A Plug-and-Play Method for Rare Human-Object Interactions Detection by Bridging Domain Gap<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lijun Zhang, Wei Suo, PENG WANG, Yanning Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1440 <b>Multi-scale Twin-attention for 3D Instance Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tran Dang Trung Duc, Byeongkeun Kang, Yeejin Lee"
      },
      {
        "type": "paperTitle",
        "text": "1441 <b>Visual-Language Collaborative Representation Network for Broad-Domain Few-Shot Image Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qianyu Guo, Jieji Ren, Haofen Wang, Tianxing Wu, Weifeng Ge, Wenqiang Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1443 <b>SpeechEE: A Novel Benchmark for Speech Event Extraction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bin Wang, Meishan Zhang, Hao Fei, Yu Zhao, Bobo Li, Shengqiong Wu, Wei Ji, Min Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1444 <b>Cluster-Phys: Facial Clues Clustering Towards Efficient Remote Physiological Measurement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qian Wei, Kun Li, Dan Guo, Bin Hu, Meng Wang"
      },
      {
        "type": "paperTitle",
        "text": "1445 <b>Rethinking the One-shot Object Detection: Cross-Domain Object Search<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yupeng Zhang, Shuqi Zheng, Ruize Han, Yuzhong Feng, Junhui Hou, Linqi Song, Wei Feng, Liang Wan"
      },
      {
        "type": "paperTitle",
        "text": "1447 <b>CRASH: Crash Recognition and Anticipation System Harnessing with Context-Aware and Temporal Focus Attentions<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haicheng Liao, Haoyu Sun, Zhenning Li,  HuanmingShen, Chengyue Wang, KaHou Tam, Chunlin Tian, Li Li, Cheng-zhong Xu"
      },
      {
        "type": "paperTitle",
        "text": "1451 <b>T2VIndexer: A Generative Video Indexer for Efficient Text-Video Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yili Li, Jing Yuan, Keke Gai, Bang Liu, Gang Xiong, Qi Wu"
      },
      {
        "type": "paperTitle",
        "text": "1456 <b>Animatable 3D Gaussian: Fast and High-Quality Reconstruction of Multiple Human Avatars<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yang Liu, Xiang Huang, Minghan Qin, Lin Qinwei, Haoqian Wang"
      },
      {
        "type": "paperTitle",
        "text": "1459 <b>RFFNet: Towards Robust and Flexible Fusion for Low-Light Image Denoising<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qiang Wang, Yuning Cui, Yawen Li,  paulruan,  zhuben, Wenqi Ren"
      },
      {
        "type": "paperTitle",
        "text": "1463 <b>ZePo: Zero-Shot Portrait Stylization with Faster Sampling<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jin Liu, Huaibo Huang, Jie Cao, Ran He"
      },
      {
        "type": "paperTitle",
        "text": "1470 <b>Dual-Optimized Adaptive Graph Reconstruction for Multi-View Graph Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zichen Wen, Tianyi Wu, Yazhou Ren, Yawen Ling, Chenhang Cui, Xiaorong Pu, Lifang He"
      },
      {
        "type": "paperTitle",
        "text": "1474 <b>Domain-Conditioned Transformer for Fully Test-time Adaptation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yushun Tang, Shuoshuo Chen, Jiyuan Jia, Yi Zhang, Zhihai He"
      },
      {
        "type": "paperTitle",
        "text": "1478 <b>Ego3DT: Tracking Every 3D Object in Ego-centric Videos<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shengyu Hao, Wenhao Chai, Zhonghan Zhao, Meiqi Sun, Wendi Hu, Jieyang Zhou, Yixian Zhao, Qi Li, Yizhou Wang, Xi Li, Gaoang Wang"
      },
      {
        "type": "paperTitle",
        "text": "1481 <b>Towards Open-vocabulary HOI Detection with Calibrated Vision-language Models and Locality-aware Queries<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenhao Yang, Xin Liu, Deqiang Ouyang, Guiduo Duan, Dongyang Zhang, Tao He, Yuan-Fang Li"
      },
      {
        "type": "paperTitle",
        "text": "1484 <b>Cons2Plan: Vector Floorplan Generation from Various Conditions via a  Learning Framework based on Conditional Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shibo Hong, Xuhong Zhang, Tianyu Du, Sheng Cheng, Xun Wang, Jianwei Yin"
      },
      {
        "type": "paperTitle",
        "text": "1489 <b>Gaussian Mutual Information Maximization for Efficient Graph Self-Supervised Learning: Bridging Contrastive-based to Decorrelation-based<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jinyong Wen"
      },
      {
        "type": "paperTitle",
        "text": "1490 <b>Fuse Your Latents: Video Editing with Multi-source Latent Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tianyi Lu, Xing Zhang, Jiaxi Gu, Hang Xu, Renjing Pei, Songcen Xu, Xingjun Ma, Zuxuan Wu"
      },
      {
        "type": "paperTitle",
        "text": "1492 <b>MDT-A2G: Exploring Masked Diffusion Transformers for Co-Speech Gesture Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaofeng Mao, Zhengkai Jiang, Qilin Wang, Chencan Fu, Jiangning Zhang, JiaFu Wu, Yabiao Wang, Chengjie Wang, Wei Li, Mingmin Chi"
      },
      {
        "type": "paperTitle",
        "text": "1502 <b>FiLo: Zero-Shot Anomaly Detection by Fine-Grained Description and High-Quality Localization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhaopeng Gu, Bingke Zhu, Guibo Zhu, Yingying Chen, Hao Li, Ming Tang, Jinqiao Wang"
      },
      {
        "type": "paperTitle",
        "text": "1503 <b>MoS$^2$: Mixture of Scale and Shift Experts for Text-Only Video Captioning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Heng Jia, Yunqiu Xu, Linchao Zhu, Guang Chen, Yufei Wang, Yi Yang"
      },
      {
        "type": "paperTitle",
        "text": "1504 <b>Progressive Point Cloud Denoising with Cross-Stage Cross-Coder Adaptive Edge Graph Convolution Network<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wu Chen, Hehe Fan, Qiuping Jiang, Chao Huang, Yi Yang"
      },
      {
        "type": "paperTitle",
        "text": "1516 <b>Tangram-Splatting: Optimizing 3D Gaussian Splatting Through Tangram-inspired Shape Priors<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yi Wang, Ningze Zhong, Minglin Chen, Longguang Wang, Yulan Guo"
      },
      {
        "type": "paperTitle",
        "text": "1520 <b>Generating Action-conditioned Prompts for Open-vocabulary Video Action Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chengyou Jia, Minnan Luo, Xiaojun Chang, ZhuoHang Dang, Mingfei Han, Mengmeng Wang, Guang Dai, Sizhe Dang, Jingdong Wang"
      },
      {
        "type": "paperTitle",
        "text": "1521 <b>Magic Clothing: Controllable Garment-Driven Image Synthesis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weifeng Chen, Tao Gu, Yuhao Xu, Arlene Chen"
      },
      {
        "type": "paperTitle",
        "text": "1525 <b>When ControlNet Meets Inexplicit Masks: A Case Study of ControlNet on its Contour-following Ability<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenjie Xuan, Yufei Xu, Shanshan Zhao, Chaoyue Wang, Juhua Liu, Bo Du, Dacheng Tao"
      },
      {
        "type": "paperTitle",
        "text": "1526 <b>HandRefiner: Refining Malformed Hands in Generated Images by Diffusion-based Conditional Inpainting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenquan Lu, Yufei Xu, Jing Zhang, Chaoyue Wang, Dacheng Tao"
      },
      {
        "type": "paperTitle",
        "text": "1530 <b>Causal Visual-semantic Correlation for Zero-shot Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuhuang Chen, Dingjie Fu, Shiming Chen, shuo Ye, Hou Jin, Xinge You"
      },
      {
        "type": "paperTitle",
        "text": "1535 <b>Zenith: Real-time Identification of DASH Encrypted Video Traffic with Distortion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weitao Tang, Jianqiang Li, Meijie Du, Die Hu, Qingyun Liu"
      },
      {
        "type": "paperTitle",
        "text": "1536 <b>SAR-SLAM: Self-Attentive Rendering-based SLAM with Neural Point Cloud Encoding<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xudong Lv, Zhiwei He, Yuxiang Yang, Jiahao Nie, Jing Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1545 <b>Detached and Interactive Multimodal Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yunfeng FAN, Wenchao Xu, Haozhao Wang, Junhong Liu, Song Guo"
      },
      {
        "type": "paperTitle",
        "text": "1547 <b>Channel-Spatial Support-Query Cross-Attention for Fine-Grained Few-Shot Image Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shicheng Yang, Xiaoxu Li, Dongliang Chang, Zhanyu Ma, Jing-Hao Xue"
      },
      {
        "type": "paperTitle",
        "text": "1552 <b>Cross-View Mutual Learning for Semi-Supervised Medical Image Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Song Wu, Xiaoyu Wei, Xinyue Chen, Yazhou Ren, Jing He, Xiaorong Pu"
      },
      {
        "type": "paperTitle",
        "text": "1553 <b>On-the-fly Point Feature Representation for Point Clouds Analysis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wang Jiangyi, Zhongyao Cheng, Na Zhao, Jun Cheng, Xulei Yang"
      },
      {
        "type": "paperTitle",
        "text": "1565 <b>GPD-VVTO: Preserving Garment Details in Video Virtual Try-On<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuanbin Wang, Weilun Dai, Chan Long, Huanyu Zhou, Aixi Zhang, Si Liu"
      },
      {
        "type": "paperTitle",
        "text": "1568 <b>Dr. CLIP: CLIP-Driven Universal Framework for Zero-Shot Sketch Image Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xue Li, YU Jiong, Ziyang Li, Hongchun Lu, Ruifeng Yuan"
      },
      {
        "type": "paperTitle",
        "text": "1576 <b>Sentiment-Oriented Sarcasm Integration: Effective Enhancement of Video Sentiment Analysis with Sarcasm Assistance<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Junlin Fang, Wenya Wang, Guosheng Lin, Fengmao Lv"
      },
      {
        "type": "paperTitle",
        "text": "1595 <b>Make Privacy Renewable! Generating Privacy-Preserving Faces  Supporting  Cancelable Biometric Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tao Wang, Yushu Zhang, Xiangli Xiao, Lin Yuan, Zhihua Xia, Jian Weng"
      },
      {
        "type": "paperTitle",
        "text": "1596 <b>PanoSent: A Panoptic Sextuple Extraction Benchmark for Multimodal Conversational Aspect-based Sentiment Analysis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Meng Luo, Hao Fei, Bobo Li, Shengqiong Wu, Qian Liu, Soujanya Poria, Erik Cambria, Mong-Li Lee, Wynne Hsu"
      },
      {
        "type": "paperTitle",
        "text": "1600 <b>Event Traffic Forecasting with Sparse Multimodal Data<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiao Han, Zhenduo zhang, Yiling Wu, Xinfeng Zhang, Zhe Wu"
      },
      {
        "type": "paperTitle",
        "text": "1601 <b>Align2Concept: Language Guided Interpretable Image Recognition by Visual Prototype and Textual Concept Alignment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiaqi Wang, Pichao WANG, Yi Feng, Huafeng Liu, Chang Gao, liping jing"
      },
      {
        "type": "paperTitle",
        "text": "1603 <b>Multimodal Unlearnable Examples: Protecting Data against Multimodal Contrastive Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xinwei Liu, Xiaojun Jia, Yuan Xun, Siyuan Liang, Xiaochun Cao"
      },
      {
        "type": "paperTitle",
        "text": "1610 <b>DreamLCM: Towards High Quality Text-to-3D Generation Via Latent Consistency Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yiming Zhong, Xiaolin Zhang, Yao Zhao, Yunchao Wei"
      },
      {
        "type": "paperTitle",
        "text": "1611 <b>Training-Free Feature Reconstruction with Sparse Optimization for Vision-Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yi Zhang, Ke Yu, Angelica Aviles-Rivero, Jiyuan Jia, Yushun Tang, Zhihai He"
      },
      {
        "type": "paperTitle",
        "text": "1613 <b>Towards Effective Data-Free Knowledge Distillation via Diverse Diffusion Augmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Muquan Li, Dongyang Zhang, Tao He, Xiurui Xie, Yuan-Fang Li, Ke Qin"
      },
      {
        "type": "paperTitle",
        "text": "1617 <b>Toward Robust Live Streaming over LEO Satellite Constellations: Measurement, Analysis, and Handover-Aware Adaptation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hao Fang, Haoyuan Zhao, Jianxin Shi, Miao Zhang, Guanzhen Wu, Yi Ching Chou, FENG WANG, Jiangchuan Liu"
      },
      {
        "type": "paperTitle",
        "text": "1620 <b>Advancing Generalized Deepfake Detector with Forgery Perception Guidance<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ruiyang Xia, Zhou Dawei, Decheng Liu, Lin Yuan, Shuodi Wang, Jie Li, Nannan Wang, Xinbo Gao"
      },
      {
        "type": "paperTitle",
        "text": "1633 <b>Restoring Real-World Degraded Events Improves Deblurring Quality<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yeqing Shen, Shang Li, Kun Song"
      },
      {
        "type": "paperTitle",
        "text": "1634 <b>One-shot-but-not-degraded Federated Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hui Zeng, Minrui Xu, Tongqing Zhou, Xinyi Wu, Jiawen Kang, Zhiping Cai, Dusit Niyato"
      },
      {
        "type": "paperTitle",
        "text": "1635 <b>LDCNet: Long-Distance Context Modeling for Large-Scale 3D Point Cloud Scene Semantic Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "守桐 骆, Zhengxing Sun, Yi Wang, Yunhan Sun, Chendi Zhu"
      },
      {
        "type": "paperTitle",
        "text": "1638 <b>Few-Shot Joint Multimodal Entity-Relation Extraction via Knowledge-Enhanced Cross-modal Prompt Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "li yuan, Yi Cai, Junsheng Huang"
      },
      {
        "type": "paperTitle",
        "text": "1641 <b>Addressing Imbalance for Class Incremental Learning in Medical Image Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xuze Hao, Wenqian Ni, Xuhao Jiang, Weimin Tan, Bo Yan"
      },
      {
        "type": "paperTitle",
        "text": "1644 <b>Towards Practical Human Motion Prediction with LiDAR Point Clouds<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiao Han, Yiming Ren, Yichen Yao, YUJING SUN, Yuexin Ma"
      },
      {
        "type": "paperTitle",
        "text": "1649 <b>QPT-V2: Masked Image Modeling Advances Visual Scoring<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qizhi Xie, Kun Yuan, Yunpeng Qu, Mingda Wu, Ming Sun, Chao Zhou, Jihong Zhu"
      },
      {
        "type": "paperTitle",
        "text": "1651 <b>Masked Snake Attention for Fundus Image Restoration with Vessel Preservation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaohuan Ding, Gong Yangrui, Tianyi Shi, Zihang Huang, Gangwei Xu, Xin Yang"
      },
      {
        "type": "paperTitle",
        "text": "1656 <b>Multi-Granularity Hand Action Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ting Zhe, Jing Zhang, Yongqian Li, Yong Luo, Han Hu, Dacheng Tao"
      },
      {
        "type": "paperTitle",
        "text": "1658 <b>Edit As You Wish: Video Caption Editing with Multi-grained User Control<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Linli Yao, Yuanmeng Zhang, Ziheng Wang, Xinglin Hou, Tiezheng Ge, Yuning Jiang, Xu Sun, Qin Jin"
      },
      {
        "type": "paperTitle",
        "text": "1663 <b>Decoder-Only LLMs are Better Controllers for Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "ZiYi Dong, Yao Xiao, Pengxu Wei, Liang Lin"
      },
      {
        "type": "paperTitle",
        "text": "1667 <b>MultiColor: Image Colorization by Learning from Multiple Color Spaces<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiangcheng Du, Zhao Zhou, Xingjiao Wu, Yanlong Wang, Zhuoyao Wang, Yingbin Zheng, Cheng Jin"
      },
      {
        "type": "paperTitle",
        "text": "1669 <b>BCSCN:Reducing Domain Gap through Bézier Curve basis-based Sparse Coding Network for Single-Image Super-Resolution<b>"
      },
      {
        "type": "paperAuthor",
        "text": "wenhao Guo, Peng Lu, Xujun Peng, Zhao Zhaoran, Ji Qiu, XiangTao Dong"
      },
      {
        "type": "paperTitle",
        "text": "1675 <b>Seeing Text in the Dark: Algorithm and Benchmark<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chengpei Xu, Hao Fu, Long Ma, Wenjing Jia, Chengqi Zhang, Feng Xia, Xiaoyu Ai, Binghao Li, Wenjie Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1677 <b>Equilibrated Diffusion: Frequency-aware Textual Embedding for Equilibrated Image Customization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Liyuan Ma, Xueji Fang, Guo-Jun Qi"
      },
      {
        "type": "paperTitle",
        "text": "1686 <b>Chain of Visual Perception: Harnessing Multimodal Large Language Models for Zero-shot Camouflaged Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lv Tang, Peng-Tao Jiang, Zhihao Shen, Hao Zhang, Jinwei Chen, Bo Li"
      },
      {
        "type": "paperTitle",
        "text": "1688 <b>Reversed in Time: A Novel Temporal-Emphasized Benchmark for Cross-Modal Video-Text Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yang Du, Yuqi Liu, Qin Jin"
      },
      {
        "type": "paperTitle",
        "text": "1691 <b>Graph Convolutional Semi-Supervised Cross-Modal Hashing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaobo Shen,  GaoyaoYu, YinFan Chen, Xichen Yang, Yuhui Zheng"
      },
      {
        "type": "paperTitle",
        "text": "1694 <b>Spatio-temporal Heterogeneous Federated Learning for Time Series Classification with Multi-view Orthogonal Training<b>"
      },
      {
        "type": "paperAuthor",
        "text": "CHENRUI WU, Haishuai Wang, Xiang Zhang, Zhen Fang, Jiajun Bu"
      },
      {
        "type": "paperTitle",
        "text": "1697 <b>DreamBooth++: Boosting Subject-Driven Generation via Region-Level References Packing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fan Zhongyi, Zixin Yin, Gang Li, Yibing Zhan, Heliang Zheng"
      },
      {
        "type": "paperTitle",
        "text": "1700 <b>Semi-supervised Visible-Infrared Person Re-identification via Modality Unification and Confidence Guidance<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiying Zheng, zhang yukang, Yang Lu, Hanzi Wang"
      },
      {
        "type": "paperTitle",
        "text": "1701 <b>Advancing Quantization Steps Estimation : A Two-Stream Network Approach for Enhancing Robustness<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Cheng Xin, Hao Wang, Jinwei Wang, Xiangyang Luo, Bin Ma"
      },
      {
        "type": "paperTitle",
        "text": "1702 <b>Boosting Non-causal Semantic Elimination: An Unconventional Harnessing of LVM for Open-World Deepfake Interpretation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhaoyang Li, Zhu Teng, Baopeng Zhang, Jianping Fan"
      },
      {
        "type": "paperTitle",
        "text": "1703 <b>Efficient Perceiving Local Details via Adaptive Spatial-Frequency Information Integration for Multi-focus Image Fusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jingjia Huang, Jingyan Tu, Ge Meng, Yingying Wang, Yuhang Dong, Xiaotong Tu, Xinghao Ding, Yue Huang"
      },
      {
        "type": "paperTitle",
        "text": "1706 <b>GS$^{3}$LAM: Gaussian Semantic Splatting SLAM<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Linfei Li, Lin Zhang, Zhong Wang, Ying Shen"
      },
      {
        "type": "paperTitle",
        "text": "1710 <b>A General Framework to Boost 3D GS Initialization for Text-to-3D Generation by Lexical Richness<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lutao Jiang, Hangyu Li, Lin Wang"
      },
      {
        "type": "paperTitle",
        "text": "1713 <b>Multimodal Low-light Image Enhancement with Depth Information<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhen Wang, Dongyuan Li, Guang Li, Ziqing Zhang, Renhe Jiang"
      },
      {
        "type": "paperTitle",
        "text": "1720 <b>ObjBlur: A Curriculum Learning Approach With Progressive Object-Level Blurring for Improved Layout-to-Image Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Stanislav Frolov, Brian Moser, Sebastian Palacio, Andreas Dengel"
      },
      {
        "type": "paperTitle",
        "text": "1722 <b>Prompt-Guided Image-Adaptive Neural Implicit Lookup Tables for Interpretable Image Enhancement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Satoshi Kosugi"
      },
      {
        "type": "paperTitle",
        "text": "1734 <b>Efficient Single Image Super-Resolution with Entropy Attention and Receptive Field Augmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaole Zhao, Linze Li, Chengxing Xie, XIAOMING ZHANG, Ting Jiang, Wenjie Lin, Shuaicheng Liu, Tianrui Li"
      },
      {
        "type": "paperTitle",
        "text": "1735 <b>Dual-Hybrid Attention Network for Specular Highlight Removal<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaojiao Guo, Xuhang Chen, Shenghong Luo, Shuqiang Wang, Chi-Man Pun"
      },
      {
        "type": "paperTitle",
        "text": "1746 <b>Learning with Alignments: Tackling the Inter- and Intra-domain Shifts for Cross-multidomain Facial Expression Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuxiang Yang, Lu Wen, Xinyi Zeng, Yuanyuan Xu, Xi Wu, Jiliu Zhou, Yan Wang"
      },
      {
        "type": "paperTitle",
        "text": "1753 <b>Mitigating Social Biases in Text-to-Image Diffusion Models via Linguistic-Aligned Attention Guidance<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yue Jiang, Yueming Lyu, Ziwen He, Bo Peng, Jing Dong"
      },
      {
        "type": "paperTitle",
        "text": "1755 <b>Adaptive Query Selection for Camouflaged Instance Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bo Dong, Pichao WANG, Hao Luo, Fan Wang"
      },
      {
        "type": "paperTitle",
        "text": "1757 <b>CREAM: Coarse-to-Fine Retrieval and Multi-modal Efficient Tuning for Document VQA<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jinxu Zhang, Yongqi Yu, Zhang Yu"
      },
      {
        "type": "paperTitle",
        "text": "1763 <b>An Inverse Partial Optimal Transport Framework for Music-guided Trailer Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yutong Wang, Sidan Zhu, Hongteng Xu, Dixin Luo"
      },
      {
        "type": "paperTitle",
        "text": "1776 <b>PerFRDiff: Personalised Weight Editing for Multiple Appropriate Facial Reaction Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hengde Zhu, Xiangyu Kong, Weicheng Xie, Xin Huang, Linlin Shen, Lu Liu, Hatice Gunes, Siyang Song"
      },
      {
        "type": "paperTitle",
        "text": "1780 <b>Product2IMG: Prompt-Free E-commerce Product Background Generation with Diffusion Model and Self-Improved LMM<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tingfeng Cao, Junsheng Kong, Xue Zhao, Wenqing Yao, Junwei Ding, Jinhui Zhu, Jian Dong Zhang"
      },
      {
        "type": "paperTitle",
        "text": "1784 <b>MAGIC: Rethinking Dynamic Convolution Design for Medical Image Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shijie Li, Yunbin Tu, Qingyuan Xiang, Zheng Li"
      },
      {
        "type": "paperTitle",
        "text": "1785 <b>AdaFPP: Adapt-Focused Bi-Propagating Prototype Learning for Panoramic Activity Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Meiqi Cao, Rui Yan, Xiangbo Shu, Guangzhao Dai, Yazhou Yao, Guo-Sen Xie"
      },
      {
        "type": "paperTitle",
        "text": "1789 <b>Shape-Guided Clothing Warping for Virtual Try-On<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaoyu Han, Shunyuan Zheng, Zonglin Li, Chenyang Wang, Xin Sun, Quanling Meng"
      },
      {
        "type": "paperTitle",
        "text": "1792 <b>Advancing 3D Object Grounding Beyond a Single 3D Scene<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wencan Huang, Daizong Liu, Wei Hu"
      },
      {
        "type": "paperTitle",
        "text": "1800 <b>Learning Realistic Sketching: A Dual-agent Reinforcement Learning Approach<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ji Qiu, Peng Lu, Xujun Peng, wenhao Guo, Zhao Zhaoran, XiangTao Dong"
      },
      {
        "type": "paperTitle",
        "text": "1805 <b>In-Context Learning for Zero-shot Medical Report Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "RUI Liu, Mingjie Li, Shen Zhao, Ling Chen, Xiaojun Chang, Lina Yao"
      },
      {
        "type": "paperTitle",
        "text": "1807 <b>LanEvil: Benchmarking the Robustness of Lane Detection to Environmental Illusions<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tianyuan Zhang, Lu Wang, Hainan Li, Yisong Xiao, Siyuan Liang, Aishan Liu, Xianglong Liu, Dacheng Tao"
      },
      {
        "type": "paperTitle",
        "text": "1810 <b>DVF: Advancing Robust and Accurate Fine-Grained Image Retrieval with Retrieval Guidelines<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xin Jiang, Hao Tang, Rui Yan, Jinhui Tang, Zechao Li"
      },
      {
        "type": "paperTitle",
        "text": "1813 <b>MVPbev: Multi-view Perspective Image Generation from BEV with Test-time Controllability and Generalizability<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Buyu Liu, Kai Wang, Yansong Liu, Jun Bao, Tingting Han, Jun Yu"
      },
      {
        "type": "paperTitle",
        "text": "1815 <b>HOGDA: Boosting Semi-supervised Graph Domain Adaptation via High-Order Structure-Guided Adaptive Feature Alignmen<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jun Dan, Liu Weiming, Mushui Liu, Chunfeng Xie, Shunjie Dong, Guofang Ma, Yanchao Tan, Jiazheng Xing"
      },
      {
        "type": "paperTitle",
        "text": "1821 <b>Towards Robust Physical-world Backdoor Attacks on Lane Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xinwei Zhang, Aishan Liu, Tianyuan Zhang, Siyuan Liang, Xianglong Liu"
      },
      {
        "type": "paperTitle",
        "text": "1822 <b>Informative Point cloud Dataset Extraction for Classification via Gradient-based Points Moving<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenxiao Zhang, Ziqi Wang, Li Xu, Xun Yang, Jun Liu"
      },
      {
        "type": "paperTitle",
        "text": "1827 <b>Relational Diffusion Distillation For Efficient Image Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weilun Feng, Chuanguang Yang, Zhulin An, Libo Huang, Boyu Diao, Fei Wang, Yongjun Xu"
      },
      {
        "type": "paperTitle",
        "text": "1828 <b>Private Gradient Estimation is Useful for Generative Modeling<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bochao Liu, Pengju Wang, Weijia Guo, Yong Li, Liansheng Zhuang, Weiping Wang, Shiming Ge"
      },
      {
        "type": "paperTitle",
        "text": "1829 <b>Speech Reconstruction from Silent Lip and Tongue Articulation by Diffusion Models and Text-Guided Pseudo Target Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Rui-Chen Zheng, Yang Ai, Zhen-Hua Ling"
      },
      {
        "type": "paperTitle",
        "text": "1833 <b>FOCT: Few-shot Industrial Anomaly Detection with Foreground-aware Online Conditional Transport<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Long Tian, Hongyi Zhao, Ruiying Lu, Rongrong Wang, YuJie Wu, Liming Wang, Xiongpeng He, Xiyang Liu"
      },
      {
        "type": "paperTitle",
        "text": "1834 <b>A Novel Confidence Guided Training Method for Conditional GANs with Auxiliary Classifier<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qi Chen, wenjie liu, Hu Ding"
      },
      {
        "type": "paperTitle",
        "text": "1837 <b>Generative Active Learning for Image Synthesis Personalization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "xulu zhang, Wengyu ZHANG, Xiaoyong Wei, Jinlin Wu, Zhaoxiang Zhang, Zhen Lei, Qing Li"
      },
      {
        "type": "paperTitle",
        "text": "1859 <b>Explicit Granularity and Implicit Scale Correspondence Learning for Point-Supervised Video Moment Localization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kun Wang, Hao Liu, Lirong Jie, Zixu Li, Yupeng Hu, Liqiang Nie"
      },
      {
        "type": "paperTitle",
        "text": "1860 <b>GDR-GMA: Machine Unlearning via Direction-Rectified and Magnitude-Adjusted Gradients<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shen Lin, Xiaoyu Zhang, Willy Susilo, Xiaofeng Chen, Jun Liu"
      },
      {
        "type": "paperTitle",
        "text": "1861 <b>SemGIR: Semantic-Guided Image Regeneration based method for AI-generated Image Detection and Attribution<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiao Yu, Kejiang Chen, Kai Zeng, Han Fang, Zijin Yang, Xiuwei Shang, Yuang Qi, Weiming Zhang, Nenghai Yu"
      },
      {
        "type": "paperTitle",
        "text": "1863 <b>From Speaker to Dubber: Movie Dubbing with Prosody and Duration Consistency Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhedong Zhang, Liang Li, Gaoxiang Cong, Haibing YIN, Yuhan Gao, Chenggang Yan, Anton Hengel, Yuankai Qi"
      },
      {
        "type": "paperTitle",
        "text": "1870 <b>Adaptively Building a Video-language Model for Video Captioning and Retrieval without Massive Video Pretraining<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zihao Liu, Xiaoyu Wu, Shengjin Wang, Jiayao Qian"
      },
      {
        "type": "paperTitle",
        "text": "1886 <b>Break the Visual Perception: Adversarial Attacks Targeting Encoded Visual Tokens of Large Vision-Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yubo Wang, Chaohu Liu, qiuqu yan, Haoyu Cao, Deqiang Jiang, Linli Xu"
      },
      {
        "type": "paperTitle",
        "text": "1894 <b>FreePIH: Training-Free Painterly Image Harmonization with Diffusion Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ruibin Li, Jingcai Guo, Qihua Zhou, Song Guo"
      },
      {
        "type": "paperTitle",
        "text": "1895 <b>TGCA-PVT: Topic-Guided Context-Aware Pyramid Vision Transformer for Sticker Emotion Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jian Chen, Wei Wang, Yuzhu Hu, Junxin Chen, Han Liu, Xiping Hu"
      },
      {
        "type": "paperTitle",
        "text": "1897 <b>Fast Elastic-Net Multi-view Clustering: A Geometric Interpretation Perspective<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yalan Qin, Li Qian"
      },
      {
        "type": "paperTitle",
        "text": "1902 <b>AesStyler: Aesthetic Guided Universal Style Transfer<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ran Yi, haokun zhu, Teng Hu, Yu-Kun Lai, Paul Rosin"
      },
      {
        "type": "paperTitle",
        "text": "1908 <b>StarStream: Live Video Analytics over Space Networking<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Miao Zhang, Jiaxing Li, Haoyuan Zhao, Linfeng Shen, Jiangchuan Liu"
      },
      {
        "type": "paperTitle",
        "text": "1910 <b>TransLinkGuard: Safeguarding Transformer Models Against Model Stealing in Edge Deployment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qinfeng Li, Zhiqiang Shen, Zhenghan Qin, Yangfan Xie, Xuhong Zhang, Tianyu Du, Sheng Cheng, Xun Wang, Jianwei Yin"
      },
      {
        "type": "paperTitle",
        "text": "1917 <b>High Fidelity Aggregated Planar Prior Assisted PatchMatch Multi-View Stereo<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jie Liang, Rongjie Wang, Rui Peng, Zhe ZHANG, Kaiqiang Xiong, Ronggang Wang"
      },
      {
        "type": "paperTitle",
        "text": "1919 <b>Cluster-driven Personalized Federated Recommendation with Interest-aware Graph Convolution Network for Multimedia<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xingyuan Mao, Yuwen Liu, Lianyong Qi, Li Duan, Xiaolong Xu, Xuyun Zhang, Wanchun Dou, Amin Beheshti, Xiaokang Zhou"
      },
      {
        "type": "paperTitle",
        "text": "1921 <b>LDA-AQU: Adaptive Query-guided Upsampling via Local Deformable Attention<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zewen Du, Zhenjiang Hu, Guiyu Zhao, Ying Jin, Hongbin Ma"
      },
      {
        "type": "paperTitle",
        "text": "1924 <b>NovaChart: A Large-scale Dataset towards Chart Understanding and Generation of Multimodal Large Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Linmei Hu, Duokang Wang, Yiming Pan, Jifan Yu, Yingxia Shao, Chong Feng, Liqiang Nie"
      },
      {
        "type": "paperTitle",
        "text": "1929 <b>Probabilistic Distillation Transformer: Modelling Uncertainties for Visual Abductive Reasoning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wanru Xu, Zhenjiang Miao, Yi Tian, Yigang Cen, Lili Wan, Ma Xiaole"
      },
      {
        "type": "paperTitle",
        "text": "1932 <b>Neural Interaction Energy for Multi-Agent Trajectory Prediction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kaixin Shen, Ruijie Quan, Linchao Zhu, Jun Xiao, Yi Yang"
      },
      {
        "type": "paperTitle",
        "text": "1933 <b>Multi-Label Learning with Block Diagonal Labels<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Leqi Shen, Sicheng Zhao, Yifeng Zhang, Hui Chen, Jundong Zhou, pengzhang liu, Yongjun Bao, Guiguang Ding"
      },
      {
        "type": "paperTitle",
        "text": "1937 <b>Counterfactually Augmented Event Matching for De-biased Temporal Sentence Grounding<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xun Jiang, Zhuoyuan Wei, Shenshen Li, Xing Xu, Jingkuan Song, Hengtao Shen"
      },
      {
        "type": "paperTitle",
        "text": "1941 <b>Enhanced Experts with Uncertainty-Aware Routing for Multimodal Sentiment Analysis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zixian Gao, Disen Hu, Xun Jiang, Huimin Lu, Hengtao Shen, Xing Xu"
      },
      {
        "type": "paperTitle",
        "text": "1942 <b>$ANFluid: Animate Natural Fluid Photos base on Physics-Aware Simulation and Dual-Flow Texture Learning$<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiangcheng Zhai, Yingqi Jie, Xueguang Xie, Aimin Hao, Na Jiang, Yang Gao"
      },
      {
        "type": "paperTitle",
        "text": "1947 <b>Robust Pseudo-label Learning with Neighbor Relation for Unsupervised Visible-Infrared Person Re-Identification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiangbo Yin, Jiangming Shi, Yachao Zhang, Yang Lu, zhizhong zhang, Yuan Xie, Yanyun Qu"
      },
      {
        "type": "paperTitle",
        "text": "1949 <b>AL-GTD: Deep Active Learning for Gaze Target Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Francesco Tonini, Nicola Dall'Asen, Lorenzo Vaquero, Cigdem Beyan, Elisa Ricci"
      },
      {
        "type": "paperTitle",
        "text": "1954 <b>Advancing Prompt Learning through an External Layer<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fangming Cui, Xun Yang, Chao Wu, Liang Xiao, Xinmei Tian"
      },
      {
        "type": "paperTitle",
        "text": "1955 <b>IBMEA: Exploring Variational Information Bottleneck for Multi-modal Entity Alignment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Taoyu Su, Jiawei Sheng, Shicheng Wang, Xinghua Zhang, Hongbo Xu, Tingwen Liu"
      },
      {
        "type": "paperTitle",
        "text": "1959 <b>Training Spatial-Frequency Visual Prompts and Probabilistic Clusters for Accurate Black-Box Transfer Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wonwoo Cho, Kangyeol Kim, Saemee Choi, Jaegul Choo"
      },
      {
        "type": "paperTitle",
        "text": "1961 <b>Cloth-aware Augmentation for Cloth-generalized Person Re-identification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fangyi Liu, Mang Ye, Bo Du"
      },
      {
        "type": "paperTitle",
        "text": "1963 <b>Two Teachers Are Better Than One: Semi-supervised Elliptical Object Detection by  Dual-Teacher Collaborative Guidance<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yu Liu, Longhan Feng, Qi Jia, Zezheng Liu, Zihuang Cao"
      },
      {
        "type": "paperTitle",
        "text": "1964 <b>ProFD: Prompt-Guided Feature Disentangling for Occluded Person Re-Identification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Can Cui, Siteng Huang, Wenxuan Song, Pengxiang Ding, Zhang Min, Donglin Wang"
      },
      {
        "type": "paperTitle",
        "text": "1968 <b>ReToMe-VA: Recursive Token Merging for Video Diffusion-based Unrestricted Adversarial Attack<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ziyi Gao, Kai Chen, Zhipeng Wei, Tingshu Mou, Jingjing Chen, Zhiyu Tan, Li Hao, Yu-Gang Jiang"
      },
      {
        "type": "paperTitle",
        "text": "1969 <b>WeakSAM: Segment Anything Meets Weakly-supervised Instance-level Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lianghui Zhu, Junwei Zhou, Yan Liu, Hao Xin, Wenyu Liu, Xinggang Wang"
      },
      {
        "type": "paperTitle",
        "text": "1975 <b>Frame Interpolation with Consecutive Brownian Bridge Diffusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zonglin Lyu, Ming Li, Jianbo Jiao, Chen Chen"
      },
      {
        "type": "paperTitle",
        "text": "1983 <b>Diffusion Domain Teacher: Diffusion Guided Domain Adaptive Object Detector<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Boyong He, Yuxiang Ji, Zhuoyue Tan, Liaoni Wu"
      },
      {
        "type": "paperTitle",
        "text": "1984 <b>Balancing Generalization and Robustness in Adversarial Training via Steering through Clean and Adversarial Gradient Directions<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haoyu Tong, Xiaoyu Zhang, Jin Yulin, Jian Lou, Kai Wu, Xiaofeng Chen"
      },
      {
        "type": "paperTitle",
        "text": "1985 <b>Subjective and Objective Quality-of-Experience Assessment for 3D Talking Heads<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yingjie Zhou, zicheng zhang, Wei Sun, Xiaohong Liu, Xiongkuo Min, Guangtao Zhai"
      },
      {
        "type": "paperTitle",
        "text": "1989 <b>Multi-scale Change-Aware Transformer for Remote Sensing Image Change Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "HUAN CHEN, Tingfa Xu, Zhenxiang Chen, Peifu Liu, Huiyan Bai, Jianan Li"
      },
      {
        "type": "paperTitle",
        "text": "1993 <b>Self-Adaptive Fine-grained Multi-modal Data Augmentation for Semi-supervised Muti-modal Coreference Resolution<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zheng Li, Boyu Chen, Hao Fei, Fei Li, Shengqiong Wu, Lizi Liao, Donghong Ji, Chong Teng"
      },
      {
        "type": "paperTitle",
        "text": "1996 <b>TVPR: Text-to-Video Person Retrieval and a New Benchmark<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhang Xu, Fan Ni, Guan-Nan Dong, Aichun Zhu, Jianhui Wu, Mingcheng Ni, Hui Liu"
      },
      {
        "type": "paperTitle",
        "text": "2006 <b>Learning from Distinction: Mitigating backdoors using a low-capacity model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haosen Sun, Yiming Li, Xixiang Lyu, Jing Ma"
      },
      {
        "type": "paperTitle",
        "text": "2007 <b>SFP: Spurious Feature-Targeted Pruning for Out-of-Distribution Generalization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yingchun Wang, Jingcai Guo, Song Guo, LIU Yi, Jie ZHANG, Weizhan Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2022 <b>Rate-aware Compression for NeRF-based Volumetric Video<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhiyu Zhang, Guo Lu, Huanxiong Liang, Zhengxue Cheng, Anni Tang, Li Song"
      },
      {
        "type": "paperTitle",
        "text": "2025 <b>TeRF: Text-driven and Region-aware Flexible Visible and Infrared Image Fusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hebaixu Wang, Hao Zhang, Xunpeng Yi, Xinyu Xiang, Leyuan Fang, Jiayi Ma"
      },
      {
        "type": "paperTitle",
        "text": "2027 <b>Semantics-Aware Image Aesthetics Assessment using Tag Matching and Contrastive Ranking<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhichao Yang, Leida Li, Pengfei Chen, Jinjian Wu, Weisheng Dong"
      },
      {
        "type": "paperTitle",
        "text": "2032 <b>Synergetic Prototype Learning Network for Unbiased Scene Graph Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ruonan Zhang, Ziwei Shang, Fengjuan Wang, Zhaoqilin Yang, Shan Cao, Yigang Cen, Gaoyun An"
      },
      {
        "type": "paperTitle",
        "text": "2034 <b>PEAN: A Diffusion-Based Prior-Enhanced Attention Network for Scene Text Image Super-Resolution<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zuoyan Zhao, hui xue, Pengfei Fang, Shipeng Zhu"
      },
      {
        "type": "paperTitle",
        "text": "2039 <b>Audio-Driven Identity Manipulation for Face Inpainting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuqi Sun, Qing Lin, Weimin Tan, Bo Yan"
      },
      {
        "type": "paperTitle",
        "text": "2043 <b>Boosting Semi-supervised Crowd Counting with Scale-based Active Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shiwei Zhang, Wei Ke, Shuai Liu, Xiaopeng Hong, Tong Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2044 <b>Navigating Weight Prediction with Diet Diary<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yinxuan Gui, Bin Zhu, Jingjing Chen, Chong Wah Ngo, Yu-Gang Jiang"
      },
      {
        "type": "paperTitle",
        "text": "2051 <b>Integrating Stickers into Multimodal Dialogue Summarization: A Novel Dataset and Approach for Enhancing Social Media Interaction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuanchen Shi, Fang Kong"
      },
      {
        "type": "paperTitle",
        "text": "2053 <b>Linearly-evolved Transformer for Pan-sharpening<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Junming Hou, Zihan Cao, Naishan Zheng, Xuan Li, Xiaoyu Chen, Xinyang Liu, Cong Xiaofeng, Danfeng Hong, man zhou"
      },
      {
        "type": "paperTitle",
        "text": "2054 <b>DanceCamAnimator: Keyframe-Based Controllable 3D Dance Camera Synthesis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zixuan Wang, Jiayi Li, Xiaoyu Qin, Shikun Sun, Songtao Zhou, Jia Jia, Jiebo Luo"
      },
      {
        "type": "paperTitle",
        "text": "2058 <b>DNTextSpotter: Arbitrary-Shaped Scene Text Spotting via Improved Denoising Training<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qian Qiao, Yu Xie, Jun Gao, Tianxiang Wu, Shaoyao Huang, Jiaqing Fan, Ziqiang Cao, Zili Wang, Yue Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2068 <b>PTSBench: A Comprehensive Post-Training Sparsity Benchmark Towards Algorithms and Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zining Wang, Jinyang Guo, Ruihao Gong, yang yong, Aishan Liu, Yushi Huang, Liu Jiaheng, Xianglong Liu"
      },
      {
        "type": "paperTitle",
        "text": "2070 <b>Enhancing Pre-trained ViTs for Downstream Task Adaptation: A Locality-Aware Prompt Learning Method<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shaokun Wang, Yifan Yu, Yuhang He, Yihong Gong"
      },
      {
        "type": "paperTitle",
        "text": "2072 <b>StylizedFacePoint: Facial Landmark Detection for Stylized Characters<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shengran Cheng, Chuhang Ma, Ye Pan"
      },
      {
        "type": "paperTitle",
        "text": "2080 <b>MAJL: A Model-Agnostic Joint Learning Framework for Music Source Separation and Pitch Estimation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haojie Wei, Yuan Jun, Rui Zhang, Quanyu Dai, Yueguo Chen"
      },
      {
        "type": "paperTitle",
        "text": "2082 <b>rPPG-HiBa:Hierarchical Balanced Framework for Remote Physiological Measurement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yin Wang, Hao Lu, Ying-Cong Chen, Li Kuang, Mengchu Zhou, Shuiguang Deng"
      },
      {
        "type": "paperTitle",
        "text": "2084 <b>Perceive before Respond: Improving Sticker Response Selection by Emotion Distillation and Hard Mining<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wuyou Xia, Shengzhe Liu, Qin Rong, Guoli Jia, Eunil Park, Jufeng Yang"
      },
      {
        "type": "paperTitle",
        "text": "2087 <b>UniL: Point Cloud Novelty Detection through Multimodal Pre-training<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuhan Wang, Mofei Song"
      },
      {
        "type": "paperTitle",
        "text": "2102 <b>One-shot In-context Part Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenqi Dai, Ting Liu, Xingxing Zhang, Yunchao Wei, Yanning Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2106 <b>3D Human Pose Estimation from Multiple Dynamic Views via Single-view Pretraining with Procrustes Alignment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Renshu Gu, Jiajun Zhu, Yixuan Si, Fei Gao, Jiamin Xu, Gang Xu"
      },
      {
        "type": "paperTitle",
        "text": "2117 <b>CBNet: Cooperation-Based Weakly Supervised Polyp Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiuquan Du, Jiajia Chen, Xuejun Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2119 <b>Optical Flow-Guided 6DoF Object Pose Tracking with an Event Camera<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zibin Liu, Banglei Guan, Yang Shang, Shunkun Liang, Zhenbao Yu, Qifeng Yu"
      },
      {
        "type": "paperTitle",
        "text": "2131 <b>Self-Supervised Visual Preference Alignment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ke Zhu, Liang Zhao, Zheng Ge, Xiangyu Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2132 <b>Consistent123: One Image to Highly Consistent 3D Asset Using Case-Aware Diffusion Priors<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yukang Lin, Haonan Han, Chaoqun Gong, Zunnan Xu, Yachao Zhang, Xiu Li"
      },
      {
        "type": "paperTitle",
        "text": "2135 <b>Focus & Gating: A Multimodal Approach for Unveiling Relations in Noisy Social Media<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Liang He, Hongke Wang, Zhen Wu, Jianbing Zhang, Xinyu Dai, Jiajun Chen"
      },
      {
        "type": "paperTitle",
        "text": "2137 <b>SpikeGS: 3D Gaussian Splatting from Spike Streams with High-Speed Camera Motion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiyuan Zhang, Kang Chen, Sy Chen, Yajing Zheng, Tiejun Huang, Zhaofei Yu"
      },
      {
        "type": "paperTitle",
        "text": "2139 <b>Reliable Attribute-missing Multi-view Clustering with Instance-level and feature-level Cooperative Imputation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Dayu Hu, Suyuan Liu, Jun Wang, Junpu Zhang, Siwei Wang, Xingchen Hu, Xinzhong Zhu, Chang Tang, Xinwang Liu"
      },
      {
        "type": "paperTitle",
        "text": "2145 <b>RefMask3D: Language-Guided Transformer for 3D Referring Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuting He, Henghui Ding"
      },
      {
        "type": "paperTitle",
        "text": "2146 <b>Score-Based Image-to-Image Brownian Bridge<b>"
      },
      {
        "type": "paperAuthor",
        "text": "PEIYONG WANG, Bohan Xiao, Qisheng He, Carri Glide-Hurst, Ming Dong"
      },
      {
        "type": "paperTitle",
        "text": "2152 <b>Portrait Shadow Removal via Self-Exemplar Illumination Equalization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qian Huang, Cheng Xu, Guiqing Li, Wu Ziheng, Shengxin Liu, Shengfeng He"
      },
      {
        "type": "paperTitle",
        "text": "2153 <b>Training pansharpening networks at full resolution using degenerate invariance<b>"
      },
      {
        "type": "paperAuthor",
        "text": "YiChang Qu, Bing Li, Huang Jie, Feng Zhao"
      },
      {
        "type": "paperTitle",
        "text": "2154 <b>AerialGait: Bridging Aerial and Ground Views for Gait Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Aoqi Li, Saihui Hou, chenye wang, Qingyuan Cai, Yongzhen Huang"
      },
      {
        "type": "paperTitle",
        "text": "2155 <b>Generating Prompts in Latent Space for Rehearsal-free Continual Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chengyi Yang, WenTao Liu, Shisong Chen, JiaYin Qi, Aimin Zhou"
      },
      {
        "type": "paperTitle",
        "text": "2158 <b>Hearing the Moment with MetaEcho! From Physical to Virtual in Synchronized Sound Recording<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zheng WEI, Yuzheng Chen, Wai Tong, Xuan Zong, Huamin Qu, Xian Xu, LIK-HANG LEE"
      },
      {
        "type": "paperTitle",
        "text": "2165 <b>256 Metaverse Recording Dataset<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Patrick Steinert, Stefan Wagenpfeil, Ingo Frommholz, Matthias Hemmje"
      },
      {
        "type": "paperTitle",
        "text": "2166 <b>Learning to Handle Large Obstructions in Video Frame Interpolation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Libo Long, Xiao Hu, Jochen Lang"
      },
      {
        "type": "paperTitle",
        "text": "2167 <b>An In-depth Study of Bandwidth Allocation across  Media Sources in Video Conferencing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zejun Zhang, Xiao Zhu, Anlan Zhang, Feng Qian"
      },
      {
        "type": "paperTitle",
        "text": "2170 <b>HybridFlow: Infusing Continuity into Masked Codebook for Extreme Low-Bitrate Image Compression<b>"
      },
      {
        "type": "paperAuthor",
        "text": "LEI LU, Yanyue Xie, Wei Jiang, Wei Wang, Xue Lin, Yanzhi Wang"
      },
      {
        "type": "paperTitle",
        "text": "2173 <b>Frequency Guidance Matters: Skeletal Action Recognition by Frequency-Aware Mixed Transformer<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenhan Wu, Ce Zheng, Zihao Yang, Chen Chen, Srijan Das, Aidong Lu"
      },
      {
        "type": "paperTitle",
        "text": "2178 <b>AdaCoder: Adaptive Prompt Compression for Programmatic Visual Question Answering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mahiro Ukai, Shuhei Kurita, Atsushi Hashimoto, Yoshitaka Ushiku, Nakamasa Inoue"
      },
      {
        "type": "paperTitle",
        "text": "2186 <b>AesMamba: Universal Image Aesthetic Assessment with State Space Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fei Gao, Yuhao Lin, Jiaqi Shi, Maoying Qiao, Nannan Wang"
      },
      {
        "type": "paperTitle",
        "text": "2193 <b>AutoGraph: Enabling Visual Context via Graph Alignment in Open Domain Multi-Modal Dialogue Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Deji Zhao, Donghong Han, Ye Yuan, Bo Ning, Li Mengxiang, Zhongjiang He, Shuangyong Song"
      },
      {
        "type": "paperTitle",
        "text": "2195 <b>Realistic Full-Body Motion Generation from Sparse Tracking with State Space Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kun Dong, Jian Xue, Niu Zehai, Xing Lan, Ke Lu, Liu Qingyuan, Xiaoyu Qin"
      },
      {
        "type": "paperTitle",
        "text": "2196 <b>Semantic Alignment for Multimodal Large Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tao Wu, Mengze Li, Jingyuan Chen, Wei Ji, Lin Wang, Jinyang Gao, Kun Kuang, Zhou Zhao, Fei Wu"
      },
      {
        "type": "paperTitle",
        "text": "2197 <b>Multi-Modality Co-Learning for Efficient Skeleton-based Action Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jinfu Liu, Chen Chen, Mengyuan Liu"
      },
      {
        "type": "paperTitle",
        "text": "2200 <b>Streamable Portrait Video Editing with Probabilistic Pixel Correspondence<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaodi Li"
      },
      {
        "type": "paperTitle",
        "text": "2226 <b>Self-Supervised Emotion Representation Disentanglement for Speech-Preserving Facial Expression Manipulation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhihua Xu, Tianshui Chen, Zhijing Yang, Chunmei Qing, Yukai Shi, Liang Lin"
      },
      {
        "type": "paperTitle",
        "text": "2245 <b>UniGM: Unifying Multiple Pre-trained Graph Models via Adaptive Knowledge Aggregation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jintao Chen, Fan Wang, Shengye Pang, Siwei Tan, Mingshuai Chen, Tiancheng Zhao, Meng Xi, Jianwei Yin"
      },
      {
        "type": "paperTitle",
        "text": "2246 <b>Manipulable NeRF using Recursively Subdivided Tetrahedra<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zherui Qiu, Chenqu Ren, Kaiwen Song, Xiaoyi Zeng, Leyuan Yang, Juyong Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2247 <b>DySarl: Dynamic Structure-Aware Representation Learning for Multimodal Knowledge Graph Reasoning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kangzheng Liu, Feng Zhao, Yu Yang, Guandong Xu"
      },
      {
        "type": "paperTitle",
        "text": "2250 <b>Semantic-aware Representation Learning for Homography Estimation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuhan Liu, Qianxin Huang, Siqi Hui, Jingwen Fu, Sanping Zhou, Kangyi Wu, Pengna Li, Jinjun Wang"
      },
      {
        "type": "paperTitle",
        "text": "2252 <b>3D Question Answering for City Scene Understanding<b>"
      },
      {
        "type": "paperAuthor",
        "text": "penglei sun, Yaoxian Song, Xiang Liu, Xiaofei Yang, Qiang Wang, tiefeng li, Yang YANG, Xiaowen Chu"
      },
      {
        "type": "paperTitle",
        "text": "2257 <b>Serial section microscopy image inpainting guided by axial optical flow<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yiran Cheng, Bintao He, Renmin Han, Fa Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2259 <b>Multi-Modal Inductive Framework for Text-Video Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "qian li, Yucheng Zhou, Cheng Ji, Feihong Lu, Jianian Gong, Shangguang Wang, Jianxin Li"
      },
      {
        "type": "paperTitle",
        "text": "2264 <b>Collaborative Training of Tiny-Large Vision Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shichen Lu, Longteng Guo, Wenxuan Wang, Zijia Zhao, Tongtian Yue, Jing Liu, Si Liu"
      },
      {
        "type": "paperTitle",
        "text": "2265 <b>TiVA: Time-Aligned Video-to-Audio Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xihua Wang, Yuyue Wang, Yihan Wu, Ruihua Song, Xu Tan, Zehua Chen, Hongteng Xu, Guodong Sui"
      },
      {
        "type": "paperTitle",
        "text": "2270 <b>Progressive Local and Non-Local Interactive Networks with Deeply Discriminative Training for Image Deraining<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Cong Wang, Liyan Wang, Jie Mu, Chengjin Yu, Wei Wang"
      },
      {
        "type": "paperTitle",
        "text": "2272 <b>Beyond Direct Relationships: Exploring Multi-Order Label Pair Dependencies for Knowledge Distillation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jingchao Wang, Zhengnan Deng, Tongxu Lin, Wenyuan Li, Shaobin Ling, Junyu Lin"
      },
      {
        "type": "paperTitle",
        "text": "2275 <b>Learning Dual Enhanced Representation for Contrastive Multi-view Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Guoliang Zhou, Yangdong Ye,  TongjiChen, Shizhe Hu"
      },
      {
        "type": "paperTitle",
        "text": "2279 <b>Purified Distillation: Bridging Domain Shift and Category Gap in Incremental Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shilong Jia, Tingting WU, Yingying Fang, Tieyong Zeng, Guixu Zhang, Zhi Li"
      },
      {
        "type": "paperTitle",
        "text": "2280 <b>AdvQDet: Detecting Query-Based Adversarial Attacks with Adversarial Contrastive Prompt Tuning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xin Wang, Kai Chen, Xingjun Ma, Zhineng Chen, Jingjing Chen, Yu-Gang Jiang"
      },
      {
        "type": "paperTitle",
        "text": "2281 <b>BSBP-RWKV: Background Suppression with Boundary Preservation for Efficient Medical Image Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xudong Zhou, Tianxiang Chen"
      },
      {
        "type": "paperTitle",
        "text": "2283 <b>SATO: Stable Text-to-Motion Framework<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenshuo chen, Hongru Xiao, Erhang Zhang, Lijie Hu, Lei Wang, Mengyuan Liu, Chen Chen"
      },
      {
        "type": "paperTitle",
        "text": "2290 <b>SAT3D: Image-driven Semantic Attribute Transfer in 3D<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhijun Zhai, Zengmao Wang, XIAOXIAO LONG, Kaixuan Zhou, Bo Du"
      },
      {
        "type": "paperTitle",
        "text": "2292 <b>Seeing Beyond Classes: Zero-Shot Grounded Situation Recognition via Language Explainer<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiaming Lei, Lin Li, Chunping Wang, Jun Xiao, Long Chen"
      },
      {
        "type": "paperTitle",
        "text": "2294 <b>Cognition-Supervised Saliency Detection: Contrasting EEG Signals and Visual Stimuli<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jun Ma, Tuukka Ruotsalo"
      },
      {
        "type": "paperTitle",
        "text": "2295 <b>LaneCMKT: Boosting Monocular 3D Lane Detection with Cross-Modal Knowledge Transfer<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Runkai Zhao, Heng Wang, Weidong Cai"
      },
      {
        "type": "paperTitle",
        "text": "2307 <b>Free Lunch: Frame-level Contrastive Learning with Text Perceiver for Robust Scene Text Recognition in Lightweight Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hongjian Zhan, yangfu Li, Xiong Yu-Jie, Umapada Pal, Yue Lu"
      },
      {
        "type": "paperTitle",
        "text": "2308 <b>Freehand Sketch Generation from Mechanical Components<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhichao Liao, Fengyuan Piao, Di Huang, Xinghui Li, ma yue, Pingfa Feng, Heming Fang, Long ZENG"
      },
      {
        "type": "paperTitle",
        "text": "2312 <b>FIND: Fine-tuning Initial Noise Distribution with Policy Optimization for Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Changgu Chen, Libing Yang, Xiaoyan Yang, Lianggangxu Chen, Gaoqi He, Changbo Wang, Yang Li"
      },
      {
        "type": "paperTitle",
        "text": "2316 <b>Contrastive Graph Distribution Alignment for Partially View-Aligned Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xibiao Wang, Hang Gao, Xindian WEI, Liang Peng, Rui Li, Cheng Liu, Si Wu, Hau-San Wong"
      },
      {
        "type": "paperTitle",
        "text": "2319 <b>Remembering is Not Applying: Interpretable Knowledge Tracing for Problem-solving Processes<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tao Huang, Xinjia Ou,  Yanghuali, Shengze Hu, Jing Geng, Junjie Hu, Zhuoran Xu"
      },
      {
        "type": "paperTitle",
        "text": "2320 <b>QVD: Post-training Quantization for Video Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shilong Tian, Hong Chen, Chengtao Lv, Yu Liu, Jinyang Guo, Xianglong Liu, Shengxi Li, Hao Yang, Tao Xie"
      },
      {
        "type": "paperTitle",
        "text": "2324 <b>Color4E: Event Demosaicing for Full-color Event Guided Image Deblurring<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yi Ma, Peiqi Duan, Yuchen Hong, Chu Zhou, Yu Zhang, Jimmy Ren, Boxin Shi"
      },
      {
        "type": "paperTitle",
        "text": "2325 <b>Exploring the Use of Abusive Generative AI Models on Civitai<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yiluo Wei, Yiming Zhu, Pan Hui, Gareth Tyson"
      },
      {
        "type": "paperTitle",
        "text": "2326 <b>CT2C-QA: Multimodal Question Answering over Chinese Text, Table and Chart<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bowen Zhao., Tianhao Cheng, Yuejie Zhang, Ying Cheng, Rui Feng, Xiaobo Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2328 <b>One-Shot Sequential Federated Learning for Non-IID Data by Enhancing Local Model Diversity<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Naibo Wang, yuchen deng, Wenjie Feng, Shichen Fan, Jianwei Yin, See-Kiong Ng"
      },
      {
        "type": "paperTitle",
        "text": "2339 <b>HmPEAR: A Dataset for Human Pose Estimation and Action Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "YiTai Lin, Zhijie Wei, Wanfa Zhang, XiPing Lin, Yudi Dai, Chenglu Wen, Siqi SHEN, Lan Xu, Cheng Wang"
      },
      {
        "type": "paperTitle",
        "text": "2340 <b>Learning in Order!   A Sequential Strategy to Learn Invariant Features for Multimodal Sentiment Analysis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xianbing Zhao, Lizhen Qu, Tao Feng, Jianfei Cai, Buzhou Tang"
      },
      {
        "type": "paperTitle",
        "text": "2347 <b>IconDM: Text-Guided Icon Set Expansion Using Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiawei Lin, Zhaoyun Jiang, Jiaqi Guo, Shizhao Sun, Ting Liu, Zijiang Yang, Jian-Guang Lou, Dongmei Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2350 <b>QueryMatch: A Query-based Contrastive Learning Framework for Weakly Supervised Visual Grounding<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shengxin Chen, Gen Luogen, Yiyi Zhou, Xiaoshuai Sun, GUANNAN JIANG, Rongrong Ji"
      },
      {
        "type": "paperTitle",
        "text": "2351 <b>SpecGaussian with latent features: A high-quality modeling of the view-dependent appearance for 3D Gaussian Splatting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhiru Wang, Shiyun Xie, Chengwei Pan, Guoping Wang"
      },
      {
        "type": "paperTitle",
        "text": "2352 <b>Generating Multimodal Metaphorical Features for Meme Understanding<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bo Xu, Junzhe Zheng, Jiayuan He, Yuxuan Sun, Hongfei Lin, Liang Zhao, Feng Xia"
      },
      {
        "type": "paperTitle",
        "text": "2357 <b>Open-Vocabulary Video Scene Graph Generation via Union-aware Semantic Alignment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "ZIyue Wu, Junyu Gao, Changsheng Xu"
      },
      {
        "type": "paperTitle",
        "text": "2360 <b>SOAP: Enhancing Spatio-Temporal Relation and Motion Information Capturing for Few-Shot Action Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenbo Huang, Jinghui Zhang, Xuwei Qian, Zhen Wu, Meng Wang, Lei Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2362 <b>Unsupervised Image-to-Video Adaptation via Category-aware Flow Memory Bank and Realistic Video Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kenan Huang, Junbao Zhuo, Shuhui Wang, Chi Su, Qingming Huang, Huimin Ma"
      },
      {
        "type": "paperTitle",
        "text": "2367 <b>DRMF: Degradation-Robust Multi-Modal Image Fusion via Composable Diffusion Prior<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Linfeng Tang, Yuxin Deng, Xunpeng Yi, Qinglong Yan, Yixuan Yuan, Jiayi Ma"
      },
      {
        "type": "paperTitle",
        "text": "2368 <b>Learning Context with Priors for 3D Interacting Hand-Object Pose Estimation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zengsheng Kuang, Changxing Ding, Huan Yao"
      },
      {
        "type": "paperTitle",
        "text": "2372 <b>CSO: Constraint-guided Space Optimization for Active Scene Mapping<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xuefeng Yin, Chenyang Zhu, Shanglai Qu, Yuqi Li, Kevin Xu, Baocai Yin, Xin Yang"
      },
      {
        "type": "paperTitle",
        "text": "2376 <b>Dual-Resolution Fusion Modeling for Unsupervised Cross-Resolution Person Re-Identification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhiqi Pang, Lingling Zhao, Chunyu Wang"
      },
      {
        "type": "paperTitle",
        "text": "2377 <b>Decoupling Heterogeneous Features for Robust 3D Interacting Hand Poses Estimation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Huan Yao, Changxing Ding, Xuanda Xu, Zhifeng Lin"
      },
      {
        "type": "paperTitle",
        "text": "2378 <b>DiffGlue: Diffusion-Aided Image Feature Matching<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shihua Zhang, Jiayi Ma"
      },
      {
        "type": "paperTitle",
        "text": "2387 <b>Instance-Level Panoramic Audio-Visual Saliency Detection and Ranking<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ruohao Guo, Niu Dantong, Liao Qu, Yanyu Qi, Ji Shi, Wenzhen Yue, xing bowei, Taiyan Chen, Xianghua Ying"
      },
      {
        "type": "paperTitle",
        "text": "2398 <b>HiVG: Hierarchical Multimodal Fine-grained Modulation for Visual Grounding<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Linhui Xiao, Xiaoshan Yang, Fang Peng, Yaowei Wang, Changsheng Xu"
      },
      {
        "type": "paperTitle",
        "text": "2402 <b>EchoAudio: Efficient and High-Quality Text-to-Audio Generation with Minimal Inference Steps<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Huadai Liu, Rongjie Huang, Yang Liu, Hengyuan Cao, Jialei Wang, Xize Cheng, Siqi Zheng, Zhou Zhao"
      },
      {
        "type": "paperTitle",
        "text": "2405 <b>Towards Labeling-free Fine-grained Animal Pose Estimation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Dan Zeng, Yu Zhu, Shuiwang Li, Qijun Zhao, Qiaomu Shen, Bo Tang"
      },
      {
        "type": "paperTitle",
        "text": "2407 <b>Toward Explainable Physical Audiovisual Commonsense Reasoning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Daoming Zong, Chaoyue Ding, Kaitao Chen"
      },
      {
        "type": "paperTitle",
        "text": "2413 <b>Model X-ray : Detecting Backdoored Models via Decision Boundary<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yanghao Su, Jie Zhang, Ting Xu, Tianwei Zhang, Weiming Zhang, Nenghai Yu"
      },
      {
        "type": "paperTitle",
        "text": "2414 <b>DOPRA: Decoding Over-accumulation Penalization and Re-allocation in Specific Weighting Layer<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jinfeng Wei, Xiao Feng Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2416 <b>Sustainable Self-evolution Adversarial Training<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenxuan Wang, Chenglei Wang, huihui Qi, Menghao Ye, Xuelin Qian, PENG WANG, Yanning Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2418 <b>MetaDragonBoat: Exploring Paddling Techniques of Virtual Dragon Boating in a Metaverse Campus<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wei He, Xiang Li, Shengtian Xu, Yuzheng Chen, SIO CHAN IN DEVIN, Ge lin, LIK-HANG LEE"
      },
      {
        "type": "paperTitle",
        "text": "2425 <b>TALE: Training-free Cross-domain Image Composition via Adaptive Latent Manipulation and Energy-guided Optimization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kien Pham, Jingye Chen, Qifeng Chen"
      },
      {
        "type": "paperTitle",
        "text": "2440 <b>Swarical: An Integrated Hierarchical Approach to Localizing Flying Light Specks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hamed Alimohammadzadeh, Shahram Ghandeharizadeh"
      },
      {
        "type": "paperTitle",
        "text": "2468 <b>Task-Adapter: Task-specific Adaptation of Image Models for Few-shot Action Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Congqi Cao, Yueran Zhang, Yating Yu, Qinyi Lv, Lingtong Min, Yanning Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2472 <b>De-fine: Decomposing and Refining Visual Programs with Auto-Feedback<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Minghe Gao, Juncheng Li, Hao Fei, Liang Pang, Wei Ji, Guoming Wang, Zheqi Lv, Wenqiao Zhang, Siliang Tang, Yueting Zhuang"
      },
      {
        "type": "paperTitle",
        "text": "2473 <b>DMFourLLIE: Dual-Stage and Multi-Branch Fourier Network for Low-Light Image Enhancement<b>"
      },
      {
        "type": "paperAuthor",
        "text": " TongshunZhang, Pingping Liu, Ming Zhao, Haotian Lv"
      },
      {
        "type": "paperTitle",
        "text": "2477 <b>2M-AF: A Strong Multi-Modality Framework For Human Action Quality Assessment with Self-supervised Representation Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuning Ding, Sifan Zhang, Liu Shenglan, Jinrong Zhang, Wenyue Chen, Duan Haifei, bingcheng dong, Tao Sun"
      },
      {
        "type": "paperTitle",
        "text": "2478 <b>MMDRFuse: Distilled Mini-Model with Dynamic Refresh for Multi-Modality Image Fusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yanglin Deng, Tianyang Xu, Chunyang Cheng, Xiaojun Wu, Josef Kittler"
      },
      {
        "type": "paperTitle",
        "text": "2482 <b>GSLAMOT: A Tracklet and Query Graph-based Simultaneous Locating, Mapping, and Multiple Object Tracking System<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuo Wang, Yongcai Wang, Zhimin Xu, Yongyu Guo, Wanting Li, Zhe Huang, xuewei Bai, Deying Li"
      },
      {
        "type": "paperTitle",
        "text": "2491 <b>Mitigating Social Hazards: Early Detection of Fake News via Diffusion-Guided Propagation Path Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Litian Zhang, Xiaoming Zhang, Chaozhuo Li, Ziyi Zhou, Liu Jiacheng, Feiran Huang, Xi Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2504 <b>Efficient Face Super-Resolution via Wavelet-based Feature Enhancement Network<b>"
      },
      {
        "type": "paperAuthor",
        "text": "wenjie li, Heng Guo, Xuannan Liu, Kongming Liang, Jiani Hu, Zhanyu Ma, Jun Guo"
      },
      {
        "type": "paperTitle",
        "text": "2509 <b>FKA-Owl: Advancing Multimodal Fake News Detection through Knowledge-Augmented LVLMs<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xuannan Liu, PEI LI, Huaibo Huang, Zekun Li, Xing Cui,  jiahao.liang, lixiong Qin, Weihong Deng, Zhaofeng He"
      },
      {
        "type": "paperTitle",
        "text": "2516 <b>View-consistent Object Removal in Radiance Fields<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yiren Lu, Jing Ma, Yu Yin"
      },
      {
        "type": "paperTitle",
        "text": "2522 <b>Interactive Segmentation by Considering First-Click Intentional Ambiguity<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hu kangpeng, Sun Quansen, Hui Sun, Tao Wang"
      },
      {
        "type": "paperTitle",
        "text": "2526 <b>White-box Multimodal Jailbreaks Against Large Vision-Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "RUOFAN WANG, Xingjun Ma, Hanxu Zhou, Chuanjun Ji, Guangnan Ye, Yu-Gang Jiang"
      },
      {
        "type": "paperTitle",
        "text": "2528 <b>Towards Efficient and Diverse Generative Model for Unconditional Human Motion Synthesis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hua Yu, Liu Weiming, Jiapeng Bai, Gui Xu, Yaqing Hou, Yew Ong, Qiang Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2536 <b>FedBCGD: Communication-Efficient Accelerated Block Coordinate Gradient Descent for Federated Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Junkang Liu, Fanhua Shang, Yuanyuan Liu, Hongying Liu, Yuangang Li, YunXiang Gong"
      },
      {
        "type": "paperTitle",
        "text": "2538 <b>Multi-view Self-Supervised Contrastive Learning for Multivariate Time Series<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuhan Wu, Xiyu Meng, Yang He, Junru Zhang, haowen zhang, Yabo Dong, Dongming Lu"
      },
      {
        "type": "paperTitle",
        "text": "2544 <b>Backdoor Attacks on Bimodal Salient Object Detection with RGB-Thermal Data<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wen Yin, Bin Benjamin Zhu, Yulai Xie, Pan Zhou, Dan Feng"
      },
      {
        "type": "paperTitle",
        "text": "2547 <b>ArtSpeech: Adaptive Text-to-Speech Synthesis with Articulatory Representations<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhongxu Wang, Yujia Wang, Mingzhu Li, Hua Huang"
      },
      {
        "type": "paperTitle",
        "text": "2548 <b>Enhancing Underwater Images via Asymmetric Multi-Scale Invertible Networks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuhui Quan, Xiaoheng Tan,  YanHuang, Yong Xu, Hui Ji"
      },
      {
        "type": "paperTitle",
        "text": "2553 <b>DFMVC: Deep Fair Multi-view Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bowen Zhao, QIANQIAN WANG, ZHIQIANG TAO, Wei Feng, Quanxue Gao"
      },
      {
        "type": "paperTitle",
        "text": "2554 <b>What's the Real: A Novel Design Philosophy for Robust AI-Synthesized Voice Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xuan Hai, Xin Liu, Yuan Tan, Gang Liu, Song Li, Weina Niu, Rui Zhou, Xiaokang Zhou"
      },
      {
        "type": "paperTitle",
        "text": "2563 <b>Adversarial Example Quality Assessment: A Large-scale Dataset and Strong Baseline<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jia-Li Yin, Menghao chen, jin Han, Bo-Hao Chen, Ximeng Liu"
      },
      {
        "type": "paperTitle",
        "text": "2569 <b>A Picture Is Worth a Graph: A Blueprint Debate Paradigm for Multimodal Reasoning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Changmeng Zheng, DaYong Liang, Wengyu ZHANG, Xiaoyong Wei, Tat-seng Chua, Qing Li"
      },
      {
        "type": "paperTitle",
        "text": "2572 <b>RayFormer: Improving Query-Based Multi-Camera 3D Object Detection via Ray-Centric Strategies<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaomeng Chu, Jiajun Deng, Guoliang You, Yifan Duan, Yao Li, Yanyong Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2573 <b>Autogenic Language Embedding for Coherent Point Tracking<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Song Zikai, Ying Tang, Run Luo, Lintao Ma, Junqing Yu, Yi-Ping Phoebe Chen, Wei Yang"
      },
      {
        "type": "paperTitle",
        "text": "2583 <b>Frequency-Aware GAN for Imperceptible Transfer Attack on 3D Point Clouds<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaowen Cai, Yunbo Tao, Daizong Liu, Pan Zhou, Xiaoye Qu, Jianfeng Dong, Keke Tang, Lichao Sun"
      },
      {
        "type": "paperTitle",
        "text": "2584 <b>Prior-free Balanced Replay: Uncertainty-guided Reservoir Sampling for Long-Tailed Continual Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lei Liu, Li Liu, Yawen Cui"
      },
      {
        "type": "paperTitle",
        "text": "2585 <b>HPC: Hierarchical Progressive Coding Framework for Volumetric Video<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zihan Zheng, Houqiang Zhong, Qiang Hu, Xiaoyun Zhang, Li Song, Ya Zhang, Yanfeng Wang"
      },
      {
        "type": "paperTitle",
        "text": "2594 <b>SegTalker: Segmentation-based Talking Face Generation with Mask-guided Local Editing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lingyu Xiong, Xize Cheng, Jintao Tan, Xianjia Wu, Xiandong Li, Lei Zhu, Fei Ma, Minglei Li, Huang Xu, Zhihui Hu"
      },
      {
        "type": "paperTitle",
        "text": "2603 <b>AutoSFX: Automatic Sound Effect Generation for Videos<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yujia Wang, Zhongxu Wang, Hua Huang"
      },
      {
        "type": "paperTitle",
        "text": "2607 <b>Not All Pairs are Equal: Hierarchical Learning for Average-Precision-Oriented Video Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yang Liu, Qianqian Xu, Peisong Wen, Siran Dai, Qingming Huang"
      },
      {
        "type": "paperTitle",
        "text": "2613 <b>CDEA: Context- and Detail-Enhanced Unsupervised Learning for Domain Adaptive Semantic Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuyuan Wen, Bingrui Hu,  wenchaoli"
      },
      {
        "type": "paperTitle",
        "text": "2615 <b>Simple Yet Effective: Structure Guided Pre-trained Transformer for Multi-modal Knowledge Graph Reasoning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "KE LIANG, Lingyuan Meng, Yue Liu, Meng Liu, Wei Wei, Siwei Wang, Suyuan Liu, Wenxuan Tu, sihang zhou, Xinwang Liu"
      },
      {
        "type": "paperTitle",
        "text": "2618 <b>Predicting the Unseen: A Novel Dataset for Hidden Intention Localization in Pre-abnormal Analysis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "ZeHao Qi, Ruixu Zhang, Xinyi Hu, Wenxuan Liu, Zheng Wang"
      },
      {
        "type": "paperTitle",
        "text": "2625 <b>SemNFT: A Semantically Enhanced Decentralized Middleware for Digital Asset Immortality<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lehao Lin, Hong KANG, Xinyao Sun, Wei Cai"
      },
      {
        "type": "paperTitle",
        "text": "2635 <b>Prior Knowledge Integration via LLM Encoding and Pseudo Event Regulation for Video Moment Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "JIANG Yiyang, Wengyu ZHANG, xulu zhang, Xiaoyong Wei, Chang Chen, Qing Li"
      },
      {
        "type": "paperTitle",
        "text": "2641 <b>F-3DGS: Factorized Coordinates and Representations for 3D Gaussian Splatting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiangyu Sun, Joo Chan Lee, Daniel Rho, Jong Hwan Ko, Usman Ali, Eunbyung Park"
      },
      {
        "type": "paperTitle",
        "text": "2650 <b>Connectivity-based Cerebrovascular Segmentation in Time-of-Flight Magnetic Resonance Angiography<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zan Chen, Xiao Yu, Yuanjing Feng"
      },
      {
        "type": "paperTitle",
        "text": "2652 <b>HGOE: Hybrid External and Internal Graph Outlier Exposure for Graph Out-of-Distribution Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "He Junwei, Qianqian Xu, Yangbangyan Jiang, Zitai Wang, Yuchen Sun, Qingming Huang"
      },
      {
        "type": "paperTitle",
        "text": "2656 <b>Exploring Deeper! Segment Anything Model with Depth Perception for Camouflaged Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenni Yu, Xiaoqin Zhang,  LiZhao, Yi Bin, Guobao Xiao"
      },
      {
        "type": "paperTitle",
        "text": "2660 <b>CodeSwap: Symmetrically Face Swapping Based on Prior Codebook<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiangyang Luo, Xin Zhang, Yifan Xie, Xinyi Tong, Weijiang Yu, Heng Chang, Fei Ma, Fei Richard Yu"
      },
      {
        "type": "paperTitle",
        "text": "2661 <b>VRDistill: Vote Refinement Distillation for Efficient Indoor 3D Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ze Yuan, Jinyang Guo, Dakai An, Junran Wu, He Zhu, Jianhao Li, Xueyuan Chen, Ke Xu, Liu Jiaheng"
      },
      {
        "type": "paperTitle",
        "text": "2665 <b>Cross-Class Domain Adaptive Semantic Segmentation with Visual Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenqi Ren, Ruihao Xia, Meng Zheng, Ziyan Wu, Yang Tang, Nicu Sebe"
      },
      {
        "type": "paperTitle",
        "text": "2666 <b>Adversarial Experts Model for Black-box Domain Adaptation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Siying Xiao, Mao Ye, Qichen He, Li Shuaifeng, Song Tang, Xiatian Zhu"
      },
      {
        "type": "paperTitle",
        "text": "2672 <b>RoSe: Rotation-Invariant Sequence-Aware Consensus for Robust Correspondence Pruning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yizhang Liu, Weiwei Zhou, Yanping Li, Shengjie Zhao"
      },
      {
        "type": "paperTitle",
        "text": "2680 <b>Regularized Contrastive Partial Multi-view Outlier Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yijia Wang, Qianqian Xu, Yangbangyan Jiang, Siran Dai, Qingming Huang"
      },
      {
        "type": "paperTitle",
        "text": "2682 <b>Regional Attention For Shadow Removal<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hengxing Liu, Mingjia Li, Xiaojie Guo"
      },
      {
        "type": "paperTitle",
        "text": "2690 <b>Illumination Distribution Prior for Low-light Image Enhancement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chao Wang, Yang Zhou, Liangtian He, Lin Fenglai, Hongming Chen, Liang-Jian Deng"
      },
      {
        "type": "paperTitle",
        "text": "2691 <b>FARFusion V2: A Geometry-based Radar-Camera Fusion Method on the Ground for Roadside Far-Range 3D Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yao Li, Jiajun Deng, Yuxuan Xiao, Yingjie Wang, Xiaomeng Chu, Jianmin Ji, Yanyong Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2702 <b>Achieving Resolution-Agnostic DNN-based Image Watermarking: A Novel Perspective of Implicit Neural Representation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuchen Wang, Xingyu Zhu, Guanhui Ye, Shiyao Zhang, Xuetao Wei"
      },
      {
        "type": "paperTitle",
        "text": "2703 <b>TrGa: Reconsidering the Application of Graph Neural Networks in Two-View Correspondence Pruning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Luanyuan Dai, Xiaoyu Du, Jinhui Tang"
      },
      {
        "type": "paperTitle",
        "text": "2710 <b>SMART: Self-Weighted Multimodal Fusion for Diagnostics of Neurodegenerative Disorders<b>"
      },
      {
        "type": "paperAuthor",
        "text": "qiuhui chen, Yi Hong"
      },
      {
        "type": "paperTitle",
        "text": "2715 <b>Embracing Domain Gradient Conflicts: Domain Generalization Using Domain Gradient Equilibrium<b>"
      },
      {
        "type": "paperAuthor",
        "text": "ZUYU ZHANG, YAN LI, BYUNGSEOK SHIN"
      },
      {
        "type": "paperTitle",
        "text": "2717 <b>Minerva: Enhancing Quantum Network Performance for High-Fidelity Multimedia Transmission<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tingting Li, Ziming Zhao, Jianwei Yin"
      },
      {
        "type": "paperTitle",
        "text": "2718 <b>Mesh Denoising Using Filtering Coefficients Jointly Aware of Noise and Geometry<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xingtao Wang, Xianqi Zhang, Wenxue Cui, Ruiqin Xiong, Xiaopeng Fan, Debin Zhao"
      },
      {
        "type": "paperTitle",
        "text": "2733 <b>P-BiC: Ultra-High-Definition Image Demoireing via Patch Bilateral Compensation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zeyu Xiao, Zhihe Lu, Xinchao Wang"
      },
      {
        "type": "paperTitle",
        "text": "2736 <b>Advancing Multi-grained Alignment for Contrastive Language-Audio Pre-training<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yiming Li, Zhifang Guo, Xiangdong Wang, Hong Liu"
      },
      {
        "type": "paperTitle",
        "text": "2754 <b>Bridging the Gap: Sketch-Aware Interpolation Network for High-Quality Animation Sketch Inbetweening<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiaming Shen, Kun Hu, Wei Bao, Chang Chen, Zhiyong Wang"
      },
      {
        "type": "paperTitle",
        "text": "2755 <b>HeroMaker: Human-centric Video Editing with Motion Priors<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shiyu Liu, Zibo Zhao, Yihao Zhi, Yiqun Zhao, Binbin Huang, Shuo Wang, Ruoyu Wang, Michael Xuan, Zhengxin Li, Shenghua Gao"
      },
      {
        "type": "paperTitle",
        "text": "2767 <b>Attribute-driven Disentangled Representation Learning for Multimodal Recommendation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenyang Li, Fan Liu, Yinwei Wei, Zhiyong Cheng, Liqiang Nie, Mohan Kankanhalli"
      },
      {
        "type": "paperTitle",
        "text": "2770 <b>From Covert Hiding To Visual Editing: Robust Generative Video Steganography<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mao Xueying, Xiao Hu, Wanli Peng, Zhenliang Gan, Zhenxing Qian, Xinpeng Zhang, Sheng Li"
      },
      {
        "type": "paperTitle",
        "text": "2775 <b>Narrowing the Gap between Vision and Action in Navigation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yue Zhang, Parisa Kordjamshidi"
      },
      {
        "type": "paperTitle",
        "text": "2782 <b>U2UData: A Large-scale Cooperative Perception Dataset for Swarm UAVs Autonomous Flight<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tongtong Feng, Xin Wang, Feilin Han, Leping Zhang, Wenwu Zhu"
      },
      {
        "type": "paperTitle",
        "text": "2786 <b>G-Refine: A General Refiner for Text-to-Image Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chunyi Li, Haoning Wu, Hongkun Hao, zicheng zhang, Tengchuan Kou, Chaofeng Chen, Xiaohong Liu, LEI BAI, Weisi Lin, Guangtao Zhai"
      },
      {
        "type": "paperTitle",
        "text": "2787 <b>TrafficMOT: A Challenging Dataset for Multi-Object Tracking in Complex Traffic Scenarios<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lihao Liu, Yanqi Cheng, Zhongying Deng, Shujun Wang, Dongdong Chen, Xiaowei Hu, Pietro Lio, Carola-Bibiane Schönlieb, Angelica Aviles-Rivero"
      },
      {
        "type": "paperTitle",
        "text": "2799 <b>Multi-view X-ray Image Synthesis with Multiple Domain Disentanglement from CT Scans<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lixing Tan, shuang Song, Kangneng Zhou, Duan Chengbo, Wang Lanying, Huayang Ren, Linlin Liu, Wei Zhang, Ruoxiu Xiao"
      },
      {
        "type": "paperTitle",
        "text": "2804 <b>Domain Shared and Specific Prompt Learning for Incremental Monocular Depth Estimation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhiwen Yang, Liang Li, Jiehua Zhang, Tingyu Wang, Yaoqi Sun, Chenggang Yan"
      },
      {
        "type": "paperTitle",
        "text": "2806 <b>GS$^2$-GNeSF: Geometry-Semantics Synergy for Generalizable Neural Semantic Fields<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chengshun Wang, Na Zhao"
      },
      {
        "type": "paperTitle",
        "text": "2811 <b>Generalized News Event Discovery via Dynamic Augmentation and Entropy Optimization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zehang LIN, Jiayuan Xie, Zhenguo Yang, Yi Yu, Qing Li"
      },
      {
        "type": "paperTitle",
        "text": "2812 <b>Adaptive Hierarchical Aggregation for Federated Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ruofan Jia, Weiying Xie, Jie Lei, Yunsong Li"
      },
      {
        "type": "paperTitle",
        "text": "2815 <b>S2TD-Face: Reconstruct a Detailed 3D Face with Controllable Texture from a Single Sketch<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zidu Wang, Xiangyu Zhu, Jiang Yu, Tianshuo Zhang, Zhen Lei"
      },
      {
        "type": "paperTitle",
        "text": "2823 <b>EPL-UFLSID: Efficient Pseudo Labels-Driven Underwater Forward-Looking Sonar Images Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Cheng Shen, Liquan Shen, Mengyao Li, Meng Yu"
      },
      {
        "type": "paperTitle",
        "text": "2825 <b>Distilled Cross-Combination Transformer for Image Captioning with Dual Refined Visual Features<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Junbo Hu, Zhixin Li"
      },
      {
        "type": "paperTitle",
        "text": "2831 <b>One-Stage Fair Multi-View Spectral Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Rongwen Li, Haiyang Hu, Liang Du, Jiarong Chen, Bingbing Jiang, Peng Zhou"
      },
      {
        "type": "paperTitle",
        "text": "2834 <b>KEBR: Knowledge Enhanced Self-Supervised Balanced  Representation for Multimodal Sentiment Analysis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Aoqiang Zhu, Min Hu, Xiaohua Wang, Jiaoyun Yang, Yiming Tang, Fuji Ren"
      },
      {
        "type": "paperTitle",
        "text": "2836 <b>Incremental Learning via Robust Parameter Posterior Fusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenju Sun, Qingyong Li, Siyu Zhang, Wen Wang, Yangliao Geng"
      },
      {
        "type": "paperTitle",
        "text": "2842 <b>Prototypical Prompting for Text-to-image Person Re-identification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuanglin Yan, Jun Liu, Neng Dong, Liyan Zhang, Jinhui Tang"
      },
      {
        "type": "paperTitle",
        "text": "2843 <b>Gait Recognition in Large-scale Free Environment via Single LiDAR<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiao Han, Yiming Ren, Peishan Cong, YUJING SUN, Jingya Wang, Lan Xu, Yuexin Ma"
      },
      {
        "type": "paperTitle",
        "text": "2844 <b>OneChart: Purify the Chart Structural Extraction via One Auxiliary Token<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jinyue Chen, Lingyu Kong, Haoran Wei, Chenglong Liu, Zheng Ge, Liang Zhao, Jianjian Sun, chunrui han, Xiangyu Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2846 <b>Digging into contrastive learning for robust depth estimation with diffusion models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "JiYuan Wang, Chunyu Lin, Lang Nie, Kang Liao, Shuwei Shao, Yao Zhao"
      },
      {
        "type": "paperTitle",
        "text": "2847 <b>Learning Cross-Spectral Prior for Image Super-Resolution<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chenxi Ma, Weimin Tan, Shili Zhou, Bo Yan"
      },
      {
        "type": "paperTitle",
        "text": "2850 <b>Unveiling Structural Memorization: Structural Membership Inference Attack for Text-to-Image Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qiao Li, Xiaomeng Fu, Xi Wang, Jin Liu, Xingyu Gao, Jiao Dai, Jizhong Han"
      },
      {
        "type": "paperTitle",
        "text": "2857 <b>Category-Prompt Refined Feature Learning for Long-Tailed Multi-Label Image Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiexuan Yan, Sheng Huang, Nankun Mu, Luwen Huangfu, Bo Liu"
      },
      {
        "type": "paperTitle",
        "text": "2861 <b>\"Special Relativity\" of Image Aesthetics Assessment: a Preliminary Empirical Perspective<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Rui Xie, Anlong Ming, Shuai He, Yi Xiao, Huadong Ma"
      },
      {
        "type": "paperTitle",
        "text": "2867 <b>Mamba3D: Enhancing Local Features for 3D Point Cloud Analysis via State Space Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xu Han, Yuan Tang, Zhaoxuan Wang, Xianzhi Li"
      },
      {
        "type": "paperTitle",
        "text": "2869 <b>Neural Boneprint: Person Identification from Bones using Generative Contrastive Deep Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chaoqun Niu, Dongdong Chen, Jizhe Zhou, Jian Wang, Xiang Luo, Quan-Hui Liu, YUAN LI, Jiancheng Lv"
      },
      {
        "type": "paperTitle",
        "text": "2873 <b>Attribute-Driven Multimodal Hierarchical Prompts for Image Aesthetic Quality Assessment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hancheng Zhu, Ju Shi, Zhiwen Shao, Rui Yao, Yong Zhou, Jiaqi Zhao, Leida Li"
      },
      {
        "type": "paperTitle",
        "text": "2882 <b>PS-TTL: Prototype-based Soft-labels and Test-Time Learning for Few-shot Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yingjie Gao, Yanan Zhang, Ziyue Huang, Nanqing Liu, Di Huang"
      },
      {
        "type": "paperTitle",
        "text": "2887 <b>Graph based Consistency Learning for Contrastive Multi-View Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Binbin Xu, Jun Yin, Nan Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2900 <b>A Descriptive Basketball Highlight Dataset for Automatic Commentary Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Benhui Zhang, Junyu Gao, Yuan Yuan"
      },
      {
        "type": "paperTitle",
        "text": "2904 <b>Joint-Motion Mutual Learning for Pose Estimation in Video<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Sifan Wu, Haipeng Chen, Yifang Yin, Sihao Hu, Runyang Feng, Yingying Jiao, Ziqi Yang, Zhenguang Liu"
      },
      {
        "type": "paperTitle",
        "text": "2909 <b>Leveraging RGB-Pressure for Whole-body Human-to-Humanoid Motion Imitation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yi Lu, Shenghao Ren, Qiu Shen, Xun Cao"
      },
      {
        "type": "paperTitle",
        "text": "2919 <b>SymAttack: Symmetry-aware Imperceptible Adversarial Attacks on 3D Point Clouds<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Keke Tang, Zhensu Wang, Weilong Peng, Lujie Huang, Le Wang, Peican Zhu, Wenping Wang, Zhihong Tian"
      },
      {
        "type": "paperTitle",
        "text": "2920 <b>ListenFormer: Responsive Listening Head Generation with Non-autoregressive Transformers<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Miao Liu, Jing Wang, Xinyuan Qian, Haizhou Li"
      },
      {
        "type": "paperTitle",
        "text": "2921 <b>Task-Interaction-Free Multi-Task Learning with Efficient Hierarchical Feature Representation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Salaeidin Sirajidin, Bayram Bayramli, Yuxiang Lu, Yuwen Yang, Tamam Alsarhan, Hongtao Lu, Yue Ding"
      },
      {
        "type": "paperTitle",
        "text": "2924 <b>A Unified Understanding of Adversarial Vulnerability Regarding Unimodal Models and Vision-Language Pre-training Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haonan Zheng, Xinyang Deng, Wen Jiang, Wenrui Li"
      },
      {
        "type": "paperTitle",
        "text": "2925 <b>InstantAS: Minimum Coverage Sampling for Arbitrary-Size Image Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Changshuo Wang, Mingzhe Yu, Lei Wu, Lei Meng, Xiang Li, Xiangxu Meng"
      },
      {
        "type": "paperTitle",
        "text": "2928 <b>Q-SNNs: Quantized Spiking Neural Networks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenjie Wei, Yu Liang, Ammar Belatreche, Yichen Xiao, Honglin Cao, Zhenbang Ren, Guoqing Wang, Malu Zhang, Yang Yang"
      },
      {
        "type": "paperTitle",
        "text": "2935 <b>Sample-agnostic Adversarial Perturbation for Vision-Language Pre-training Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haonan Zheng, Wen Jiang, Xinyang Deng, Wenrui Li"
      },
      {
        "type": "paperTitle",
        "text": "2946 <b>Controllable Music Loops Generation with MIDI and Text via Multi-Stage Cross Attention and Instrument-Aware Reinforcement Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Guan-Yuan Chen, Von-Wun Soo"
      },
      {
        "type": "paperTitle",
        "text": "2948 <b>Visual Question Answering Driven Eye Tracking Paradigm for Identifying Children with Autism Spectrum Disorder<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiansong Qi, Yaping Huang, Ying Zhang, Zhang Sihui, Mei Tian, Yi Tian, Fanchao Meng, Lin Guan, Tianyi Chang"
      },
      {
        "type": "paperTitle",
        "text": "2951 <b>Aspects are Anchors: Towards Multimodal Aspect-based Sentiment Analysis via Aspect-driven Alignment and Refinement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhanpeng Chen, Zhihong Zhu, Wanshi Xu, Yunyan Zhang, Xian Wu, Yefeng Zheng"
      },
      {
        "type": "paperTitle",
        "text": "2959 <b>Do LLMs Understand Visual Anomalies?  Uncovering LLM's Capabilities in Zero-shot Anomaly Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiaqi Zhu, Shaofeng Cai, Fang Deng,  WuJunran"
      },
      {
        "type": "paperTitle",
        "text": "2963 <b>ScaleTraversal: Creating Multi-Scale Biomedical Animation with Limited Hardware Resources<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Richen Liu, Hanshang Wang, Hailong Wang, Siru Chen, Chufan Lai, Ayush Kumar, Siming Chen"
      },
      {
        "type": "paperTitle",
        "text": "2969 <b>Language-Driven Interactive Shadow Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hongqiu Wang, Wei Wang, haipeng zhou, Shaozhi Wu, Lei Zhu"
      },
      {
        "type": "paperTitle",
        "text": "2970 <b>Cross-view Contrastive Unification Guides Generative Pretraining for Molecular Property Prediction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Junyu Lin, Yan Zheng, Xinyue Chen, Yazhou Ren, Xiaorong Pu, Jing He"
      },
      {
        "type": "paperTitle",
        "text": "2971 <b>Semantic-aware Next-Best-View for Multi-DoFs Mobile System in Search-and-Acquisition based Visual Perception<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaotong Yu, Chang Chen"
      },
      {
        "type": "paperTitle",
        "text": "2980 <b>Engaging Live Video Comments Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ge Luo, Yuchen Ma, Manman Zhang, Junqiang Huang, Sheng Li, Zhenxing Qian, Xinpeng Zhang"
      },
      {
        "type": "paperTitle",
        "text": "2982 <b>Fine-Grained Side Information Guided Dual-Prompts for Zero-Shot Skeleton Action Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yang Chen, Jingcai Guo, Tian He, Xiaocheng Lu, Ling Wang"
      },
      {
        "type": "paperTitle",
        "text": "2983 <b>Revisiting Unsupervised Temporal Action Localization: The Primacy of High-Quality Actionness and Pseudolabels<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Han Jiang, Haoyu Tang, Ming Yan, Ji Zhang, Mingzhu Xu, Yupeng Hu, Jihua Zhu, Liqiang Nie"
      },
      {
        "type": "paperTitle",
        "text": "2984 <b>AniTalker: Animate Vivid and Diverse Talking Faces through Identity-Decoupled Facial Motion Encoding<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tao Liu,  Feilong.chen, Shuai Fan, Chenpeng Du, Qi Chen, Xie Chen, Kai Yu"
      },
      {
        "type": "paperTitle",
        "text": "2989 <b>LoMOE: Localized Multi-Object Editing via Multi-Diffusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Goirik Chakrabarty, Aditya Chandrasekar, Ramya Hebbalaguppe, Prathosh AP"
      },
      {
        "type": "paperTitle",
        "text": "2991 <b>Saliency-Guided Fine-Grained Temporal Mask Learning for Few-Shot Action Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuo Zheng, Yuanjie Dang, Peng Chen, Ruohong Huan, Dongdong Zhao, Ronghua Liang"
      },
      {
        "type": "paperTitle",
        "text": "2992 <b>Divide and Conquer: Isolating Normal-Abnormal Attributes in Knowledge Graph-Enhanced Radiology Report Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiao Liang, Yanlei Zhang, Di Wang, Haodi Zhong, Ronghan Li, Quan Wang"
      },
      {
        "type": "paperTitle",
        "text": "2993 <b>Investigating Conceptual Blending of a Diffusion Model for Improving Nonword-to-Image Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chihaya Matsuhira, Marc A. Kastner, Takahiro Komamizu, Takatsugu Hirayama, Ichiro Ide"
      },
      {
        "type": "paperTitle",
        "text": "3005 <b>SimCEN: Simple Contrast-enhanced Network for CTR Prediction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Honghao Li, Lei Sang, Yi Zhang, Yiwen Zhang"
      },
      {
        "type": "paperTitle",
        "text": "3007 <b>Resisting Over-Smoothing in Graph Neural Networks via Dual-Dimensional Decoupling<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wei Shen, Mang Ye, Wenke Huang"
      },
      {
        "type": "paperTitle",
        "text": "3009 <b>Revisiting Knowledge Tracing: A Simple and Powerful Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaoxuan Shen, Feng Hua Yu, yaqi Liu, Ruxia Liang, Qian Wan, Kai Yang, Jianwen Sun"
      },
      {
        "type": "paperTitle",
        "text": "3011 <b>Cross-View Consistency Regularisation for Knowledge Distillation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weijia Zhang, Dongnan Liu, Weidong Cai, Chao Ma"
      },
      {
        "type": "paperTitle",
        "text": "3012 <b>SOIL: Contrastive Second-Order Interest Learning for Multimodal Recommendation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hongzu Su, Jingjing Li, FENGLING LI, Ke Lu, Lei Zhu"
      },
      {
        "type": "paperTitle",
        "text": "3013 <b>OmniStitch: Depth-Aware Stitching Framework for Omnidirectional Vision with Multiple Cameras<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Soo-ho Kim, Soyeon Hong, Kyungsoo Park, Hyunsouk Cho, Kyung-Ah Sohn"
      },
      {
        "type": "paperTitle",
        "text": "3020 <b>PRISM: PRogressive dependency maxImization for Scale-invariant image Matching<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xudong Cai, Yongcai Wang, Lun Luo, Minhang Wang, Deying Li, Jintao Xu, Weihao Gu, Rui Ai"
      },
      {
        "type": "paperTitle",
        "text": "3022 <b>Not All Frequencies Are Created Equal: Towards a Dynamic Fusion of Frequencies in Time-Series Forecasting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xingyu Zhang, Siyu Zhao, Zeen Song, Huijie Guo, Jianqi Zhang, Changwen Zheng, Wenwen Qiang"
      },
      {
        "type": "paperTitle",
        "text": "3032 <b>Prototype-Guided Dual-Transformer Reasoning for Video Individual Counting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Rui Li, Yishu Liu, Huafeng Li, Jinxing Li, Guangming Lu"
      },
      {
        "type": "paperTitle",
        "text": "3039 <b>Uni-YOLO: Vision-Language Model-Guided YOLO for Robust and Fast Universal Detection in the Open World<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xudong Wang, Weihong Ren, Xi'ai Chen, Huijie Fan, Yandong Tang, Zhi Han"
      },
      {
        "type": "paperTitle",
        "text": "3040 <b>Towards Robustness Prompt Tuning with Fully Test-Time Adaptation for CLIP's Zero-Shot Generalization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ran Wang, Hua Zuo, Zhen Fang, Jie Lu"
      },
      {
        "type": "paperTitle",
        "text": "3041 <b>Egocentric Vehicle Dense Video Captioning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "feiyu chen, Cong Xu, Qi Jia, Yihua Wang, Yuhan Liu, Zhang Haotian, Endong Wang"
      },
      {
        "type": "paperTitle",
        "text": "3046 <b>ERL-MR: Harnessing the Power of Euler Feature Representations for Balanced Multi-modal Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weixiang Han, Chengjun Cai, Guo Yu, Jialiang Peng"
      },
      {
        "type": "paperTitle",
        "text": "3050 <b>PriFU: Capturing Task-Relevant Information Without Adversarial Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiuli Bi, Yang Hu, Bo Liu, Weisheng Li, Pamela Cosman, Bin Xiao"
      },
      {
        "type": "paperTitle",
        "text": "3053 <b>Sample Efficiency Matters: Training Multimodal Conversational Recommendation Systems in a Small Data Setting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haoyang Su, Wenzhe Du, Nguyen Cam-Tu, Wang Xiaoliang"
      },
      {
        "type": "paperTitle",
        "text": "3059 <b>Bilateral Adaptive Cross-Modal Fusion Prompt Learning for CLIP<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qiang Wang, Ke Yan, Shouhong Ding"
      },
      {
        "type": "paperTitle",
        "text": "3067 <b>Caption-Aware Multimodal Relation Extraction with Mutual Information Maximization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "zefan zhang, Weiqi Zhang, hui yan, bai tian"
      },
      {
        "type": "paperTitle",
        "text": "3068 <b>DERO: Diffusion-Model-Erasure Robust Watermarking<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Han Fang, Kejiang Chen, Yupeng Qiu, Zehua Ma, Weiming Zhang, Ee-Chien Chang"
      },
      {
        "type": "paperTitle",
        "text": "3082 <b>Efficient Training for Multilingual Visual Speech Recognition: Pre-training with Discretized Visual Speech Representation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Minsu Kim, Jeonghun Yeo, Se Jin Park, Hyeongseop Rha, Yong Ro"
      },
      {
        "type": "paperTitle",
        "text": "3083 <b>IF-Garments: Reconstructing Your Intersection-Free Multi-Layered Garments from Monocular Videos<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mingyang Sun, Qipeng Yan, Zhuoer Liang, Dongliang Kou, Dingkang Yang, Ruisheng Yuan, Xiao Zhao, Mingcheng Li, Lihua Zhang"
      },
      {
        "type": "paperTitle",
        "text": "3086 <b>Scalable Multi-view Unsupervised Feature Selection with Structure Learning and Fusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chenglong Zhang, Xinyan Liang, Peng Zhou, Zhaolong Ling, Yingwei Zhang, Xingyu Wu, Weiguo Sheng, Bingbing Jiang"
      },
      {
        "type": "paperTitle",
        "text": "3089 <b>Rethinking the Effect of Uninformative Class Name in Prompt Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fengmao Lv, Changru Nie, Jianyang Zhang, Guowu Yang, Guosheng Lin, Xiao Wu, Tianrui Li"
      },
      {
        "type": "paperTitle",
        "text": "3096 <b>ViewPCGC: View-Guided Learned Point Cloud Geometry Compression<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Huiming Zheng, Wei Gao, Zhuozhen Yu, Tiesong Zhao, Ge Li"
      },
      {
        "type": "paperTitle",
        "text": "3098 <b>Cefdet: Cognitive Effectiveness Network Based on Fuzzy Inference for Action Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhe Luo, Weina Fu, Shuai Liu, Saeed Anwar, Muhammad Saqib, Sambit Bakshi, Khan Muhammad"
      },
      {
        "type": "paperTitle",
        "text": "3102 <b>PrimKD: Primary Modality Guided Multimodal Fusion for RGB-D Semantic Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhiwei Hao, Zhongyu Xiao, Yong Luo, Jianyuan Guo, Jing Wang, Li Shen, Han Hu"
      },
      {
        "type": "paperTitle",
        "text": "3104 <b>RefScale: Multi-temporal Assisted Image Rescaling in Repetitive Observation Scenarios<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhen Zhang, Jing Xiao, Liang Liao, Mi Wang"
      },
      {
        "type": "paperTitle",
        "text": "3108 <b>Expanded Convolutional Neural Network Based Look-Up Tables for High Efficient Single-Image Super-Resolution<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kai Yin, Jie Shen"
      },
      {
        "type": "paperTitle",
        "text": "3113 <b>Visual Grounding with Multi-modal Conditional Adaptation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ruilin Yao, Shengwu Xiong, Yichen Zhao, Yi Rong"
      },
      {
        "type": "paperTitle",
        "text": "3118 <b>MiniGPT-3D: Efficiently Aligning 3D Point Clouds with Large Language Models using 2D Priors<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuan Tang, Xu Han, Xianzhi Li, Qiao Yu, yixue Hao, Long Hu, Min Chen"
      },
      {
        "type": "paperTitle",
        "text": "3125 <b>Towards Multi-view Consistent Graph Diffusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jielong Lu, Zhihao Wu, Zhaoliang Chen, Zhiling Cai, Shiping Wang"
      },
      {
        "type": "paperTitle",
        "text": "3131 <b>TUT4CRS: Time-aware User-preference Tracking for Conversational Recommendation System<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Dongxiao He, Jinghan Zhang, Xiaobao Wang, Meng Ge, Zhiyong Feng, Longbiao Wang, Xiaoke Ma"
      },
      {
        "type": "paperTitle",
        "text": "3136 <b>DualFed: Enjoying both Generalization and Personalization in Federated Learning via Hierachical Representations<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Guogang Zhu, Xuefeng Liu, Jianwei Niu, Shaojie Tang, Xinghao Wu, Jiayuan Zhang"
      },
      {
        "type": "paperTitle",
        "text": "3139 <b>RAVSS: Robust Audio-Visual Speech Separation in Multi-Speaker Scenarios with Missing Visual Cues<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tianrui Pan, Jie Liu, Bohan Wang, Jie Tang, Gangshan Wu"
      },
      {
        "type": "paperTitle",
        "text": "3149 <b>PixelFade: Privacy-preserving Person Re-identification with Noise-guided Progressive Replacement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Delong Zhang, Yi-Xing Peng, Xiao-Ming Wu, Ancong Wu, Wei-Shi Zheng"
      },
      {
        "type": "paperTitle",
        "text": "3150 <b>Causal-driven Large Language Models with Faithful Reasoning for Knowledge Question Answering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiawei Wang, Da Cao, Shaofei Lu, Zhanchang Ma, Junbin Xiao, Tat-seng Chua"
      },
      {
        "type": "paperTitle",
        "text": "3151 <b>Low-rank Prompt Interaction for Continual Vision-Language Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weicai Yan, Ye Wang, Lin Wang, Zirun Guo, Zhou Zhao, Tao Jin"
      },
      {
        "type": "paperTitle",
        "text": "3159 <b>Geometry-Guided Diffusion Model with Masked Transformer for Robust Multi-View 3D Human Pose Estimation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xinyi Zhang, Qinpeng Cui, Qiqi Bao, Wenming Yang, Qingmin Liao"
      },
      {
        "type": "paperTitle",
        "text": "3160 <b>AVHash: Joint Audio-Visual Hashing for Video Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuxiang Zhou, Zhe Sun, Rui Liu, Yong Chen, Dell Zhang"
      },
      {
        "type": "paperTitle",
        "text": "3162 <b>MGR-Dark: A Large Multimodal Video Dataset  and RGB-IR benchmark for Gesture Recognition in Darkness<b>"
      },
      {
        "type": "paperAuthor",
        "text": "yuanyuan Shi, Yunan Li, Siyu Liang, Huizhou Chen, Qiguang Miao"
      },
      {
        "type": "paperTitle",
        "text": "3168 <b>STAR-VP: Improving Long-term Viewport Prediction in 360° Videos via Space-aligned and Time-varying Fusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Baoqi Gao, Daoxu Sheng, Lei Zhang, Qi Qi, Bo He, Zirui Zhuang, Jingyu Wang"
      },
      {
        "type": "paperTitle",
        "text": "3186 <b>AxiomVision: Accuracy-Guaranteed Adaptive Visual Model Selection for Perspective-Aware Video Analytics<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Dai Xiangxiang, Zeyu Zhang, Peng Yang, Yuedong Xu, Xutong Liu, John Lui"
      },
      {
        "type": "paperTitle",
        "text": "3187 <b>Eliminate Before Align: A Remote Sensing Image-Text Retrieval Framework with Keyword Explicit Reasoning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhong Ji, Changxu Meng, Yan Zhang, Haoran Wang, Yanwei Pang, Jungong Han"
      },
      {
        "type": "paperTitle",
        "text": "3189 <b>VL-Reader: Vision and Language Reconstructor is an Effective Scene Text Recognizer<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Humen Zhong, ZhiBo Yang, Zhaohai Li, Peng Wang, Jun Tang, Wenqing Cheng, Cong Yao"
      },
      {
        "type": "paperTitle",
        "text": "3192 <b>Ada-iD: Active Domain Adaption for Intrusion Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fujun Han, Peng Ye, Shukai Duan, Lidan Wang"
      },
      {
        "type": "paperTitle",
        "text": "3194 <b>Automatic and Aligned Anchor Learning Strategy for Multi-View Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Huimin Ma, Siwei Wang, Shengju Yu, Suyuan Liu, Jun-Jie Huang, Huijun Wu, Xinwang Liu, En Zhu"
      },
      {
        "type": "paperTitle",
        "text": "3196 <b>A Simple and Provable Approach for Learning on Noisy Labeled Multi-modal Medical Images<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Nan Wang, Zonglin Di, Houlin He, Qingchao Jiang, Xiaoxiao Li"
      },
      {
        "type": "paperTitle",
        "text": "3199 <b>Multimodal Inplace Prompt Tuning for Open-set Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Guilin Li, Mengdan Zhang, Xiawu Zheng, Peixian Chen, Zihan Wang, Yunhang Shen, Mingchen Zhuge, Chenglin Wu, Fei Chao, Ke Li, Xing Sun, Rongrong Ji"
      },
      {
        "type": "paperTitle",
        "text": "3200 <b>PlacidDreamer: Advancing Harmony in Text-to-3D Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuo Huang, Shikun Sun, Zixuan Wang, Xiaoyu Qin,  xiongyanmin,  zhangyuan, Pengfei Wan, Di ZHANG, Jia Jia"
      },
      {
        "type": "paperTitle",
        "text": "3205 <b>Hydrodynamics-Informed Neural Network for Simulating Dense Crowd Motion Patterns<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yanshan Zhou, Pingrui Lai, Jiaqi Yu, Yingjie Xiong, Hua Yang"
      },
      {
        "type": "paperTitle",
        "text": "3206 <b>FedSLS: Exploring Federated Aggregation in Saliency Latent Space<b>"
      },
      {
        "type": "paperAuthor",
        "text": " HengyiWang, Weiying Xie, Ma Jitao,  DaixunLi, Yunsong Li"
      },
      {
        "type": "paperTitle",
        "text": "3209 <b>SafePaint: Anti-forensic Image Inpainting with Domain Adaptation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Dunyun Chen, Xin Liao, Xiaoshuai Wu, Shiwei Chen"
      },
      {
        "type": "paperTitle",
        "text": "3213 <b>Accurate and Lightweight Learning for Specific Domain Image-Text Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Rui Yang, Shuang Wang, Jianwei Tao, Yingping Han, Qiaoling Lin, Yanhe Guo, Biao Hou, Licheng Jiao"
      },
      {
        "type": "paperTitle",
        "text": "3218 <b>R4D-planes: Remapping Planes For Novel View Synthesis and Self-Supervised Decoupling of Monocular Videos<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Junyuan Guo, Hao Tang, Teng Wang, Chao Wang"
      },
      {
        "type": "paperTitle",
        "text": "3225 <b>Multiple Kernel Clustering with Shifted Laplacian on Grassmann Manifold<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xi Wu, Chuang Huang, Xinliu Liu, Fei Zhou, Zhenwen Ren"
      },
      {
        "type": "paperTitle",
        "text": "3227 <b>CFDiffusion: Controllable Foreground Relighting in Image Compositing via Diffusion Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "ZiQi Yu, Jing Zhou, Zhongyun Bao, Gang Fu, Weilei He, Chao Liang, Chunxia Xiao"
      },
      {
        "type": "paperTitle",
        "text": "3233 <b>GOAL: Grounded text-to-image Synthesis with Joint Layout Alignment Tuning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yaqi Li, Han Fang, Zerun Feng, Kaijing Ma, Chao Ban, Xianghao Zang, LanXiang Zhou, Zhongjiang He, Jingyan Chen, Jiani Hu, Hao Sun, Huayu Zhang"
      },
      {
        "type": "paperTitle",
        "text": "3235 <b>IC-Mapper: Instance-Centric Spatio-Temporal Modeling for Online Vectorized Map Construction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiangtong Zhu, Zhao Yang, Yinan Shi, Jianwu Fang, Jianru Xue"
      },
      {
        "type": "paperTitle",
        "text": "3239 <b>ColVO: Colonoscopic Visual Odometry Considering Geometric and Photometric Consistency<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ruyu Liu, Zhengzhe Liu, ZHANG HAOYU, Guodao Zhang, Jianhua Zhang, Bo Sun, Weiguo Sheng, Xiufeng Liu, Yaochu Jin"
      },
      {
        "type": "paperTitle",
        "text": "3240 <b>FLIP-80M: 80 Million Visual-Linguistic Pairs for Facial Language-Image Pre-Training<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yudong Li, Xianxu Hou, Zheng Dezhi, Linlin Shen, Zhe Zhao"
      },
      {
        "type": "paperTitle",
        "text": "3244 <b>HKDSME: Heterogeneous Knowledge Distillation for Semi-supervised Singing Melody Extraction Using Harmonic Supervision<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuai Yu, Xiaoliang He, Ke Chen, Yi Yu"
      },
      {
        "type": "paperTitle",
        "text": "3246 <b>Edit3D: Elevating 3D Scene Editing with Attention-Driven Multi-Turn Interactivity<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Peng Zhou, Dunbo Cai, Yujian Du, Runqing Zhang, Bingbing Ni, Jie Qin, Ling Qian"
      },
      {
        "type": "paperTitle",
        "text": "3249 <b>RainyScape: Unsupervised Rainy Scene Reconstruction using Decoupled Neural Rendering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xianqiang Lyu, Hui LIU, Junhui Hou"
      },
      {
        "type": "paperTitle",
        "text": "3254 <b>DP-RAE: A Dual-Phase Merging Reversible Adversarial Example for Image Privacy Protection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xia Du, Jiajie Zhu, Jizhe Zhou, Chi-Man Pun, Qizhen Xu, Xiaoyuan Liu"
      },
      {
        "type": "paperTitle",
        "text": "3266 <b>MB2C: Multimodal Bidirectional Cycle Consistency for Learning Robust Visual Neural Representations<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yayun Wei, Lei Cao, Hao Li, Yilin Dong"
      },
      {
        "type": "paperTitle",
        "text": "3275 <b>FusionOcc: Multi-Modal Fusion for 3D Occupancy Prediction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuo Zhang, Yupeng Zhai, Jilin Mei, Yu Hu"
      },
      {
        "type": "paperTitle",
        "text": "3283 <b>mPLUG-PaperOwl: Scientific Diagram Analysis with the Multimodal Large Language Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Anwen Hu, Yaya Shi, Haiyang Xu, Jiabo Ye, Qinghao Ye, Ming Yan, Chenliang Li, Qi Qian, Ji Zhang, Fei Huang"
      },
      {
        "type": "paperTitle",
        "text": "3286 <b>PathUp: Patch-wise Timestep Tracking for Multi-class Large Pathology Image Synthesising Diffusion Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jingxiong Li, Sunyi Zheng, Chenglu Zhu, Yuxuan Sun, Pingyi Chen, Zhongyi Shui, Yunlong Zhang, Honglin Li, Lin Yang"
      },
      {
        "type": "paperTitle",
        "text": "3287 <b>BrainRAM: Cross-Modality Retrieval-Augmented Image Reconstruction from Human Brain Activity<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Dian Xie, Peiang Zhao, Jiarui Zhang, Kangqi Wei, Xiaobao Ni, Jiong Xia"
      },
      {
        "type": "paperTitle",
        "text": "3298 <b>Building Trust in Decision with Conformalized Multi-view Deep Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wei Liu, Yufei Chen, Xiaodong Yue"
      },
      {
        "type": "paperTitle",
        "text": "3312 <b>3D Reconstruction and Novel View Synthesis of Indoor Environments based on a Dual Neural Radiance Field<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenyu Bao, Gb Liao, Zhongyuan Zhao, KANGLIN LIU, Qing Li, Guoping Qiu"
      },
      {
        "type": "paperTitle",
        "text": "3313 <b>Superpixel-based Efficient Sampling for Learning Neural Fields from Large Input<b>"
      },
      {
        "type": "paperAuthor",
        "text": "zhongwei xuan, Zunjie Zhu, Shuai Wang, Haibing YIN, Hongkui Wang, Ming Lu"
      },
      {
        "type": "paperTitle",
        "text": "3314 <b>Partial Multi-label Learning Based On Near-Far Neighborhood Label Enhancement And Nonlinear Guidance<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yu Chen, Yanan Wu, Na Han, Xiaozhao Fang, Bingzhi Chen, Jie Wen"
      },
      {
        "type": "paperTitle",
        "text": "3319 <b>ROI-Guided Point Cloud Geometry Compression Towards Human and Machine Vision<b>"
      },
      {
        "type": "paperAuthor",
        "text": "liang Xie, Wei Gao, Huiming Zheng, Ge Li"
      },
      {
        "type": "paperTitle",
        "text": "3321 <b>Heterogeneity-Aware Federated Deep Multi-View Clustering towards Diverse Feature Representations<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaorui Jiang, Zhongyi Ma, Yulin Fu, Yong Liao, Pengyuan Zhou"
      },
      {
        "type": "paperTitle",
        "text": "3323 <b>MegaSurf: Scalable Large Scene Neural Surface Reconstruction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yusen Wang, Kaixuan Zhou, Wenxiao Zhang, Chunxia Xiao"
      },
      {
        "type": "paperTitle",
        "text": "3326 <b>Learnable Negative Proposals Using Dual-Signed Cross-Entropy Loss for Weakly Supervised Video Moment Localization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Sunoh Kim, Daeho Um, HyunJun Choi, Jin Choi"
      },
      {
        "type": "paperTitle",
        "text": "3327 <b>Lumos: Optimizing Live 360-degree Video Upstreaming via Spatial-Temporal Integrated Neural Enhancement<b>"
      },
      {
        "type": "paperAuthor",
        "text": " BeizhangGuo, Juntao Bao, Chai Baili, Di Wu, Miao Hu"
      },
      {
        "type": "paperTitle",
        "text": "3331 <b>Understanding and Tackling Scattering and Reflective Flare for Mobile Camera Systems<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fengbo Lan, Chang Chen"
      },
      {
        "type": "paperTitle",
        "text": "3339 <b>Enhancing Transformer-based Semantic Matching for Few-shot Learning through Weakly Contrastive Pre-training<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wei Yang, Tengfei Huo, Zhiqiang Liu"
      },
      {
        "type": "paperTitle",
        "text": "3342 <b>Blind Video Bit-Depth Expansion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Panjun Duan, Yang Zhao, Yuan Chen, Wei Jia, Zhao Zhang, Ronggang Wang"
      },
      {
        "type": "paperTitle",
        "text": "3347 <b>FodFoM: Fake Outlier Data by Foundation Models Creates Stronger Visual Out-of-Distribution Detector<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiankang Chen, Ling Deng, Zhiyong Gan, Wei-Shi Zheng, Ruixuan Wang"
      },
      {
        "type": "paperTitle",
        "text": "3348 <b>Domain-Agnostic Crowd Counting via Uncertainty-Guided Style Diversity Augmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Guanchen Ding, Lingbo Liu, Zhenzhong Chen, Chang Chen"
      },
      {
        "type": "paperTitle",
        "text": "3349 <b>GAN-based Symmetric Embedding Costs Adjustment for Enhancing Image Steganographic Security<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ye Miaoxin, Zhou Saixing, Weiqi Luo, Shunquan Tan, Jiwu Huang"
      },
      {
        "type": "paperTitle",
        "text": "3350 <b>Heterophilic Graph Invariant Learning for Out-of-Distribution of Fraud Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lingfei Ren, Ruimin Hu, Zheng Wang, Yilin Xiao, Dengshi Li, Junhang Wu, Jinzhang Hu, Yilong Zang, Zijun Huang"
      },
      {
        "type": "paperTitle",
        "text": "3351 <b>A Medical Data-Effective Learning Benchmark for Highly Efficient Pre-training of Foundation Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenxuan Yang, Weimin Tan, Yuqi Sun, Bo Yan"
      },
      {
        "type": "paperTitle",
        "text": "3354 <b>HcaNet: Haze-concentration-aware Network for Real-scene Dehazing with Codebook Priors<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yi Liu, Jiachen Li, Yanchun Ma, Qing Xie, Yongjian Liu"
      },
      {
        "type": "paperTitle",
        "text": "3355 <b>ScanTD: 360° Scanpath Prediction based on Time-Series Diffusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yujia Wang, Fang-Lue Zhang, Neil A. Dodgson"
      },
      {
        "type": "paperTitle",
        "text": "3357 <b>PFFAA: Prototype-based Feature and Frequency Alteration Attack for Semantic Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhidong Yu, Zhenbo Shi, Xiaoman Liu, Wei Yang"
      },
      {
        "type": "paperTitle",
        "text": "3360 <b>MMDFND: Multi-modal Multi-Domain Fake News Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yu Tong, Weihai Lu, Zhe Zhao, LAI Song, Tong Shi"
      },
      {
        "type": "paperTitle",
        "text": "3363 <b>Rethinking the Implicit Optimization Paradigm with Dual Alignments for Referring Remote Sensing Image Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuwen Pan, Rui Sun, Yuan Wang, Tianzhu Zhang, Yongdong Zhang"
      },
      {
        "type": "paperTitle",
        "text": "3367 <b>FedCAFE: Federated Cross-Modal Hashing with Adaptive Feature Enhancement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ting Fu, Yu-Wei Zhan, Chong-Yu Zhang, Xin Luo, Zhen-Duo Chen, Yongxin Wang, Xun Yang, Xin-Shun Xu"
      },
      {
        "type": "paperTitle",
        "text": "3370 <b>3DPCP-Net: A Lightweight Progressive 3D Correspondence Pruning Network for Accurate and Efficient Point Cloud Registration<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jingtao Wang, Zechao Li"
      },
      {
        "type": "paperTitle",
        "text": "3376 <b>Global Patch-wise Attention is Masterful Facilitator for Masked Image Modeling<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Gongli Xi, Ye Tian, Mengyu Yang, Lanshan Zhang, Xirong Que, Wendong Wang"
      },
      {
        "type": "paperTitle",
        "text": "3377 <b>Harmfully Manipulated Images Matter in Multimodal Misinformation Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bing Wang, Shengsheng Wang, Changchun Li, Renchu Guan, Ximing Li"
      },
      {
        "type": "paperTitle",
        "text": "3384 <b>S$^2$-CSNet: Scale-Aware Scalable Sampling Network for Image Compressive Sensing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chen Hui, Haiqi Zhu, Shuya Yan, Shaohui Liu, Feng Jiang, Debin Zhao"
      },
      {
        "type": "paperTitle",
        "text": "3392 <b>Part-level Reconstruction for Self-Supervised Category-level 6D Object Pose Estimation with Coarse-to-Fine Correspondence Optimization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zerui Zhang, Jun Yu, Liangxian Cui, Qiang Ling,  TianyuLiu"
      },
      {
        "type": "paperTitle",
        "text": "3397 <b>Balanced Multi-Relational Graph Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhixiang Shen, Haolan He, zhao kang"
      },
      {
        "type": "paperTitle",
        "text": "3398 <b>When, Where, and What? A Benchmark for Accident Anticipation and Localization with Large Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haicheng Liao, Yongkang Li, Zhenning Li, Chengyue Wang, Yanchen Guan, KaHou Tam, Chunlin Tian, Li Li, Cheng-zhong Xu"
      },
      {
        "type": "paperTitle",
        "text": "3415 <b>Making Large Language Models Perform Better in Knowledge Graph Completion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yichi Zhang, Zhuo Chen, Lingbing Guo, yajing Xu, Wen Zhang, Huajun Chen"
      },
      {
        "type": "paperTitle",
        "text": "3418 <b>ECAvatar: 3D Avatar Facial Animation with Controllable Identity and Emotion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Minjing Yu,  DelongPang, Ziwen Kang, Zhiyao Sun, Tian Lv, Jenny Sheng, Ran Yi, Yuhui Wen, Yong-jin Liu"
      },
      {
        "type": "paperTitle",
        "text": "3419 <b>Label Text-aided Hierarchical Semantics Mining for Panoramic Activity Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tianshan Liu, Kin-man Lam, Bingkun BAO"
      },
      {
        "type": "paperTitle",
        "text": "3421 <b>Dig a Hole and Fill in Sand: Adversary and Hiding Decoupled Steganography<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weixuan Tang,  HaoyuYang, Yuan Rao, Zhili Zhou, Fei Peng"
      },
      {
        "type": "paperTitle",
        "text": "3430 <b>Robust Variational Contrastive Learning for Partially View-unaligned Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Changhao He, Hongyuan Zhu, Peng Hu, Xi Peng"
      },
      {
        "type": "paperTitle",
        "text": "3439 <b>XMeCap: Meme Caption Generation with Sub-Image Adaptability<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuyan Chen, Songzhou Yan, Zhihong Zhu, Zhixu Li, Yanghua Xiao"
      },
      {
        "type": "paperTitle",
        "text": "3463 <b>Spatiotemporal Fine-grained Video Description for Short Videos<b>"
      },
      {
        "type": "paperAuthor",
        "text": "te yang, Jian Jia, Bo Wang, yanhua chengyan, Yan Li, dongze hao, Xipeng Cao, Quan Chen, Han Li, Peng Jiang, Xiangyu Zhu, Zhen Lei"
      },
      {
        "type": "paperTitle",
        "text": "3466 <b>Improved Weighted Tensor Schatten 𝑝-Norm for Fast Multi-view Graph Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hui Sun, Xingfeng Li, Sun Quansen, Min-Ling Zhang, Zhenwen Ren"
      },
      {
        "type": "paperTitle",
        "text": "3469 <b>Adaptive Instance-wise Multi-view Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shudong Huang, Hecheng Cai, Hao Dai, Wentao Feng, Jiancheng Lv"
      },
      {
        "type": "paperTitle",
        "text": "3472 <b>Consistency Guided Diffusion Model with Neural Syntax for Perceptual Image Compression<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haowei Kuang, Yiyang Ma, Wenhan Yang, Zongming Guo, Jiaying Liu"
      },
      {
        "type": "paperTitle",
        "text": "3477 <b>TimeNeRF: Building Generalizable Neural Radiance Fields across Time from Few-Shot Input Views<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hsiang-Hui Hung, Huu-Phu Do, Yung-hui Li, Ching-Chun Huang"
      },
      {
        "type": "paperTitle",
        "text": "3481 <b>Few-shot Semantic Segmentation via Perceptual Attention and Spatial Control<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Guangchen Shi, Danhuai Zhao, Yirui Wu, Wei Zhu, Kang Zheng, Tong Lu"
      },
      {
        "type": "paperTitle",
        "text": "3483 <b>MIKO: Multimodal Intention Knowledge Distillation from Large Language Models for Social-Media Commonsense Discovery<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Feihong Lu, Weiqi Wang, Yangyifei Luo, Ziqin Zhu, Qingyun Sun, Baixuan Xu, Haochen SHI, Shiqi Gao, qian li, Yangqiu Song, Jianxin Li"
      },
      {
        "type": "paperTitle",
        "text": "3486 <b>Traj2Former: A Local Context-aware Snapshot and Sequential Dual Fusion Transformer for Trajectory Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuan Xie, Yichen Zhang, Yifang Yin, SHENG ZHANG, Ying Zhang, Rajiv Shah, Roger Zimmermann, Guoqing Xiao"
      },
      {
        "type": "paperTitle",
        "text": "3487 <b>Multi-fineness Boundaries and the Shifted Ensemble-aware Encoding for Point Cloud Semantic Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ziming Wang, Boxiang Zhang, Ming Ma, Yue Wang, Taoli Du, Wenhui Li"
      },
      {
        "type": "paperTitle",
        "text": "3488 <b>Rainmer: Learning Multi-view Representations for Comprehensive Image Deraining and Beyond<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wu Ran, Peirong Ma, Zhiquan He, Lu Hong"
      },
      {
        "type": "paperTitle",
        "text": "3494 <b>Point-GCC: Universal Self-supervised 3D Scene Pre-training via Geometry-Color Contrast<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Guofan Fan, Zekun Qi, Wenkai Shi, Kaisheng Ma"
      },
      {
        "type": "paperTitle",
        "text": "3499 <b>Transferable Adversarial Facial Images for Privacy Protection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Minghui Li, jiangxiong wang, Hao Zhang, Ziqi Zhou, Shengshan Hu, pei Xiaobing"
      },
      {
        "type": "paperTitle",
        "text": "3501 <b>Audio Deepfake Detection with Self-Supervised XLS-R and SLS Classifier<b>"
      },
      {
        "type": "paperAuthor",
        "text": "qishan Zhang, Shuangbing Wen, Tao Hu"
      },
      {
        "type": "paperTitle",
        "text": "3502 <b>Fast and Scalable Incomplete Multi-View Clustering with Duality Optimal Graph Filtering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Liang Du, Yukai Shi, Yan Chen, Peng Zhou, Yuhua Qian"
      },
      {
        "type": "paperTitle",
        "text": "3503 <b>Boosting Speech Recognition Robustness to Modality-Distortion with Contrast-Augmented Prompts<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Dongjie Fu, Xize Cheng, Xiaoda Yang, Wang Hanting, Zhou Zhao, Tao Jin"
      },
      {
        "type": "paperTitle",
        "text": "3507 <b>Contrastive Context-Speech Pretraining for Expressive Text-to-Speech Synthesis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yujia Xiao, Xi Wang, Xu Tan, Lei He, Xinfa Zhu, sheng zhao, Tan Lee"
      },
      {
        "type": "paperTitle",
        "text": "3509 <b>CIRP: Cross-Item Relational Pre-training for Multimodal Product Bundling<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yunshan Ma, Yingzhi He, WENJUN ZHONG, Xiang Wang, Roger Zimmermann, Tat-seng Chua"
      },
      {
        "type": "paperTitle",
        "text": "3520 <b>Hierachical Multi-label Learning for Incremental Multilingual Text Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaoqian Liu, Ming-Hui Liu, Zhen-Duo Chen, Xin Luo, Xin-Shun Xu"
      },
      {
        "type": "paperTitle",
        "text": "3521 <b>CoMO-NAS: Core-Structures-Guided Multi-Objective Neural Architecture Search for Multi-Modal Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Pinhan Fu, Xinyan Liang, Yuhua Qian, Qian Guo, Zhifang Wei, Wen Li"
      },
      {
        "type": "paperTitle",
        "text": "3523 <b>Maximizing Feature Distribution Variance for Robust Neural Networks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hao Yang, Min Wang, zhengfei Yu, Zhi Zeng, Mingrui Lao, Yun Zhou"
      },
      {
        "type": "paperTitle",
        "text": "3525 <b>VisHanfu: An Interactive System for the Promotion of Hanfu Knowledge via Cross-Shaped Flat Structure<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Minjing Yu, Lingzhi Zeng, Xinxin Du, Jenny Sheng, Qiantian Liao, Yong-jin Liu"
      },
      {
        "type": "paperTitle",
        "text": "3527 <b>Enhanced Screen Content Image Compression: A Synergistic Approach for Structural Fidelity and Text Integrity Preservation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fangtao Zhou, xiaofeng huang, Peng Zhang, Meng Wang, Zhao Wang, Yang Zhou, Haibing YIN"
      },
      {
        "type": "paperTitle",
        "text": "3528 <b>Foreground Harmonization and Shadow Generation for  Composite Image<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jing Zhou, ZiQi Yu, Zhongyun Bao, Gang Fu, Weilei He, Chao Liang, Chunxia Xiao"
      },
      {
        "type": "paperTitle",
        "text": "3530 <b>Towards Photorealistic Video Colorization via Gated Color-Guided Image Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiaxing Li, Hongbo Zhao, Yijun Wang, Jianxin Lin"
      },
      {
        "type": "paperTitle",
        "text": "3531 <b>Asymmetric Event-Guided Video Super-Resolution<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zeyu Xiao, Dachun Kai, Yueyi Zhang, Xiaoyan Sun, Zhiwei Xiong"
      },
      {
        "type": "paperTitle",
        "text": "3538 <b>HICEScore: A Hierarchical Metric for Image Captioning Evaluation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zequn Zeng, Jianqiao Sun, Hao Zhang, Tiansheng Wen, Yudi Su, Yan Xie, Zhengjue Wang, Bo Chen"
      },
      {
        "type": "paperTitle",
        "text": "3546 <b>DEITalk: Speech-Driven 3D Facial Animation with Dynamic Emotional Intensity Modeling<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shen Kang, Haifeng Xia, Guangxing Geng, GuangYue Geng, Si-Yu Xia, Zhengming Ding"
      },
      {
        "type": "paperTitle",
        "text": "3548 <b>MaskBEV: Towards A Unified Framework for BEV Detection and Map Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiao Zhao, XUKUN ZHANG, Dingkang Yang, Mingyang Sun, Mingcheng Li, Shunli Wang, Lihua Zhang"
      },
      {
        "type": "paperTitle",
        "text": "3554 <b>AbsGS: Recovering fine details in 3D Gaussian Splatting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zongxin Ye, Wenyu Li, Sidun Liu, Peng Qiao, Yong Dou"
      },
      {
        "type": "paperTitle",
        "text": "3576 <b>Beyond the Known: Ambiguity-Aware Multi-view Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zihan Fang, Shide Du, Yuhong Chen, Shiping Wang"
      },
      {
        "type": "paperTitle",
        "text": "3577 <b>Information Diffusion Prediction with Graph Neural Ordinary Differential Equation Network<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ding Wang, Wei Zhou, Songiln Hu"
      },
      {
        "type": "paperTitle",
        "text": "3578 <b>Improving Interaction Comfort in Authoring Task in AR-HRI through Dynamic Dual-Layer Interaction Adjustment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yunqiang Pei, Kaiyue Zhang, Hongrong yang, Yong Tao, Qihang Tang, Jialei Tang, Guoqing Wang, Zhitao Liu, Ning Xie, Peng Wang, Yang Yang, Hengtao Shen"
      },
      {
        "type": "paperTitle",
        "text": "3583 <b>Emotion Recognition in HMDs: A Multi-task Approach Using Physiological Signals and Occluded Faces<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yunqiang Pei, Jialei Tang, Qihang Tang, Mingfeng Zha, Dongyu Xie, Guoqing Wang, Zhitao Liu, Ning Xie, Peng Wang, Yang Yang, Hengtao Shen"
      },
      {
        "type": "paperTitle",
        "text": "3596 <b>MMHead: Towards Fine-grained Multi-modal 3D Facial Animation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Sijing Wu, Yunhao Li, Yichao Yan, Huiyu Duan, Ziwei Liu, Guangtao Zhai"
      },
      {
        "type": "paperTitle",
        "text": "3607 <b>Shapley Value-based Contrastive Alignment for  Multimodal Information Extraction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wen Luo, Yu Xia, Shen Tianshu, Sujian Li"
      },
      {
        "type": "paperTitle",
        "text": "3610 <b>Enhancing Adaptive Deep Networks for Image Classification via Uncertainty-aware Decision Fusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xu Zhang, Zhipeng Xie, Haiyang Yu, Qitong Wang, Peng Wang, Wei Wang"
      },
      {
        "type": "paperTitle",
        "text": "3615 <b>Q-MoE: Connector for MLLMs with Text-Driven Routing<b>"
      },
      {
        "type": "paperAuthor",
        "text": " Hanziwang, Jiamin Ren, Yifeng Ding, Lei Ren, Huixing Jiang, Chen Wei, Fangxiang Feng, Xiaojie Wang"
      },
      {
        "type": "paperTitle",
        "text": "3623 <b>Exploring in Extremely Dark: Low-Light Video Enhancement with Real Events<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xicong Wang, Huiyuan Fu, Jiaxuan Wang, Xin Wang, Heng Zhang, Huadong Ma"
      },
      {
        "type": "paperTitle",
        "text": "3624 <b>Once-for-all: Efficient Visual Face Privacy Protection via Person-specific Veils<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zixuan Yang, Yushu Zhang, Tao Wang, Zhongyun Hua, Zhihua Xia, Jian Weng"
      },
      {
        "type": "paperTitle",
        "text": "3626 <b>Eglcr: Edge Structure Guidance and Scale Adaptive Attention for Iterative Stereo Matching<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhien Dai, Zhaohui Tang, Hu Zhang, Can Tian, Mingjun Pan, Yongfang Xie"
      },
      {
        "type": "paperTitle",
        "text": "3628 <b>Evolving Storytelling: Benchmarks and Methods for New Character Customization with Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiyu Wang, Yufei Wang, Satoshi Tsutsui, Weisi Lin, Bihan Wen, Alex Kot"
      },
      {
        "type": "paperTitle",
        "text": "3629 <b>Scalable Super-Resolution Neural Operator<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lei Han, Xuesong Zhang"
      },
      {
        "type": "paperTitle",
        "text": "3637 <b>MappingFormer: Learning cross-modal feature mapping for visible-to-infrared image translation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haining Wang, Na Li, Huijie Zhao, Yan Wen, Yi Su, Yuqiang Fang"
      },
      {
        "type": "paperTitle",
        "text": "3643 <b>SimCLIP: Refining Image-Text Alignment with Simple Prompts for Zero-/Few-shot Anomaly Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "ChengHao Deng, haote xu, Xiaolu Chen, Haodi Xu, Xiaotong Tu, Xinghao Ding, Yue Huang"
      },
      {
        "type": "paperTitle",
        "text": "3647 <b>Medical Report Generation via Multimodal Spatio-Temporal Fusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xin Mei, Rui Mao, Xiaoyan Cai, libin yang, Erik Cambria"
      },
      {
        "type": "paperTitle",
        "text": "3650 <b>Real-time parameter evaluation of high-speed microfluidic droplets using continuous spike streams<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bo Xiong, Changqing Su, Lin Zihan, Yanqin Chen, You Zhou, Zhen Cheng, Zhaofei Yu, Tiejun Huang"
      },
      {
        "type": "paperTitle",
        "text": "3656 <b>Arondight: Red Teaming Large Vision Language Models with Auto-generated Multi-modal Jailbreak Prompts<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yi Liu, Chengjun Cai, Xiaoli ZHANG, Xingliang YUAN, Cong Wang"
      },
      {
        "type": "paperTitle",
        "text": "3657 <b>FBSDiff: Plug-and-Play Frequency Band Substitution of Diffusion Features for Highly Controllable Text-Driven Image Translation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiang Gao, Jiaying Liu"
      },
      {
        "type": "paperTitle",
        "text": "3664 <b>LinkThief: Combining Generalized Structure Knowledge with Node Similarity for Link Stealing Attack against GNN<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuxing Zhang, Siyuan Meng, Chunchun Chen, Mengyao Peng, Hongyan Gu, Xinli Huang"
      },
      {
        "type": "paperTitle",
        "text": "3668 <b>HazeSpace2M: A Dataset for Haze Aware Single Image Dehazing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Md Tanvir Islam, Nasir Rahim, Saeed Anwar, Muhammad Saqib, Sambit Bakshi, Khan Muhammad"
      },
      {
        "type": "paperTitle",
        "text": "3670 <b>A Coarse to Fine Detection Method for Prohibited Object in X-ray Images Based on Progressive Transformer Decoder<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chunjie Ma, Lina Du, Zan Gao, Li Zhuo, Meng Wang"
      },
      {
        "type": "paperTitle",
        "text": "3672 <b>Overcoming Spatial-Temporal Catastrophic Forgetting for Federated Class-Incremental Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hao Yu, Xin Yang,  xingao, Feng yihui, Hao Wang, Yan Kang, Tianrui Li"
      },
      {
        "type": "paperTitle",
        "text": "3678 <b>Enhancing Model Interpretability with Local Attribution over Global Exploration<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhu Zhiyu, Zhibo Jin, Jiayu Zhang, Huaming Chen"
      },
      {
        "type": "paperTitle",
        "text": "3679 <b>SyncTalklip: Highly Synchronized Lip-Readable Speaker Generation with Multi-Task Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaoda Yang, Xize Cheng, Dongjie Fu, Minghui Fang, Jialung Zuo, Shengpeng Ji, Tao Jin, Zhou Zhao"
      },
      {
        "type": "paperTitle",
        "text": "3690 <b>Calibrating Prompt from History for Continual Vision-Language Retrieval and Grounding<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tao Jin, Weicai Yan, Ye Wang, Sihang Cai,  Shuaiqifan, Zhou Zhao"
      },
      {
        "type": "paperTitle",
        "text": "3692 <b>Towards Multimodal-augmented Pre-trained Language Models via Self-balanced Expectation-Maximization Iteration<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xianwei Zhuang, Xuxin Cheng, Zhihong Zhu, Zhanpeng Chen, Hongxiang Li, Yuexian Zou"
      },
      {
        "type": "paperTitle",
        "text": "3695 <b>Multi-grained Correspondence Learning of Audio-language Models for Few-shot Audio Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shengwei Zhao, Xu Linhai, Yuying Liu, Shaoyi Du"
      },
      {
        "type": "paperTitle",
        "text": "3696 <b>Decoder Pretraining with only Text for Scene Text Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuai Zhao, Yongkun Du, Zhineng Chen, Yu-Gang Jiang"
      },
      {
        "type": "paperTitle",
        "text": "3703 <b>DreamVTON: Customizing 3D Virtual Try-on  with Personalized Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenyu Xie, Haoye Dong, Yufei Gao, Zehua Ma, Xiaodan Liang"
      },
      {
        "type": "paperTitle",
        "text": "3705 <b>EGGesture: Entropy-Guided Vector Quantized Variational AutoEncoder for Co-Speech Gesture Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "yiyong xiao, Kai Shu, Haoyi Zhang,  BaoHuaYin, Wai Seng Cheang, Haoyang Wang, Jiechao Gao"
      },
      {
        "type": "paperTitle",
        "text": "3721 <b>InNeRF: Learning Interpretable Radiance Fields for Generalizable 3D Scene Representation and Rendering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Dan Wang, Xinrui Cui"
      },
      {
        "type": "paperTitle",
        "text": "3727 <b>Zero-Shot Controllable Image-to-Video Animation via Motion Decomposition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shoubin Yu, Zhiyuan Fang, Jian Zheng, Gunnar Sigurdsson, Vicente Ordonez, Robinson Piramuthu, Mohit Bansal"
      },
      {
        "type": "paperTitle",
        "text": "3737 <b>Mixed Prototype Correction for Causal Inference in Medical Image Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yajie Zhang, Zhi-An Huang, Zhiliang Hong, Songsong Wu, Jibin Wu, KC Tan"
      },
      {
        "type": "paperTitle",
        "text": "3741 <b>CustomNet: Object Customization with Variable-Viewpoints in Text-to-Image Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ziyang Yuan, Mingdeng Cao, Xintao Wang, Zhongang Qi, Chun Yuan, Ying Shan"
      },
      {
        "type": "paperTitle",
        "text": "3748 <b>Robust Prototype Completion for Incomplete Multi-view Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Honglin Yuan, Shiyun Lai, Xingfeng Li, Jian Dai, Yuan Sun, Zhenwen Ren"
      },
      {
        "type": "paperTitle",
        "text": "3752 <b>PAIR: Pre-denosing Augmented Image Retrieval Model for Defending Adversarial Patches<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ziyang Zhou, Pinghui Wang, Zi Liang, Ruofei Zhang, Haitao Bai"
      },
      {
        "type": "paperTitle",
        "text": "3757 <b>PercepLIE: A New Path to Perceptual Low-Light Image Enhancement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Cong Wang, Chengjin Yu, Jie Mu, Wei Wang"
      },
      {
        "type": "paperTitle",
        "text": "3759 <b>Towards Flexible Evaluation for Generative Visual Question Answering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Huishan Ji, Qingyi Si, zheng Lin, Weiping Wang"
      },
      {
        "type": "paperTitle",
        "text": "3770 <b>Adaptive Vision Transformer for Event-Based Human Pose Estimation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Nannan Yu, Tao Ma, Jiqing Zhang, Yuji Zhang, Qirui Bao, Xiaopeng Wei, Xin Yang"
      },
      {
        "type": "paperTitle",
        "text": "3772 <b>Dual Advancement of Representation Learning and Clustering for Sparse and Noisy Images<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenlin Li, Yucheng Xu, Xiaoqing Zheng, Suoya Han, Jun Wang, Xiaobo Sun"
      },
      {
        "type": "paperTitle",
        "text": "3780 <b>WisdoM: Improving Multimodal Sentiment Analysis by Fusing Contextual World Knowledge<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenbin Wang, LIANG DING, Li Shen, Yong Luo, Han Hu, Dacheng Tao"
      },
      {
        "type": "paperTitle",
        "text": "3785 <b>Dynamic Evidence Decoupling for Trusted Multi-view Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ying Liu, Lihong Liu, Cai Xu, Xiangyu Song, Ziyu Guan, Wei Zhao"
      },
      {
        "type": "paperTitle",
        "text": "3797 <b>SM4Depth: Seamless Monocular Metric Depth Estimation across Multiple Cameras and Scenes by One Mode<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yihao Liu, Feng Xue, Anlong Ming, Mingshuai Zhao, Huadong Ma, Nicu Sebe"
      },
      {
        "type": "paperTitle",
        "text": "3798 <b>REmoNet: Reducing Emotional Label Noise via Multi-regularized Self-supervision<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weibang Jiang, Yu-Ting Lan, Bao-liang Lu"
      },
      {
        "type": "paperTitle",
        "text": "3804 <b>Hawkeye: Discovering and Grounding Implicit Anomalous Sentiment in Recon-videos via Scene-enhanced Video Large Language Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jianing Zhao, Jingjing Wang, Yujie Jin, Jiamin Luo, Guodong Zhou"
      },
      {
        "type": "paperTitle",
        "text": "3815 <b>SDePR: Fine-Grained Leaf Image Retrieval with Structural Deep Patch Representation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xin Chen, Bin Wang, jinzheng jiang, Kunkun Zhang, Yongsheng Gao"
      },
      {
        "type": "paperTitle",
        "text": "3821 <b>$E^{3}$Gen: Efficient, Expressive and Editable Avatars Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weitian Zhang, Yichao Yan, Yunhui Liu, Xingdong Sheng, Xiaokang Yang"
      },
      {
        "type": "paperTitle",
        "text": "3823 <b>Thinking Temporal Automatic White Balance: Datasets, Models and Benchmarks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "chunxiao Li, Shuyang Wang, Xuejing Kang, Anlong Ming"
      },
      {
        "type": "paperTitle",
        "text": "3826 <b>Importance-aware Shared Parameter Subspace Learning for Domain Incremental Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shiye Wang, Changsheng Li, Jialin Tang, Xing Gong, Ye Yuan, Guoren Wang"
      },
      {
        "type": "paperTitle",
        "text": "3831 <b>Multimodal-aware Multi-intention Learning for Recommendation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wei Yang, Qingchen Yang"
      },
      {
        "type": "paperTitle",
        "text": "3836 <b>ECFCON: Emotion Consequence Forecasting in Conversations<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xincheng Ju, Dong Zhang, Suyang Zhu, Junhui Li, Shoushan Li, Guodong Zhou"
      },
      {
        "type": "paperTitle",
        "text": "3843 <b>Deep Incomplete Multi-View Network Semi-Supervised Multi-Label Learning with Unbiased Loss<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Quanjiang LI, Tingjin Luo, Mingdie Jiang, Jiahui Liao, Zhangqi Jiang"
      },
      {
        "type": "paperTitle",
        "text": "3845 <b>Towards Effective Federated Graph Anomaly Detection via Self-boosted Knowledge Distillation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jinyu Cai, Yunhe Zhang, Zhoumin Lu, Wenzhong Guo, See-Kiong Ng"
      },
      {
        "type": "paperTitle",
        "text": "3856 <b>Multimodal Multi-turn Conversation Stance Detection: A Challenge Dataset and Effective Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fuqiang Niu, Zebang Cheng, Xianghua Fu, Xiaojiang Peng, Genan Dai, Yin Chen, Hu Huang, Bowen Zhang"
      },
      {
        "type": "paperTitle",
        "text": "3857 <b>Multi-view Clustering Based on Deep Non-negative Tensor Factorization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wei Feng, dongyuan wei, QIANQIAN WANG, Bo Dong, Quanxue Gao"
      },
      {
        "type": "paperTitle",
        "text": "3862 <b>Safe-SD: Safe and Traceable Stable Diffusion with Text Prompt Trigger for Invisible Generative Watermarking<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhiyuan Ma, Guoli Jia, Biqing Qi, Bowen Zhou"
      },
      {
        "type": "paperTitle",
        "text": "3877 <b>Adaptive Pruning of Channel Spatial Dependability in Convolutional Neural Networks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weiying Xie, Mei Yuan, Ma Jitao, Yunsong Li"
      },
      {
        "type": "paperTitle",
        "text": "3879 <b>Sparse Query Dense: Enhancing 3D Object Detection with Pseudo points<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mo Yujian, Yan Wu, Junqiao Zhao, Hou zhenjie, weiquan Huang, Hu Yinghao, Jijun Wang, Jun Yan"
      },
      {
        "type": "paperTitle",
        "text": "3899 <b>Selection and Reconstruction of Key Locals: A Novel Specific Domain Image-Text Retrieval Method<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yu Liao, Xinfeng Zhang, Rui Yang, Jianwei Tao, Bai Liu, Zhipeng Hu, Shuang Wang, zeng zhao"
      },
      {
        "type": "paperTitle",
        "text": "3905 <b>Vaccine Misinformation Detection in X using Cooperative Multimodal Framework<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Usman Naseem, Adam Dunn, Matloob Khushi, Jinman Kim"
      },
      {
        "type": "paperTitle",
        "text": "3908 <b>Toward Timeliness-Enhanced Loss Recovery for Large-Scale Live Streaming<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bo Wu, Tong Li, cheng luo, Xu Yan, FuYu Wang, Xinle Du, Ke Xu"
      },
      {
        "type": "paperTitle",
        "text": "3911 <b>Bridging the Modality Gap: Dimension Information Alignment and Sparse Spatial Constraint for Image-Text Matching<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiang Ma, Xuemei Li, Lexin Fang, Caiming Zhang"
      },
      {
        "type": "paperTitle",
        "text": "3920 <b>Agent Aggregator with Mask Denoise Mechanism for Histopathology Whole Slide Image Analysis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xitong Ling, Minxi Ouyang, Yizhi Wang, Xinrui Chen, Renao Yan,  Hongbochu, Junru Cheng, Tian Guan, Xiaoping Liu, Sufang Tian, Yonghong He"
      },
      {
        "type": "paperTitle",
        "text": "3921 <b>AdapMTL: Adaptive Pruning Framework for Multitask Learning Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mingcan Xiang, Steven Jiaxun Tang, Qizheng Yang, Hui Guan, Tongping Liu"
      },
      {
        "type": "paperTitle",
        "text": "3931 <b>FewVS: A Vision-Semantics Integration Framework for Few-Shot Image Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhuoling Li, Yong Wang, Kaitong Li"
      },
      {
        "type": "paperTitle",
        "text": "3935 <b>Crossmodal Few-shot 3D Point Cloud Semantic Segmentation via View Synthesis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ziyu Zhao, Pingping Cai, Canyu Zhang, Xiaoguang Li, Song Wang"
      },
      {
        "type": "paperTitle",
        "text": "3938 <b>3D Scene De-occlusion in Neural Radiance Fields: A Framework for Obstacle Removal and Realistic Inpainting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yi LIU, Xinyi LI, Shuai Wenjing"
      },
      {
        "type": "paperTitle",
        "text": "3939 <b>MTSNet: Joint Feature Adaptation and Enhancement for Text-Guided Multi-view Martian Terrain Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yang Fang, Xuefeng Rao, Xinbo Gao, Weisheng Li, Zijian Min"
      },
      {
        "type": "paperTitle",
        "text": "3943 <b>MFRGN: Multi-scale Feature Representation Generalization Network For Ground-to-Aerial Geo-localization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuntao Wang, Jinpu Zhang, Ruonan Wei, Wenbo Gao, Yuehuan Wang"
      },
      {
        "type": "paperTitle",
        "text": "3944 <b>HeadSetOff: Enabling Photorealistic Video Conferencing on Economical VR Headsets<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yili Jin, Duan Xize, Fangxin Wang, Xue Liu"
      },
      {
        "type": "paperTitle",
        "text": "3956 <b>New Job, New Gender? Measuring the Social Bias in Image Generation Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenxuan Wang, Haonan Bai, Jen-tse Huang, Yuxuan WAN, Youliang Yuan, Haoyi Qiu, Nanyun Peng, Michael Lyu"
      },
      {
        "type": "paperTitle",
        "text": "3959 <b>HighlightRemover: Spatially Valid Pixel Learning for Image Specular Highlight Removal<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ling Zhang, Yidong Ma, Zhi Jiang, Weilei He, Zhongyun Bao, Gang Fu, Wenju Xu, Chunxia Xiao"
      },
      {
        "type": "paperTitle",
        "text": "3985 <b>An Active Masked Attention Framework for Many-to-Many Cross-Domain Recommendations<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Feng Zhu, Xinxing Yang, Longfei Li, JUN ZHOU"
      },
      {
        "type": "paperTitle",
        "text": "3988 <b>HS-Surf: A Novel High-Frequency Surface Shell Radiance Field to Improve Large-Scale Scene Rendering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiongming Qin, Fei LUO, Tuo Cao, Wenju Xu, Chunxia Xiao"
      },
      {
        "type": "paperTitle",
        "text": "3992 <b>A Progressive Skip Reasoning Fusion Method for Multi-Modal Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qian Guo, Xinyan Liang, Yuhua Qian, Zhihua Cui, Jie Wen"
      },
      {
        "type": "paperTitle",
        "text": "3997 <b>Conditional Diffusion Model for Open-ended Video Question Answering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xinyue Liu, Jiahui Wan, Linlin Zong, Bo Xu"
      },
      {
        "type": "paperTitle",
        "text": "4003 <b>Unifying Spike Perception and Prediction: A Compact Spike Representation Model using Multi-scale Correlation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kexiang Feng, Chuanmin Jia, Siwei Ma, Wen Gao"
      },
      {
        "type": "paperTitle",
        "text": "4006 <b>Uncovering Capabilities of Model Pruning in Graph Contrastive  Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xueyuan Chen, Shangzhe Li, Junran Wu"
      },
      {
        "type": "paperTitle",
        "text": "4008 <b>R2SFD: Improving Single Image Reflection Removal using Semantic Feature Dictionary<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Green Rosh, Pawan Prasad B H, LOKESH BOREGOWDA, Kaushik Mitra"
      },
      {
        "type": "paperTitle",
        "text": "4020 <b>QNCD: Quantization Noise Correction for Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Huanpeng Chu, Wei Wu, Chengjie Zang, Kun Yuan"
      },
      {
        "type": "paperTitle",
        "text": "4021 <b>Monocular Human-Object Reconstruction in the Wild<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chaofan Huo, Ye Shi, Jingya Wang"
      },
      {
        "type": "paperTitle",
        "text": "4037 <b>Differential-Perceptive and Retrieval-Augmented MLLM for Change Captioning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xian Zhang, Haokun Wen, Jianlong Wu, Pengda Qin, Hui Xue', Liqiang Nie"
      },
      {
        "type": "paperTitle",
        "text": "4055 <b>Point Cloud Densification for 3D Gaussian Splatting from Sparse Input Views<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kin-Chung Chan, Jun Xiao, Hana Lebeta Goshu, Kin-man Lam"
      },
      {
        "type": "paperTitle",
        "text": "4062 <b>FC-4DFS: Frequency-controlled Flexible 4D Facial Expression Synthesizing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xin Lu, Chuanqing Zhuang, Zhengda Lu, Yiqun Wang, Jun Xiao"
      },
      {
        "type": "paperTitle",
        "text": "4065 <b>TS-ILM:Class Incremental Learning for Online Action Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Li Xiaochen, Jian Cheng, Ziying Xia, Zichong Chen, Junhao Shi, Zhicheng Dong, Nyima Tashi"
      },
      {
        "type": "paperTitle",
        "text": "4071 <b>FTF-ER: Feature-Topology Fusion-Based Experience Replay Method for Continual Graph Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Changqing Lin, Jinhui Pang, Xiaoshuai Hao, Rong Yin, Zixuan Wang, Zhihui Zhang, Jinglin He, HUANG TAI SHENG"
      },
      {
        "type": "paperTitle",
        "text": "4074 <b>See or Guess: Counterfactually Regularized Image Captioning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qian Cao, Xu Chen, Ruihua Song, Xiting Wang, Xinting Huang, Yuchen Ren"
      },
      {
        "type": "paperTitle",
        "text": "4075 <b>Uni-DlLoRA: Style Fine-Tuning for Fashion Image Translation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "LIAO Fangjian, Xingxing Zou, Waikeung Wong"
      },
      {
        "type": "paperTitle",
        "text": "4078 <b>Designing Spatial Visualization and Interactions of Immersive Sankey Diagram in Virtual Reality<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yang Lu,  junxianli, Zhitong Cui, Jiapeng Hu, Yanna Lin, Shijian Luo"
      },
      {
        "type": "paperTitle",
        "text": "4091 <b>Bridging Fourier and Spatial-Spectral Domains for Hyperspectral Image Denoising<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiahua Xiao, Yang Liu, Shizhou Zhang, Xing Wei"
      },
      {
        "type": "paperTitle",
        "text": "4095 <b>CoTuning: A Large-Small Model Collaborating Distillation Framework for Better Model Generalization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zimo Liu, Kangjun Liu, Mingyue Guo, Shiliang Zhang, Yaowei Wang"
      },
      {
        "type": "paperTitle",
        "text": "4096 <b>4D Gaussian Splatting with Scale-aware Residual Field and Adaptive Optimization for Real-time rendering of temporally complex dynamic scenes<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jinbo Yan, Rui Peng, Luyang Tang, Ronggang Wang"
      },
      {
        "type": "paperTitle",
        "text": "4102 <b>GPT4Video: A Unified Multimodal Large Language Model for lnstruction-Followed Understanding and Safety-Aware Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhanyu Wang, Longyue Wang, Zhen Zhao, Minghao Wu, Chenyang Lyu, Huayang Li, Deng Cai, Luping Zhou, Shuming Shi, Zhaopeng Tu"
      },
      {
        "type": "paperTitle",
        "text": "4103 <b>UniStyle: Unified Style Modeling for Speaking Style Captioning and Stylistic Speech Synthesis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xinfa Zhu, Wenjie Tian, Xinsheng Wang, Lei He, Yujia Xiao, Xi Wang, Xu Tan, sheng zhao, Lei Xie"
      },
      {
        "type": "paperTitle",
        "text": "4112 <b>DiffHarmony++: Enhancing Image Harmonization with Harmony-VAE and Inverse Harmonization Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Pengfei Zhou, Fangxiang Feng, Guang Liu, Ruifan Li, Xiaojie Wang"
      },
      {
        "type": "paperTitle",
        "text": "4131 <b>AIGCs Confuse AI Too: Investigating and Explaining Synthetic Image-induced Hallucinations in Large Vision-Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yifei Gao Gao, Jiaqi Wang, Zhiyu Lin, Jitao Sang"
      },
      {
        "type": "paperTitle",
        "text": "4134 <b>CompGS: Efficient 3D Scene Representation via Compressed Gaussian Splatting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiangrui Liu, Xinju Wu, Pingping Zhang, Shiqi Wang, Zhu Li, Sam Kwong"
      },
      {
        "type": "paperTitle",
        "text": "4139 <b>DQG: Database Question Generation for Exact Text-based Image Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Rintaro Yanagi, Ren Togo, Takahiro Ogawa, Miki Haseyama"
      },
      {
        "type": "paperTitle",
        "text": "4146 <b>Segment Anything with Precise Interaction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Liu Mengzhen, Mengyu Wang, Henghui Ding, Yilong Xu, Yao Zhao, Yunchao Wei"
      },
      {
        "type": "paperTitle",
        "text": "4152 <b>Large Multi-modality Model Assisted AI-Generated Image Quality Assessment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Puyi Wang, Wei Sun, zicheng zhang, Jun Jia, Yanwei Jiang, Zhichao Zhang, Xiongkuo Min, Guangtao Zhai"
      },
      {
        "type": "paperTitle",
        "text": "4160 <b>Auto-ACD: A Large-scale Dataset for Audio-Language  Representation Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Luoyi Sun, Xuenan Xu, Mengyue Wu, Weidi Xie"
      },
      {
        "type": "paperTitle",
        "text": "4166 <b>UNER: A Unified Prediction Head for Named Entity Recognition in Visually-rich Documents<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yi Tu, Chong Zhang, Ya Guo, Huan Chen, Jinyang Tang, Huijia Zhu, Qi Zhang"
      },
      {
        "type": "paperTitle",
        "text": "4173 <b>DepthCloak: Projecting Optical Camouflage Patches for Erroneous Monocular Depth Estimation of Vehicles<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Huixiang Wen, Shan Chang, Shizong Yan, Jie Xu, Hongzi Zhu, Yanting Zhang, Bo Li"
      },
      {
        "type": "paperTitle",
        "text": "4177 <b>Cascaded Adversarial Attack: Simultaneously Fooling Rain Removal and Semantic Segmentation Networks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhiwen Wang, Yuhui Wu, Zheng WANG, Jiwei Wei, Tianyu Li, Guoqing Wang, Yang Yang, Hengtao Shen"
      },
      {
        "type": "paperTitle",
        "text": "4178 <b>Diffusion Networks with Task-Specific Noise Control for Radiology Report Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuanhe Tian, Fei Xia, Yan Song"
      },
      {
        "type": "paperTitle",
        "text": "4186 <b>Latent Representation Reorganization for Face Privacy Protection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenzhong Kuang, Jianan Lu, Chenhui Hong, Haobin Huang, Suguo Zhu, Xiaowei Zhao, Jun Yu, Jianping Fan"
      },
      {
        "type": "paperTitle",
        "text": "4196 <b>DCAFuse: Dual-Branch Diffusion-CNN Complementary Feature Aggregation Network for Multi-Modality Image Fusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xudong Lu, Yuqi Jiang, Haiwen Hong, Qi Sun, Cheng Zhuo"
      },
      {
        "type": "paperTitle",
        "text": "4202 <b>An Entailment Tree Generation Approach for Multimodal Multi-Hop Question Answering with Mixture-of-Experts and Iterative Feedback Mechanism<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qing Zhang, Haocheng Lv, Jie Liu, Zhiyun Chen, JianYong Duan, Hao Wang, Li He, Xu Mingying"
      },
      {
        "type": "paperTitle",
        "text": "4209 <b>Federated Morozov Regularization for Shortcut Learning in Privacy Preserving Learning with Watermarked Image Data<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tao Ling, Siping SHI, Hao Wang, Chuang Hu, Dan Wang"
      },
      {
        "type": "paperTitle",
        "text": "4213 <b>CP-Prompt: Composition-Based Cross-modal Prompting for Domain-Incremental Continual Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yu Feng, Zhen Tian, Yifan Zhu, Zongfu Han, Haoran Luo, Guangwei Zhang, Meina Song"
      },
      {
        "type": "paperTitle",
        "text": "4217 <b>LiDAR-NeRF: Novel LiDAR View Synthesis via Neural Radiance Fields<b>"
      },
      {
        "type": "paperAuthor",
        "text": "tao tang, Longfei Gao, Guangrun Wang, Yixing Lao, Peng Chen, Hengshuang Zhao, Dayang Hao, Xiaodan Liang, Mathieu Salzmann, Kaicheng Yu"
      },
      {
        "type": "paperTitle",
        "text": "4230 <b>Driving Scene Understanding with Traffic Scene-Assisted Topology Graph Transformer<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fu Rong, Wenjin Peng, Meng Lan, Qian Zhang, Lefei Zhang"
      },
      {
        "type": "paperTitle",
        "text": "4244 <b>Reason-and-Execute Prompting: Enhancing MultiModal Large Language Models for Solving Geometry Questions<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiuliang Duan, Dating Tan, Liangda Fang, Yuyu Zhou, Chaobo He, Ziliang Chen, Wu lusheng, Guanliang Chen, Zhiguo Gong, Weiqi Luo, Quanlong Guan"
      },
      {
        "type": "paperTitle",
        "text": "4255 <b>Adaptive Multi-Modality Prompt Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zongqian Wu, Yujing Liu, mengmeng zhan, Ping Hu, Xiaofeng Zhu"
      },
      {
        "type": "paperTitle",
        "text": "4258 <b>Time-Frequency Domain Fusion Enhancement for Audio Super-Resolution<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ye Tian, Zhe Wang, Jianguo Sun, Liguo Zhang"
      },
      {
        "type": "paperTitle",
        "text": "4260 <b>Towards Emotion-enriched Text-to-Motion Generation via LLM-guided Limb-level Emotion Manipulating<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tan Yu, Jingjing Wang, Jiawen Wang, Jiamin Luo, Guodong Zhou"
      },
      {
        "type": "paperTitle",
        "text": "4261 <b>WorldGPT: Empowering LLM as Multimodal World Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhiqi Ge, Hongzhe Huang, Mingze Zhou, Juncheng Li, Guoming Wang, Siliang Tang, Yueting Zhuang"
      },
      {
        "type": "paperTitle",
        "text": "4266 <b>PastNet: Introducing Physical Inductive Biases for Spatio-temporal Video Prediction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hao Wu, Fan Xu, Chong Chen, Xian-Sheng Hua, Xiao Luo, Haixin Wang"
      },
      {
        "type": "paperTitle",
        "text": "4284 <b>FedDEO: Description-Enhanced One-Shot Federated Learning with Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mingzhao Yang, Shangchao Su, Bin Li, Xiangyang Xue"
      },
      {
        "type": "paperTitle",
        "text": "4290 <b>MDDR:Multi-modal Dual-Attention aggregation for Depression Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wei Zhang"
      },
      {
        "type": "paperTitle",
        "text": "4292 <b>CoPL:Parameter-Efficient Collaborative Prompt Learning for Audio-Visual Tasks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yihan Zhao, Wei Xi, Cui Yuhang, Gairui Bai, Xinhui Liu, Jizhong Zhao"
      },
      {
        "type": "paperTitle",
        "text": "4297 <b>Semantic Distillation from Neighborhood for Composed Image Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yifan Wang, Wuliang Huang, Lei Li, Chun Yuan"
      },
      {
        "type": "paperTitle",
        "text": "4299 <b>MultiDAN: Unsupervised, Multistage, Multisource and Multitarget Domain Adaptation for Semantic Segmentation of Remote Sensing Images<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuxiang Cai, Yongheng Shang, Jianwei Yin"
      },
      {
        "type": "paperTitle",
        "text": "4302 <b>Prompt2Poster: Automatically Artistic Chinese Poster Creation from Prompt Only<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shaodong wang, Yunyang Ge, Liuhan Chen, Haiyang Zhou, Qian Wang, Xinhua Cheng, Yuan LI"
      },
      {
        "type": "paperTitle",
        "text": "4311 <b>One-bit Semantic Hashing: Towards Resource-Efficient Hashing Model with Binary Neural Network<b>"
      },
      {
        "type": "paperAuthor",
        "text": "liyang he, Zhenya Huang, Chenglong Liu, Rui Li, Runze Wu, Qi Liu, Enhong Chen"
      },
      {
        "type": "paperTitle",
        "text": "4313 <b>Cross-Modal Coherence-Enhanced Feedback Prompting for News Captioning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ning Xu, Yifei Gao, Tingting Zhang, Hongshuo Tian, Anan Liu"
      },
      {
        "type": "paperTitle",
        "text": "4318 <b>Multi-Modal Diffusion Model for Recommendation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yangqin Jiang, Lianghao Xia, Wei Wei, Da Luo, Kangyi Lin, Chao Huang"
      },
      {
        "type": "paperTitle",
        "text": "4322 <b>AraLive: Automatic Reward Adaption for Learning-based Live Video Streaming<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Huanhuan Zhang, Liu zhuo, Haotian Li, Anfu Zhou, Chuanming Wang, Huadong Ma"
      },
      {
        "type": "paperTitle",
        "text": "4327 <b>Point Cloud Upsampling With Geometric Algebra Driven Inverse Heat Dissipation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wen-qiang Xu, Wenrui Dai, Ziyang Zheng, Chenglin Li, Junni Zou, Hongkai Xiong"
      },
      {
        "type": "paperTitle",
        "text": "4329 <b>Tracing Training Progress: Dynamic Influence Based Selection for Active Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tianjiao Wan, Kele Xu, Long Lan, Zijian Gao, Davy Feng, Bo Ding, Huaimin Wang"
      },
      {
        "type": "paperTitle",
        "text": "4341 <b>Neighbor Does Matter: Global Positive-Negative Sampling for Vision-Language Pre-training<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bin Huang, Feng He, Qi Wang, Hong Chen, Guohao Li, Zhifan Feng, Xin Wang, Wenwu Zhu"
      },
      {
        "type": "paperTitle",
        "text": "4342 <b>CACE-Net: Co-guidance Attention and Contrastive Enhancement for Effective Audio-Visual Event Localization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "xiang he, xiangxi Liu, Yang Li, Dongcheng Zhao, Guobin Shen, Qingqun Kong, Xin Yang, Yi Zeng"
      },
      {
        "type": "paperTitle",
        "text": "4352 <b>VR-Mediated Cognitive Defusion: A Comparative Study for Managing Negative Thoughts<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kento Shigyo, Yi-Fan Cao, Kentaro Takahira, Mingming Fan, Huamin Qu"
      },
      {
        "type": "paperTitle",
        "text": "4367 <b>LiteGfm: A Lightweight Self-supervised Monocular Depth Estimation Framework for Artifacts Reduction via Guided Image Filtering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhilin He, Yawei Zhang, Jingchang Mu, Xiaoyue Gu, Tianhao Gu"
      },
      {
        "type": "paperTitle",
        "text": "4384 <b>FreeEnhance: Tuning-Free Image Enhancement via Content-Consistent Noising-and-Denoising Process<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yang Luo, Yiheng Zhang, Zhaofan Qiu, Ting Yao, Zhineng Chen, Yu-Gang Jiang, Tao Mei"
      },
      {
        "type": "paperTitle",
        "text": "4389 <b>Learning Geometry Consistent Neural Radiance Fields from Sparse and Unposed Views<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qi Zhang, Chi Huang, Qian Zhang, Nan Li, Wei Feng"
      },
      {
        "type": "paperTitle",
        "text": "4397 <b>Peeling Back the Layers: Interpreting the Storytelling of ViT<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jingjie Zeng, Zhihao Yang, Qi Yang, Liang Yang, Hongfei Lin"
      },
      {
        "type": "paperTitle",
        "text": "4399 <b>Improving the Training of the GANs with Limited Data via Dual Adaptive Noise Injection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhaoyu Zhang, Yang Hua, Guanxiong Sun, Hui Wang, Seán McLoone"
      },
      {
        "type": "paperTitle",
        "text": "4420 <b>Cross-Modal Meta Consensus for Heterogeneous Federated Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Li Shuai, Fan Qi, Zixin Zhang, Changsheng Xu"
      },
      {
        "type": "paperTitle",
        "text": "4432 <b>Align-IQA: Aligning Image Quality Assessment Models with Diverse Human Preferences via Customizable Guidance<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Junfeng Yang,  JingFu, Zhen Zhang, Limei Liu, Qin Li, wei zhang, Wenzhi Cao"
      },
      {
        "type": "paperTitle",
        "text": "4436 <b>Perceptual-Distortion Balanced Image Super-Resolution is a Multi-Objective Optimization Problem<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qiwen Zhu, Yanjie Wang, Shilv Cai, Liqun Chen, Jiahuan Zhou, Luxin Yan, Sheng Zhong, Xu Zou"
      },
      {
        "type": "paperTitle",
        "text": "4438 <b>Rethinking the Architecture Design for Efficient Generic Event Boundary Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ziwei Zheng, Zechuan Zhang, Yulin Wang, Shiji Song, Gao Huang, Le Yang"
      },
      {
        "type": "paperTitle",
        "text": "4442 <b>Let Me Finish My Sentence: Video Temporal Grounding with Holistic Text Understanding<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jongbin Woo, Hyeonggon Ryu, Youngjoon Jang, Jae Won Cho, Joon Chung"
      },
      {
        "type": "paperTitle",
        "text": "4451 <b>Multimodal Emotion Recognition Calibration in Conversations<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Geng Tu, Feng Xiong, Bin Liang, Hui Wang, Xi Zeng, Ruifeng Xu"
      },
      {
        "type": "paperTitle",
        "text": "4453 <b>MagicVFX: Visual Effects Synthesis in Just Minutes<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiaqi Guo, Lianli Gao, Junchen Zhu,  JiaxinZhang, Siyang Li, Jingkuan Song"
      },
      {
        "type": "paperTitle",
        "text": "4464 <b>3D Question Answering with Scene Graph Reasoning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zizhao Wu, Haohan Li, Gongyi Chen, Zhou Yu, Xiaoling Gu, Yigang Wang"
      },
      {
        "type": "paperTitle",
        "text": "4473 <b>Loc4Plan: Locating Before Planning for Outdoor Vision and Language Navigation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Huilin Tian, Jingke Meng, Wei-Shi Zheng, Yuanming Li, Junkai Yan, Yunong Zhang"
      },
      {
        "type": "paperTitle",
        "text": "4475 <b>DeepPointMap2: Accurate and Robust LiDAR-Visual SLAM with Neural Descriptors<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaze Zhang, Ziheng Ding, Qi Jing, Ying Cheng, Wenchao Ding, Rui Feng"
      },
      {
        "type": "paperTitle",
        "text": "4478 <b>MDR: Multi-stage Decoupled Relational Knowledge Distillation with Adaptive Stage Selection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "JiaQi Wang, Lu Lu, Mingmin Chi, Jian Chen"
      },
      {
        "type": "paperTitle",
        "text": "4482 <b>MultiHateClip: A Multilingual Benchmark Dataset for Hateful Video Detection on YouTube and Bilibili<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Han Wang, Rui Yang Tan, Usman Naseem, Roy Ka-Wei Lee"
      },
      {
        "type": "paperTitle",
        "text": "4488 <b>Deconfounded Emotion Guidance Sticker Selection with Causal Inference<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiali Chen, Yi Cai, Ruohang Xu, Jiexin Wang, Jiayuan Xie, Qing Li"
      },
      {
        "type": "paperTitle",
        "text": "4489 <b>Task-Oriented Multi-Bitstream Optimization for Image Compression and Transmission via Optimal Transport<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Sa Yan, Nuowen Kan, Chenglin Li, Wenrui Dai, Junni Zou, Hongkai Xiong"
      },
      {
        "type": "paperTitle",
        "text": "4492 <b>Poisoning for Debiasing: Fair Recognition via Eliminating Bias Uncovered in Data Poisoning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yi Zhang, Zhefeng Wang, Rui Hu, Xinyu Duan, Yi ZHENG, Baoxing Huai, Jiarun Han, Jitao Sang"
      },
      {
        "type": "paperTitle",
        "text": "4493 <b>Virtual Visual-Guided Domain-Shadow Fusion via Modal Exchanging for Domain-Specific Multi-Modal Neural Machine Translation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenyu Hou, Junjun Guo"
      },
      {
        "type": "paperTitle",
        "text": "4494 <b>GuidedNet: Semi-Supervised Multi-Organ Segmentation via Labeled Data Guide Unlabeled Data<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haochen Zhao, Hui Meng, Deqian Yang, Xiexiao zheng, Xiaoze Wu,  QingfengLi, Jianwei Niu"
      },
      {
        "type": "paperTitle",
        "text": "4508 <b>GLoMo: Global-Local Modal Fusion for Multimodal Sentiment Analysis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yan Zhuang, Yanru Zhang, Zheng Hu, Xiaoyue Zhang, Jiawen Deng, Fuji Ren"
      },
      {
        "type": "paperTitle",
        "text": "4512 <b>ReForm-Eval: Evaluating Large Vision Language Models via Unified Re-Formulation of Task-Oriented Benchmarks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zejun Li, Ye Wang, Mengfei Du, Qingwen Liu, Binhao Wu, Jiwen Zhang, Chengxing Zhou, Zhihao Fan, Jie Fu, Jingjing Chen, zhongyu wei, Xuanjing Huang"
      },
      {
        "type": "paperTitle",
        "text": "4517 <b>GLATrack: Global and Local Awareness for Open-Vocabulary Multiple Object Tracking<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Guangyao Li, Yajun Jian, Yan Yan, Hanzi Wang"
      },
      {
        "type": "paperTitle",
        "text": "4519 <b>Towards Medical Vision-Language Contrastive Pre-training via Study-Oriented Semantic Exploration<b>"
      },
      {
        "type": "paperAuthor",
        "text": "LIU BO, LU ZEXIN, Yan Wang"
      },
      {
        "type": "paperTitle",
        "text": "4521 <b>D$^3$U-Net: Dual-Domain Collaborative Optimization Deep Unfolding Network for Image Compressive Sensing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kai Han, Jin Wang, Yunhui Shi, Nam Ling, Baocai Yin"
      },
      {
        "type": "paperTitle",
        "text": "4527 <b>Knowledge-Aware Artifact Image Synthesis with LLM-Enhanced Prompting and Multi-Source Supervision<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shengguang Wu, Zhenglun Chen, Qi Su"
      },
      {
        "type": "paperTitle",
        "text": "4530 <b>SAM-MIL: A Spatial Contextual Aware Multiple Instance Learning Approach for Whole Slide Image Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Heng Fang, Sheng Huang, Wenhao Tang, Luwen Huangfu, Bo Liu"
      },
      {
        "type": "paperTitle",
        "text": "4543 <b>Universal Frequency Domain Perturbation for Single-Source Domain Generalization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "liu chuang, Yichao Cao, Haogang Zhu, Xiu Su"
      },
      {
        "type": "paperTitle",
        "text": "4545 <b>Probabilistic Vision-Language Representation for Weakly Supervised Temporal Action Localization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "GeunTaek Lim, Hyunwoo Kim, Joonsoo Kim, Yukyung Choi"
      },
      {
        "type": "paperTitle",
        "text": "4547 <b>Highly Transferable Diffusion-based Unrestricted Adversarial Attack on Pre-trained Vision-Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenzhuo Xu, Kai Chen, Ziyi Gao, Zhipeng Wei, Jingjing Chen, Yu-Gang Jiang"
      },
      {
        "type": "paperTitle",
        "text": "4548 <b>Convert and Speak: Zero-shot Accent Conversion with Minimum Supervision<b>"
      },
      {
        "type": "paperAuthor",
        "text": "zhijun jia, Huaying Xue, Xiulian Peng, Yan Lu"
      },
      {
        "type": "paperTitle",
        "text": "4555 <b>PD-Refiner: An Underlying Surface Inheritance Refiner with Adaptive Edge-Aware Supervision for Point Cloud Denoising<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chengwei Zhang, Xueyi Zhang, Yue Xianghu, Mingrui Lao, Tao Jiang, Jiawei Wang, Fubo Zhang, Longyong Chen"
      },
      {
        "type": "paperTitle",
        "text": "4557 <b>Domain Knowledge Enhanced Vision-Language Pretrained Model for Dynamic Facial Expression Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Liupeng Li, Yuhua Zheng, Shupeng Liu, Xiaoyin Xu, Taihao Li"
      },
      {
        "type": "paperTitle",
        "text": "4558 <b>UniQ: Unified Decoder with Task-specific Queries for Efficient Scene Graph Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xinyao Liao, Wei Wei, Dangyang Chen,  Yuanyuanfu"
      },
      {
        "type": "paperTitle",
        "text": "4561 <b>PASSION: Towards Effective Incomplete Multi-Modal Medical Image Segmentation with Imbalanced Missing Rates<b>"
      },
      {
        "type": "paperAuthor",
        "text": "JunJie Shi, Caozhi Shang, Zhaobin Sun, Li Yu, Xin Yang, Zengqiang Yan"
      },
      {
        "type": "paperTitle",
        "text": "4567 <b>A Principled Approach to Natural Language Watermarking<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhe Ji, Qiansiqi Hu, Yicheng Zheng, Liyao Xiang, Xinbing Wang"
      },
      {
        "type": "paperTitle",
        "text": "4574 <b>FedEvalFair: A Privacy-Preserving and Statistically Grounded Federated Fairness Evaluation Framework<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhongchi Wang, Hailong Sun, Zhengyang Zhao"
      },
      {
        "type": "paperTitle",
        "text": "4581 <b>FacialPulse: An Efficient RNN-based Depression Detection via Temporal Facial Landmarks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ruiqi Wang, Jinyang Huang, Jie Zhang, Xin Liu, Xiang Zhang, Zhi Liu, Peng Zhao, Sigui Chen, Xiao Sun"
      },
      {
        "type": "paperTitle",
        "text": "4584 <b>Modeling Event-level Causal Representation for  Video Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuqing Wang, Lei Meng, Haokai Ma, Yuqing Wang, Haibei HUANG, Xiangxu Meng"
      },
      {
        "type": "paperTitle",
        "text": "4585 <b>Document Registration: Towards Automated Labeling of Pixel-Level Alignment Between Warped-Flat Documents<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Weiguang Zhang, Qiufeng Wang, Kaizhu Huang, Xiaowei Huang, Fengjun Guo, Xiaomeng Gu"
      },
      {
        "type": "paperTitle",
        "text": "4588 <b>CAD Translator: An Effective Drive for Text to 3D Parametric Computer-Aided Design Generative Modeling<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xueyang Li, Yu Song, Yunzhong Lou, Xiangdong Zhou"
      },
      {
        "type": "paperTitle",
        "text": "4596 <b>MSFNet: Multi-Scale Fusion Network for Brain-Controlled Speaker Extraction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Cunhang Fan, Jingjing Zhang, Hongyu Zhang, Wang Xiang, Jianhua Tao, Xinhui Li, Jiangyan Yi, Dianbo Sui, Zhao Lv"
      },
      {
        "type": "paperTitle",
        "text": "4604 <b>Trust Prophet or Not? Taking a Further Verification Step toward Accurate Scene Text Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Anna Zhu, Ke Xiao, Bo Zhou, Runmin Wang"
      },
      {
        "type": "paperTitle",
        "text": "4609 <b>Fact: Teaching MLLMs with Faithful, Concise and Transferable Rationales<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Minghe Gao, Shuang Chen, Liang Pang, Yuan Yao, Jisheng Dang, Wenqiao Zhang, Juncheng Li, Siliang Tang, Yueting Zhuang, Tat-seng Chua"
      },
      {
        "type": "paperTitle",
        "text": "4623 <b>Fine-grained Semantic Alignment with Transferred Person-SAM for Text-based Person Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yihao Wang, Meng Yang, Rui Cao"
      },
      {
        "type": "paperTitle",
        "text": "4634 <b>GRFormer: Grouped Residual Self-Attention for Lightweight Single Image Super-Resolution<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuzhen Li, Zehang Deng, Yuxin Cao, Lihua Liu"
      },
      {
        "type": "paperTitle",
        "text": "4636 <b>Dual-view Pyramid Network for Video Frame Interpolation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yao Luo, Ming Yang, Jinhui Tang"
      },
      {
        "type": "paperTitle",
        "text": "4657 <b>Diffusion Posterior Proximal Sampling for Image Restoration<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hongjie Wu, Linchao He, Mingqin Zhang, Dongdong Chen, Kunming Luo, Mengting Luo, Jizhe Zhou, Hu Chen, Jiancheng Lv"
      },
      {
        "type": "paperTitle",
        "text": "4668 <b>Federated Fuzzy C-means with Schatten-p Norm Minimization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wei Feng, Zhenwei Wu 武振威, QIANQIAN WANG, Bo Dong, Quanxue Gao"
      },
      {
        "type": "paperTitle",
        "text": "4670 <b>CMT: Co-training Mean-Teacher for Unsupervised Domain Adaptation on 3D Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shijie Chen, Junbao Zhuo, Xin Li, Haizhuang Liu, Rongquan Wang, Jiansheng Chen, Huimin Ma"
      },
      {
        "type": "paperTitle",
        "text": "4671 <b>WaveDN: A Wavelet-based Training-free Zero-shot Enhancement for Vision-Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiulin Li, Mengyu Yang, Ye Tian, Lanshan Zhang, Yongchun Lu, Jice Liu, Wendong Wang"
      },
      {
        "type": "paperTitle",
        "text": "4673 <b>Unsupervised Multi-view Pedestrian Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mengyin Liu, Chao Zhu, Shiqi Ren, Xu-cheng Yin"
      },
      {
        "type": "paperTitle",
        "text": "4678 <b>Dual-stream Perception-driven Blind Quality Assessment for Stereoscopic Omnidirectional Images<b>"
      },
      {
        "type": "paperAuthor",
        "text": "ZHAOLIN WAN, Qiushuang Yang, Zhiyang Li, Xiaopeng Fan, Wangmeng Zuo, Debin Zhao"
      },
      {
        "type": "paperTitle",
        "text": "4685 <b>MVP-Net: Multi-View Depth Image Guided Cross-Modal Distillation Network for Point Cloud Upsampling<b>"
      },
      {
        "type": "paperAuthor",
        "text": "jiade chen, Jin Wang, Yunhui Shi, Nam Ling, Baocai Yin"
      },
      {
        "type": "paperTitle",
        "text": "4702 <b>Language-Guided Visual Prompt Compensation for Multi-Modal Remote Sensing Image Classification with Modality Absence<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ling Huang, Wenqian Dong, Song xiao, Jiahui Qu, Yuanbo Yang, Yunsong Li"
      },
      {
        "type": "paperTitle",
        "text": "4719 <b>Simplifying Cross-modal Interaction via Modality-Shared Features for RGBT Tracking<b>"
      },
      {
        "type": "paperAuthor",
        "text": "LiQiu Chen, Yuqing Huang, Hengyu li, Zikun Zhou, Zhenyu He"
      },
      {
        "type": "paperTitle",
        "text": "4729 <b>SparseInteraction: Sparse Semantic Guidance for Radar and Camera 3D Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shengyin Jiang, Shaoqing Xu,  lifang, Li Liu, Ziying Song, Yang Bo, Zhi-Xin Yang"
      },
      {
        "type": "paperTitle",
        "text": "4737 <b>CapS-Adapter: Caption-based MultiModal Adapter in Zero-Shot Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qijie Wang, Liu Guandu, Bin Wang"
      },
      {
        "type": "paperTitle",
        "text": "4742 <b>DragEntity:Trajectory Guided Video Generation using Entity and Positional Relationships<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wan Zhang, Sheng Tang, Jiawei Wei, Ruize Zhang, Juan Cao"
      },
      {
        "type": "paperTitle",
        "text": "4758 <b>Report-Concept Textual-Prompt Learning for Enhancing X-ray Diagnosis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiongjun Zhao, Zheng-Yu Liu, Fen Liu, Guanting Li, Yutao Dou, Shaoliang Peng"
      },
      {
        "type": "paperTitle",
        "text": "4759 <b>Dual-head Genre-instance Transformer Network for Arbitrary Style Transfer<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Meichen Liu, Shuting He, Songnan Lin, Bihan Wen"
      },
      {
        "type": "paperTitle",
        "text": "4786 <b>WSEL: EEG feature selection with weighted self-expression learning for incomplete multi-dimensional emotion recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xueyuan Xu, Li Zhuo, Jinxin Lu, Xia Wu"
      },
      {
        "type": "paperTitle",
        "text": "4788 <b>Efficient Dual-Confounding Eliminating for Weakly-supervised Temporal Action Localization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Li Ao, Huijun Liu, Jinrong Sheng, Zhongming Chen, Yongxin Ge"
      },
      {
        "type": "paperTitle",
        "text": "4796 <b>MUSCAT: a Multimodal mUSic Collection for Automatic Transcription of real recordings and image scores<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Alejandro Galán-Cuenca, Jose J. Valero-Mas, Juan C. Martinez-Sevilla, Antonio Hidalgo-Centeno, Antonio Pertusa, Jorge Calvo-Zaragoza"
      },
      {
        "type": "paperTitle",
        "text": "4797 <b>Enhanced Tensorial Self-representation Subspace Learning for Incomplete Multi-view Clustering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "hangjun Che, Xinyu Pu, Deqiang Ouyang, Beibei Li"
      },
      {
        "type": "paperTitle",
        "text": "4799 <b>Domain Generalization-Aware Uncertainty Introspective Learning for 3D Point Clouds Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Pei He, Licheng Jiao, Lingling Li, Xu Liu, Fang Liu, wenping ma, Shuyuan Yang, Ronghua Shang"
      },
      {
        "type": "paperTitle",
        "text": "4803 <b>A Lightweight Multi-domain Multi-attention Progressive Network for Single Image Deraining<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Junliu zhong, Li Zhiyi, Dan Xiang, Maotang Han, Changsheng Li, gan yanfen"
      },
      {
        "type": "paperTitle",
        "text": "4831 <b>Hunting Blemishes: Language-guided High-fidelity Face Retouching Transformer with Limited Paired Data<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Le Jiang,  YanHuang, lianxin xie, Wen Xue, Cheng Liu, Si Wu, Hau-San Wong"
      },
      {
        "type": "paperTitle",
        "text": "4857 <b>Partially Aligned Cross-modal Retrieval via  Optimal Transport-based Prototype Alignment Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Junsheng Wang, Tiantian Gong, Yan Yan"
      },
      {
        "type": "paperTitle",
        "text": "4860 <b>Siformer: Feature-isolated Transformer for Efficient Skeleton-based Sign Language Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Muxin Pu, Mei Kuan Lim, Chun Yong Chong"
      },
      {
        "type": "paperTitle",
        "text": "4862 <b>Correlation-Driven Multi-Modality Graph Decomposition for Cross-Subject Emotion Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wuliang Huang, Yiqiang Chen, Xinlong Jiang, Chenlong Gao, Qian Chen, Teng Zhang, 冰洁 闫, Yifan Wang, Jianrong Yang"
      },
      {
        "type": "paperTitle",
        "text": "4863 <b>Wave-Mamba: Wavelet State Space Model for Ultra-High-Definition Low-Light Image Enhancement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenbin Zou, Hongxia Gao, Weipeng Yang, Tongtong Liu"
      },
      {
        "type": "paperTitle",
        "text": "4866 <b>Task-Conditional Adapter for Multi-Task Dense Prediction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fengze Jiang, Shuling Wang, Xiaojin Gong"
      },
      {
        "type": "paperTitle",
        "text": "4868 <b>PIMT: Physics-Based Interactive Motion Transition for Hybrid Character Animation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yanbin Deng, Zheng Li, Ning Xie, Wei Zhang"
      },
      {
        "type": "paperTitle",
        "text": "4871 <b>Open-Set Video-based Facial Expression Recognition with Human Expression-sensitive Prompting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuanyuan Liu, Yuxuan Huang, Shuyang Liu, Yibing Zhan, Zijing Chen, Zhe Chen"
      },
      {
        "type": "paperTitle",
        "text": "4878 <b>LLaVA-Ultra: Large Chinese Language and Vision Assistant for Ultrasound<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xuechen Guo, Wenhao Chai, Shi-Yan Li, Gaoang Wang"
      },
      {
        "type": "paperTitle",
        "text": "4881 <b>Enhancing Multi-view Graph Neural Network with Cross-view Confluent Message Passing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuman Zhuang, Sujia Huang, wei huang, Yuhong Chen, Zhihao Wu, Ximeng Liu"
      },
      {
        "type": "paperTitle",
        "text": "4882 <b>Open-Vocabulary Audio-Visual Semantic Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ruohao Guo, Liao Qu, Niu Dantong, Yanyu Qi, Wenzhen Yue, Ji Shi, xing bowei, Xianghua Ying"
      },
      {
        "type": "paperTitle",
        "text": "4883 <b>Live On the Hump: Self Knowledge Distillation via Virtual Teacher-Students Mutual Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuang Wang, Pengyi Hao, Fuli Wu, Cong Bai"
      },
      {
        "type": "paperTitle",
        "text": "4884 <b>Decoupling General and Personalized Knowledge in Federated Learning via Additive and Low-rank Decomposition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xinghao Wu, Xuefeng Liu, Jianwei Niu, Haolin Wang, Shaojie Tang, Guogang Zhu, Hao Su"
      },
      {
        "type": "paperTitle",
        "text": "4891 <b>Can We Debiase Multimodal Large Language Models via Model Editing?<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zecheng Wang, Xinye Li, Zhanyue Qin, Chunshan Li, Zhiying Tu, Dianhui Chu, Dianbo Sui"
      },
      {
        "type": "paperTitle",
        "text": "4892 <b>Learning to Correction: Explainable Feedback Generation for Visual Commonsense Reasoning Distractor<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiali Chen, Xusen Hei, Yuqi Xue, Yuancheng Wei, Jiayuan Xie, Yi Cai, Qing Li"
      },
      {
        "type": "paperTitle",
        "text": "4894 <b>Cross-modal Observation Hypothesis Inference<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mengze Li, Kairong Han, Jiahe Xu, Li Yueying, Wu Tao, Zhou Zhao, Jiaxu Miao, Shengyu Zhang, Jingyuan Chen"
      },
      {
        "type": "paperTitle",
        "text": "4896 <b>Evolution-aware VAriance (EVA) Coreset Selection for Medical Image Classification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuxin Hong, Xiao Zhang, Xin Zhang, Joey Zhou"
      },
      {
        "type": "paperTitle",
        "text": "4901 <b>MM-Forecast: A Multimodal Approach to Temporal Event Forecasting with Large Language Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "HaoXuan Li, zhengmao Yang, Yunshan Ma, Yi Bin, Yang Yang, Tat-seng Chua"
      },
      {
        "type": "paperTitle",
        "text": "4902 <b>Estimating the Semantic Density of Visual Media<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Luca Rossetto, Cristina Sarasua, Abraham Bernstein"
      },
      {
        "type": "paperTitle",
        "text": "4906 <b>Seeing Beyond Words: Multimodal Aspect-Level Complaint Detection in Ecommerce Videos<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Rishikesh Devanathan, APOORVA SINGH, Anandan Sangeetha Poornash, Sriparna Saha"
      },
      {
        "type": "paperTitle",
        "text": "4910 <b>Semantic Aware Just Noticeable Differences for VVC compressed Text Screen Content Images<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kaifang Yang, Xinrong Zhao, Yanchao Gong"
      },
      {
        "type": "paperTitle",
        "text": "4931 <b>Few-Shot Multimodal Explanation for Visual Question Answering<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Dizhan Xue, Shengsheng Qian, Changsheng Xu"
      },
      {
        "type": "paperTitle",
        "text": "4934 <b>Generative Multimodal Data Augmentation for Low-Resource Multimodal Named Entity Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ziyan Li, Jianfei Yu, Jia Yang, Wenya Wang, Li Yang, Rui Xia"
      },
      {
        "type": "paperTitle",
        "text": "4939 <b>DQ-Former: Querying Transformer with Dynamic Modality Priority for Cognitive-aligned Multimodal Emotion Recognition in Conversation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jing Ye, Xinpei Zhao"
      },
      {
        "type": "paperTitle",
        "text": "4942 <b>EMVCC: Enhanced Multi-View Contrastive Clustering for Hyperspectral Images<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fulin Luo, Yi Liu, Xiuwen Gong, Zhixiong Nan, Tan Guo"
      },
      {
        "type": "paperTitle",
        "text": "4943 <b>Observe before Generate: Emotion-Cause aware Video Caption for Multimodal Emotion Cause Generation in Conversations<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fanfan Wang, Heqing Ma, Xiangqing Shen, Jianfei Yu, Rui Xia"
      },
      {
        "type": "paperTitle",
        "text": "4946 <b>Utilizing Speaker Profiles for Impersonation Audio Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hao Gu, Jiangyan Yi, chenglong wang, Yong Ren, Jianhua Tao, Xinrui Yan, Yujie Chen, XiaoHui Zhang"
      },
      {
        "type": "paperTitle",
        "text": "4952 <b>Dual-path Collaborative Generation Network for Emotional Video Captioning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Cheng Ye, Weidong Chen, Jingyu Li, Lei Zhang, Zhendong Mao"
      },
      {
        "type": "paperTitle",
        "text": "4968 <b>Holistic-CAM: Ultra-lucid and Sanity Preserving Visual Interpretation in Holistic Stage of CNNs<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Pengxu Chen, Huazhong Liu, Jihong Ding, Jiawen Luo, Peng Tan, Laurence T. Yang"
      },
      {
        "type": "paperTitle",
        "text": "4971 <b>Multimodal Physiological Signals Representation Learning via Multiscale Contrasting for Depression Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kai Shao, Rui Wang, yixue Hao, Long Hu, Min Chen, Hans Arno Jacobsen"
      },
      {
        "type": "paperTitle",
        "text": "4973 <b>Similarity Preserving Transformer Cross-Modal Hashing for Video-Text Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": " qianxinhuang, Siyao Peng, Xiaobo Shen, Yun-Hao Yuan, Shirui Pan"
      },
      {
        "type": "paperTitle",
        "text": "5001 <b>MMAL: Multi-Modal Analytic Learning for Exemplar-Free Audio-Visual Class Incremental Tasks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yue Xianghu, Xueyi Zhang, Yiming Chen, Chengwei Zhang, Mingrui Lao, HUIPING ZHUANG, Xinyuan Qian, Haizhou Li"
      },
      {
        "type": "paperTitle",
        "text": "5013 <b>HyperTime: Hyperparameter Optimization for Combating Temporal Distribution Shifts<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shaokun Zhang, Yiran Wu, Zhonghua Zheng, Qingyun Wu, Chi Wang"
      },
      {
        "type": "paperTitle",
        "text": "5019 <b>DIG: Complex Layout Document Image Generation with Authentic-looking Text for Enhancing Layout Analysis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Dehao Ying, Fengchang Yu, Haihua Chen, Wei Lu"
      },
      {
        "type": "paperTitle",
        "text": "5030 <b>Reliable Model Watermarking: Defending Against Theft without Compromising on Evasion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hongyu Zhu, Sichu Liang, Wentao Hu, Li Fangqi, Ju Jia, Shi-Lin Wang"
      },
      {
        "type": "paperTitle",
        "text": "5035 <b>3D Priors-Guided Diffusion for Blind Face Restoration<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaobin Lu, Xiaobin Hu, Jun Luo,  zhuben,  paulruan, Wenqi Ren"
      },
      {
        "type": "paperTitle",
        "text": "5044 <b>Reconstructing, Understanding, and Analyzing Relief Type Cultural Heritage from a Single Old Photo<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiao PAN, Liang Li, Hiroshi Yamaguchi, Kyoko Hasegawa, Fadjar Ibnu Thufail,  Brahmantara, Xiaojuan Ban, Satoshi Tanaka"
      },
      {
        "type": "paperTitle",
        "text": "5045 <b>Fractional Correspondence Framework in Detection Transformer<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Masoumeh Zareapoor, Pourya Shamsolmoali, Huiyu Zhou, Yue Lu, Salvador Garcia"
      },
      {
        "type": "paperTitle",
        "text": "5051 <b>Improving Open-World Classification with Disentangled Foreground and Background Features<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Choubo Ding, Guansong Pang"
      },
      {
        "type": "paperTitle",
        "text": "5055 <b>Laplacian Matrix Learning for Point Cloud Attribute Compression with Ternary Search-Based Adaptive Block Partition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Changhao Peng, Wei Gao"
      },
      {
        "type": "paperTitle",
        "text": "5061 <b>In Situ 3D Scene Synthesis for Ubiquitous Embodied Interfaces<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haiyan Jiang, Song Leiyu, dongdong weng, Zhe Sun, Li Huiying, Xiaonuo Dongye, Zhenliang Zhang"
      },
      {
        "type": "paperTitle",
        "text": "5069 <b>GRACE: GRadient-based Active Learning with Curriculum Enhancement for Multimodal Sentiment Analysis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xinyu Li, Wenqing Ye, Yueyi Zhang, Xiaoyan Sun"
      },
      {
        "type": "paperTitle",
        "text": "5073 <b>EAGLE: Egocentric AGgregated Language-video Engine<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jing Bi, Yunlong Tang, Luchuan Song, Ali Vosoughi, Nguyen Nguyen, Chenliang Xu"
      },
      {
        "type": "paperTitle",
        "text": "5076 <b>IGSPAD: Inverting 3D Gaussian Splatting for Pose-agnostic Anomaly Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bolin Jiang, Yuqiu Xie, Jiawei Li, Naiqi Li, Bin Chen, Shu-Tao Xia"
      },
      {
        "type": "paperTitle",
        "text": "5080 <b>PSM: Learning Probabilistic Embeddings for Multi-scale Zero-shot Soundscape Mapping<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Subash Khanal, Eric Xing, Srikumar Sastry, Aayush Dhakal, Zhexiao Xiong, Adeel Ahmad, Nathan Jacobs"
      },
      {
        "type": "paperTitle",
        "text": "5085 <b>Learning A Low-Level Vision Generalist via Visual Task Prompt<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiangyu Chen, Yihao Liu, Yuandong Pu, Wenlong Zhang, Jiantao Zhou, Yu Qiao, Chao Dong"
      },
      {
        "type": "paperTitle",
        "text": "5104 <b>Detecting Multimodal Situations with Insufficient Context and Abstaining from Baseless Predictions<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Junzhang Liu, Zhecan Wang, Hammad Ayyubi, Haoxuan You, Christopher Thomas, Rui Sun, Shih-Fu Chang, Kai-Wei Chang"
      },
      {
        "type": "paperTitle",
        "text": "5105 <b>InMu-Net: Advancing Multi-modal Intent Detection via Information Bottleneck and Multi-sensory Processing<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhihong Zhu, Xuxin Cheng, Zhaorun Chen, Yuyan Chen, Yunyan Zhang, Xian Wu, Yefeng Zheng, Bowen Xing"
      },
      {
        "type": "paperTitle",
        "text": "5120 <b>SSAT-Adapter: Enhancing Vision-Language Model Few-shot Learning with Auxiliary Tasks<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bowen Chen, Yun Sing Koh, Gillian Dobbie"
      },
      {
        "type": "paperTitle",
        "text": "5129 <b>Modal-Enhanced Semantic Modeling for Fine-Grained 3D Human Motion Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haoyu Shi, huaiwen zhang"
      },
      {
        "type": "paperTitle",
        "text": "5131 <b>GaussianTalker: Real-Time Talking Head Synthesis with 3D Gaussian Splatting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kyusun Cho, JoungBin Lee, Heeji Yoon, Yeobin Hong, Jaehoon Ko, Sangjun Ahn, Seungryong Kim"
      },
      {
        "type": "paperTitle",
        "text": "5138 <b>Exploring Matching Rates: From Key Point Selection to Camera Relocalization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "HU LIN, Chengjiang Long, Yifeng Fei, qianchen xia, Erwei Yin, Baocai Yin, Xin Yang"
      },
      {
        "type": "paperTitle",
        "text": "5158 <b>CREST: Cross-modal Resonance through Evidential Deep Learning for Enhanced Zero-ShoT Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haojian Huang, Xiaozhennn Qiao, Zhuo Chen, Haodong Chen, Binyu Li, Zhe Sun, Mulin Chen, Xuelong Li"
      },
      {
        "type": "paperTitle",
        "text": "5161 <b>A Chinese Multimodal Social Video Dataset for Controversy Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": " TianjiaoXu, Aoxuan Chen, Yuxi Zhao, Jinfei Gao, Tian Gan"
      },
      {
        "type": "paperTitle",
        "text": "5167 <b>Temporal Enhancement for Video Affective Content Analysis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xin Li, Shangfei Wang, Xuandong Huang"
      },
      {
        "type": "paperTitle",
        "text": "5176 <b>Hierarchical Debiasing and Noisy Correction for Cross-domain Video Tube Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jingqiao Xiu, Mengze Li, Wei Ji, Jingyuan Chen, Hanbin Zhao, Shin'ichi Satoh, Roger Zimmermann"
      },
      {
        "type": "paperTitle",
        "text": "5177 <b>Multimodal Fusion via Hypergraph Autoencoder and Contrastive Learning for Emotion Recognition in Conversation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zijian Yi, Ziming Zhao, Zhishu Shen, Tiehua Zhang"
      },
      {
        "type": "paperTitle",
        "text": "5194 <b>Hi3D: Pursuing High-Resolution Image-to-3D Generation with Video Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haibo Yang, Yang Chen, Yingwei Pan, Ting Yao, Zhineng Chen, Chong Wah Ngo, Tao Mei"
      },
      {
        "type": "paperTitle",
        "text": "5197 <b>LDStega: Practical and Robust Generative Image Steganography based on  Latent Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yinyin Peng, Yaofei Wang, Donghui Hu, Kejiang Chen, Xianjin Rong, Weiming Zhang"
      },
      {
        "type": "paperTitle",
        "text": "5203 <b>AlignCLIP: Align Multi Domains of Texts Input for CLIP models with Object-IoU Loss<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lu Zhang, Ke Yan, Shouhong Ding"
      },
      {
        "type": "paperTitle",
        "text": "5209 <b>SkipVSR: Adaptive Patch Routing for Video Super-Resolution with Inter-Frame Mask<b>"
      },
      {
        "type": "paperAuthor",
        "text": "zekun Ai, Xiaotong Luo, Yanyun Qu, Yuan Xie"
      },
      {
        "type": "paperTitle",
        "text": "5210 <b>A Unimodal Valence-Arousal Driven Contrastive Learning Framework for Multimodal Multi-Label Emotion Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenjie Zheng, Jianfei Yu, Rui Xia"
      },
      {
        "type": "paperTitle",
        "text": "5214 <b>Deeply Fusing Semantics and Interactions for Item Representation Learning via Topology-driven Pre-training<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shiqin Liu, Chaozhuo Li, Xi Zhang, Minjun Zhao, yuanbo xu, Jiajun Bu"
      },
      {
        "type": "paperTitle",
        "text": "5216 <b>Navigating Beyond Instructions: Vision-and-Language Navigation in Obstructed Environments<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Haodong Hong, Sen Wang, Zi Huang, Qi Wu, Jiajun Liu"
      },
      {
        "type": "paperTitle",
        "text": "5247 <b>HMR-Adapter: A Lightweight Adapter with Dual-Path Cross Augmentation for Expressive Human Mesh Recovery<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenhao Shen, Wanqi Yin, Hao Wang, CHEN WEI, Zhongang Cai, Lei Yang, Guosheng Lin"
      },
      {
        "type": "paperTitle",
        "text": "5257 <b>ExpressiveSinger: Multilingual and Multi-Style Score-based Singing Voice Synthesis with Expressive Performance Control<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuqi Dai, Ming-Yu Liu, Rafael Valle, Siddharth Gururani"
      },
      {
        "type": "paperTitle",
        "text": "5263 <b>HINER: Neural Representation for Hyperspectral Image<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Junqi Shi, Jiang Mingyi, Ming Lu, Tong Chen, Xun Cao, Zhan Ma"
      },
      {
        "type": "paperTitle",
        "text": "5271 <b>Control-Talker: A Rapid-Customization Talking Head Generation Method for Multi-Condition Control and High-Texture Enhancement<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yiding Li, Lingyun Yu, Li Wang, Hongtao Xie"
      },
      {
        "type": "paperTitle",
        "text": "5285 <b>EEG-MACS: Manifold Attention and Confidence Stratification for EEG-based Cross-Center Brain Disease Diagnosis under Unreliable Annotations<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenxi Song, Ruihan Qin, Huixia Ren, Zhen Liang, Yi Guo, Min zhang, Zhiguo Zhang"
      },
      {
        "type": "paperTitle",
        "text": "5293 <b>Diverse consensuses paired with motion estimation-based multi-model fitting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenyu Yin, Shuyuan Lin, Yang Lu, Hanzi Wang"
      },
      {
        "type": "paperTitle",
        "text": "5298 <b>AMG-Embedding: a Self-Supervised Embedding Approach for Audio Identification<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuhang Su, Wei Hu, Fan Zhang,  QimingXu"
      },
      {
        "type": "paperTitle",
        "text": "5299 <b>Ada2I: Enhancing Modality Balance for Multimodal Conversational Emotion Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Cam Van Nguyen, Son Le The, Tuan Mai, Duc-Trong Le"
      },
      {
        "type": "paperTitle",
        "text": "5305 <b>Semantic Editing Increment Benefits Zero-Shot Composed Image Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhenyu Yang, Shengsheng Qian, Dizhan Xue, Jiahong Wu, Fan Yang, Weiming Dong, Changsheng Xu"
      },
      {
        "type": "paperTitle",
        "text": "5329 <b>Edge-assisted Real-time Dynamic 3D Point Cloud Rendering for Multi-party Mobile Virtual Reality<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ximing Wu, Kongyange Zhao, Teng Liang, Xu Chen"
      },
      {
        "type": "paperTitle",
        "text": "5331 <b>SCREEN: A Benchmark for Situated Conversational Recommendation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Dongding Lin, Jian Wang, Chak Tou Leong, Wenjie Li"
      },
      {
        "type": "paperTitle",
        "text": "5339 <b>Embedding an Ethical Mind:  Aligning Text-to-Image Synthesis via Lightweight Value Optimization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xingqi Wang, Xiaoyuan Yi, Xin Xie, Jia Jia"
      },
      {
        "type": "paperTitle",
        "text": "5342 <b>RelScene: A Benchmark and baseline for Spatial Relations in text-driven 3D Scene Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "zhaoda ye, Xinhan Zheng, Yang Liu, Yuxin Peng"
      },
      {
        "type": "paperTitle",
        "text": "5353 <b>SATPose: Improving Monocular 3D Pose Estimation with Spatial-aware Ground Tactility<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lishuang Zhan, Enting Ying,  JiabaoGan, Shihui Guo, BoYu Gao, Yipeng Qin"
      },
      {
        "type": "paperTitle",
        "text": "5359 <b>RDLNet: A Novel and Accurate Real-world Document Localization Method<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yaqiang Wu, Zhen Xu, Yong Duan, Yanlai Wu, Qinghua Zheng, Hui Li, Hu Xiaochen, Lianwen Jin"
      },
      {
        "type": "paperTitle",
        "text": "5360 <b>GalleryGPT: Analyzing Paintings with Large Multimodal Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yi Bin, WENHAO SHI, Yujuan Ding, Zhiqiang Hu, Zheng WANG, Yang Yang, See-Kiong Ng, Hengtao Shen"
      },
      {
        "type": "paperTitle",
        "text": "5362 <b>StableMoFusion: Towards Robust and Efficient Diffusion-based Motion Generation Framework<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yiheng Huang, Yang Hui, Chuanchen Luo, Yuxi Wang, Shibiao Xu, Zhaoxiang Zhang, Man Zhang, Junran Peng"
      },
      {
        "type": "paperTitle",
        "text": "5368 <b>Query Augmentation with Brain Signals<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ziyi Ye, Jingtao Zhan, Qingyao Ai, Yiqun LIU, Maarten Rijke, Christina Lioma, Tuukka Ruotsalo"
      },
      {
        "type": "paperTitle",
        "text": "5377 <b>Zero-Shot Character Identification and Speaker Prediction in Comics via Iterative Multimodal Fusion<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yingxuan Li, Ryota Hinami, Kiyoharu Aizawa, Yusuke Matsui"
      },
      {
        "type": "paperTitle",
        "text": "5385 <b>ResVG: Enhancing Relation and Semantic Understanding in Multiple Instances for Visual Grounding<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Minghang Zheng, Jiahua Zhang, Qingchao Chen, Yuxin Peng, Yang Liu"
      },
      {
        "type": "paperTitle",
        "text": "5387 <b>Bridging Gaps in Content and Knowledge for Multimodal Entity Linking<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Pengfei Luo, Tong Xu, Che Liu, Suojuan Zhang, Linli Xu, Minglei Li, Enhong Chen"
      },
      {
        "type": "paperTitle",
        "text": "5413 <b>DAFT-GAN: Dual Affine Transformation Generative Adversarial Network for Text-Guided Image Inpainting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jihoon Lee, Yunhong Min, Hwidong Kim, Sangtae Ahn"
      },
      {
        "type": "paperTitle",
        "text": "5417 <b>Combating Visual Question Answering Hallucinations via Robust Multi-Space Co-Debias Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiawei Zhu, Yishu Liu, Huanjia Zhu, Hui Lin, Yuncheng Jiang, Zheng Zhang, Bingzhi Chen"
      },
      {
        "type": "paperTitle",
        "text": "5418 <b>Vi2ACT:Video-enhanced Cross-modal Co-learning with Representation Conditional Discriminator for Few-shot Human Activity Recognition<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Kang Xia, Wenzhong Li, Yimiao Shao, Sanglu Lu"
      },
      {
        "type": "paperTitle",
        "text": "5419 <b>CIEASR:Contextual Image-Enhanced Automatic Speech Recognition for Improved Homophone Discrimination<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Ziyi Wang, Yiming Rong, Jiang Deyang, Haoran Wu, Shiyu Zhou, Bo XU"
      },
      {
        "type": "paperTitle",
        "text": "5425 <b>Generalizing ISP Model by Unsupervised Raw-to-raw Mapping<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Dongyu Xie, Chaofan Qiao, Liang Lanyue, Zhiwen Wang, Tianyu Li, Qiao Liu, Chongyi Li, Guoqing Wang, Yang Yang"
      },
      {
        "type": "paperTitle",
        "text": "5441 <b>Dual-Branch Fusion with Style Modulation for Cross-Domain Few-Shot Semantic Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Qiuyu Kong, Jiangming Chen, Jiang Jie, Zanxi Ruan, KANG Lai"
      },
      {
        "type": "paperTitle",
        "text": "5447 <b>Inferring 3D Occupancy Fields through Implicit Reasoning on Silhouette Images<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Baorui Ma, Yu-Shen Liu, Matthias Zwicker, Zhizhong Han"
      },
      {
        "type": "paperTitle",
        "text": "5452 <b>Dual-Modeling Decouple Distillation for Unsupervised Anomaly Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xinyue Liu, Jianyuan Wang, Biao Leng, Shuo Zhang"
      },
      {
        "type": "paperTitle",
        "text": "5453 <b>LiteQUIC: Improving QoE of Video Streams by Reducing CPU Overhead of QUIC<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Pengqiang Bi, Yifei Zou, Mengbai Xiao, Dongxiao Yu,  yijunli,  zhixiong.liu,  qunxie"
      },
      {
        "type": "paperTitle",
        "text": "5483 <b>ReCoS: A Novel Benchmark for Cross-Modal Image-Text Retrieval in Complex Real-Life Scenarios<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xiaojun Chen, jimeng lou, Wenxi Huang, Ting Wan1058, Qin Zhang, Min Yang"
      },
      {
        "type": "paperTitle",
        "text": "5494 <b>FRADE: Forgery-aware Audio-distilled Multimodal Learning for Deepfake Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Fan Nie, Jiangqun Ni, Jian Zhang, Bin Zhang, Weizhe Zhang"
      },
      {
        "type": "paperTitle",
        "text": "5507 <b>Mitigating World Biases: A Multimodal Multi-View Debiasing Framework for Fake News Video Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhi Zeng, Minnan Luo, Xiangzheng Kong, Huan Liu, Hao Guo, Hao Yang, Zihan Ma, Xiang Zhao"
      },
      {
        "type": "paperTitle",
        "text": "5510 <b>SpeechCraft: A Fine-Grained Expressive Speech Dataset with Natural Language Description<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zeyu Jin, Jia Jia, Qixin Wang, Kehan Li, Shuoyi Zhou, Songtao Zhou, Xiaoyu Qin, Zhiyong Wu"
      },
      {
        "type": "paperTitle",
        "text": "5519 <b>GaussianTalker: Speaker-specific Talking Head Synthesis via 3D Gaussian Splatting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Hongyun Yu, Zhan Qu, Qihang Yu, Jianchuan Chen, Zhonghua Jiang, Zhiwen Chen, Shengyu Zhang, Jimin Xu, Fei Wu, chengfei lv, Gang Yu"
      },
      {
        "type": "paperTitle",
        "text": "5526 <b>Stay Focused is All You Need for Adversarial Robustness<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Bingzhi Chen, Ruihan Liu, Yishu Liu, Xiaozhao Fang, Jiahui Pan, Guangming Lu, Zheng Zhang"
      },
      {
        "type": "paperTitle",
        "text": "5528 <b>Leveraging Weak Cross-Modal Guidance for Coherence Modelling via Iterative Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yi Bin, Junrong Liao, Yujuan Ding, HaoXuan Li, Yang Yang, See-Kiong Ng, Hengtao Shen"
      },
      {
        "type": "paperTitle",
        "text": "5530 <b>Scene Diffusion: Text-driven Scene Image Synthesis Conditioning on a Single 3D Model<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Xuan Han, Yihao Zhao, Mingyu You"
      },
      {
        "type": "paperTitle",
        "text": "5551 <b>Towards Trustworthy MetaShopping: Studying Manipulative Audiovisual Designs in Virtual-Physical Commercial Platforms<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Esmee Henrieke Anne de Haas, LIK-HANG LEE, Yiming Huang, Carlos BERMEJO FERNANDEZ, Pan Hui, Zijun Lin"
      },
      {
        "type": "paperTitle",
        "text": "5555 <b>VoxInstruct: Expressive Human Instruction-to-Speech Generation with Unified Multilingual Codec Language Modelling<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yixuan Zhou, Xiaoyu Qin, Zeyu Jin, Shuoyi Zhou, Shun Lei, Songtao Zhou, Zhiyong Wu, Jia Jia"
      },
      {
        "type": "paperTitle",
        "text": "5560 <b>Model-Based Non-Independent Distortion Cost Design for Effective JPEG Steganography<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuanfeng Pan, Wenkang Su, Jiangqun Ni, Qingliang Liu, Yulin Zhang, Donghua Jiang"
      },
      {
        "type": "paperTitle",
        "text": "5588 <b>Enhancing Images with Coupled Low-Resolution and Ultra-Dark Degradations: A Tri-level Learning Framework<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jiaxin Gao, Yaohua Liu"
      },
      {
        "type": "paperTitle",
        "text": "5592 <b>Leveraging Knowledge of Modality Experts for Incomplete Multimodal Learning<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Wenxin Xu, Hexin Jiang, xuefeng liang"
      },
      {
        "type": "paperTitle",
        "text": "5596 <b>Information Fusion with Knowledge Distillation for Fine-grained Remote Sensing Object Detection<b>"
      },
      {
        "type": "paperAuthor",
        "text": " Shengzhang, Xi Yang"
      },
      {
        "type": "paperTitle",
        "text": "5598 <b>Multi-modal Auto-regressive Modeling via Visual Tokens<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Tianshuo Peng, Zuchao Li, Lefei Zhang, hai zhao, Ping Wang, Bo Du"
      },
      {
        "type": "paperTitle",
        "text": "5611 <b>OSNeRF: On-demand Semantic Neural Radiance Fields for Fast and Robust 3D Object Reconstruction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Rui Xu, Gaolei Li, Changze Li, Zhaohui Yang, Yuchen Liu, Mingzhe Chen"
      },
      {
        "type": "paperTitle",
        "text": "5612 <b>SCPSN: Spectral Clustering-based Pyramid Super-resolution Network  for Hyperspectral Images<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yong Yang, Aoqi Zhao, Shuying Huang, Xiaozheng Wang, Yajing Fan"
      },
      {
        "type": "paperTitle",
        "text": "5629 <b>Tango 2: Aligning Diffusion-based Text-to-Audio Generative Models through Direct Preference Optimization<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Navonil Majumder, Chia-Yu Hung, Deepanway Ghosal, Wei-Ning Hsu, Rada Mihalcea, Soujanya Poria"
      },
      {
        "type": "paperTitle",
        "text": "5633 <b>The Room: design and embodiment of spaces as social beings<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Federico Espositi, Andrea Bonarini"
      },
      {
        "type": "paperTitle",
        "text": "5636 <b>Dissecting Temporal Understanding in Text-to-Audio Retrieval<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Andreea-Maria Oncescu, Joao F. Henriques, A. Koepke"
      },
      {
        "type": "paperTitle",
        "text": "5640 <b>Towards Stricter Black-box Integrity Verification of Deep Neural Network Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Chaoxiang He, Bai Xiaofan, Xiaojing Ma, Bin Benjamin Zhu, Pingyi Hu, Jiayun Fu, Hai Jin, Dongmei Zhang"
      },
      {
        "type": "paperTitle",
        "text": "5646 <b>Tag Tree-Guided Multi-grained Alignment for Multi-Domain Short Video Recommendation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuting Zhang, Zhao Zhang, Yiqing Wu, Ying Sun, Fuzhen Zhuang, Wenhui Yu, Lantao Hu, Han Li, Kun Gai, Zhulin An, Yongjun Xu"
      },
      {
        "type": "paperTitle",
        "text": "5651 <b>Self-derived Knowledge Graph Contrastive Learning for Recommendation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lei Shi, Jiapeng Yang, Pengtao Lv, Lu Yuan, Feifei Kou, Jia Luo, Xu Mingying"
      },
      {
        "type": "paperTitle",
        "text": "5663 <b>Gaussian Splatting With Neural Basis Extension<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhi Zhou, Junke Zhu, ZhangJin Huang"
      },
      {
        "type": "paperTitle",
        "text": "5666 <b>VoiceTuner: Self-Supervised Pre-training and Efficient Fine-tuning For Voice Generation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Rongjie Huang, Yongqi Wang, Hu Ruofan, Xiaoshan Xu, Zhiqing Hong, Dongchao Yang, Xize Cheng, Zehan Wang, Ziyue Jiang, Zhenhui Ye, Luping Liu, Siqi Zheng, Zhou Zhao"
      },
      {
        "type": "paperTitle",
        "text": "5668 <b>Multimodal Contextual Interactions of Entities: A Modality Circular Fusion Approach for Link Prediction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jing Yang, ShunDong Yang, Yuan Gao, JieMing Yang, Laurence Yang"
      },
      {
        "type": "paperTitle",
        "text": "5676 <b>Generative Expressive Conversational Speech Synthesis<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Rui Liu, Yifan Hu, Yi Ren, Xiang Yin, Haizhou Li"
      },
      {
        "type": "paperTitle",
        "text": "5678 <b>MaskMentor: Unlocking the Potential of Masked Self-Teaching for Missing Modality RGB-D Semantic Segmentation<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Zhida Zhao, Jia Li, Lijun Wang, Yifan Wang, Huchuan Lu"
      },
      {
        "type": "paperTitle",
        "text": "5686 <b>Context-Aware Indoor Point Cloud Object Generation through User Instructions<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Luo Yiyang, Ke Lin, Chao Gu"
      },
      {
        "type": "paperTitle",
        "text": "5695 <b>Interpretable Matching of Optical-SAR Image via Dynamically Conditioned Diffusion Models<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Shuiping Gou, Xin Wang, Xinlin Wang, Yunzhi Chen"
      },
      {
        "type": "paperTitle",
        "text": "5703 <b>Boundary-Aware Periodicity-based Sparsification Strategy for Ultra-Long Time Series Forecasting<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yiying Bao, Hao Zhou, Peng Chao, Chenyang Xu, Shuo Shi, Kecheng Cai"
      },
      {
        "type": "paperTitle",
        "text": "5724 <b>Generalize to Fully Unseen Graphs: Learn Transferable Hyper-Relation Structures for Inductive Link Prediction<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Jing Yang,  XiaowenJiang, Yuan Gao, Laurence Yang, JieMing Yang"
      },
      {
        "type": "paperTitle",
        "text": "5730 <b>Aspect-Based Multimodal Mining: Unveiling Sentiments, Complaints, and Beyond in User-Generated Content<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Mamta Mamta, gopendra singh, Deepak Raju Kori, Asif Ekbal"
      },
      {
        "type": "paperTitle",
        "text": "5734 <b>Towards Distortion-Debiased  Blind Image Quality  Assessment<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Lize Zhou, Xiaoqi Wang, Jian Xiong, Xianzhong Long, Hao Gao"
      },
      {
        "type": "paperTitle",
        "text": "5739 <b>Decoding Urban Industrial Complexity: Enhancing Knowledge-Driven Insights via IndustryScopeGPT<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Siqi Wang, Chao Liang, Yunfan Gao, Liu Yang, Jing Li, Haofen Wang"
      },
      {
        "type": "paperTitle",
        "text": "5743 <b>RHKH: Relational Hypergraph Neural Network for Link Prediction on N-ary Knowledge Hypergraph<b>"
      },
      {
        "type": "paperAuthor",
        "text": "Yuzhuo Wang, He Junwei, Hongzhi Wang"
      }
		],
    }
  },
  mounted() {
    document.title = 'ACM Multimedia 2021 Accepted Papers'
  }
}

</script>

<style scoped>
@import "../../css/responsive.css";
</style>