Papers are listed below. * denote joint first authors. 2024 How Good is my Video LMM? Complex Video Reasoning and Robustness Evaluation Suite for Video-LMMs Muhammad Uzair Khattak, Muhammad Ferjad Naeem, Jameel Hassan, Muzammal Naseer, Federico Tombari, Fahad Shahbaz Khan, and Salman Khan arXiv preprint arXiv:2405.03690, 2024 Bib PDF Code Website @article{khattak2024complex, author = {Khattak, Muhammad Uzair and Naeem, Muhammad Ferjad and Hassan, Jameel and Naseer, Muzammal and Tombari, Federico and Khan, Fahad Shahbaz and Khan, Salman}, title = {How Good is my Video LMM? Complex Video Reasoning and Robustness Evaluation Suite for Video-LMMs}, journal = {arXiv preprint arXiv:2405.03690}, year = {2024}, } Learning to Prompt with Text Only Supervision for Vision-Language Models Muhammad Uzair khattak, Muhammad Ferjad Naeem, Naseer Muzzamal, Luc Van Gool, and Federico Tombari arXiv:2401.02418, 2024 Bib PDF Code Website @article{Khattak2024ProText, title = {Learning to Prompt with Text Only Supervision for Vision-Language Models}, author = {khattak, Muhammad Uzair and Naeem, Muhammad Ferjad and Muzzamal, Naseer and Gool, Luc Van and Tombari, Federico}, journal = {arXiv:2401.02418}, year = {2024}, } 2023 Maple: Multi-modal prompt learning Muhammad Uzair Khattak, Hanoona Rasheed, Muhammad Maaz, Salman Khan, and Fahad Shahbaz Khan In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, 2023 Bib PDF Code Website @inproceedings{khattak2023maple, title = {Maple: Multi-modal prompt learning}, author = {Khattak, Muhammad Uzair and Rasheed, Hanoona and Maaz, Muhammad and Khan, Salman and Khan, Fahad Shahbaz}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, pages = {19113--19122}, year = {2023}, } Align Your Prompts: Test-Time Prompting with Distribution Alignment for Zero-Shot Generalization Jameel Hassan, Hanan Gani, Noor Hussein, Muhammad Uzair Khattak+, Muzammal Naseer, Fahad Shahbaz Khan, and Salman Khan Advances in Neural Information Processing Systems, 2023 Bib PDF Code Website @article{hassan2023align, title = {Align Your Prompts: Test-Time Prompting with Distribution Alignment for Zero-Shot Generalization}, author = {Hassan, Jameel and Gani, Hanan and Hussein, Noor and Khattak+, Muhammad Uzair and Naseer, Muzammal and Khan, Fahad Shahbaz and Khan, Salman}, journal = {Advances in Neural Information Processing Systems}, year = {2023}, } Fine-tuned clip models are efficient video learners Hanoona Rasheed*, Muhammad Uzair Khattak*, Muhammad Maaz, Salman Khan, and Fahad Shahbaz Khan In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, 2023 Bib PDF Code Website @inproceedings{rasheed2023fine, title = {Fine-tuned clip models are efficient video learners}, author = {Rasheed*, Hanoona and Khattak*, Muhammad Uzair and Maaz, Muhammad and Khan, Salman and Khan, Fahad Shahbaz}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, pages = {6545--6554}, year = {2023}, } Self-regulating Prompts: Foundational Model Adaptation without Forgetting Muhammad Uzair Khattak*, Syed Talal Wasim*, Muzammal Naseer, Salman Khan, Ming-Hsuan Yang, and Fahad Shahbaz Khan In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, Oct 2023 Bib PDF Code Website @inproceedings{Khattak_2023_ICCV, title = {Self-regulating Prompts: Foundational Model Adaptation without Forgetting}, author = {Khattak*, Muhammad Uzair and Wasim*, Syed Talal and Naseer, Muzammal and Khan, Salman and Yang, Ming-Hsuan and Khan, Fahad Shahbaz}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, pages = {15190-15200}, year = {2023}, month = oct, } Video-FocalNets: Spatio-Temporal Focal Modulation for Video Action Recognition Syed Talal Wasim*, Muhammad Uzair Khattak*, Muzammal Naseer, Salman Khan, Mubarak Shah, and Fahad Shahbaz Khan In Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV), Oct 2023 Bib PDF Code Website @inproceedings{Wasim_2023_ICCV, author = {Wasim*, Syed Talal and Khattak*, Muhammad Uzair and Naseer, Muzammal and Khan, Salman and Shah, Mubarak and Khan, Fahad Shahbaz}, title = {Video-FocalNets: Spatio-Temporal Focal Modulation for Video Action Recognition}, booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, month = oct, year = {2023}, pages = {13778-13789}, } 2022 Bridging the gap between object and image-level representations for open-vocabulary detection Hanoona Bangalath*, Muhammad Maaz*, Muhammad Uzair Khattak, Salman H Khan, and Fahad Shahbaz Khan Advances in Neural Information Processing Systems, Oct 2022 Bib PDF Code Website @article{bangalath2022bridging, title = {Bridging the gap between object and image-level representations for open-vocabulary detection}, author = {Bangalath*, Hanoona and Maaz*, Muhammad and Khattak, Muhammad Uzair and Khan, Salman H and Shahbaz Khan, Fahad}, journal = {Advances in Neural Information Processing Systems}, volume = {35}, pages = {33781--33794}, year = {2022}, } Investigating and Improving Common Loop Closure Failures in Visual SLAM Saran Khaliq, Muhammad Latif Anjum, Wajahat Hussain, Muhammad Uzair Khattak, and Momen Rasool Autonomous Robots, Oct 2022 PDF