Publications | Muhammad Uzair Khattak

Papers are listed below. * denote joint first authors.

2024

UniMed-CLIP: Towards a Unified Image-Text Pretraining Paradigm for Diverse Medical Imaging Modalities

Muhammad Uzair Khattak*, Shahina Kunhimon*, Muzammal Naseer, Salman Khan, and Fahad Shahbaz Khan

arXiv preprint arXiv:2412.10372, 2024

@article{khattak2024unimed,
  author = {Khattak*, Muhammad Uzair and Kunhimon*, Shahina and Naseer, Muzammal and Khan, Salman and Khan, Fahad Shahbaz},
  title = {UniMed-CLIP: Towards a Unified Image-Text Pretraining Paradigm for Diverse Medical Imaging Modalities},
  journal = {arXiv preprint arXiv:2412.10372},
  year = {2024},
}

How Good is my Video LMM? Complex Video Reasoning and Robustness Evaluation Suite for Video-LMMs

Muhammad Uzair Khattak, Muhammad Ferjad Naeem, Jameel Hassan, Muzammal Naseer, Federico Tombari, Fahad Shahbaz Khan, and Salman Khan

arXiv preprint arXiv:2405.03690, 2024

Bib PDF Code Website

@article{khattak2024complex,
  author = {Khattak, Muhammad Uzair and Naeem, Muhammad Ferjad and Hassan, Jameel and Naseer, Muzammal and Tombari, Federico and Khan, Fahad Shahbaz and Khan, Salman},
  title = {How Good is my Video LMM? Complex Video Reasoning and Robustness Evaluation Suite for Video-LMMs},
  journal = {arXiv preprint arXiv:2405.03690},
  year = {2024},
}

Learning to Prompt with Text Only Supervision for Vision-Language Models

Muhammad Uzair khattak, Muhammad Ferjad Naeem, Naseer Muzzamal, Luc Van Gool, and Federico Tombari

arXiv:2401.02418, 2024

Bib PDF Code Website

@article{Khattak2024ProText,
  title = {Learning to Prompt with Text Only Supervision for Vision-Language Models},
  author = {khattak, Muhammad Uzair and Naeem, Muhammad Ferjad and Muzzamal, Naseer and Gool, Luc Van and Tombari, Federico},
  journal = {arXiv:2401.02418},
  year = {2024},
}

2023

Maple: Multi-modal prompt learning

Muhammad Uzair Khattak, Hanoona Rasheed, Muhammad Maaz, Salman Khan, and Fahad Shahbaz Khan

In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, 2023

Bib PDF Code Website

@inproceedings{khattak2023maple,
  title = {Maple: Multi-modal prompt learning},
  author = {Khattak, Muhammad Uzair and Rasheed, Hanoona and Maaz, Muhammad and Khan, Salman and Khan, Fahad Shahbaz},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages = {19113--19122},
  year = {2023},
}

Align Your Prompts: Test-Time Prompting with Distribution Alignment for Zero-Shot Generalization

Jameel Hassan, Hanan Gani, Noor Hussein, Muhammad Uzair Khattak+, Muzammal Naseer, Fahad Shahbaz Khan, and Salman Khan

Advances in Neural Information Processing Systems, 2023

Bib PDF Code Website

@article{hassan2023align,
  title = {Align Your Prompts: Test-Time Prompting with Distribution Alignment for Zero-Shot Generalization},
  author = {Hassan, Jameel and Gani, Hanan and Hussein, Noor and Khattak+, Muhammad Uzair and Naseer, Muzammal and Khan, Fahad Shahbaz and Khan, Salman},
  journal = {Advances in Neural Information Processing Systems},
  year = {2023},
}

Fine-tuned clip models are efficient video learners

Hanoona Rasheed*, Muhammad Uzair Khattak*, Muhammad Maaz, Salman Khan, and Fahad Shahbaz Khan

In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, 2023

Bib PDF Code Website

@inproceedings{rasheed2023fine,
  title = {Fine-tuned clip models are efficient video learners},
  author = {Rasheed*, Hanoona and Khattak*, Muhammad Uzair and Maaz, Muhammad and Khan, Salman and Khan, Fahad Shahbaz},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages = {6545--6554},
  year = {2023},
}

Self-regulating Prompts: Foundational Model Adaptation without Forgetting

Muhammad Uzair Khattak*, Syed Talal Wasim*, Muzammal Naseer, Salman Khan, Ming-Hsuan Yang, and Fahad Shahbaz Khan

In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, Oct 2023

Bib PDF Code Website

@inproceedings{Khattak_2023_ICCV,
  title = {Self-regulating Prompts: Foundational Model Adaptation without Forgetting},
  author = {Khattak*, Muhammad Uzair and Wasim*, Syed Talal and Naseer, Muzammal and Khan, Salman and Yang, Ming-Hsuan and Khan, Fahad Shahbaz},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages = {15190-15200},
  year = {2023},
  month = oct,
}

Video-FocalNets: Spatio-Temporal Focal Modulation for Video Action Recognition

Syed Talal Wasim*, Muhammad Uzair Khattak*, Muzammal Naseer, Salman Khan, Mubarak Shah, and Fahad Shahbaz Khan

In Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV), Oct 2023

Bib PDF Code Website

@inproceedings{Wasim_2023_ICCV,
  author = {Wasim*, Syed Talal and Khattak*, Muhammad Uzair and Naseer, Muzammal and Khan, Salman and Shah, Mubarak and Khan, Fahad Shahbaz},
  title = {Video-FocalNets: Spatio-Temporal Focal Modulation for Video Action Recognition},
  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
  month = oct,
  year = {2023},
  pages = {13778-13789},
}

2022

Bridging the gap between object and image-level representations for open-vocabulary detection

Hanoona Bangalath*, Muhammad Maaz*, Muhammad Uzair Khattak, Salman H Khan, and Fahad Shahbaz Khan

Advances in Neural Information Processing Systems, Oct 2022

Bib PDF Code Website

@article{bangalath2022bridging,
  title = {Bridging the gap between object and image-level representations for open-vocabulary detection},
  author = {Bangalath*, Hanoona and Maaz*, Muhammad and Khattak, Muhammad Uzair and Khan, Salman H and Shahbaz Khan, Fahad},
  journal = {Advances in Neural Information Processing Systems},
  volume = {35},
  pages = {33781--33794},
  year = {2022},
}

Investigating and Improving Common Loop Closure Failures in Visual SLAM

Saran Khaliq, Muhammad Latif Anjum, Wajahat Hussain, Muhammad Uzair Khattak, and Momen Rasool

Autonomous Robots, Oct 2022

PDF