@inproceedings{
  author={ J. Yao, Q. Anthony, A. Shafi, H. Subramoni, D. Panda },
  title={ Exploiting Inter-Layer Expert Affinity for Accelerating Mixture-of-Experts Model Inference },
  conference={ 38th IEEE International Parallel & Distributed Processing Symposium },
  year={ 2024 },
  month={ May },
  location={ San Francisco, California },
  source={ http://nowlab.cse.ohio-state.edu/publications/ },
}