@inproceedings{ author={ J. Yao, Q. Anthony, A. Shafi, H. Subramoni, D. Panda }, title={ Exploiting Inter-Layer Expert Affinity for Accelerating Mixture-of-Experts Model Inference }, conference={ 38th IEEE International Parallel & Distributed Processing Symposium }, year={ 2024 }, month={ May }, location={ San Francisco, California }, source={ http://nowlab.cse.ohio-state.edu/publications/ }, }