@inproceedings{
  author={ J. Yao, S. Jacobs, M. Tanaka, O. Ruwase, H. Subramoni, D. Panda },
  title={ Training ultra long context language model with fully pipelined distributed transformer },
  conference={ The Eighth Annual Conference on Machine Learning and Systems },
  year={ 2025 },
  month={ May },
  location={ Santa Clara, California },
  source={ http://nowlab.cse.ohio-state.edu/publications/ },
}