@inproceedings{singh:ics2023, author = {Singh, Siddharth and Ruwase, Olatunji and Awan, Ammar Ahmad and Rajbhandari, Samyam and He, Yuxiong and Bhatele, Abhinav}, title = {A Hybrid Tensor-Expert-Data Parallelism Approach to Optimize Mixture-of-Experts Training}, booktitle = {Proceedings of the International Conference on Supercomputing}, series = {ICS '23}, year = {2023}, month = jun, url = {http://doi.acm.org/10.1145/3577193.3593704} }