by Tianchu Ji, Shraddhan Jain, Michael Ferdman, Peter Milder, H. Andrew Schwartz, Niranjan Balasubramanian
Reference:
On the Distribution, Sparsity, and Inference-time Quantization of Attention Values in Transformers Tianchu Ji, Shraddhan Jain, Michael Ferdman, Peter Milder, H. Andrew Schwartz, Niranjan Balasubramanian, In Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021, Association for Computational Linguistics, 2021.
Bibtex Entry:
@inproceedings{ferdman-acl-findings-inference-time-quantization-of-attention-values-in-transformers,
title = "On the Distribution, Sparsity, and Inference-time Quantization of Attention Values in Transformers",
author = "Ji, Tianchu and
Jain, Shraddhan and
Ferdman, Michael and
Milder, Peter and
Schwartz, H. Andrew and
Balasubramanian, Niranjan",
booktitle = "Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021",
month = aug,
year = "2021",
pdf={https://compas.cs.stonybrook.edu/%7Emferdman/downloads.php/ACL21-FINDINGS_On_the_Distribution_Sparsity,_and_Inference-time_Quantization_ofAttention_Values_in_Transformers.pdf},
address = "Online",
publisher = "Association for Computational Linguistics",
pages = "4147--4157",
}