@inproceedings{fdi:010090487, title = {{R} and {R} : metric-guided adversarial sentence generation}, author = {{X}u, {L}. and {C}uesta-{I}nfante, {A}. and {B}erti-{E}quille, {L}aure and {V}eeramachaneni, {K}.}, editor = {}, language = {{ENG}}, abstract = {{A}dversarial examples are helpful for analyzing and improving the robustness of text classifiers. {G}enerating high-quality adversarial examples is a challenging task as it requires generating fluent adversarial sentences that are semantically similar to the original sentences and preserve the original labels, while causing the classifier to misclassify them. {E}xisting methods prioritize misclassification by maximizing each perturbation's effectiveness at misleading a text classifier; thus, the generated adversarial examples fall short in terms of fluency and similarity. {I}n this paper, we propose a rewrite and rollback ({R}&{R}) framework for adversarial attack. {I}t improves the quality of adversarial examples by optimizing a critique score which combines the fluency, similarity, and misclassification metrics. {R}&{R} generates high-quality adversarial examples by allowing exploration of perturbations that do not have immediate impact on the misclassification metric but can improve fluency and similarity metrics. {W}e evaluate our method on 5 representative datasets and 3 classifier architectures. {O}ur method outperforms current state-of-the-art in attack success rate by +16.2%, +12.8%, and +14.0% on the classifiers respectively.}, keywords = {}, numero = {}, pages = {438--452}, booktitle = {{F}indings of the {A}ssociation for {C}omputational {L}inguistics : {AACL}-{IJCNLP} 2022}, year = {2022}, DOI = {10.48550/ar{X}iv.2104.08453}, URL = {https://www.documentation.ird.fr/hor/fdi:010090487}, }