OpenAttack
33 строки · 1.2 Кб
1'''
2This example code illustrates adversarial attacks against a fine-tuned model from Transformers on a dataset from Datasets.
3'''
4import OpenAttack5import transformers6import datasets7
8def dataset_mapping(x):9return {10"x": x["sentence"],11"y": 1 if x["label"] > 0.5 else 0,12}13
14def main():15print("Load model")16tokenizer = transformers.AutoTokenizer.from_pretrained("echarlaix/bert-base-uncased-sst2-acc91.1-d37-hybrid")17model = transformers.AutoModelForSequenceClassification.from_pretrained("echarlaix/bert-base-uncased-sst2-acc91.1-d37-hybrid", num_labels=2, output_hidden_states=False)18victim = OpenAttack.classifiers.TransformersClassifier (model, tokenizer, model.bert.embeddings.word_embeddings)19
20print("New Attacker")21attacker = OpenAttack.attackers.PWWSAttacker()22
23dataset = datasets.load_dataset("sst", split="train[:20]").map(function=dataset_mapping)24
25print("Start attack")26attack_eval = OpenAttack.AttackEval(attacker, victim, metrics = [27OpenAttack.metric.EditDistance(),28OpenAttack.metric.ModificationRate()29])30attack_eval.eval(dataset, visualize=True)31
32if __name__ == "__main__":33main()34