OpenAttack

nli_attack.py
51 строка · 1.6 Кб
Перенос по словам
1
'''
2
This example code shows how to conduct adversarial attacks against a sentence pair classification (NLI) model
3
'''
4
import OpenAttack
5
import transformers
6
import datasets
7

8
class NLIWrapper(OpenAttack.classifiers.Classifier):
9
    def __init__(self, model : OpenAttack.classifiers.Classifier):
10
        self.model = model
11
    
12
    def get_pred(self, input_):
13
        return self.get_prob(input_).argmax(axis=1)
14

15
    def get_prob(self, input_):
16
        ref = self.context.input["hypothesis"]
17
        input_sents = [  sent + "</s></s>" + ref for sent in input_ ]
18
        print(input_sents)
19
        return self.model.get_prob(
20
            input_sents
21
        )
22

23

24
def dataset_mapping(x):
25
    return {
26
        "x": x["premise"],
27
        "y": x["label"],
28
        "hypothesis": x["hypothesis"]
29
    }
30
    
31
def main():
32
    print("Load model")
33
    tokenizer = transformers.AutoTokenizer.from_pretrained("roberta-large-mnli")
34
    model = transformers.AutoModelForSequenceClassification.from_pretrained("roberta-large-mnli", output_hidden_states=False)
35
    victim = OpenAttack.classifiers.TransformersClassifier(model, tokenizer, model.roberta.embeddings.word_embeddings)
36
    victim = NLIWrapper(victim)
37

38
    print("New Attacker")
39
    attacker = OpenAttack.attackers.PWWSAttacker()
40

41
    dataset = datasets.load_dataset("glue", "mnli", split="train[:20]").map(function=dataset_mapping)
42

43
    print("Start attack")
44
    attack_eval = OpenAttack.AttackEval(attacker, victim, metrics = [
45
        OpenAttack.metric.EditDistance(),
46
        OpenAttack.metric.ModificationRate()
47
    ])
48
    attack_eval.eval(dataset, visualize=True)
49

50
if __name__ == "__main__":
51
    main()
OpenAttack

Использование cookies