google-research

Форк
0
/
instructions_registry.py 
176 строк · 7.1 Кб
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
"""Registry of all instructions."""
17
from instruction_following_eval import instructions
18

19
_KEYWORD = "keywords:"
20

21
_LANGUAGE = "language:"
22

23
_LENGTH = "length_constraints:"
24

25
_CONTENT = "detectable_content:"
26

27
_FORMAT = "detectable_format:"
28

29
_MULTITURN = "multi-turn:"
30

31
_COMBINATION = "combination:"
32

33
_STARTEND = "startend:"
34

35
_CHANGE_CASES = "change_case:"
36

37
_PUNCTUATION = "punctuation:"
38

39
INSTRUCTION_DICT = {
40
    _KEYWORD + "existence": instructions.KeywordChecker,
41
    _KEYWORD + "frequency": instructions.KeywordFrequencyChecker,
42
    # TODO(jeffreyzhou): make a proper set of sentences to choose from
43
    # _KEYWORD + "key_sentences": instructions.KeySentenceChecker,
44
    _KEYWORD + "forbidden_words": instructions.ForbiddenWords,
45
    _KEYWORD + "letter_frequency": instructions.LetterFrequencyChecker,
46
    _LANGUAGE + "response_language": instructions.ResponseLanguageChecker,
47
    _LENGTH + "number_sentences": instructions.NumberOfSentences,
48
    _LENGTH + "number_paragraphs": instructions.ParagraphChecker,
49
    _LENGTH + "number_words": instructions.NumberOfWords,
50
    _LENGTH + "nth_paragraph_first_word": instructions.ParagraphFirstWordCheck,
51
    _CONTENT + "number_placeholders": instructions.PlaceholderChecker,
52
    _CONTENT + "postscript": instructions.PostscriptChecker,
53
    _FORMAT + "number_bullet_lists": instructions.BulletListChecker,
54
    # TODO(jeffreyzhou): Pre-create paragraph or use prompt to replace
55
    # _CONTENT + "rephrase_paragraph": instructions.RephraseParagraph,
56
    _FORMAT + "constrained_response": instructions.ConstrainedResponseChecker,
57
    _FORMAT + "number_highlighted_sections": (
58
        instructions.HighlightSectionChecker),
59
    _FORMAT + "multiple_sections": instructions.SectionChecker,
60
    # TODO(tianjianlu): Re-enable rephrasing with preprocessing the message.
61
    # _FORMAT + "rephrase": instructions.RephraseChecker,
62
    _FORMAT + "json_format": instructions.JsonFormat,
63
    _FORMAT + "title": instructions.TitleChecker,
64
    # TODO(tianjianlu): Re-enable with specific prompts.
65
    # _MULTITURN + "constrained_start": instructions.ConstrainedStartChecker,
66
    _COMBINATION + "two_responses": instructions.TwoResponsesChecker,
67
    _COMBINATION + "repeat_prompt": instructions.RepeatPromptThenAnswer,
68
    _STARTEND + "end_checker": instructions.EndChecker,
69
    _CHANGE_CASES
70
    + "capital_word_frequency": instructions.CapitalWordFrequencyChecker,
71
    _CHANGE_CASES
72
    + "english_capital": instructions.CapitalLettersEnglishChecker,
73
    _CHANGE_CASES
74
    + "english_lowercase": instructions.LowercaseLettersEnglishChecker,
75
    _PUNCTUATION + "no_comma": instructions.CommaChecker,
76
    _STARTEND + "quotation": instructions.QuotationChecker,
77
}
78

79
INSTRUCTION_CONFLICTS = {
80
    _KEYWORD + "existence": {_KEYWORD + "existence"},
81
    _KEYWORD + "frequency": {_KEYWORD + "frequency"},
82
    # TODO(jeffreyzhou): make a proper set of sentences to choose from
83
    # _KEYWORD + "key_sentences": instructions.KeySentenceChecker,
84
    _KEYWORD + "forbidden_words": {_KEYWORD + "forbidden_words"},
85
    _KEYWORD + "letter_frequency": {_KEYWORD + "letter_frequency"},
86
    _LANGUAGE
87
    + "response_language": {
88
        _LANGUAGE + "response_language",
89
        _FORMAT + "multiple_sections",
90
        _KEYWORD + "existence",
91
        _KEYWORD + "frequency",
92
        _KEYWORD + "forbidden_words",
93
        _STARTEND + "end_checker",
94
        _CHANGE_CASES + "english_capital",
95
        _CHANGE_CASES + "english_lowercase",
96
    },
97
    _LENGTH + "number_sentences": {_LENGTH + "number_sentences"},
98
    _LENGTH + "number_paragraphs": {
99
        _LENGTH + "number_paragraphs",
100
        _LENGTH + "nth_paragraph_first_word",
101
        _LENGTH + "number_sentences",
102
        _LENGTH + "nth_paragraph_first_word",
103
    },
104
    _LENGTH + "number_words": {_LENGTH + "number_words"},
105
    _LENGTH + "nth_paragraph_first_word": {
106
        _LENGTH + "nth_paragraph_first_word",
107
        _LENGTH + "number_paragraphs",
108
    },
109
    _CONTENT + "number_placeholders": {_CONTENT + "number_placeholders"},
110
    _CONTENT + "postscript": {_CONTENT + "postscript"},
111
    _FORMAT + "number_bullet_lists": {_FORMAT + "number_bullet_lists"},
112
    # TODO(jeffreyzhou): Pre-create paragraph or use prompt to replace
113
    # _CONTENT + "rephrase_paragraph": instructions.RephraseParagraph,
114
    _FORMAT + "constrained_response": set(INSTRUCTION_DICT.keys()),
115
    _FORMAT
116
    + "number_highlighted_sections": {_FORMAT + "number_highlighted_sections"},
117
    _FORMAT
118
    + "multiple_sections": {
119
        _FORMAT + "multiple_sections",
120
        _LANGUAGE + "response_language",
121
        _FORMAT + "number_highlighted_sections",
122
    },
123
    # TODO(tianjianlu): Re-enable rephrasing with preprocessing the message.
124
    # _FORMAT + "rephrase": instructions.RephraseChecker,
125
    _FORMAT
126
    + "json_format": set(INSTRUCTION_DICT.keys()).difference(
127
        {_KEYWORD + "forbidden_words", _KEYWORD + "existence"}
128
    ),
129
    _FORMAT + "title": {_FORMAT + "title"},
130
    # TODO(tianjianlu): Re-enable with specific prompts.
131
    # _MULTITURN + "constrained_start": instructions.ConstrainedStartChecker,
132
    _COMBINATION
133
    + "two_responses": set(INSTRUCTION_DICT.keys()).difference({
134
        _KEYWORD + "forbidden_words",
135
        _KEYWORD + "existence",
136
        _LANGUAGE + "response_language",
137
        _FORMAT + "title",
138
        _PUNCTUATION + "no_comma"
139
    }),
140
    _COMBINATION + "repeat_prompt": set(INSTRUCTION_DICT.keys()).difference({
141
        _KEYWORD + "existence",
142
        _FORMAT + "title",
143
        _PUNCTUATION + "no_comma"
144
    }),
145
    _STARTEND + "end_checker": {_STARTEND + "end_checker"},
146
    _CHANGE_CASES + "capital_word_frequency": {
147
        _CHANGE_CASES + "capital_word_frequency",
148
        _CHANGE_CASES + "english_lowercase",
149
        _CHANGE_CASES + "english_capital",
150
    },
151
    _CHANGE_CASES + "english_capital": {_CHANGE_CASES + "english_capital"},
152
    _CHANGE_CASES + "english_lowercase": {
153
        _CHANGE_CASES + "english_lowercase",
154
        _CHANGE_CASES + "english_capital",
155
    },
156
    _PUNCTUATION + "no_comma": {_PUNCTUATION + "no_comma"},
157
    _STARTEND + "quotation": {_STARTEND + "quotation", _FORMAT + "title"},
158
}
159

160

161
def conflict_make(conflicts):
162
  """Makes sure if A conflicts with B, B will conflict with A.
163

164
  Args:
165
    conflicts: Dictionary of potential conflicts where key is instruction id
166
      and value is set of instruction ids that it conflicts with.
167

168
  Returns:
169
    Revised version of the dictionary. All instructions conflict with
170
    themselves. If A conflicts with B, B will conflict with A.
171
  """
172
  for key in conflicts:
173
    for k in conflicts[key]:
174
      conflicts[k].add(key)
175
    conflicts[key].add(key)
176
  return conflicts
177

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.