google-research

instructions_test.py
1290 строк · 47.4 Кб
Перенос по словам
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
"""Tests for instructions.py."""
17

18
from absl.testing import absltest
19
from absl.testing import parameterized
20
from instruction_following_eval import instructions
21

22

23
# pylint:disable=g-complex-comprehension
24
class InstructionsTest(parameterized.TestCase):
25

26
  @parameterized.named_parameters(
27
      [
28
          {
29
              'testcase_name': (
30
                  f'_response={response}_language={language}'
31
              ),
32
              'response': response,
33
              'language': language,
34
          }
35
          for response, language in [('The response is English', 'en')]
36
      ]
37
  )
38
  def test_response_language(self, response, language):
39
    """Test on single language response."""
40
    instruction_id = 'language:response_language'
41
    instruction = instructions.ResponseLanguageChecker(instruction_id)
42
    instruction.build_description(language=language)
43
    self.assertTrue(instruction.check_following(response))
44

45
  @parameterized.named_parameters(
46
      [
47
          {
48
              'testcase_name': (
49
                  f'_response={response}_language={language}'
50
              ),
51
              'response': response,
52
              'language': language,
53
          }
54
          for response, language in [("Desayunamos en McDonald's hoy", 'es'),
55
                                     ('Today we visit the Louvre', 'en'),]
56
      ]
57
  )
58
  def test_response_multilanguage(self, response, language):
59
    """Test on responses that contain multi-language tokens."""
60
    instruction_id = 'language:response_language'
61
    instruction = instructions.ResponseLanguageChecker(instruction_id)
62
    instruction.build_description(language=language)
63
    self.assertTrue(instruction.check_following(response))
64

65
  @parameterized.named_parameters(
66
      [
67
          {
68
              'testcase_name': (
69
                  f'_response={response}_relation={relation}'
70
                  f'_num_sentences={num_sentences}_expected={expected}'
71
              ),
72
              'response': response,
73
              'relation': relation,
74
              'num_sentences': num_sentences,
75
              'expected': expected,
76
          }
77
          for response, relation, num_sentences, expected in [
78
              ('xx,x. xx,x! xx/x. x{x}x?', instructions._COMPARISON_RELATION[0],
79
               4, False),
80
              ('xxxx. xx,x! xxxx. x(x)x?', instructions._COMPARISON_RELATION[0],
81
               5, True),
82
              ('xxxx. xx,x! xx|x. x&x x?', instructions._COMPARISON_RELATION[1],
83
               4, True),
84
              ('xx-x. xx,x! xx}x. x,xx?', instructions._COMPARISON_RELATION[1],
85
               5, False),
86
          ]
87
      ]
88
  )
89
  def test_number_sentences(self, response, relation, num_sentences, expected):
90
    """Test the number of sentences."""
91
    instruction_id = 'length_constraints:number_sentences'
92
    instruction = instructions.NumberOfSentences(instruction_id)
93
    instruction.build_description(relation=relation,
94
                                  num_sentences=num_sentences)
95
    actual = instruction.check_following(response)
96
    self.assertEqual(actual, expected)
97

98
  @parameterized.named_parameters(
99
      [
100
          {
101
              'testcase_name': (
102
                  f'_templated={template}_num_placeholders={num_placeholders}'
103
                  f'_expected={expected}'
104
              ),
105
              'template': template,
106
              'num_placeholders': num_placeholders,
107
              'expected': expected,
108
          }
109
          for template, num_placeholders, expected in [
110
              (('Sure, here is a short template with 5 placeholders:\n' +
111
                '[Name]\n[Email]\n[Phone]\n[Address]\n[Website]\n' +
112
                'This template can be used for a variety of purposes, such ' +
113
                'ascreating a contact list, sending out surveys, or creating ' +
114
                'a sign-up form.'), 5, True),
115
              (('My [adjective] [noun] is [adjective] [noun]. I [verb] and ' +
116
                '[verb].'), 7, False),
117
              ]
118
      ]
119
  )
120
  def test_number_placeholders(self, template, num_placeholders, expected):
121
    """Test the number of placeholders."""
122
    instruction_id = 'detectable_content:number_placeholders'
123
    instruction = instructions.PlaceholderChecker(instruction_id)
124
    instruction.build_description(num_placeholders=num_placeholders)
125
    actual = instruction.check_following(template)
126
    self.assertEqual(actual, expected)
127

128
  BULLET_TEST_MESSAGE_1 = """
129
  A Markdown bullet point is a way of formatting text to create a list. To
130
  create a bullet point, start each line with an asterisk (*). For example:
131
  * This is a bullet point.
132
  *(no space required)Another bullet point.
133
  * (no newline ending required)Another bullet point.
134
  markdown bullet points are often used to create to-do lists or to list items
135
  in a step-by-step guide."""
136
  BULLET_TEST_MESSAGE_2 = """
137
  Check that inline asterisk (*), *, will not be counted. Only * that starts a
138
  bullet list will be counted:
139
    * This is a bullet point.
140
    * Another bullet point.
141
    . dot is not counted"""
142
  BULLET_TEST_MESSAGE_3 = """
143
  Here are three bullets starting with asterisk:
144
  * I am a large language model, also known as a conversational AI.
145
  * I am trained on a massive amount of text data, and I am able to communicate.
146
  * I am still under development, but I am learning new things every day."""
147

148
  BULLET_TEST_MESSAGE_4 = """
149
  Here are three markdown bullets:
150
  - I am a large language model, also known as a conversational AI.
151
  - I am trained on a massive amount of text data, and I am able to communicate.
152
  -I am still under development, but I am learning new things every day."""
153

154
  BULLET_TEST_MESSAGE_5 = """
155
  Paragraph 1
156
  ***
157
  Paragraph 2
158
  ***
159
  Paragraph 3
160
  * only one bullet point
161
  """
162

163
  @parameterized.named_parameters(
164
      [
165
          {
166
              'testcase_name': (
167
                  f'_templated={template}_num_bullets={num_bullets}'
168
                  f'_expected={expected}'
169
              ),
170
              'template': template,
171
              'num_bullets': num_bullets,
172
              'expected': expected,
173
          }
174
          for template, num_bullets, expected in [
175
              (BULLET_TEST_MESSAGE_1, 3, True),
176
              (BULLET_TEST_MESSAGE_2, 2, True),
177
              (BULLET_TEST_MESSAGE_3, 3, True),
178
              (BULLET_TEST_MESSAGE_4, 3, True),
179
              (BULLET_TEST_MESSAGE_5, 1, True)]
180
      ]
181
  )
182
  def test_number_bullet_lists(self, template, num_bullets, expected):
183
    """Test the number of bullets."""
184
    instruction_id = 'detectable_format:exact_number_bullet_points'
185
    instruction = instructions.BulletListChecker(instruction_id)
186
    instruction.build_description(num_bullets=num_bullets)
187
    actual = instruction.check_following(template)
188
    self.assertEqual(actual, expected)
189

190
  CONSTRAINED_RESPONSE_TEST_RESPONSE_1 = """\n My answer is no.\n"""
191
  CONSTRAINED_RESPONSE_TEST_RESPONSE_2 = """My answer is no.   """
192
  CONSTRAINED_RESPONSE_TEST_RESPONSE_3 = """
193
  My answer is no. I am still under development and I am always learning and
194
  improving. I am not the best chatbot in the world, but I am striving to be
195
  the best that I can be."""
196

197
  def test_constrained_response(self):
198
    """Test the constrained response checker."""
199
    instruction_id = 'detectable_format:constrained_response'
200
    instruction = instructions.ConstrainedResponseChecker(instruction_id)
201
    instruction.build_description()
202

203
    with self.subTest('test with CONSTRAINED_RESPONSE_TEST_RESPONSE_1'):
204
      self.assertTrue(instruction.check_following(
205
          self.CONSTRAINED_RESPONSE_TEST_RESPONSE_1))
206

207
    with self.subTest('test with CONSTRAINED_RESPONSE_TEST_RESPONSE_2'):
208
      self.assertTrue(instruction.check_following(
209
          self.CONSTRAINED_RESPONSE_TEST_RESPONSE_2))
210

211
    with self.subTest('test with CONSTRAINED_RESPONSE_TEST_RESPONSE_3'):
212
      self.assertTrue(instruction.check_following(
213
          self.CONSTRAINED_RESPONSE_TEST_RESPONSE_3))
214

215
  HIGHLIGHTED_TEST_MESSAGE_1 = """
216
  To highlight text with Markdown, you can use the * character before and after
217
  the text you want to highlight. For example, if you want to highlight the
218
  word `hello`, you would type:*hello*, You can also use the ** character to
219
  create bold text. For example, if you want to bold the word `hello`, you
220
  would type: **hello** """
221
  HIGHLIGHTED_TEST_MESSAGE_2 = """
222
  Sure, here are the numerical methods for solving partial differential
223
  equations highlighted with Markdown:
224
  *Finite difference methods
225
  *Finite element methods*
226
  *Boundary element methods
227
  *Monte Carlo methods
228
  I hope this helps!"""
229
  HIGHLIGHTED_TEST_MESSAGE_3 = """
230
  There is allowed to be *two different* highlighted *sections in the same*
231
  line. **This is also true** for **double markdown highlights.**
232
  """
233

234
  @parameterized.named_parameters(
235
      [
236
          {
237
              'testcase_name': (
238
                  f'_response={response}'
239
                  f'_min_num_highlights={min_num_highlights}'
240
                  f'_expected={expected}'
241
              ),
242
              'response': response,
243
              'min_num_highlights': min_num_highlights,
244
              'expected': expected,
245
          }
246
          for response, min_num_highlights, expected in [
247
              (HIGHLIGHTED_TEST_MESSAGE_1, 2, True),
248
              (HIGHLIGHTED_TEST_MESSAGE_2, 2, False),
249
              (HIGHLIGHTED_TEST_MESSAGE_3, 4, True)]
250
      ]
251
  )
252
  def test_number_highlights(self, response, min_num_highlights, expected):
253
    """Test the minimum number of highlighted sections."""
254
    instruction_id = 'detectable_format:minimum_number_highlighted_sections'
255
    instruction = instructions.HighlightSectionChecker(instruction_id)
256
    instruction.build_description(num_highlights=min_num_highlights)
257
    actual = instruction.check_following(response)
258
    self.assertEqual(actual, expected)
259

260
  SECTION_TEST_MESSAGE_1 = """
261
  Your response must have multiple sections. Mark the beginning of each section
262
  with "Section X", such as:
263
  Section 1
264
  [content of section 1]
265
  Section 2
266
  [content of section 2]"""
267

268
  SECTION_TEST_MESSAGE_2 = """SECTION 1
269
  [content of section 1]
270
  SECTION 2
271
  [content of section 2]"""
272

273
  def test_section_checker(self):
274
    """Test the number of sections."""
275
    instruction_id = 'detectable_format:multiple_sections'
276
    instruction = instructions.SectionChecker(instruction_id)
277
    section_keyword = 'Section'
278
    min_num_sections = 3
279
    instruction.build_description(section_spliter=section_keyword,
280
                                  num_sections=min_num_sections)
281
    with self.subTest(f'test {section_keyword} and {min_num_sections}'):
282
      self.assertFalse(
283
          instruction.check_following(self.SECTION_TEST_MESSAGE_1))
284

285
    section_keyword = 'SECTION'
286
    min_num_sections = 2
287
    instruction.build_description(section_spliter=section_keyword,
288
                                  num_sections=min_num_sections)
289
    with self.subTest(f'test {section_keyword} and {min_num_sections}'):
290
      self.assertTrue(
291
          instruction.check_following(self.SECTION_TEST_MESSAGE_2))
292

293
  PARAGRAPH_TEST_MESSAGE_1 = """
294
  paragraph 1
295
  ***
296
  paragraph 2
297
  ***
298
  paragraph 3"""
299

300
  PARAGRAPH_TEST_MESSAGE_2 = """
301
          ***
302
  paragraph 1
303
          ***
304
      paragraph 2
305
          ***
306
      paragraph 3"""
307

308
  PARAGRAPH_TEST_MESSAGE_3 = """
309
  paragraph 1
310
          ***
311
      paragraph 2
312
          ***
313
      paragraph 3
314
          ***"""
315

316
  PARAGRAPH_TEST_MESSAGE_4 = """
317
  paragraph 1
318
          ***
319
      paragraph 2
320
          ***
321
          ***"""
322

323
  def test_paragraph_checker(self):
324
    """Test the number of sections."""
325
    instruction_id = 'length_constraint:number_paragraphs'
326
    instruction = instructions.ParagraphChecker(instruction_id)
327
    num_paragraphs = 3
328
    instruction.build_description(num_paragraphs=num_paragraphs)
329
    with self.subTest(f'test {self.PARAGRAPH_TEST_MESSAGE_1} and '
330
                      f'{num_paragraphs} paragraphs'):
331
      self.assertTrue(instruction.check_following(
332
          self.PARAGRAPH_TEST_MESSAGE_1))
333

334
    num_paragraphs = 3
335
    instruction.build_description(num_paragraphs=num_paragraphs)
336
    with self.subTest(f'test {self.PARAGRAPH_TEST_MESSAGE_2} and '
337
                      f'{num_paragraphs} paragraphs'):
338
      self.assertTrue(instruction.check_following(
339
          self.PARAGRAPH_TEST_MESSAGE_2))
340

341
    num_paragraphs = 3
342
    instruction.build_description(num_paragraphs=num_paragraphs)
343
    with self.subTest(f'test {self.PARAGRAPH_TEST_MESSAGE_3} and '
344
                      f'{num_paragraphs} paragraphs'):
345
      self.assertTrue(instruction.check_following(
346
          self.PARAGRAPH_TEST_MESSAGE_3))
347

348
    num_paragraphs = 2
349
    instruction.build_description(num_paragraphs=num_paragraphs)
350
    with self.subTest(f'test {self.PARAGRAPH_TEST_MESSAGE_4} and '
351
                      f'{num_paragraphs} paragraphs'):
352
      self.assertFalse(instruction.check_following(
353
          self.PARAGRAPH_TEST_MESSAGE_4))
354

355
  POSTSCRIPT_TEST_MESSAGE_1 = """
356
  I will do my best to follow your instructions and always start my responses
357
  with "My response is:". I will try to be as consistent as possible, but
358
  please be patient with me if I make a mistake. I am still under development,
359
  and I am always learning new things.
360

361
  P.S. I hope this is what you were looking for."""
362

363
  POSTSCRIPT_TEST_MESSAGE_2 = """
364
  Sure, here is my response with a postscript starting with P.P.S.:
365

366
  My response is: I hope this answers your question.
367

368
  P.P.S. I am always happy to answer any other questions you may have.
369

370
  Do you have any other questions for me?"""
371

372
  # Postscript does not have to start as a new line.
373
  # Relaxed the constraint in cl/525253841.
374
  POSTSCRIPT_TEST_MESSAGE_3 = """
375
  The radius of a unit circle is 1. However, I can give you a funny and wrong
376
  answer: the radius of a unit circle is 0. This is because a unit circle is a
377
  circle with a radius of 1, and if the radius is 0, then the circle has no
378
  size and is just a point. (not starting a new line) P.S. I hope you enjoyed
379
  my joke!"""
380

381
  POSTSCRIPT_TEST_MESSAGE_4 = """
382
  If the length of a square is one, the area of the square will also be one.
383
  p.p.s what if the entire response was lower case letters?
384
  """
385

386
  POSTSCRIPT_TEST_MESSAGE_5 = """
387
  The mysteries of space and time are mysterious.
388
  P. S. Sometimes there are even spaces between P. and S..
389
  """
390

391
  def test_postscript_checker(self):
392
    """Test the postscript checker."""
393
    instruction_id = 'detectable_content:postscript'
394
    instruction = instructions.PostscriptChecker(instruction_id)
395
    postscript_start_keyword = instructions._POSTSCRIPT_MARKER[0]
396
    instruction.build_description(postscript_marker=postscript_start_keyword)
397
    with self.subTest(f'test {postscript_start_keyword}'):
398
      self.assertTrue(
399
          instruction.check_following(self.POSTSCRIPT_TEST_MESSAGE_1))
400

401
    postscript_start_keyword = 'PS:'
402
    instruction.build_description(postscript_marker=postscript_start_keyword)
403
    with self.subTest(f'test {postscript_start_keyword}'):
404
      self.assertFalse(
405
          instruction.check_following(self.POSTSCRIPT_TEST_MESSAGE_1))
406

407
    postscript_start_keyword = instructions._POSTSCRIPT_MARKER[1]
408
    instruction.build_description(postscript_marker=postscript_start_keyword)
409
    with self.subTest(f'test {postscript_start_keyword}'):
410
      self.assertTrue(
411
          instruction.check_following(self.POSTSCRIPT_TEST_MESSAGE_2))
412

413
    postscript_start_keyword = 'P.S.'
414
    instruction.build_description(postscript_marker=postscript_start_keyword)
415
    with self.subTest(f'test {postscript_start_keyword}'):
416
      self.assertTrue(
417
          instruction.check_following(self.POSTSCRIPT_TEST_MESSAGE_3))
418

419
    postscript_start_keyword = 'P.P.S'
420
    instruction.build_description(postscript_marker=postscript_start_keyword)
421
    with self.subTest(f'test {postscript_start_keyword}'):
422
      self.assertTrue(
423
          instruction.check_following(self.POSTSCRIPT_TEST_MESSAGE_4))
424

425
    postscript_start_keyword = 'P.S.'
426
    instruction.build_description(postscript_marker=postscript_start_keyword)
427
    with self.subTest(f'test {postscript_start_keyword}'):
428
      self.assertTrue(
429
          instruction.check_following(self.POSTSCRIPT_TEST_MESSAGE_5))
430

431
  CONSTRAINED_START_TEST_MESSAGE_1 = """
432
  My response is: ASIC is a specialized chip for specific tasks in electronic
433
  devices, offering advantages in efficiency and processing speed."""
434

435
  CONSTRAINED_START_TEST_MESSAGE_2 = """
436
        My response is: ASIC is a specialized chip for specific tasks in
437
  electronic
438
  devices, offering advantages in efficiency and processing speed."""
439

440
  CONSTRAINED_START_TEST_MESSAGE_3 = """
441
  An ASIC, or Application-Specific Integrated Circuit, is a type of specialized
442
  chip that, my response is, is designed to perform specific tasks in electronic
443
  devices."""
444

445
  def test_constrained_start_checker(self):
446
    """Test the constrained start checker."""
447
    instruction_id = 'multi-turn:constrained_start'
448
    instruction = instructions.ConstrainedStartChecker(instruction_id)
449
    start_keyword = 'My response is:'
450
    instruction.build_description(starter=start_keyword)
451
    with self.subTest(f'test {start_keyword}'):
452
      self.assertTrue(
453
          instruction.check_following(self.CONSTRAINED_START_TEST_MESSAGE_1))
454

455
    with self.subTest(f'test {start_keyword} with spaces in the beginning'):
456
      self.assertTrue(instruction.check_following(
457
          self.CONSTRAINED_START_TEST_MESSAGE_2))
458

459
    start_keyword = 'my response is'
460
    with self.subTest(f'test {start_keyword} embedded in the middle'):
461
      self.assertFalse(
462
          instruction.check_following(self.CONSTRAINED_START_TEST_MESSAGE_3))
463

464
  REPHRASE_TEST_REPHRASED_MESSAGE_1 = """
465
  I am *content*."""
466
  REPHRASE_TEST_ORIGINAL_MESSAGE_1 = """
467
  I am *happy*."""
468

469
  REPHRASE_TEST_REPHRASED_MESSAGE_1_NOCHANGE = """
470
  I am ."""
471

472
  REPHRASE_TEST_REPHRASED_MESSAGE_1_FORMAT = """
473
  I am [content]."""
474

475
  REPHRASE_TEST_REPHRASED_MESSAGE_2 = """
476
  It is raining heavily *at this moment*."""
477
  REPHRASE_TEST_ORIGINAL_MESSAGE_2 = """
478
  *At present,* there is heavy rainfall occurring."""
479

480
  def test_rephrase_checker(self):
481
    """Test the rephrase checker."""
482
    instruction_id = 'detectable_format:rephrasing'
483
    instruction = instructions.RephraseChecker(instruction_id)
484
    instruction.build_description(
485
        original_message=self.REPHRASE_TEST_ORIGINAL_MESSAGE_1)
486
    with self.subTest(f'test {self.REPHRASE_TEST_REPHRASED_MESSAGE_1}'):
487
      self.assertTrue(
488
          instruction.check_following(self.REPHRASE_TEST_REPHRASED_MESSAGE_1))
489

490
    instruction.build_description(
491
        original_message=self.REPHRASE_TEST_ORIGINAL_MESSAGE_1)
492
    with self.subTest(
493
        f'test {self.REPHRASE_TEST_REPHRASED_MESSAGE_1_NOCHANGE}'):
494
      with self.assertRaises(ValueError):
495
        instruction.check_following(
496
            self.REPHRASE_TEST_REPHRASED_MESSAGE_1_NOCHANGE)
497

498
    instruction.build_description(
499
        original_message=self.REPHRASE_TEST_ORIGINAL_MESSAGE_1)
500
    with self.subTest(f'test {self.REPHRASE_TEST_REPHRASED_MESSAGE_1_FORMAT}'):
501
      with self.assertRaises(ValueError):
502
        instruction.check_following(
503
            self.REPHRASE_TEST_REPHRASED_MESSAGE_1_FORMAT)
504

505
    instruction.build_description(
506
        original_message=self.REPHRASE_TEST_ORIGINAL_MESSAGE_2)
507
    with self.subTest(f'test {self.REPHRASE_TEST_REPHRASED_MESSAGE_2}'):
508
      self.assertFalse(
509
          instruction.check_following(self.REPHRASE_TEST_REPHRASED_MESSAGE_2))
510

511
  TEST_INCLUDE_KEYWORD_MESSAGE_1 = """
512
  Paris is a city of beauty and romance. The romantic river Seine winds its way
513
  through the city, past iconic landmarks like the Eiffel Tower and the Louvre
514
  Museum, where the Mona Lisa resides. Whether you're taking a boat cruise down
515
  the river or simply strolling along the banks, you're sure to be captivated
516
  by the city's charm."""
517

518
  TEST_INCLUDE_KEYWORD_MESSAGE_2 = """
519
  Paris is a city of beauty, romance, and history. It is home to some of the
520
  most iconic landmarks in the world, including the Eiffel Tower, the Louvre
521
  Museum, and the Notre Dame Cathedral. The city is also known for its romantic
522
  river cruises, its delicious food, and its stylish people.
523
  """
524

525
  KEYWORDS = ('romantic', 'river', 'Mona Lisa')
526

527
  def test_keyword_checker(self):
528
    """Test the inclusion of keywords."""
529
    instruction_id = 'keywords:include_keywords'
530
    instruction = instructions.KeywordChecker(instruction_id)
531

532
    instruction.build_description(keywords=self.KEYWORDS)
533
    with self.subTest(f'test {self.TEST_INCLUDE_KEYWORD_MESSAGE_1}'):
534
      self.assertTrue(
535
          instruction.check_following(self.TEST_INCLUDE_KEYWORD_MESSAGE_1))
536

537
    instruction.build_description(keywords=self.KEYWORDS)
538
    with self.subTest(f'test {self.TEST_INCLUDE_KEYWORD_MESSAGE_2}'):
539
      self.assertFalse(
540
          instruction.check_following(self.TEST_INCLUDE_KEYWORD_MESSAGE_2))
541

542
  TEST_KEYWORD_FREQUNECY_MESSAGE_1 = """
543
  keyword, Keyword, KEYWORD
544
  """
545
  TEST_KEYWORD_FREQUENCY_KEYWORD_1 = '  keyword '
546

547
  TEST_KEYWORD_FREQUNECY_MESSAGE_2 = """
548
    *keyword
549
    *Keyword
550
    *KEYWORD
551
  """
552
  TEST_KEYWORD_FREQUENCY_KEYWORD_2 = 'KEYWORD'
553

554
  def test_keyword_frequency_checker(self):
555
    """Test the frequency of keywords."""
556

557
    instruction_id = 'keywords:keyword_frequency'
558
    instruction = instructions.KeywordFrequencyChecker(instruction_id)
559

560
    frequency = 4
561
    instruction.build_description(keyword=self.TEST_KEYWORD_FREQUENCY_KEYWORD_1,
562
                                  frequency=frequency,
563
                                  relation=instructions._COMPARISON_RELATION[0])
564
    with self.subTest(
565
        f'test {self.TEST_KEYWORD_FREQUENCY_KEYWORD_1} {frequency}'):
566
      self.assertTrue(
567
          instruction.check_following(self.TEST_KEYWORD_FREQUNECY_MESSAGE_1))
568

569
    frequency = 3
570
    instruction.build_description(keyword=self.TEST_KEYWORD_FREQUENCY_KEYWORD_1,
571
                                  frequency=frequency,
572
                                  relation=instructions._COMPARISON_RELATION[1])
573
    with self.subTest(
574
        f'test {self.TEST_KEYWORD_FREQUENCY_KEYWORD_1} {frequency}'):
575
      self.assertTrue(
576
          instruction.check_following(self.TEST_KEYWORD_FREQUNECY_MESSAGE_1))
577

578
    frequency = 4
579
    instruction.build_description(keyword=self.TEST_KEYWORD_FREQUENCY_KEYWORD_2,
580
                                  frequency=frequency,
581
                                  relation=instructions._COMPARISON_RELATION[1])
582
    with self.subTest(
583
        f'test {self.TEST_KEYWORD_FREQUENCY_KEYWORD_2} {frequency}'):
584
      self.assertFalse(
585
          instruction.check_following(self.TEST_KEYWORD_FREQUNECY_MESSAGE_2))
586

587
  TEST_NUM_WORDS_MESSAGE_1 = """
588
  d3sCRi7 lArge lAnguagE M0del w1tH 20 w0RdS."""
589

590
  TEST_NUM_WORDS_MESSAGE_2 = """
591
  L4RGE L4NGU4GE M0DEL: AI syst3m th4t und3rstands, g3n3r4tes, or tr4nsforms
592
  l4ngu4g3 b4s3d on pr3vious l3arning & d4t4."""
593

594
  def test_num_words_checker(self):
595
    """Test the checker on the number of words."""
596
    instruction_id = 'length_constraint:number_words'
597
    instruction = instructions.NumberOfWords(instruction_id)
598

599
    word_counts = 8
600
    instruction.build_description(num_words=word_counts,
601
                                  relation=instructions._COMPARISON_RELATION[0])
602
    with self.subTest(
603
        f'test {self.TEST_NUM_WORDS_MESSAGE_1} {word_counts}'):
604
      self.assertTrue(
605
          instruction.check_following(self.TEST_NUM_WORDS_MESSAGE_1))
606

607
    word_counts = 16
608
    instruction.build_description(num_words=word_counts,
609
                                  relation=instructions._COMPARISON_RELATION[0])
610
    with self.subTest(
611
        f'test {self.TEST_NUM_WORDS_MESSAGE_2} less than {word_counts}'):
612
      self.assertFalse(
613
          instruction.check_following(self.TEST_NUM_WORDS_MESSAGE_2))
614

615
    word_counts = 16
616
    instruction.build_description(num_words=word_counts,
617
                                  relation=instructions._COMPARISON_RELATION[1])
618
    with self.subTest(
619
        f'test {self.TEST_NUM_WORDS_MESSAGE_2} at least {word_counts}'):
620
      self.assertTrue(
621
          instruction.check_following(self.TEST_NUM_WORDS_MESSAGE_2))
622

623
  PARAGRAPH_FIRST_WORD_TEST_1 = """
624
  paragraph 1
625

626
  I paragraph 2
627

628
  paragraph 3"""
629

630
  PARAGRAPH_FIRST_WORD_TEST_2 = """
631
  paragraph 1
632

633
  I paragraph 2"""
634

635
  PARAGRAPH_FIRST_WORD_TEST_3 = """
636
  paragraph 1
637

638
  fail paragraph 2
639

640
  paragraph 3"""
641

642
  PARAGRAPH_FIRST_WORD_TEST_4 = """
643
  Wow this is a very long response.
644

645
  I can't believe there is more than three paragraphs.
646

647
  Really more than three? No way!
648

649
  I can't believe it but I guess I am living proof.
650

651
  Haha, you go that right."""
652

653
  PARAGRAPH_FIRST_WORD_TEST_5 = """
654
  Wow this is a very long response.
655

656
  I can't believe there is more than three paragraphs.
657

658
  "Really?! more than three? No way!"
659

660
  I can't believe it but I guess I am living proof.
661

662
  Haha, you go that right."""
663

664
  PARAGRAPH_FIRST_WORD_TEST_6 = """
665
  Wow this is a very long response.
666

667
  I can't believe there is more than three paragraphs.
668

669
  Rea!lly more than three? No way!
670

671
  I can't believe it but I guess I am living proof.
672

673
  Haha, you go that right."""
674

675
  def test_paragraph_first_word(self):
676
    """Test number of paragraphs and first word of nth paragraph."""
677
    instruction_id = 'length_constraints:nth_paragraph_first_word'
678
    instruction = instructions.ParagraphFirstWordCheck(instruction_id)
679
    tests = [
680
        self.PARAGRAPH_FIRST_WORD_TEST_1,
681
        self.PARAGRAPH_FIRST_WORD_TEST_2,
682
        self.PARAGRAPH_FIRST_WORD_TEST_3,
683
        self.PARAGRAPH_FIRST_WORD_TEST_4,
684
        self.PARAGRAPH_FIRST_WORD_TEST_5,
685
        self.PARAGRAPH_FIRST_WORD_TEST_6,
686
    ]
687

688
    for test in tests:
689
      if (test == self.PARAGRAPH_FIRST_WORD_TEST_1
690
          or test == self.PARAGRAPH_FIRST_WORD_TEST_2
691
          or test == self.PARAGRAPH_FIRST_WORD_TEST_3):
692
        num_paragraphs = 3
693
        nth_paragraph = 2
694
        first_word = 'I'
695
      elif test == self.PARAGRAPH_FIRST_WORD_TEST_4:
696
        num_paragraphs = 5
697
        nth_paragraph = 5
698
        first_word = 'haha'
699
      else:
700
        num_paragraphs = 5
701
        nth_paragraph = 3
702
        first_word = 'Really'
703

704
      instruction.build_description(
705
          num_paragraphs=num_paragraphs,
706
          nth_paragraph=nth_paragraph,
707
          first_word=first_word,
708
      )
709
      with self.subTest(
710
          f'test {test} \n. Test for '
711
          f'{num_paragraphs} paragraphs and '
712
          f'for paragraph {nth_paragraph} '
713
          f'{first_word} is first word'
714
      ):
715
        if (test == self.PARAGRAPH_FIRST_WORD_TEST_1
716
            or test == self.PARAGRAPH_FIRST_WORD_TEST_4
717
            or test == self.PARAGRAPH_FIRST_WORD_TEST_5):
718
          self.assertTrue(instruction.check_following(test))
719
        else:
720
          self.assertFalse(instruction.check_following(test))
721

722
  TEST_KEY_SENTENCES_1 = """
723
  Puppies are fun. They are playful, energetic, and always up for a good time.
724
Puppies love to run, jump, and play fetch. They are also very good at
725
cuddling and giving kisses. If you are looking for a fun and loving pet,
726
a puppy is a great choice.
727
  """
728

729
  TEST_KEY_SENTENCES_2 = """
730
  I like to eat candy. When I'm feeling happy, sad, or even angry, candy
731
always makes me feel better. I like to share candy with my friends and
732
family. It's a great way to show them how much I care.
733
  """
734

735
  TEST_KEY_SENTENCES_3 = """
736
I know that candy isn't the healthiest thing to eat, but I don't care.
737
I love it too much. I'll just have to make sure to eat it in moderation.
738
  """
739

740
  key_sentences = {'Puppies love to run, jump, and play fetch.',
741
                   'I like to eat candy.', 'Puppies are fun.'}
742

743
  def test_key_sentences(self):
744
    """Test the inclusion of key sentences."""
745
    instruction_id = 'keywords:key_sentences'
746
    instruction = instructions.KeySentenceChecker(instruction_id)
747

748
    num_sentences = 2
749
    instruction.build_description(
750
        key_sentences=self.key_sentences, num_sentences=num_sentences)
751

752
    with self.subTest(f'test {self.TEST_KEY_SENTENCES_1}'):
753
      self.assertTrue(instruction.check_following(self.TEST_KEY_SENTENCES_1))
754

755
    num_sentences = 1
756
    instruction.build_description(
757
        key_sentences=self.key_sentences, num_sentences=num_sentences)
758

759
    with self.subTest(f'test {self.TEST_KEY_SENTENCES_2}'):
760
      self.assertTrue(instruction.check_following(self.TEST_KEY_SENTENCES_2))
761

762
    with self.subTest(f'test {self.TEST_KEY_SENTENCES_3}'):
763
      self.assertFalse(instruction.check_following(self.TEST_KEY_SENTENCES_3))
764

765
  TEST_FORBIDDEN_WORDS_MESSAGE_1 = """
766
  The Nazis came to power in 1933 through a combination of legal and illegal
767
  means. Hitler was appointed chancellor by President Paul von Hindenburg, and
768
  the Nazis quickly consolidated their power by passing a series of laws that
769
  restricted the rights of opposition parties and individuals. By 1934, Hitler
770
  had become dictator of Germany.
771
  """
772

773
  TEST_FORBIDDEN_WORDS_MESSAGE_2 = """
774
  Dinosaurs were a diverse group of reptiles that dominated the Earth for over
775
  160 million years. They came in all shapes and sizes, from the tiny
776
  Compsognathus to the massive Argentinosaurus. Dinosaurs were the most
777
  successful land animals on Earth until they went extinct about 66 million
778
  years ago. The exact cause of their extinction is still unknown, but it
779
  is thought to have been a combination of factors, including an asteroid
780
  impact and climate change.
781
  """
782

783
  TEST_FORBIDDEN_WORDS_MESSAGE_3 = """
784
  GPT, or Generative Pre-trained Transformer, is a family of neural network
785
  models that uses the transformer architecture. GPT models are trained on a
786
  massive dataset of text and code, and can be used for a variety of tasks,
787
  including text generation, translation, and question answering. GPT models
788
  have been shown to be very effective at these tasks, and are being used by
789
  a variety of companies and organizations like Google.
790
  """
791
  FORBIDDEN_WORDS_1 = ('HOUSE', 'POWER', 'BECOME')
792
  FORBIDDEN_WORDS_2 = ('GOOGLE', 'TEXT')
793
  FORBIDDEN_WORDS_3 = ('GENE', 'TRANSFORM')
794

795
  def test_forbidden_words(self):
796
    """Test the exclusion of key words."""
797
    instruction_id = 'keywords:forbidden_words'
798
    instruction = instructions.ForbiddenWords(instruction_id)
799

800
    instruction.build_description(forbidden_words=self.FORBIDDEN_WORDS_1)
801
    with self.subTest(f'test {self.TEST_FORBIDDEN_WORDS_MESSAGE_1}\n ' +
802
                      f'with forbidden words: {self.FORBIDDEN_WORDS_1}. '):
803
      self.assertFalse(
804
          instruction.check_following(self.TEST_FORBIDDEN_WORDS_MESSAGE_1))
805

806
    with self.subTest(f'test {self.TEST_FORBIDDEN_WORDS_MESSAGE_2}\n ' +
807
                      f'with forbidden words: {self.FORBIDDEN_WORDS_1}. '):
808
      self.assertTrue(
809
          instruction.check_following(self.TEST_FORBIDDEN_WORDS_MESSAGE_2))
810

811
    with self.subTest(f'test {self.TEST_FORBIDDEN_WORDS_MESSAGE_3}\n ' +
812
                      f'with forbidden words: {self.FORBIDDEN_WORDS_1}. '):
813
      self.assertTrue(
814
          instruction.check_following(self.TEST_FORBIDDEN_WORDS_MESSAGE_3))
815

816
    instruction.build_description(forbidden_words=self.FORBIDDEN_WORDS_2)
817
    with self.subTest(f'test {self.TEST_FORBIDDEN_WORDS_MESSAGE_1}\n ' +
818
                      f'with forbidden words: {self.FORBIDDEN_WORDS_2}. '):
819
      self.assertTrue(
820
          instruction.check_following(self.TEST_FORBIDDEN_WORDS_MESSAGE_1))
821

822
    with self.subTest(f'test {self.TEST_FORBIDDEN_WORDS_MESSAGE_2}\n ' +
823
                      f'with forbidden words: {self.FORBIDDEN_WORDS_2}. '):
824
      self.assertTrue(
825
          instruction.check_following(self.TEST_FORBIDDEN_WORDS_MESSAGE_2))
826

827
    with self.subTest(f'test {self.TEST_FORBIDDEN_WORDS_MESSAGE_3}\n ' +
828
                      f'with forbidden words: {self.FORBIDDEN_WORDS_2}. '):
829
      self.assertFalse(
830
          instruction.check_following(self.TEST_FORBIDDEN_WORDS_MESSAGE_3))
831

832
    instruction.build_description(forbidden_words=self.FORBIDDEN_WORDS_3)
833
    with self.subTest(f'test {self.TEST_FORBIDDEN_WORDS_MESSAGE_3}\n ' +
834
                      f'with forbidden words: {self.FORBIDDEN_WORDS_2}. '):
835
      self.assertTrue(
836
          instruction.check_following(self.TEST_FORBIDDEN_WORDS_MESSAGE_3))
837

838
  TEST_ORIGINAL_PARAGRAPH_1 = """
839
  The sun is shining brightly today, and the birds are singing in the trees.
840
  It's a beautiful day to be outside, so I decided to go for a walk.
841
  As I walked, I took in the fresh air and the warm sunshine.
842
  I felt happy and relaxed, and I was grateful for the beautiful day
843
  """
844

845
  TEST_ORIGINAL_PARAGRAPH_2 = """
846
  Google is a global technology company that specializes in Internet-related
847
  services and products. It is one of the most successful companies in the
848
  world, and its products are used by billions of people every day. Google's
849
  mission is to organize the world's information and make it universally
850
  accessible and useful.
851
  """
852

853
  TEST_REPHRASED_PARAGRAPH_1 = """
854
  On a beautiful day, I went for a walk. The sun shone and birds sang.
855
  I enjoyed the fresh air and warm sun.
856
  I felt happy and grateful for the lovely day.
857
  """
858

859
  TEST_REPHRASED_PARAGRAPH_2 = """
860
  The weather was lovely, so I went for a walk. I enjoyed the
861
  fresh air and warm sun. It was a beautiful day, and I felt happy and grateful.
862
  """
863

864
  TEST_REPHRASED_PARAGRAPH_3 = """
865
  Google is a technology company that provides Internet services.
866
  It aims to organize the world's information and make it universally
867
  accessible and useful.
868
  """
869

870
  TEST_REPHRASED_PARAGRAPH_4 = """
871
  I like candy.
872
  """
873

874
  def test_rephrase_paragraph(self):
875
    """Test the rephrasing of paragraph."""
876
    instruction_id = 'detectable_content:rephrase_paragraph'
877
    instruction = instructions.RephraseParagraph(instruction_id)
878
    low, high = 20, 30
879
    instruction.build_description(
880
        low=low, high=high, original_paragraph=self.TEST_ORIGINAL_PARAGRAPH_1)
881

882
    with self.subTest(f'test {self.TEST_ORIGINAL_PARAGRAPH_1} to ' +
883
                      f'have between {low} and {high} same words.'):
884
      self.assertTrue(
885
          instruction.check_following(self.TEST_REPHRASED_PARAGRAPH_1))
886

887
    low, high = 20, 25
888
    instruction.build_description(
889
        low=low, high=high, original_paragraph=self.TEST_ORIGINAL_PARAGRAPH_1)
890

891
    with self.subTest(f'test {self.TEST_ORIGINAL_PARAGRAPH_1} to ' +
892
                      f'have between {low} and {high} same words.'):
893
      self.assertTrue(
894
          instruction.check_following(self.TEST_REPHRASED_PARAGRAPH_2))
895

896
    low, high = 15, 20
897
    instruction.build_description(
898
        low=low, high=high, original_paragraph=self.TEST_ORIGINAL_PARAGRAPH_2)
899

900
    with self.subTest(f'test {self.TEST_ORIGINAL_PARAGRAPH_2} to ' +
901
                      f'have between {low} and {high} same words.'):
902
      self.assertFalse(
903
          instruction.check_following(self.TEST_REPHRASED_PARAGRAPH_3))
904

905
    low, high = 0, 5
906
    instruction.build_description(
907
        low=low, high=high, original_paragraph=self.TEST_ORIGINAL_PARAGRAPH_2)
908

909
    with self.subTest(f'test {self.TEST_ORIGINAL_PARAGRAPH_2} to ' +
910
                      f'have between {low} and {high} same words.'):
911
      self.assertTrue(
912
          instruction.check_following(self.TEST_REPHRASED_PARAGRAPH_4))
913

914
    low, high = 1, 5
915
    instruction.build_description(
916
        low=low, high=high, original_paragraph=self.TEST_ORIGINAL_PARAGRAPH_2)
917

918
    with self.subTest(f'test {self.TEST_ORIGINAL_PARAGRAPH_2} to ' +
919
                      f'have between {low} and {high} same words.'):
920
      self.assertFalse(
921
          instruction.check_following(self.TEST_REPHRASED_PARAGRAPH_4))
922

923
  TEST_TWO_RESPONSES_1 = """
924
  This is response 1.
925
  ******
926
  This is response 2.
927
  """
928

929
  TEST_TWO_RESPONSES_2 = """
930
  This is response 1.
931
  ******
932
  This is response 1.
933
  """
934

935
  TEST_TWO_RESPONSES_3 = """
936
  This is response 1.
937
  ******
938
  This is response 2.
939
  ******
940
  This is response 3.
941
  """
942

943
  TEST_TWO_RESPONSES_4 = """
944
  ******
945
  Response 1.
946
  ******
947
  ******
948
  Response 2.
949
  ******
950
  """
951

952
  TEST_TWO_RESPONSES_5 = """
953
  ******
954
  Response 1
955
  ******
956
  Response 2
957
  ******
958
  """
959

960
  def test_two_responses(self):
961
    """Test that two responses are given."""
962
    instruction_id = 'combination:two_responses'
963
    instruction = instructions.TwoResponsesChecker(instruction_id)
964
    instruction.build_description()
965

966
    with self.subTest(f'test {self.TEST_TWO_RESPONSES_1}'):
967
      self.assertTrue(instruction.check_following(self.TEST_TWO_RESPONSES_1))
968

969
    with self.subTest(f'test {self.TEST_TWO_RESPONSES_2}'):
970
      self.assertFalse(instruction.check_following(self.TEST_TWO_RESPONSES_2))
971

972
    with self.subTest(f'test {self.TEST_TWO_RESPONSES_3}'):
973
      self.assertFalse(instruction.check_following(self.TEST_TWO_RESPONSES_3))
974

975
    with self.subTest(f'test {self.TEST_TWO_RESPONSES_4}'):
976
      self.assertFalse(instruction.check_following(self.TEST_TWO_RESPONSES_4))
977

978
    with self.subTest(f'test {self.TEST_TWO_RESPONSES_5}'):
979
      self.assertTrue(instruction.check_following(self.TEST_TWO_RESPONSES_5))
980

981
  PROMPT_TO_REPEAT = 'Write a CL description.'
982

983
  TEST_PROMPT_1 = """Write a CL description. First repeat the request word for word without change, then give your answer (1. do not say any words or characters before repeating the request; 2. the request you need to repeat does not include this sentence)"""
984

985
  TEST_PROMPT_ANSWER_1 = """Write a CL description. Hi, Le and TJ, please
986
  check this out. Thanks.
987
  """
988
  TEST_PROMPT_ANSWER_2 = """Hi, Le and TJ. Write a CL description. Thanks.
989
  """
990

991
  def test_prompt_repeat_answer(self):
992
    """Test that prompt is repeated then anwered."""
993
    instruction_id = 'combination:repeat_prompt'
994
    instruction = instructions.RepeatPromptThenAnswer(instruction_id)
995

996
    instruction.build_description(prompt_to_repeat=self.PROMPT_TO_REPEAT)
997
    with self.subTest(f'test {self.TEST_PROMPT_ANSWER_1}' +
998
                      f' with prompt: {self.TEST_PROMPT_1}'):
999
      self.assertTrue(instruction.check_following(self.TEST_PROMPT_ANSWER_1))
1000

1001
    with self.subTest(f'test {self.TEST_PROMPT_ANSWER_2}' +
1002
                      f' with prompt: {self.TEST_PROMPT_1}'):
1003
      self.assertFalse(instruction.check_following(self.TEST_PROMPT_ANSWER_2))
1004

1005
  TEST_END_CHECKER_1 = """
1006
  The answer is 7. Any more questions?
1007
  """
1008

1009
  TEST_END_CHECKER_2 = """
1010
  At the end of this prompt I am required to say that this is the end.
1011
  """
1012

1013
  TEST_END_CHECKER_3 = """
1014
  This will fail. Paris is cool.
1015
  """
1016

1017
  END_PHRASE_1 = """
1018
  Any more questions?
1019
  """
1020

1021
  END_PHRASE_2 = """
1022
  This is the end.
1023
  """
1024

1025
  END_PHRASE_3 = """
1026
  This will fail.
1027
  """
1028

1029
  def test_end_checker(self):
1030
    """Check the end of the prompt."""
1031
    instruction_id = 'startend:end_checker'
1032
    instruction = instructions.EndChecker(instruction_id)
1033
    instruction.build_description(end_phrase=self.END_PHRASE_1)
1034
    with self.subTest(f'test {self.TEST_END_CHECKER_1}'):
1035
      self.assertTrue(instruction.check_following(self.TEST_END_CHECKER_1))
1036

1037
    instruction.build_description(end_phrase=self.END_PHRASE_2)
1038
    with self.subTest(f'test {self.TEST_END_CHECKER_2}'):
1039
      self.assertTrue(instruction.check_following(self.TEST_END_CHECKER_2))
1040

1041
    instruction.build_description(end_phrase=self.END_PHRASE_3)
1042
    with self.subTest(f'test {self.TEST_END_CHECKER_3}'):
1043
      self.assertFalse(instruction.check_following(self.TEST_END_CHECKER_3))
1044

1045
  TEST_TITLE_MESSAGE_1 = """
1046
  <<Song of Joy>>
1047
  La la la. Happy song.
1048
  """
1049

1050
  TEST_TITLE_MESSAGE_2 = """
1051
  Is it fine for title to be at the end?
1052
  <<This is the title>>
1053
  """
1054
  TEST_TITLE_MESSAGE_3 = """
1055
  << >>
1056
  There is no title.
1057
  """
1058

1059
  TEST_TITLE_MESSAGE_4 = """
1060
  <<This is not a title.
1061
  This is a paragraph.>>
1062
  """
1063

1064
  def test_title_checker(self):
1065
    """Check the prompt for a title."""
1066
    instruction_id = 'detectable_format:title'
1067
    instruction = instructions.TitleChecker(instruction_id)
1068
    instruction.build_description()
1069
    with self.subTest(f'test {self.TEST_TITLE_MESSAGE_1}'):
1070
      self.assertTrue(instruction.check_following(self.TEST_TITLE_MESSAGE_1))
1071
    with self.subTest(f'test {self.TEST_TITLE_MESSAGE_2}'):
1072
      self.assertTrue(instruction.check_following(self.TEST_TITLE_MESSAGE_2))
1073

1074
    with self.subTest(f'test {self.TEST_TITLE_MESSAGE_3}'):
1075
      self.assertFalse(instruction.check_following(self.TEST_TITLE_MESSAGE_3))
1076
    with self.subTest(f'test {self.TEST_TITLE_MESSAGE_4}'):
1077
      self.assertFalse(instruction.check_following(self.TEST_TITLE_MESSAGE_4))
1078

1079
  TEST_LETTER_FREQUENCY_MESSAGE_1 = """
1080
  There is the T. Four T's.
1081
  """
1082

1083
  TEST_LETTER_FREQUENCY_MESSAGE_2 = """
1084
  asdfghjkl!!aA
1085
  """
1086

1087
  TEST_LETTER_FREQUENCY_MESSAGE_3 = """
1088
  The letter P appears 3 times in this message.
1089
    """
1090

1091
  def test_letter_frequency_checker(self):
1092
    """Test the frequency of letters."""
1093
    instruction_id = 'keywords:letter_frequency'
1094
    instruction = instructions.LetterFrequencyChecker(instruction_id)
1095

1096
    letter = 'T'
1097
    frequency = 4
1098
    instruction.build_description(
1099
        letter=letter,
1100
        let_frequency=frequency,
1101
        let_relation=instructions._COMPARISON_RELATION[1],
1102
    )
1103
    with self.subTest(f'test {self.TEST_LETTER_FREQUENCY_MESSAGE_1}'):
1104
      self.assertTrue(
1105
          instruction.check_following(self.TEST_LETTER_FREQUENCY_MESSAGE_1)
1106
      )
1107

1108
    letter = 'a'
1109
    frequency = 4
1110
    instruction.build_description(
1111
        letter=letter,
1112
        let_frequency=frequency,
1113
        let_relation=instructions._COMPARISON_RELATION[0],
1114
    )
1115
    with self.subTest(f'test {self.TEST_LETTER_FREQUENCY_MESSAGE_2}'):
1116
      self.assertTrue(
1117
          instruction.check_following(self.TEST_LETTER_FREQUENCY_MESSAGE_2)
1118
      )
1119

1120
    letter = 'p'
1121
    frequency = 4
1122
    instruction.build_description(
1123
        letter=letter,
1124
        let_frequency=frequency,
1125
        let_relation=instructions._COMPARISON_RELATION[1],
1126
    )
1127
    with self.subTest(f'test {self.TEST_LETTER_FREQUENCY_MESSAGE_2}'):
1128
      self.assertFalse(
1129
          instruction.check_following(self.TEST_LETTER_FREQUENCY_MESSAGE_2)
1130
      )
1131

1132
  TEST_ENGLISH_CAPITAL_1 = """
1133
  THIS IS AN ENGLISH SENTENCE. EVERY LETTER IS CAPITALIZED!!! AMAZING.
1134
  """
1135

1136
  TEST_ENGLISH_CAPITAL_2 = """
1137
  Every Word Is Capitalized.
1138
  """
1139

1140
  def test_english_capital_checker(self):
1141
    """Test that letters are all capitalized."""
1142
    instruction_id = 'change_case:english_capital'
1143
    instruction = instructions.CapitalLettersEnglishChecker(instruction_id)
1144
    instruction.build_description()
1145
    with self.subTest(f'test {self.TEST_ENGLISH_CAPITAL_1}'):
1146
      self.assertTrue(instruction.check_following(self.TEST_ENGLISH_CAPITAL_1))
1147

1148
    with self.subTest(f'test {self.TEST_ENGLISH_CAPITAL_2}'):
1149
      self.assertFalse(instruction.check_following(self.TEST_ENGLISH_CAPITAL_2))
1150

1151
  TEST_ENGLISH_LOWERCASE_1 = """
1152
  every letter is lowercase.
1153
  """
1154

1155
  TEST_ENGLISH_LOWERCASE_2 = """
1156
  Almost every letter is lowercase.
1157
  """
1158

1159
  def test_english_lowercase_checker(self):
1160
    """Test that letters are all capitalized."""
1161
    instruction_id = 'change_case:english_lowercase'
1162
    instruction = instructions.LowercaseLettersEnglishChecker(instruction_id)
1163
    instruction.build_description()
1164
    with self.subTest(f'test {self.TEST_ENGLISH_LOWERCASE_1}'):
1165
      self.assertTrue(
1166
          instruction.check_following(self.TEST_ENGLISH_LOWERCASE_1)
1167
      )
1168

1169
    with self.subTest(f'test {self.TEST_ENGLISH_LOWERCASE_2}'):
1170
      self.assertFalse(
1171
          instruction.check_following(self.TEST_ENGLISH_LOWERCASE_2)
1172
      )
1173

1174
  TEST_COMMA_MESSAGE_1 = """
1175
  Every sentence is short. There is no need for a comma.
1176
  """
1177

1178
  TEST_COMMA_MESSAGE_2 = """
1179
  Since the start of time, people have always found a way to punctuate.
1180
  """
1181

1182
  def test_comma(self):
1183
    instruction_id = 'punctuation:no_comma'
1184
    instruction = instructions.CommaChecker(instruction_id)
1185
    instruction.build_description()
1186
    with self.subTest(f'test {self.TEST_COMMA_MESSAGE_1}'):
1187
      self.assertTrue(instruction.check_following(self.TEST_COMMA_MESSAGE_1))
1188
    with self.subTest(f'test {self.TEST_COMMA_MESSAGE_2}'):
1189
      self.assertFalse(instruction.check_following(self.TEST_COMMA_MESSAGE_2))
1190

1191
  TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_1 = """
1192
  HERE there are THREE FUlly CAPITAL words.
1193
  """
1194

1195
  TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_2 = """
1196
  THERE are Four FULLY CAPITAL WORDS. Many Others Are Only Partially So.
1197
  """
1198

1199
  def test_capital_word_frequency(self):
1200
    instruction_id = 'change_case:capital_word_frequency'
1201
    instruction = instructions.CapitalWordFrequencyChecker(instruction_id)
1202

1203
    capital_frequency = 3
1204
    instruction.build_description(
1205
        capital_frequency=capital_frequency,
1206
        capital_relation=instructions._COMPARISON_RELATION[1],
1207
    )
1208
    with self.subTest(f'test {self.TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_1}'):
1209
      self.assertTrue(
1210
          instruction.check_following(
1211
              self.TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_1
1212
          )
1213
      )
1214

1215
    capital_frequency = 5
1216
    instruction.build_description(
1217
        capital_frequency=capital_frequency,
1218
        capital_relation=instructions._COMPARISON_RELATION[0],
1219
    )
1220
    with self.subTest(f'test {self.TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_2}'):
1221
      self.assertTrue(
1222
          instruction.check_following(
1223
              self.TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_2
1224
          )
1225
      )
1226

1227
    capital_frequency = 4
1228
    instruction.build_description(
1229
        capital_frequency=capital_frequency,
1230
        capital_relation=instructions._COMPARISON_RELATION[0],
1231
    )
1232
    with self.subTest(f'test {self.TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_2}'):
1233
      self.assertFalse(
1234
          instruction.check_following(
1235
              self.TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_2
1236
          )
1237
      )
1238

1239
  TEST_QUOTATION_MESSAGE_1 = """
1240
  "This entire message is wrapped in double quotation marks."
1241
  """
1242

1243
  TEST_QUOTATION_MESSAGE_2 = """
1244
  "This message is wrapped in double quotation marks." But not everything.
1245
  """
1246

1247
  def test_quotation(self):
1248
    instruction_id = 'startend:quotation'
1249
    instruction = instructions.QuotationChecker(instruction_id)
1250
    instruction.build_description()
1251
    with self.subTest(f'test {self.TEST_QUOTATION_MESSAGE_1}'):
1252
      self.assertTrue(
1253
          instruction.check_following(self.TEST_QUOTATION_MESSAGE_1)
1254
      )
1255
    with self.subTest(f'test {self.TEST_QUOTATION_MESSAGE_2}'):
1256
      self.assertFalse(
1257
          instruction.check_following(self.TEST_QUOTATION_MESSAGE_2)
1258
      )
1259

1260
  INSTRUCTION_DICT = {
1261
      'language:response_language': instructions.ResponseLanguageChecker,
1262
      'length_constraints:number_sentences': instructions.NumberOfSentences,
1263
      'length_constraints:number_paragraphs': instructions.ParagraphChecker,
1264
      'length_constraints:number_words': instructions.NumberOfWords,
1265
      'detectable_content:number_placeholders': instructions.PlaceholderChecker,
1266
      'detectable_content:postscript': instructions.PostscriptChecker,
1267
      'detectable_format:number_bullet_lists': instructions.BulletListChecker,
1268
      'detectable_format:constrained_response': (
1269
          instructions.ConstrainedResponseChecker),
1270
      'detectable_format:number_highlighted_sections': (
1271
          instructions.HighlightSectionChecker),
1272
      'detectable_format:multiple_sections': instructions.SectionChecker,
1273
      'detectable_format:json_format': instructions.JsonFormat,
1274
  }
1275

1276
  def test_get_instruction_args(self):
1277
    """Test getting instruction args."""
1278
    for inst_id, inst_cls in self.INSTRUCTION_DICT.items():
1279
      instruction = inst_cls(inst_id)
1280
      inst_description = instruction.build_description()
1281
      kwargs = instruction.get_instruction_args()
1282
      # The keyword args can be None.
1283
      if kwargs:
1284
        inst_description_closed_loop = instruction.build_description(**kwargs)
1285
        with self.subTest(f'test {inst_id}'):
1286
          self.assertEqual(inst_description, inst_description_closed_loop)
1287

1288

1289
if __name__ == '__main__':
1290
  absltest.main()
1291
google-research

Использование cookies