google-research
1290 строк · 47.4 Кб
1# coding=utf-8
2# Copyright 2024 The Google Research Authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Tests for instructions.py."""
17
18from absl.testing import absltest
19from absl.testing import parameterized
20from instruction_following_eval import instructions
21
22
23# pylint:disable=g-complex-comprehension
24class InstructionsTest(parameterized.TestCase):
25
26@parameterized.named_parameters(
27[
28{
29'testcase_name': (
30f'_response={response}_language={language}'
31),
32'response': response,
33'language': language,
34}
35for response, language in [('The response is English', 'en')]
36]
37)
38def test_response_language(self, response, language):
39"""Test on single language response."""
40instruction_id = 'language:response_language'
41instruction = instructions.ResponseLanguageChecker(instruction_id)
42instruction.build_description(language=language)
43self.assertTrue(instruction.check_following(response))
44
45@parameterized.named_parameters(
46[
47{
48'testcase_name': (
49f'_response={response}_language={language}'
50),
51'response': response,
52'language': language,
53}
54for response, language in [("Desayunamos en McDonald's hoy", 'es'),
55('Today we visit the Louvre', 'en'),]
56]
57)
58def test_response_multilanguage(self, response, language):
59"""Test on responses that contain multi-language tokens."""
60instruction_id = 'language:response_language'
61instruction = instructions.ResponseLanguageChecker(instruction_id)
62instruction.build_description(language=language)
63self.assertTrue(instruction.check_following(response))
64
65@parameterized.named_parameters(
66[
67{
68'testcase_name': (
69f'_response={response}_relation={relation}'
70f'_num_sentences={num_sentences}_expected={expected}'
71),
72'response': response,
73'relation': relation,
74'num_sentences': num_sentences,
75'expected': expected,
76}
77for response, relation, num_sentences, expected in [
78('xx,x. xx,x! xx/x. x{x}x?', instructions._COMPARISON_RELATION[0],
794, False),
80('xxxx. xx,x! xxxx. x(x)x?', instructions._COMPARISON_RELATION[0],
815, True),
82('xxxx. xx,x! xx|x. x&x x?', instructions._COMPARISON_RELATION[1],
834, True),
84('xx-x. xx,x! xx}x. x,xx?', instructions._COMPARISON_RELATION[1],
855, False),
86]
87]
88)
89def test_number_sentences(self, response, relation, num_sentences, expected):
90"""Test the number of sentences."""
91instruction_id = 'length_constraints:number_sentences'
92instruction = instructions.NumberOfSentences(instruction_id)
93instruction.build_description(relation=relation,
94num_sentences=num_sentences)
95actual = instruction.check_following(response)
96self.assertEqual(actual, expected)
97
98@parameterized.named_parameters(
99[
100{
101'testcase_name': (
102f'_templated={template}_num_placeholders={num_placeholders}'
103f'_expected={expected}'
104),
105'template': template,
106'num_placeholders': num_placeholders,
107'expected': expected,
108}
109for template, num_placeholders, expected in [
110(('Sure, here is a short template with 5 placeholders:\n' +
111'[Name]\n[Email]\n[Phone]\n[Address]\n[Website]\n' +
112'This template can be used for a variety of purposes, such ' +
113'ascreating a contact list, sending out surveys, or creating ' +
114'a sign-up form.'), 5, True),
115(('My [adjective] [noun] is [adjective] [noun]. I [verb] and ' +
116'[verb].'), 7, False),
117]
118]
119)
120def test_number_placeholders(self, template, num_placeholders, expected):
121"""Test the number of placeholders."""
122instruction_id = 'detectable_content:number_placeholders'
123instruction = instructions.PlaceholderChecker(instruction_id)
124instruction.build_description(num_placeholders=num_placeholders)
125actual = instruction.check_following(template)
126self.assertEqual(actual, expected)
127
128BULLET_TEST_MESSAGE_1 = """
129A Markdown bullet point is a way of formatting text to create a list. To
130create a bullet point, start each line with an asterisk (*). For example:
131* This is a bullet point.
132*(no space required)Another bullet point.
133* (no newline ending required)Another bullet point.
134markdown bullet points are often used to create to-do lists or to list items
135in a step-by-step guide."""
136BULLET_TEST_MESSAGE_2 = """
137Check that inline asterisk (*), *, will not be counted. Only * that starts a
138bullet list will be counted:
139* This is a bullet point.
140* Another bullet point.
141. dot is not counted"""
142BULLET_TEST_MESSAGE_3 = """
143Here are three bullets starting with asterisk:
144* I am a large language model, also known as a conversational AI.
145* I am trained on a massive amount of text data, and I am able to communicate.
146* I am still under development, but I am learning new things every day."""
147
148BULLET_TEST_MESSAGE_4 = """
149Here are three markdown bullets:
150- I am a large language model, also known as a conversational AI.
151- I am trained on a massive amount of text data, and I am able to communicate.
152-I am still under development, but I am learning new things every day."""
153
154BULLET_TEST_MESSAGE_5 = """
155Paragraph 1
156***
157Paragraph 2
158***
159Paragraph 3
160* only one bullet point
161"""
162
163@parameterized.named_parameters(
164[
165{
166'testcase_name': (
167f'_templated={template}_num_bullets={num_bullets}'
168f'_expected={expected}'
169),
170'template': template,
171'num_bullets': num_bullets,
172'expected': expected,
173}
174for template, num_bullets, expected in [
175(BULLET_TEST_MESSAGE_1, 3, True),
176(BULLET_TEST_MESSAGE_2, 2, True),
177(BULLET_TEST_MESSAGE_3, 3, True),
178(BULLET_TEST_MESSAGE_4, 3, True),
179(BULLET_TEST_MESSAGE_5, 1, True)]
180]
181)
182def test_number_bullet_lists(self, template, num_bullets, expected):
183"""Test the number of bullets."""
184instruction_id = 'detectable_format:exact_number_bullet_points'
185instruction = instructions.BulletListChecker(instruction_id)
186instruction.build_description(num_bullets=num_bullets)
187actual = instruction.check_following(template)
188self.assertEqual(actual, expected)
189
190CONSTRAINED_RESPONSE_TEST_RESPONSE_1 = """\n My answer is no.\n"""
191CONSTRAINED_RESPONSE_TEST_RESPONSE_2 = """My answer is no. """
192CONSTRAINED_RESPONSE_TEST_RESPONSE_3 = """
193My answer is no. I am still under development and I am always learning and
194improving. I am not the best chatbot in the world, but I am striving to be
195the best that I can be."""
196
197def test_constrained_response(self):
198"""Test the constrained response checker."""
199instruction_id = 'detectable_format:constrained_response'
200instruction = instructions.ConstrainedResponseChecker(instruction_id)
201instruction.build_description()
202
203with self.subTest('test with CONSTRAINED_RESPONSE_TEST_RESPONSE_1'):
204self.assertTrue(instruction.check_following(
205self.CONSTRAINED_RESPONSE_TEST_RESPONSE_1))
206
207with self.subTest('test with CONSTRAINED_RESPONSE_TEST_RESPONSE_2'):
208self.assertTrue(instruction.check_following(
209self.CONSTRAINED_RESPONSE_TEST_RESPONSE_2))
210
211with self.subTest('test with CONSTRAINED_RESPONSE_TEST_RESPONSE_3'):
212self.assertTrue(instruction.check_following(
213self.CONSTRAINED_RESPONSE_TEST_RESPONSE_3))
214
215HIGHLIGHTED_TEST_MESSAGE_1 = """
216To highlight text with Markdown, you can use the * character before and after
217the text you want to highlight. For example, if you want to highlight the
218word `hello`, you would type:*hello*, You can also use the ** character to
219create bold text. For example, if you want to bold the word `hello`, you
220would type: **hello** """
221HIGHLIGHTED_TEST_MESSAGE_2 = """
222Sure, here are the numerical methods for solving partial differential
223equations highlighted with Markdown:
224*Finite difference methods
225*Finite element methods*
226*Boundary element methods
227*Monte Carlo methods
228I hope this helps!"""
229HIGHLIGHTED_TEST_MESSAGE_3 = """
230There is allowed to be *two different* highlighted *sections in the same*
231line. **This is also true** for **double markdown highlights.**
232"""
233
234@parameterized.named_parameters(
235[
236{
237'testcase_name': (
238f'_response={response}'
239f'_min_num_highlights={min_num_highlights}'
240f'_expected={expected}'
241),
242'response': response,
243'min_num_highlights': min_num_highlights,
244'expected': expected,
245}
246for response, min_num_highlights, expected in [
247(HIGHLIGHTED_TEST_MESSAGE_1, 2, True),
248(HIGHLIGHTED_TEST_MESSAGE_2, 2, False),
249(HIGHLIGHTED_TEST_MESSAGE_3, 4, True)]
250]
251)
252def test_number_highlights(self, response, min_num_highlights, expected):
253"""Test the minimum number of highlighted sections."""
254instruction_id = 'detectable_format:minimum_number_highlighted_sections'
255instruction = instructions.HighlightSectionChecker(instruction_id)
256instruction.build_description(num_highlights=min_num_highlights)
257actual = instruction.check_following(response)
258self.assertEqual(actual, expected)
259
260SECTION_TEST_MESSAGE_1 = """
261Your response must have multiple sections. Mark the beginning of each section
262with "Section X", such as:
263Section 1
264[content of section 1]
265Section 2
266[content of section 2]"""
267
268SECTION_TEST_MESSAGE_2 = """SECTION 1
269[content of section 1]
270SECTION 2
271[content of section 2]"""
272
273def test_section_checker(self):
274"""Test the number of sections."""
275instruction_id = 'detectable_format:multiple_sections'
276instruction = instructions.SectionChecker(instruction_id)
277section_keyword = 'Section'
278min_num_sections = 3
279instruction.build_description(section_spliter=section_keyword,
280num_sections=min_num_sections)
281with self.subTest(f'test {section_keyword} and {min_num_sections}'):
282self.assertFalse(
283instruction.check_following(self.SECTION_TEST_MESSAGE_1))
284
285section_keyword = 'SECTION'
286min_num_sections = 2
287instruction.build_description(section_spliter=section_keyword,
288num_sections=min_num_sections)
289with self.subTest(f'test {section_keyword} and {min_num_sections}'):
290self.assertTrue(
291instruction.check_following(self.SECTION_TEST_MESSAGE_2))
292
293PARAGRAPH_TEST_MESSAGE_1 = """
294paragraph 1
295***
296paragraph 2
297***
298paragraph 3"""
299
300PARAGRAPH_TEST_MESSAGE_2 = """
301***
302paragraph 1
303***
304paragraph 2
305***
306paragraph 3"""
307
308PARAGRAPH_TEST_MESSAGE_3 = """
309paragraph 1
310***
311paragraph 2
312***
313paragraph 3
314***"""
315
316PARAGRAPH_TEST_MESSAGE_4 = """
317paragraph 1
318***
319paragraph 2
320***
321***"""
322
323def test_paragraph_checker(self):
324"""Test the number of sections."""
325instruction_id = 'length_constraint:number_paragraphs'
326instruction = instructions.ParagraphChecker(instruction_id)
327num_paragraphs = 3
328instruction.build_description(num_paragraphs=num_paragraphs)
329with self.subTest(f'test {self.PARAGRAPH_TEST_MESSAGE_1} and '
330f'{num_paragraphs} paragraphs'):
331self.assertTrue(instruction.check_following(
332self.PARAGRAPH_TEST_MESSAGE_1))
333
334num_paragraphs = 3
335instruction.build_description(num_paragraphs=num_paragraphs)
336with self.subTest(f'test {self.PARAGRAPH_TEST_MESSAGE_2} and '
337f'{num_paragraphs} paragraphs'):
338self.assertTrue(instruction.check_following(
339self.PARAGRAPH_TEST_MESSAGE_2))
340
341num_paragraphs = 3
342instruction.build_description(num_paragraphs=num_paragraphs)
343with self.subTest(f'test {self.PARAGRAPH_TEST_MESSAGE_3} and '
344f'{num_paragraphs} paragraphs'):
345self.assertTrue(instruction.check_following(
346self.PARAGRAPH_TEST_MESSAGE_3))
347
348num_paragraphs = 2
349instruction.build_description(num_paragraphs=num_paragraphs)
350with self.subTest(f'test {self.PARAGRAPH_TEST_MESSAGE_4} and '
351f'{num_paragraphs} paragraphs'):
352self.assertFalse(instruction.check_following(
353self.PARAGRAPH_TEST_MESSAGE_4))
354
355POSTSCRIPT_TEST_MESSAGE_1 = """
356I will do my best to follow your instructions and always start my responses
357with "My response is:". I will try to be as consistent as possible, but
358please be patient with me if I make a mistake. I am still under development,
359and I am always learning new things.
360
361P.S. I hope this is what you were looking for."""
362
363POSTSCRIPT_TEST_MESSAGE_2 = """
364Sure, here is my response with a postscript starting with P.P.S.:
365
366My response is: I hope this answers your question.
367
368P.P.S. I am always happy to answer any other questions you may have.
369
370Do you have any other questions for me?"""
371
372# Postscript does not have to start as a new line.
373# Relaxed the constraint in cl/525253841.
374POSTSCRIPT_TEST_MESSAGE_3 = """
375The radius of a unit circle is 1. However, I can give you a funny and wrong
376answer: the radius of a unit circle is 0. This is because a unit circle is a
377circle with a radius of 1, and if the radius is 0, then the circle has no
378size and is just a point. (not starting a new line) P.S. I hope you enjoyed
379my joke!"""
380
381POSTSCRIPT_TEST_MESSAGE_4 = """
382If the length of a square is one, the area of the square will also be one.
383p.p.s what if the entire response was lower case letters?
384"""
385
386POSTSCRIPT_TEST_MESSAGE_5 = """
387The mysteries of space and time are mysterious.
388P. S. Sometimes there are even spaces between P. and S..
389"""
390
391def test_postscript_checker(self):
392"""Test the postscript checker."""
393instruction_id = 'detectable_content:postscript'
394instruction = instructions.PostscriptChecker(instruction_id)
395postscript_start_keyword = instructions._POSTSCRIPT_MARKER[0]
396instruction.build_description(postscript_marker=postscript_start_keyword)
397with self.subTest(f'test {postscript_start_keyword}'):
398self.assertTrue(
399instruction.check_following(self.POSTSCRIPT_TEST_MESSAGE_1))
400
401postscript_start_keyword = 'PS:'
402instruction.build_description(postscript_marker=postscript_start_keyword)
403with self.subTest(f'test {postscript_start_keyword}'):
404self.assertFalse(
405instruction.check_following(self.POSTSCRIPT_TEST_MESSAGE_1))
406
407postscript_start_keyword = instructions._POSTSCRIPT_MARKER[1]
408instruction.build_description(postscript_marker=postscript_start_keyword)
409with self.subTest(f'test {postscript_start_keyword}'):
410self.assertTrue(
411instruction.check_following(self.POSTSCRIPT_TEST_MESSAGE_2))
412
413postscript_start_keyword = 'P.S.'
414instruction.build_description(postscript_marker=postscript_start_keyword)
415with self.subTest(f'test {postscript_start_keyword}'):
416self.assertTrue(
417instruction.check_following(self.POSTSCRIPT_TEST_MESSAGE_3))
418
419postscript_start_keyword = 'P.P.S'
420instruction.build_description(postscript_marker=postscript_start_keyword)
421with self.subTest(f'test {postscript_start_keyword}'):
422self.assertTrue(
423instruction.check_following(self.POSTSCRIPT_TEST_MESSAGE_4))
424
425postscript_start_keyword = 'P.S.'
426instruction.build_description(postscript_marker=postscript_start_keyword)
427with self.subTest(f'test {postscript_start_keyword}'):
428self.assertTrue(
429instruction.check_following(self.POSTSCRIPT_TEST_MESSAGE_5))
430
431CONSTRAINED_START_TEST_MESSAGE_1 = """
432My response is: ASIC is a specialized chip for specific tasks in electronic
433devices, offering advantages in efficiency and processing speed."""
434
435CONSTRAINED_START_TEST_MESSAGE_2 = """
436My response is: ASIC is a specialized chip for specific tasks in
437electronic
438devices, offering advantages in efficiency and processing speed."""
439
440CONSTRAINED_START_TEST_MESSAGE_3 = """
441An ASIC, or Application-Specific Integrated Circuit, is a type of specialized
442chip that, my response is, is designed to perform specific tasks in electronic
443devices."""
444
445def test_constrained_start_checker(self):
446"""Test the constrained start checker."""
447instruction_id = 'multi-turn:constrained_start'
448instruction = instructions.ConstrainedStartChecker(instruction_id)
449start_keyword = 'My response is:'
450instruction.build_description(starter=start_keyword)
451with self.subTest(f'test {start_keyword}'):
452self.assertTrue(
453instruction.check_following(self.CONSTRAINED_START_TEST_MESSAGE_1))
454
455with self.subTest(f'test {start_keyword} with spaces in the beginning'):
456self.assertTrue(instruction.check_following(
457self.CONSTRAINED_START_TEST_MESSAGE_2))
458
459start_keyword = 'my response is'
460with self.subTest(f'test {start_keyword} embedded in the middle'):
461self.assertFalse(
462instruction.check_following(self.CONSTRAINED_START_TEST_MESSAGE_3))
463
464REPHRASE_TEST_REPHRASED_MESSAGE_1 = """
465I am *content*."""
466REPHRASE_TEST_ORIGINAL_MESSAGE_1 = """
467I am *happy*."""
468
469REPHRASE_TEST_REPHRASED_MESSAGE_1_NOCHANGE = """
470I am ."""
471
472REPHRASE_TEST_REPHRASED_MESSAGE_1_FORMAT = """
473I am [content]."""
474
475REPHRASE_TEST_REPHRASED_MESSAGE_2 = """
476It is raining heavily *at this moment*."""
477REPHRASE_TEST_ORIGINAL_MESSAGE_2 = """
478*At present,* there is heavy rainfall occurring."""
479
480def test_rephrase_checker(self):
481"""Test the rephrase checker."""
482instruction_id = 'detectable_format:rephrasing'
483instruction = instructions.RephraseChecker(instruction_id)
484instruction.build_description(
485original_message=self.REPHRASE_TEST_ORIGINAL_MESSAGE_1)
486with self.subTest(f'test {self.REPHRASE_TEST_REPHRASED_MESSAGE_1}'):
487self.assertTrue(
488instruction.check_following(self.REPHRASE_TEST_REPHRASED_MESSAGE_1))
489
490instruction.build_description(
491original_message=self.REPHRASE_TEST_ORIGINAL_MESSAGE_1)
492with self.subTest(
493f'test {self.REPHRASE_TEST_REPHRASED_MESSAGE_1_NOCHANGE}'):
494with self.assertRaises(ValueError):
495instruction.check_following(
496self.REPHRASE_TEST_REPHRASED_MESSAGE_1_NOCHANGE)
497
498instruction.build_description(
499original_message=self.REPHRASE_TEST_ORIGINAL_MESSAGE_1)
500with self.subTest(f'test {self.REPHRASE_TEST_REPHRASED_MESSAGE_1_FORMAT}'):
501with self.assertRaises(ValueError):
502instruction.check_following(
503self.REPHRASE_TEST_REPHRASED_MESSAGE_1_FORMAT)
504
505instruction.build_description(
506original_message=self.REPHRASE_TEST_ORIGINAL_MESSAGE_2)
507with self.subTest(f'test {self.REPHRASE_TEST_REPHRASED_MESSAGE_2}'):
508self.assertFalse(
509instruction.check_following(self.REPHRASE_TEST_REPHRASED_MESSAGE_2))
510
511TEST_INCLUDE_KEYWORD_MESSAGE_1 = """
512Paris is a city of beauty and romance. The romantic river Seine winds its way
513through the city, past iconic landmarks like the Eiffel Tower and the Louvre
514Museum, where the Mona Lisa resides. Whether you're taking a boat cruise down
515the river or simply strolling along the banks, you're sure to be captivated
516by the city's charm."""
517
518TEST_INCLUDE_KEYWORD_MESSAGE_2 = """
519Paris is a city of beauty, romance, and history. It is home to some of the
520most iconic landmarks in the world, including the Eiffel Tower, the Louvre
521Museum, and the Notre Dame Cathedral. The city is also known for its romantic
522river cruises, its delicious food, and its stylish people.
523"""
524
525KEYWORDS = ('romantic', 'river', 'Mona Lisa')
526
527def test_keyword_checker(self):
528"""Test the inclusion of keywords."""
529instruction_id = 'keywords:include_keywords'
530instruction = instructions.KeywordChecker(instruction_id)
531
532instruction.build_description(keywords=self.KEYWORDS)
533with self.subTest(f'test {self.TEST_INCLUDE_KEYWORD_MESSAGE_1}'):
534self.assertTrue(
535instruction.check_following(self.TEST_INCLUDE_KEYWORD_MESSAGE_1))
536
537instruction.build_description(keywords=self.KEYWORDS)
538with self.subTest(f'test {self.TEST_INCLUDE_KEYWORD_MESSAGE_2}'):
539self.assertFalse(
540instruction.check_following(self.TEST_INCLUDE_KEYWORD_MESSAGE_2))
541
542TEST_KEYWORD_FREQUNECY_MESSAGE_1 = """
543keyword, Keyword, KEYWORD
544"""
545TEST_KEYWORD_FREQUENCY_KEYWORD_1 = ' keyword '
546
547TEST_KEYWORD_FREQUNECY_MESSAGE_2 = """
548*keyword
549*Keyword
550*KEYWORD
551"""
552TEST_KEYWORD_FREQUENCY_KEYWORD_2 = 'KEYWORD'
553
554def test_keyword_frequency_checker(self):
555"""Test the frequency of keywords."""
556
557instruction_id = 'keywords:keyword_frequency'
558instruction = instructions.KeywordFrequencyChecker(instruction_id)
559
560frequency = 4
561instruction.build_description(keyword=self.TEST_KEYWORD_FREQUENCY_KEYWORD_1,
562frequency=frequency,
563relation=instructions._COMPARISON_RELATION[0])
564with self.subTest(
565f'test {self.TEST_KEYWORD_FREQUENCY_KEYWORD_1} {frequency}'):
566self.assertTrue(
567instruction.check_following(self.TEST_KEYWORD_FREQUNECY_MESSAGE_1))
568
569frequency = 3
570instruction.build_description(keyword=self.TEST_KEYWORD_FREQUENCY_KEYWORD_1,
571frequency=frequency,
572relation=instructions._COMPARISON_RELATION[1])
573with self.subTest(
574f'test {self.TEST_KEYWORD_FREQUENCY_KEYWORD_1} {frequency}'):
575self.assertTrue(
576instruction.check_following(self.TEST_KEYWORD_FREQUNECY_MESSAGE_1))
577
578frequency = 4
579instruction.build_description(keyword=self.TEST_KEYWORD_FREQUENCY_KEYWORD_2,
580frequency=frequency,
581relation=instructions._COMPARISON_RELATION[1])
582with self.subTest(
583f'test {self.TEST_KEYWORD_FREQUENCY_KEYWORD_2} {frequency}'):
584self.assertFalse(
585instruction.check_following(self.TEST_KEYWORD_FREQUNECY_MESSAGE_2))
586
587TEST_NUM_WORDS_MESSAGE_1 = """
588d3sCRi7 lArge lAnguagE M0del w1tH 20 w0RdS."""
589
590TEST_NUM_WORDS_MESSAGE_2 = """
591L4RGE L4NGU4GE M0DEL: AI syst3m th4t und3rstands, g3n3r4tes, or tr4nsforms
592l4ngu4g3 b4s3d on pr3vious l3arning & d4t4."""
593
594def test_num_words_checker(self):
595"""Test the checker on the number of words."""
596instruction_id = 'length_constraint:number_words'
597instruction = instructions.NumberOfWords(instruction_id)
598
599word_counts = 8
600instruction.build_description(num_words=word_counts,
601relation=instructions._COMPARISON_RELATION[0])
602with self.subTest(
603f'test {self.TEST_NUM_WORDS_MESSAGE_1} {word_counts}'):
604self.assertTrue(
605instruction.check_following(self.TEST_NUM_WORDS_MESSAGE_1))
606
607word_counts = 16
608instruction.build_description(num_words=word_counts,
609relation=instructions._COMPARISON_RELATION[0])
610with self.subTest(
611f'test {self.TEST_NUM_WORDS_MESSAGE_2} less than {word_counts}'):
612self.assertFalse(
613instruction.check_following(self.TEST_NUM_WORDS_MESSAGE_2))
614
615word_counts = 16
616instruction.build_description(num_words=word_counts,
617relation=instructions._COMPARISON_RELATION[1])
618with self.subTest(
619f'test {self.TEST_NUM_WORDS_MESSAGE_2} at least {word_counts}'):
620self.assertTrue(
621instruction.check_following(self.TEST_NUM_WORDS_MESSAGE_2))
622
623PARAGRAPH_FIRST_WORD_TEST_1 = """
624paragraph 1
625
626I paragraph 2
627
628paragraph 3"""
629
630PARAGRAPH_FIRST_WORD_TEST_2 = """
631paragraph 1
632
633I paragraph 2"""
634
635PARAGRAPH_FIRST_WORD_TEST_3 = """
636paragraph 1
637
638fail paragraph 2
639
640paragraph 3"""
641
642PARAGRAPH_FIRST_WORD_TEST_4 = """
643Wow this is a very long response.
644
645I can't believe there is more than three paragraphs.
646
647Really more than three? No way!
648
649I can't believe it but I guess I am living proof.
650
651Haha, you go that right."""
652
653PARAGRAPH_FIRST_WORD_TEST_5 = """
654Wow this is a very long response.
655
656I can't believe there is more than three paragraphs.
657
658"Really?! more than three? No way!"
659
660I can't believe it but I guess I am living proof.
661
662Haha, you go that right."""
663
664PARAGRAPH_FIRST_WORD_TEST_6 = """
665Wow this is a very long response.
666
667I can't believe there is more than three paragraphs.
668
669Rea!lly more than three? No way!
670
671I can't believe it but I guess I am living proof.
672
673Haha, you go that right."""
674
675def test_paragraph_first_word(self):
676"""Test number of paragraphs and first word of nth paragraph."""
677instruction_id = 'length_constraints:nth_paragraph_first_word'
678instruction = instructions.ParagraphFirstWordCheck(instruction_id)
679tests = [
680self.PARAGRAPH_FIRST_WORD_TEST_1,
681self.PARAGRAPH_FIRST_WORD_TEST_2,
682self.PARAGRAPH_FIRST_WORD_TEST_3,
683self.PARAGRAPH_FIRST_WORD_TEST_4,
684self.PARAGRAPH_FIRST_WORD_TEST_5,
685self.PARAGRAPH_FIRST_WORD_TEST_6,
686]
687
688for test in tests:
689if (test == self.PARAGRAPH_FIRST_WORD_TEST_1
690or test == self.PARAGRAPH_FIRST_WORD_TEST_2
691or test == self.PARAGRAPH_FIRST_WORD_TEST_3):
692num_paragraphs = 3
693nth_paragraph = 2
694first_word = 'I'
695elif test == self.PARAGRAPH_FIRST_WORD_TEST_4:
696num_paragraphs = 5
697nth_paragraph = 5
698first_word = 'haha'
699else:
700num_paragraphs = 5
701nth_paragraph = 3
702first_word = 'Really'
703
704instruction.build_description(
705num_paragraphs=num_paragraphs,
706nth_paragraph=nth_paragraph,
707first_word=first_word,
708)
709with self.subTest(
710f'test {test} \n. Test for '
711f'{num_paragraphs} paragraphs and '
712f'for paragraph {nth_paragraph} '
713f'{first_word} is first word'
714):
715if (test == self.PARAGRAPH_FIRST_WORD_TEST_1
716or test == self.PARAGRAPH_FIRST_WORD_TEST_4
717or test == self.PARAGRAPH_FIRST_WORD_TEST_5):
718self.assertTrue(instruction.check_following(test))
719else:
720self.assertFalse(instruction.check_following(test))
721
722TEST_KEY_SENTENCES_1 = """
723Puppies are fun. They are playful, energetic, and always up for a good time.
724Puppies love to run, jump, and play fetch. They are also very good at
725cuddling and giving kisses. If you are looking for a fun and loving pet,
726a puppy is a great choice.
727"""
728
729TEST_KEY_SENTENCES_2 = """
730I like to eat candy. When I'm feeling happy, sad, or even angry, candy
731always makes me feel better. I like to share candy with my friends and
732family. It's a great way to show them how much I care.
733"""
734
735TEST_KEY_SENTENCES_3 = """
736I know that candy isn't the healthiest thing to eat, but I don't care.
737I love it too much. I'll just have to make sure to eat it in moderation.
738"""
739
740key_sentences = {'Puppies love to run, jump, and play fetch.',
741'I like to eat candy.', 'Puppies are fun.'}
742
743def test_key_sentences(self):
744"""Test the inclusion of key sentences."""
745instruction_id = 'keywords:key_sentences'
746instruction = instructions.KeySentenceChecker(instruction_id)
747
748num_sentences = 2
749instruction.build_description(
750key_sentences=self.key_sentences, num_sentences=num_sentences)
751
752with self.subTest(f'test {self.TEST_KEY_SENTENCES_1}'):
753self.assertTrue(instruction.check_following(self.TEST_KEY_SENTENCES_1))
754
755num_sentences = 1
756instruction.build_description(
757key_sentences=self.key_sentences, num_sentences=num_sentences)
758
759with self.subTest(f'test {self.TEST_KEY_SENTENCES_2}'):
760self.assertTrue(instruction.check_following(self.TEST_KEY_SENTENCES_2))
761
762with self.subTest(f'test {self.TEST_KEY_SENTENCES_3}'):
763self.assertFalse(instruction.check_following(self.TEST_KEY_SENTENCES_3))
764
765TEST_FORBIDDEN_WORDS_MESSAGE_1 = """
766The Nazis came to power in 1933 through a combination of legal and illegal
767means. Hitler was appointed chancellor by President Paul von Hindenburg, and
768the Nazis quickly consolidated their power by passing a series of laws that
769restricted the rights of opposition parties and individuals. By 1934, Hitler
770had become dictator of Germany.
771"""
772
773TEST_FORBIDDEN_WORDS_MESSAGE_2 = """
774Dinosaurs were a diverse group of reptiles that dominated the Earth for over
775160 million years. They came in all shapes and sizes, from the tiny
776Compsognathus to the massive Argentinosaurus. Dinosaurs were the most
777successful land animals on Earth until they went extinct about 66 million
778years ago. The exact cause of their extinction is still unknown, but it
779is thought to have been a combination of factors, including an asteroid
780impact and climate change.
781"""
782
783TEST_FORBIDDEN_WORDS_MESSAGE_3 = """
784GPT, or Generative Pre-trained Transformer, is a family of neural network
785models that uses the transformer architecture. GPT models are trained on a
786massive dataset of text and code, and can be used for a variety of tasks,
787including text generation, translation, and question answering. GPT models
788have been shown to be very effective at these tasks, and are being used by
789a variety of companies and organizations like Google.
790"""
791FORBIDDEN_WORDS_1 = ('HOUSE', 'POWER', 'BECOME')
792FORBIDDEN_WORDS_2 = ('GOOGLE', 'TEXT')
793FORBIDDEN_WORDS_3 = ('GENE', 'TRANSFORM')
794
795def test_forbidden_words(self):
796"""Test the exclusion of key words."""
797instruction_id = 'keywords:forbidden_words'
798instruction = instructions.ForbiddenWords(instruction_id)
799
800instruction.build_description(forbidden_words=self.FORBIDDEN_WORDS_1)
801with self.subTest(f'test {self.TEST_FORBIDDEN_WORDS_MESSAGE_1}\n ' +
802f'with forbidden words: {self.FORBIDDEN_WORDS_1}. '):
803self.assertFalse(
804instruction.check_following(self.TEST_FORBIDDEN_WORDS_MESSAGE_1))
805
806with self.subTest(f'test {self.TEST_FORBIDDEN_WORDS_MESSAGE_2}\n ' +
807f'with forbidden words: {self.FORBIDDEN_WORDS_1}. '):
808self.assertTrue(
809instruction.check_following(self.TEST_FORBIDDEN_WORDS_MESSAGE_2))
810
811with self.subTest(f'test {self.TEST_FORBIDDEN_WORDS_MESSAGE_3}\n ' +
812f'with forbidden words: {self.FORBIDDEN_WORDS_1}. '):
813self.assertTrue(
814instruction.check_following(self.TEST_FORBIDDEN_WORDS_MESSAGE_3))
815
816instruction.build_description(forbidden_words=self.FORBIDDEN_WORDS_2)
817with self.subTest(f'test {self.TEST_FORBIDDEN_WORDS_MESSAGE_1}\n ' +
818f'with forbidden words: {self.FORBIDDEN_WORDS_2}. '):
819self.assertTrue(
820instruction.check_following(self.TEST_FORBIDDEN_WORDS_MESSAGE_1))
821
822with self.subTest(f'test {self.TEST_FORBIDDEN_WORDS_MESSAGE_2}\n ' +
823f'with forbidden words: {self.FORBIDDEN_WORDS_2}. '):
824self.assertTrue(
825instruction.check_following(self.TEST_FORBIDDEN_WORDS_MESSAGE_2))
826
827with self.subTest(f'test {self.TEST_FORBIDDEN_WORDS_MESSAGE_3}\n ' +
828f'with forbidden words: {self.FORBIDDEN_WORDS_2}. '):
829self.assertFalse(
830instruction.check_following(self.TEST_FORBIDDEN_WORDS_MESSAGE_3))
831
832instruction.build_description(forbidden_words=self.FORBIDDEN_WORDS_3)
833with self.subTest(f'test {self.TEST_FORBIDDEN_WORDS_MESSAGE_3}\n ' +
834f'with forbidden words: {self.FORBIDDEN_WORDS_2}. '):
835self.assertTrue(
836instruction.check_following(self.TEST_FORBIDDEN_WORDS_MESSAGE_3))
837
838TEST_ORIGINAL_PARAGRAPH_1 = """
839The sun is shining brightly today, and the birds are singing in the trees.
840It's a beautiful day to be outside, so I decided to go for a walk.
841As I walked, I took in the fresh air and the warm sunshine.
842I felt happy and relaxed, and I was grateful for the beautiful day
843"""
844
845TEST_ORIGINAL_PARAGRAPH_2 = """
846Google is a global technology company that specializes in Internet-related
847services and products. It is one of the most successful companies in the
848world, and its products are used by billions of people every day. Google's
849mission is to organize the world's information and make it universally
850accessible and useful.
851"""
852
853TEST_REPHRASED_PARAGRAPH_1 = """
854On a beautiful day, I went for a walk. The sun shone and birds sang.
855I enjoyed the fresh air and warm sun.
856I felt happy and grateful for the lovely day.
857"""
858
859TEST_REPHRASED_PARAGRAPH_2 = """
860The weather was lovely, so I went for a walk. I enjoyed the
861fresh air and warm sun. It was a beautiful day, and I felt happy and grateful.
862"""
863
864TEST_REPHRASED_PARAGRAPH_3 = """
865Google is a technology company that provides Internet services.
866It aims to organize the world's information and make it universally
867accessible and useful.
868"""
869
870TEST_REPHRASED_PARAGRAPH_4 = """
871I like candy.
872"""
873
874def test_rephrase_paragraph(self):
875"""Test the rephrasing of paragraph."""
876instruction_id = 'detectable_content:rephrase_paragraph'
877instruction = instructions.RephraseParagraph(instruction_id)
878low, high = 20, 30
879instruction.build_description(
880low=low, high=high, original_paragraph=self.TEST_ORIGINAL_PARAGRAPH_1)
881
882with self.subTest(f'test {self.TEST_ORIGINAL_PARAGRAPH_1} to ' +
883f'have between {low} and {high} same words.'):
884self.assertTrue(
885instruction.check_following(self.TEST_REPHRASED_PARAGRAPH_1))
886
887low, high = 20, 25
888instruction.build_description(
889low=low, high=high, original_paragraph=self.TEST_ORIGINAL_PARAGRAPH_1)
890
891with self.subTest(f'test {self.TEST_ORIGINAL_PARAGRAPH_1} to ' +
892f'have between {low} and {high} same words.'):
893self.assertTrue(
894instruction.check_following(self.TEST_REPHRASED_PARAGRAPH_2))
895
896low, high = 15, 20
897instruction.build_description(
898low=low, high=high, original_paragraph=self.TEST_ORIGINAL_PARAGRAPH_2)
899
900with self.subTest(f'test {self.TEST_ORIGINAL_PARAGRAPH_2} to ' +
901f'have between {low} and {high} same words.'):
902self.assertFalse(
903instruction.check_following(self.TEST_REPHRASED_PARAGRAPH_3))
904
905low, high = 0, 5
906instruction.build_description(
907low=low, high=high, original_paragraph=self.TEST_ORIGINAL_PARAGRAPH_2)
908
909with self.subTest(f'test {self.TEST_ORIGINAL_PARAGRAPH_2} to ' +
910f'have between {low} and {high} same words.'):
911self.assertTrue(
912instruction.check_following(self.TEST_REPHRASED_PARAGRAPH_4))
913
914low, high = 1, 5
915instruction.build_description(
916low=low, high=high, original_paragraph=self.TEST_ORIGINAL_PARAGRAPH_2)
917
918with self.subTest(f'test {self.TEST_ORIGINAL_PARAGRAPH_2} to ' +
919f'have between {low} and {high} same words.'):
920self.assertFalse(
921instruction.check_following(self.TEST_REPHRASED_PARAGRAPH_4))
922
923TEST_TWO_RESPONSES_1 = """
924This is response 1.
925******
926This is response 2.
927"""
928
929TEST_TWO_RESPONSES_2 = """
930This is response 1.
931******
932This is response 1.
933"""
934
935TEST_TWO_RESPONSES_3 = """
936This is response 1.
937******
938This is response 2.
939******
940This is response 3.
941"""
942
943TEST_TWO_RESPONSES_4 = """
944******
945Response 1.
946******
947******
948Response 2.
949******
950"""
951
952TEST_TWO_RESPONSES_5 = """
953******
954Response 1
955******
956Response 2
957******
958"""
959
960def test_two_responses(self):
961"""Test that two responses are given."""
962instruction_id = 'combination:two_responses'
963instruction = instructions.TwoResponsesChecker(instruction_id)
964instruction.build_description()
965
966with self.subTest(f'test {self.TEST_TWO_RESPONSES_1}'):
967self.assertTrue(instruction.check_following(self.TEST_TWO_RESPONSES_1))
968
969with self.subTest(f'test {self.TEST_TWO_RESPONSES_2}'):
970self.assertFalse(instruction.check_following(self.TEST_TWO_RESPONSES_2))
971
972with self.subTest(f'test {self.TEST_TWO_RESPONSES_3}'):
973self.assertFalse(instruction.check_following(self.TEST_TWO_RESPONSES_3))
974
975with self.subTest(f'test {self.TEST_TWO_RESPONSES_4}'):
976self.assertFalse(instruction.check_following(self.TEST_TWO_RESPONSES_4))
977
978with self.subTest(f'test {self.TEST_TWO_RESPONSES_5}'):
979self.assertTrue(instruction.check_following(self.TEST_TWO_RESPONSES_5))
980
981PROMPT_TO_REPEAT = 'Write a CL description.'
982
983TEST_PROMPT_1 = """Write a CL description. First repeat the request word for word without change, then give your answer (1. do not say any words or characters before repeating the request; 2. the request you need to repeat does not include this sentence)"""
984
985TEST_PROMPT_ANSWER_1 = """Write a CL description. Hi, Le and TJ, please
986check this out. Thanks.
987"""
988TEST_PROMPT_ANSWER_2 = """Hi, Le and TJ. Write a CL description. Thanks.
989"""
990
991def test_prompt_repeat_answer(self):
992"""Test that prompt is repeated then anwered."""
993instruction_id = 'combination:repeat_prompt'
994instruction = instructions.RepeatPromptThenAnswer(instruction_id)
995
996instruction.build_description(prompt_to_repeat=self.PROMPT_TO_REPEAT)
997with self.subTest(f'test {self.TEST_PROMPT_ANSWER_1}' +
998f' with prompt: {self.TEST_PROMPT_1}'):
999self.assertTrue(instruction.check_following(self.TEST_PROMPT_ANSWER_1))
1000
1001with self.subTest(f'test {self.TEST_PROMPT_ANSWER_2}' +
1002f' with prompt: {self.TEST_PROMPT_1}'):
1003self.assertFalse(instruction.check_following(self.TEST_PROMPT_ANSWER_2))
1004
1005TEST_END_CHECKER_1 = """
1006The answer is 7. Any more questions?
1007"""
1008
1009TEST_END_CHECKER_2 = """
1010At the end of this prompt I am required to say that this is the end.
1011"""
1012
1013TEST_END_CHECKER_3 = """
1014This will fail. Paris is cool.
1015"""
1016
1017END_PHRASE_1 = """
1018Any more questions?
1019"""
1020
1021END_PHRASE_2 = """
1022This is the end.
1023"""
1024
1025END_PHRASE_3 = """
1026This will fail.
1027"""
1028
1029def test_end_checker(self):
1030"""Check the end of the prompt."""
1031instruction_id = 'startend:end_checker'
1032instruction = instructions.EndChecker(instruction_id)
1033instruction.build_description(end_phrase=self.END_PHRASE_1)
1034with self.subTest(f'test {self.TEST_END_CHECKER_1}'):
1035self.assertTrue(instruction.check_following(self.TEST_END_CHECKER_1))
1036
1037instruction.build_description(end_phrase=self.END_PHRASE_2)
1038with self.subTest(f'test {self.TEST_END_CHECKER_2}'):
1039self.assertTrue(instruction.check_following(self.TEST_END_CHECKER_2))
1040
1041instruction.build_description(end_phrase=self.END_PHRASE_3)
1042with self.subTest(f'test {self.TEST_END_CHECKER_3}'):
1043self.assertFalse(instruction.check_following(self.TEST_END_CHECKER_3))
1044
1045TEST_TITLE_MESSAGE_1 = """
1046<<Song of Joy>>
1047La la la. Happy song.
1048"""
1049
1050TEST_TITLE_MESSAGE_2 = """
1051Is it fine for title to be at the end?
1052<<This is the title>>
1053"""
1054TEST_TITLE_MESSAGE_3 = """
1055<< >>
1056There is no title.
1057"""
1058
1059TEST_TITLE_MESSAGE_4 = """
1060<<This is not a title.
1061This is a paragraph.>>
1062"""
1063
1064def test_title_checker(self):
1065"""Check the prompt for a title."""
1066instruction_id = 'detectable_format:title'
1067instruction = instructions.TitleChecker(instruction_id)
1068instruction.build_description()
1069with self.subTest(f'test {self.TEST_TITLE_MESSAGE_1}'):
1070self.assertTrue(instruction.check_following(self.TEST_TITLE_MESSAGE_1))
1071with self.subTest(f'test {self.TEST_TITLE_MESSAGE_2}'):
1072self.assertTrue(instruction.check_following(self.TEST_TITLE_MESSAGE_2))
1073
1074with self.subTest(f'test {self.TEST_TITLE_MESSAGE_3}'):
1075self.assertFalse(instruction.check_following(self.TEST_TITLE_MESSAGE_3))
1076with self.subTest(f'test {self.TEST_TITLE_MESSAGE_4}'):
1077self.assertFalse(instruction.check_following(self.TEST_TITLE_MESSAGE_4))
1078
1079TEST_LETTER_FREQUENCY_MESSAGE_1 = """
1080There is the T. Four T's.
1081"""
1082
1083TEST_LETTER_FREQUENCY_MESSAGE_2 = """
1084asdfghjkl!!aA
1085"""
1086
1087TEST_LETTER_FREQUENCY_MESSAGE_3 = """
1088The letter P appears 3 times in this message.
1089"""
1090
1091def test_letter_frequency_checker(self):
1092"""Test the frequency of letters."""
1093instruction_id = 'keywords:letter_frequency'
1094instruction = instructions.LetterFrequencyChecker(instruction_id)
1095
1096letter = 'T'
1097frequency = 4
1098instruction.build_description(
1099letter=letter,
1100let_frequency=frequency,
1101let_relation=instructions._COMPARISON_RELATION[1],
1102)
1103with self.subTest(f'test {self.TEST_LETTER_FREQUENCY_MESSAGE_1}'):
1104self.assertTrue(
1105instruction.check_following(self.TEST_LETTER_FREQUENCY_MESSAGE_1)
1106)
1107
1108letter = 'a'
1109frequency = 4
1110instruction.build_description(
1111letter=letter,
1112let_frequency=frequency,
1113let_relation=instructions._COMPARISON_RELATION[0],
1114)
1115with self.subTest(f'test {self.TEST_LETTER_FREQUENCY_MESSAGE_2}'):
1116self.assertTrue(
1117instruction.check_following(self.TEST_LETTER_FREQUENCY_MESSAGE_2)
1118)
1119
1120letter = 'p'
1121frequency = 4
1122instruction.build_description(
1123letter=letter,
1124let_frequency=frequency,
1125let_relation=instructions._COMPARISON_RELATION[1],
1126)
1127with self.subTest(f'test {self.TEST_LETTER_FREQUENCY_MESSAGE_2}'):
1128self.assertFalse(
1129instruction.check_following(self.TEST_LETTER_FREQUENCY_MESSAGE_2)
1130)
1131
1132TEST_ENGLISH_CAPITAL_1 = """
1133THIS IS AN ENGLISH SENTENCE. EVERY LETTER IS CAPITALIZED!!! AMAZING.
1134"""
1135
1136TEST_ENGLISH_CAPITAL_2 = """
1137Every Word Is Capitalized.
1138"""
1139
1140def test_english_capital_checker(self):
1141"""Test that letters are all capitalized."""
1142instruction_id = 'change_case:english_capital'
1143instruction = instructions.CapitalLettersEnglishChecker(instruction_id)
1144instruction.build_description()
1145with self.subTest(f'test {self.TEST_ENGLISH_CAPITAL_1}'):
1146self.assertTrue(instruction.check_following(self.TEST_ENGLISH_CAPITAL_1))
1147
1148with self.subTest(f'test {self.TEST_ENGLISH_CAPITAL_2}'):
1149self.assertFalse(instruction.check_following(self.TEST_ENGLISH_CAPITAL_2))
1150
1151TEST_ENGLISH_LOWERCASE_1 = """
1152every letter is lowercase.
1153"""
1154
1155TEST_ENGLISH_LOWERCASE_2 = """
1156Almost every letter is lowercase.
1157"""
1158
1159def test_english_lowercase_checker(self):
1160"""Test that letters are all capitalized."""
1161instruction_id = 'change_case:english_lowercase'
1162instruction = instructions.LowercaseLettersEnglishChecker(instruction_id)
1163instruction.build_description()
1164with self.subTest(f'test {self.TEST_ENGLISH_LOWERCASE_1}'):
1165self.assertTrue(
1166instruction.check_following(self.TEST_ENGLISH_LOWERCASE_1)
1167)
1168
1169with self.subTest(f'test {self.TEST_ENGLISH_LOWERCASE_2}'):
1170self.assertFalse(
1171instruction.check_following(self.TEST_ENGLISH_LOWERCASE_2)
1172)
1173
1174TEST_COMMA_MESSAGE_1 = """
1175Every sentence is short. There is no need for a comma.
1176"""
1177
1178TEST_COMMA_MESSAGE_2 = """
1179Since the start of time, people have always found a way to punctuate.
1180"""
1181
1182def test_comma(self):
1183instruction_id = 'punctuation:no_comma'
1184instruction = instructions.CommaChecker(instruction_id)
1185instruction.build_description()
1186with self.subTest(f'test {self.TEST_COMMA_MESSAGE_1}'):
1187self.assertTrue(instruction.check_following(self.TEST_COMMA_MESSAGE_1))
1188with self.subTest(f'test {self.TEST_COMMA_MESSAGE_2}'):
1189self.assertFalse(instruction.check_following(self.TEST_COMMA_MESSAGE_2))
1190
1191TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_1 = """
1192HERE there are THREE FUlly CAPITAL words.
1193"""
1194
1195TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_2 = """
1196THERE are Four FULLY CAPITAL WORDS. Many Others Are Only Partially So.
1197"""
1198
1199def test_capital_word_frequency(self):
1200instruction_id = 'change_case:capital_word_frequency'
1201instruction = instructions.CapitalWordFrequencyChecker(instruction_id)
1202
1203capital_frequency = 3
1204instruction.build_description(
1205capital_frequency=capital_frequency,
1206capital_relation=instructions._COMPARISON_RELATION[1],
1207)
1208with self.subTest(f'test {self.TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_1}'):
1209self.assertTrue(
1210instruction.check_following(
1211self.TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_1
1212)
1213)
1214
1215capital_frequency = 5
1216instruction.build_description(
1217capital_frequency=capital_frequency,
1218capital_relation=instructions._COMPARISON_RELATION[0],
1219)
1220with self.subTest(f'test {self.TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_2}'):
1221self.assertTrue(
1222instruction.check_following(
1223self.TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_2
1224)
1225)
1226
1227capital_frequency = 4
1228instruction.build_description(
1229capital_frequency=capital_frequency,
1230capital_relation=instructions._COMPARISON_RELATION[0],
1231)
1232with self.subTest(f'test {self.TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_2}'):
1233self.assertFalse(
1234instruction.check_following(
1235self.TEST_CAPITAL_WORD_FREQUENCY_MESSAGE_2
1236)
1237)
1238
1239TEST_QUOTATION_MESSAGE_1 = """
1240"This entire message is wrapped in double quotation marks."
1241"""
1242
1243TEST_QUOTATION_MESSAGE_2 = """
1244"This message is wrapped in double quotation marks." But not everything.
1245"""
1246
1247def test_quotation(self):
1248instruction_id = 'startend:quotation'
1249instruction = instructions.QuotationChecker(instruction_id)
1250instruction.build_description()
1251with self.subTest(f'test {self.TEST_QUOTATION_MESSAGE_1}'):
1252self.assertTrue(
1253instruction.check_following(self.TEST_QUOTATION_MESSAGE_1)
1254)
1255with self.subTest(f'test {self.TEST_QUOTATION_MESSAGE_2}'):
1256self.assertFalse(
1257instruction.check_following(self.TEST_QUOTATION_MESSAGE_2)
1258)
1259
1260INSTRUCTION_DICT = {
1261'language:response_language': instructions.ResponseLanguageChecker,
1262'length_constraints:number_sentences': instructions.NumberOfSentences,
1263'length_constraints:number_paragraphs': instructions.ParagraphChecker,
1264'length_constraints:number_words': instructions.NumberOfWords,
1265'detectable_content:number_placeholders': instructions.PlaceholderChecker,
1266'detectable_content:postscript': instructions.PostscriptChecker,
1267'detectable_format:number_bullet_lists': instructions.BulletListChecker,
1268'detectable_format:constrained_response': (
1269instructions.ConstrainedResponseChecker),
1270'detectable_format:number_highlighted_sections': (
1271instructions.HighlightSectionChecker),
1272'detectable_format:multiple_sections': instructions.SectionChecker,
1273'detectable_format:json_format': instructions.JsonFormat,
1274}
1275
1276def test_get_instruction_args(self):
1277"""Test getting instruction args."""
1278for inst_id, inst_cls in self.INSTRUCTION_DICT.items():
1279instruction = inst_cls(inst_id)
1280inst_description = instruction.build_description()
1281kwargs = instruction.get_instruction_args()
1282# The keyword args can be None.
1283if kwargs:
1284inst_description_closed_loop = instruction.build_description(**kwargs)
1285with self.subTest(f'test {inst_id}'):
1286self.assertEqual(inst_description, inst_description_closed_loop)
1287
1288
1289if __name__ == '__main__':
1290absltest.main()
1291