google-research
123 строки · 4.2 Кб
1# coding=utf-8
2# Copyright 2024 The Google Research Authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Test for utility library of instructions."""
17
18from absl.testing import absltest
19from absl.testing import parameterized
20from instruction_following_eval import instructions_util
21
22
23class InstructionsUtilTest(parameterized.TestCase):
24
25TEST_WORD_COUNT_CASE_1 = ("word1, word2, word3, word4.", 4)
26
27TEST_WORD_COUNT_CASE_2 = (
28"""
29Bard can you tell me which is the best optimization method for the
30transition from an hydro-thermal system to an hydro-renewables system""",
3124)
32
33TEST_WORD_COUNT_CASE_3 = (
34"""
35Hyphenated-word has two word counts.
36""", 6)
37
38def test_word_count(self):
39"""Tests word counter."""
40with self.subTest(f"{self.TEST_WORD_COUNT_CASE_1[0]}"):
41text, expected_num_words = self.TEST_WORD_COUNT_CASE_1
42actual_num_words = instructions_util.count_words(text)
43self.assertEqual(expected_num_words, actual_num_words)
44
45with self.subTest(f"{self.TEST_WORD_COUNT_CASE_2[0]}"):
46text, expected_num_words = self.TEST_WORD_COUNT_CASE_2
47actual_num_words = instructions_util.count_words(text)
48self.assertEqual(expected_num_words, actual_num_words)
49
50with self.subTest(f"{self.TEST_WORD_COUNT_CASE_3[0]}"):
51text, expected_num_words = self.TEST_WORD_COUNT_CASE_3
52actual_num_words = instructions_util.count_words(text)
53self.assertEqual(expected_num_words, actual_num_words)
54
55@parameterized.named_parameters(
56[
57{ # pylint: disable=g-complex-comprehension
58"testcase_name": (
59f"_response={response}_num_sentences={num_sentences}"
60),
61"response": response,
62"num_sentences": num_sentences,
63}
64for response, num_sentences in [
65("xx,x. xx,x! xx/x. x{x}x? x.", 5),
66("xx,x! xxxx. x(x)x?", 3),
67("xxxx. xx,x! xx|x. x&x x?", 4),
68("xx-x]xx,x! x{x}xx,x.", 2),
69]
70]
71)
72def test_count_sentences(self, response, num_sentences):
73"""Tests sentence counter."""
74actual_num_sentences = instructions_util.count_sentences(response)
75self.assertEqual(num_sentences, actual_num_sentences)
76
77TEST_SENTENCE_SPLIT_1 = """
78Google is a technology company. It was founded in 1998 by Larry Page
79and Sergey Brin. Google's mission is to organize the world's information
80and make it universally accessible and useful.
81"""
82
83TEST_SENTENCE_SPLIT_2 = """
84The U.S.A has many Ph.D. students. They will often haven a .com website
85sharing the research that they have done.
86"""
87
88EXPECTED_SENTENCE_SPLIT_1 = [
89"Google is a technology company.",
90"It was founded in 1998 by Larry Page and Sergey Brin.",
91(
92"Google's mission is to organize the world's information and make it"
93" universally accessible and useful."
94),
95]
96
97EXPECTED_SENTENCE_SPLIT_2 = [
98"The U.S.A has many Ph.D. students.",
99(
100"They will often haven a .com website sharing the research that they"
101" have done."
102),
103]
104
105def test_sentence_splitter(self):
106"""Tests sentence splitter."""
107sentence_split_1 = instructions_util.split_into_sentences(
108self.TEST_SENTENCE_SPLIT_1
109)
110sentence_split_2 = instructions_util.split_into_sentences(
111self.TEST_SENTENCE_SPLIT_2
112)
113
114self.assertEqual(self.EXPECTED_SENTENCE_SPLIT_1, sentence_split_1)
115self.assertEqual(self.EXPECTED_SENTENCE_SPLIT_2, sentence_split_2)
116
117def test_generate_keywords(self):
118"""Tests generate keywords."""
119self.assertLen(instructions_util.generate_keywords(10), 10)
120
121
122if __name__ == "__main__":
123absltest.main()
124