google-research

Форк
0
/
instructions_util_test.py 
123 строки · 4.2 Кб
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
"""Test for utility library of instructions."""
17

18
from absl.testing import absltest
19
from absl.testing import parameterized
20
from instruction_following_eval import instructions_util
21

22

23
class InstructionsUtilTest(parameterized.TestCase):
24

25
  TEST_WORD_COUNT_CASE_1 = ("word1, word2, word3, word4.", 4)
26

27
  TEST_WORD_COUNT_CASE_2 = (
28
      """
29
      Bard can you tell me which is the best optimization method for the
30
      transition from an hydro-thermal system to an hydro-renewables system""",
31
      24)
32

33
  TEST_WORD_COUNT_CASE_3 = (
34
      """
35
      Hyphenated-word has two word counts.
36
      """, 6)
37

38
  def test_word_count(self):
39
    """Tests word counter."""
40
    with self.subTest(f"{self.TEST_WORD_COUNT_CASE_1[0]}"):
41
      text, expected_num_words = self.TEST_WORD_COUNT_CASE_1
42
      actual_num_words = instructions_util.count_words(text)
43
      self.assertEqual(expected_num_words, actual_num_words)
44

45
    with self.subTest(f"{self.TEST_WORD_COUNT_CASE_2[0]}"):
46
      text, expected_num_words = self.TEST_WORD_COUNT_CASE_2
47
      actual_num_words = instructions_util.count_words(text)
48
      self.assertEqual(expected_num_words, actual_num_words)
49

50
    with self.subTest(f"{self.TEST_WORD_COUNT_CASE_3[0]}"):
51
      text, expected_num_words = self.TEST_WORD_COUNT_CASE_3
52
      actual_num_words = instructions_util.count_words(text)
53
      self.assertEqual(expected_num_words, actual_num_words)
54

55
  @parameterized.named_parameters(
56
      [
57
          {  # pylint: disable=g-complex-comprehension
58
              "testcase_name": (
59
                  f"_response={response}_num_sentences={num_sentences}"
60
              ),
61
              "response": response,
62
              "num_sentences": num_sentences,
63
          }
64
          for response, num_sentences in [
65
              ("xx,x. xx,x! xx/x. x{x}x? x.", 5),
66
              ("xx,x! xxxx. x(x)x?", 3),
67
              ("xxxx. xx,x! xx|x. x&x x?", 4),
68
              ("xx-x]xx,x! x{x}xx,x.", 2),
69
          ]
70
      ]
71
  )
72
  def test_count_sentences(self, response, num_sentences):
73
    """Tests sentence counter."""
74
    actual_num_sentences = instructions_util.count_sentences(response)
75
    self.assertEqual(num_sentences, actual_num_sentences)
76

77
  TEST_SENTENCE_SPLIT_1 = """
78
  Google is a technology company. It was founded in 1998 by Larry Page
79
and Sergey Brin. Google's mission is to organize the world's information
80
and make it universally accessible and useful.
81
  """
82

83
  TEST_SENTENCE_SPLIT_2 = """
84
  The U.S.A has many Ph.D. students. They will often haven a .com website
85
sharing the research that they have done.
86
  """
87

88
  EXPECTED_SENTENCE_SPLIT_1 = [
89
      "Google is a technology company.",
90
      "It was founded in 1998 by Larry Page and Sergey Brin.",
91
      (
92
          "Google's mission is to organize the world's information and make it"
93
          " universally accessible and useful."
94
      ),
95
  ]
96

97
  EXPECTED_SENTENCE_SPLIT_2 = [
98
      "The U.S.A has many Ph.D. students.",
99
      (
100
          "They will often haven a .com website sharing the research that they"
101
          " have done."
102
      ),
103
  ]
104

105
  def test_sentence_splitter(self):
106
    """Tests sentence splitter."""
107
    sentence_split_1 = instructions_util.split_into_sentences(
108
        self.TEST_SENTENCE_SPLIT_1
109
    )
110
    sentence_split_2 = instructions_util.split_into_sentences(
111
        self.TEST_SENTENCE_SPLIT_2
112
    )
113

114
    self.assertEqual(self.EXPECTED_SENTENCE_SPLIT_1, sentence_split_1)
115
    self.assertEqual(self.EXPECTED_SENTENCE_SPLIT_2, sentence_split_2)
116

117
  def test_generate_keywords(self):
118
    """Tests generate keywords."""
119
    self.assertLen(instructions_util.generate_keywords(10), 10)
120

121

122
if __name__ == "__main__":
123
  absltest.main()
124

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.