google-research

Форк
0
/
inference_example.ipynb 
2247 строк · 228.5 Кб
1
{
2
  "cells": [
3
    {
4
      "cell_type": "markdown",
5
      "metadata": {
6
        "id": "jPI3U6mnP6x_"
7
      },
8
      "source": [
9
        "Copyright 2023 Google LLC\n",
10
        "\n",
11
        "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
12
        "you may not use this file except in compliance with the License.\n",
13
        "You may obtain a copy of the License at\n",
14
        "\n",
15
        "    https://www.apache.org/licenses/LICENSE-2.0\n",
16
        "\n",
17
        "Unless required by applicable law or agreed to in writing, software\n",
18
        "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
19
        "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
20
        "See the License for the specific language governing permissions and\n",
21
        "limitations under the License."
22
      ]
23
    },
24
    {
25
      "cell_type": "markdown",
26
      "metadata": {
27
        "id": "5qw7dxETZUkg"
28
      },
29
      "source": [
30
        "# Colab to run MADLAD models\n",
31
        "\n",
32
        "Adapted from [T5X tutorial](https://colab.research.google.com/github/google-research/t5x/blob/main/t5x/notebooks/inference.ipynb#scrollTo=f9_BPXG_QgJs)\n",
33
        "\n",
34
        "*Special thanks to Juarez Bochi*\n",
35
        "\n",
36
        "\n"
37
      ]
38
    },
39
    {
40
      "cell_type": "markdown",
41
      "metadata": {
42
        "id": "bqZYp90PIa1t"
43
      },
44
      "source": [
45
        "## Install packages"
46
      ]
47
    },
48
    {
49
      "cell_type": "code",
50
      "execution_count": null,
51
      "metadata": {
52
        "colab": {
53
          "base_uri": "https://localhost:8080/"
54
        },
55
        "id": "M23bwrwVVYtL",
56
        "outputId": "8305bdb7-ba6d-457a-e026-5849fc5286a3"
57
      },
58
      "outputs": [
59
        {
60
          "name": "stdout",
61
          "output_type": "stream",
62
          "text": [
63
            "Cloning into 't5x'...\n",
64
            "remote: Enumerating objects: 6284, done.\u001b[K\n",
65
            "remote: Counting objects: 100% (158/158), done.\u001b[K\n",
66
            "remote: Compressing objects: 100% (90/90), done.\u001b[K\n",
67
            "remote: Total 6284 (delta 82), reused 103 (delta 66), pack-reused 6126\u001b[K\n",
68
            "Receiving objects: 100% (6284/6284), 9.38 MiB | 3.58 MiB/s, done.\n",
69
            "Resolving deltas: 100% (4461/4461), done.\n"
70
          ]
71
        }
72
      ],
73
      "source": [
74
        "!git clone https://github.com/google-research/t5x.git"
75
      ]
76
    },
77
    {
78
      "cell_type": "code",
79
      "execution_count": null,
80
      "metadata": {
81
        "colab": {
82
          "base_uri": "https://localhost:8080/"
83
        },
84
        "id": "p10CTfDTcqEb",
85
        "outputId": "1a102ea2-88e3-4f33-d194-f44e957e9b9e"
86
      },
87
      "outputs": [
88
        {
89
          "name": "stdout",
90
          "output_type": "stream",
91
          "text": [
92
            "\u001b[33mWARNING: Skipping jax as it is not installed.\u001b[0m\u001b[33m\n",
93
            "\u001b[0m\u001b[33mWARNING: Skipping jaxlib as it is not installed.\u001b[0m\u001b[33m\n",
94
            "\u001b[0mCollecting jax[cpu]\n",
95
            "  Using cached jax-0.4.20-py3-none-any.whl.metadata (23 kB)\n",
96
            "Collecting ml-dtypes\u003e=0.2.0 (from jax[cpu])\n",
97
            "  Downloading ml_dtypes-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n",
98
            "Collecting numpy\u003e=1.22 (from jax[cpu])\n",
99
            "  Downloading numpy-1.26.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)\n",
100
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.2/61.2 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
101
            "\u001b[?25hCollecting opt-einsum (from jax[cpu])\n",
102
            "  Using cached opt_einsum-3.3.0-py3-none-any.whl (65 kB)\n",
103
            "Collecting scipy\u003e=1.9 (from jax[cpu])\n",
104
            "  Downloading scipy-1.11.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)\n",
105
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.4/60.4 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
106
            "\u001b[?25hCollecting jaxlib==0.4.20 (from jax[cpu])\n",
107
            "  Downloading jaxlib-0.4.20-cp310-cp310-manylinux2014_x86_64.whl.metadata (2.1 kB)\n",
108
            "Downloading jaxlib-0.4.20-cp310-cp310-manylinux2014_x86_64.whl (85.8 MB)\n",
109
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.8/85.8 MB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
110
            "\u001b[?25hDownloading ml_dtypes-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (206 kB)\n",
111
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m206.7/206.7 kB\u001b[0m \u001b[31m24.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
112
            "\u001b[?25hDownloading numpy-1.26.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)\n",
113
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.2/18.2 MB\u001b[0m \u001b[31m83.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
114
            "\u001b[?25hDownloading scipy-1.11.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.4 MB)\n",
115
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m36.4/36.4 MB\u001b[0m \u001b[31m62.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
116
            "\u001b[?25hUsing cached jax-0.4.20-py3-none-any.whl (1.7 MB)\n",
117
            "Installing collected packages: numpy, scipy, opt-einsum, ml-dtypes, jaxlib, jax\n",
118
            "Successfully installed jax-0.4.20 jaxlib-0.4.20 ml-dtypes-0.3.1 numpy-1.26.1 opt-einsum-3.3.0 scipy-1.11.3\n",
119
            "Processing /usr/local/google/home/biaojiaxing/work/tmp/jupyter-server/t5x\n",
120
            "  Preparing metadata (setup.py) ... \u001b[?25l-\b \bdone\n",
121
            "\u001b[?25hCollecting airio@ git+https://github.com/google/airio#egg=airio (from t5x==0.0.0)\n",
122
            "  Cloning https://github.com/google/airio to /tmp/pip-install-aawkrx82/airio_b13fee94cd1449d09f54f56a8c0c3345\n",
123
            "  Running command git clone --filter=blob:none --quiet https://github.com/google/airio /tmp/pip-install-aawkrx82/airio_b13fee94cd1449d09f54f56a8c0c3345\n",
124
            "  Resolved https://github.com/google/airio to commit c33b6824c0c7948d4a399f0969e12b3acf9de024\n",
125
            "  Preparing metadata (setup.py) ... \u001b[?25l-\b \bdone\n",
126
            "\u001b[?25hCollecting clu@ git+https://github.com/google/CommonLoopUtils#egg=clu (from t5x==0.0.0)\n",
127
            "  Cloning https://github.com/google/CommonLoopUtils to /tmp/pip-install-aawkrx82/clu_18b484b192664da2896e30b530b92fe3\n",
128
            "  Running command git clone --filter=blob:none --quiet https://github.com/google/CommonLoopUtils /tmp/pip-install-aawkrx82/clu_18b484b192664da2896e30b530b92fe3\n",
129
            "  Resolved https://github.com/google/CommonLoopUtils to commit be91a3100e06ca16265e5e823e7a146f05b09e57\n",
130
            "  Preparing metadata (setup.py) ... \u001b[?25l-\b \bdone\n",
131
            "\u001b[?25hCollecting flax@ git+https://github.com/google/flax#egg=flax (from t5x==0.0.0)\n",
132
            "  Cloning https://github.com/google/flax to /tmp/pip-install-aawkrx82/flax_341f5a9a72c34b469a334d919ffeb0c3\n",
133
            "  Running command git clone --filter=blob:none --quiet https://github.com/google/flax /tmp/pip-install-aawkrx82/flax_341f5a9a72c34b469a334d919ffeb0c3\n",
134
            "  Resolved https://github.com/google/flax to commit 0b126b83d6b7442904e26db498a8b0b578385379\n",
135
            "  Installing build dependencies ... \u001b[?25l-\b \b\\\b \b|\b \b/\b \bdone\n",
136
            "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25l-\b \b\\\b \bdone\n",
137
            "\u001b[?25h  Installing backend dependencies ... \u001b[?25l-\b \b\\\b \bdone\n",
138
            "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25l-\b \b\\\b \bdone\n",
139
            "\u001b[?25hCollecting jestimator@ git+https://github.com/google-research/jestimator#egg=jestimator (from t5x==0.0.0)\n",
140
            "  Cloning https://github.com/google-research/jestimator to /tmp/pip-install-aawkrx82/jestimator_99b4cbfb258e4c27ac2681e73c25db42\n",
141
            "  Running command git clone --filter=blob:none --quiet https://github.com/google-research/jestimator /tmp/pip-install-aawkrx82/jestimator_99b4cbfb258e4c27ac2681e73c25db42\n",
142
            "  Resolved https://github.com/google-research/jestimator to commit fa143d93e337ca8ab77c4510baf21ae52af24ab2\n",
143
            "  Installing build dependencies ... \u001b[?25l-\b \b\\\b \bdone\n",
144
            "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
145
            "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
146
            "\u001b[?25hCollecting optax@ git+https://github.com/deepmind/optax#egg=optax (from t5x==0.0.0)\n",
147
            "  Cloning https://github.com/deepmind/optax to /tmp/pip-install-aawkrx82/optax_bf4949a07cf24bc29b2e9f9937cf284d\n",
148
            "  Running command git clone --filter=blob:none --quiet https://github.com/deepmind/optax /tmp/pip-install-aawkrx82/optax_bf4949a07cf24bc29b2e9f9937cf284d\n",
149
            "  Resolved https://github.com/deepmind/optax to commit 6e7b3a4f78068852609a15fa55de3a20335c5254\n",
150
            "  Installing build dependencies ... \u001b[?25l-\b \b\\\b \bdone\n",
151
            "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
152
            "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25l-\b \bdone\n",
153
            "\u001b[?25hCollecting seqio@ git+https://github.com/google/seqio#egg=seqio (from t5x==0.0.0)\n",
154
            "  Cloning https://github.com/google/seqio to /tmp/pip-install-aawkrx82/seqio_0b004bb655f944bc9774e08b9b34f95b\n",
155
            "  Running command git clone --filter=blob:none --quiet https://github.com/google/seqio /tmp/pip-install-aawkrx82/seqio_0b004bb655f944bc9774e08b9b34f95b\n",
156
            "  Resolved https://github.com/google/seqio to commit c6f6dded4f878a1724ec17887c2465bee398f82d\n",
157
            "  Preparing metadata (setup.py) ... \u001b[?25l-\b \bdone\n",
158
            "\u001b[?25hCollecting absl-py (from t5x==0.0.0)\n",
159
            "  Using cached absl_py-2.0.0-py3-none-any.whl.metadata (2.3 kB)\n",
160
            "Collecting cached_property (from t5x==0.0.0)\n",
161
            "  Using cached cached_property-1.5.2-py2.py3-none-any.whl (7.6 kB)\n",
162
            "Collecting fiddle\u003e=0.2.5 (from t5x==0.0.0)\n",
163
            "  Using cached fiddle-0.2.11-py3-none-any.whl.metadata (2.2 kB)\n",
164
            "Collecting gin-config (from t5x==0.0.0)\n",
165
            "  Using cached gin_config-0.5.0-py3-none-any.whl (61 kB)\n",
166
            "Requirement already satisfied: jax\u003e=0.4.16 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from t5x==0.0.0) (0.4.20)\n",
167
            "Requirement already satisfied: jaxlib\u003e=0.4.16 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from t5x==0.0.0) (0.4.20)\n",
168
            "Requirement already satisfied: numpy in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from t5x==0.0.0) (1.26.1)\n",
169
            "Collecting orbax-checkpoint (from t5x==0.0.0)\n",
170
            "  Using cached orbax_checkpoint-0.4.1-py3-none-any.whl.metadata (1.7 kB)\n",
171
            "Collecting tensorflow-cpu (from t5x==0.0.0)\n",
172
            "  Downloading tensorflow_cpu-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)\n",
173
            "Collecting tensorstore\u003e=0.1.20 (from t5x==0.0.0)\n",
174
            "  Downloading tensorstore-0.1.47-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.9 kB)\n",
175
            "Collecting protobuf==3.20.3 (from t5x==0.0.0)\n",
176
            "  Downloading protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)\n",
177
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
178
            "\u001b[?25hCollecting graphviz (from fiddle\u003e=0.2.5-\u003et5x==0.0.0)\n",
179
            "  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)\n",
180
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.0/47.0 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
181
            "\u001b[?25hCollecting libcst (from fiddle\u003e=0.2.5-\u003et5x==0.0.0)\n",
182
            "  Downloading libcst-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (17 kB)\n",
183
            "Requirement already satisfied: typing-extensions in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from fiddle\u003e=0.2.5-\u003et5x==0.0.0) (4.8.0)\n",
184
            "Requirement already satisfied: ml-dtypes\u003e=0.2.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from jax\u003e=0.4.16-\u003et5x==0.0.0) (0.3.1)\n",
185
            "Requirement already satisfied: opt-einsum in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from jax\u003e=0.4.16-\u003et5x==0.0.0) (3.3.0)\n",
186
            "Requirement already satisfied: scipy\u003e=1.9 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from jax\u003e=0.4.16-\u003et5x==0.0.0) (1.11.3)\n",
187
            "Collecting grain-nightly (from airio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
188
            "  Downloading grain_nightly-0.0.2-py3-none-any.whl.metadata (13 kB)\n",
189
            "Collecting tfds-nightly==4.9.2.dev202308090034 (from airio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
190
            "  Downloading tfds_nightly-4.9.2.dev202308090034-py3-none-any.whl.metadata (9.3 kB)\n",
191
            "Collecting array-record (from tfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
192
            "  Downloading array_record-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (503 bytes)\n",
193
            "Collecting click (from tfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
194
            "  Downloading click-8.1.7-py3-none-any.whl.metadata (3.0 kB)\n",
195
            "Collecting dm-tree (from tfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
196
            "  Downloading dm_tree-0.1.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (152 kB)\n",
197
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m152.8/152.8 kB\u001b[0m \u001b[31m18.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
198
            "\u001b[?25hCollecting etils\u003e=0.9.0 (from etils[enp,epath,etree]\u003e=0.9.0-\u003etfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
199
            "  Downloading etils-1.5.2-py3-none-any.whl.metadata (6.3 kB)\n",
200
            "Collecting promise (from tfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
201
            "  Downloading promise-2.3.tar.gz (19 kB)\n",
202
            "  Preparing metadata (setup.py) ... \u001b[?25l-\b \bdone\n",
203
            "\u001b[?25hRequirement already satisfied: psutil in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0) (5.9.6)\n",
204
            "Requirement already satisfied: requests\u003e=2.19.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0) (2.31.0)\n",
205
            "Collecting tensorflow-metadata (from tfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
206
            "  Downloading tensorflow_metadata-1.14.0-py3-none-any.whl.metadata (2.1 kB)\n",
207
            "Collecting termcolor (from tfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
208
            "  Downloading termcolor-2.3.0-py3-none-any.whl (6.9 kB)\n",
209
            "Collecting toml (from tfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
210
            "  Downloading toml-0.10.2-py2.py3-none-any.whl (16 kB)\n",
211
            "Collecting tqdm (from tfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
212
            "  Downloading tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)\n",
213
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.6/57.6 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
214
            "\u001b[?25hCollecting wrapt (from tfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
215
            "  Downloading wrapt-1.15.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (78 kB)\n",
216
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.4/78.4 kB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
217
            "\u001b[?25hCollecting ml_collections (from clu@ git+https://github.com/google/CommonLoopUtils#egg=clu-\u003et5x==0.0.0)\n",
218
            "  Downloading ml_collections-0.1.1.tar.gz (77 kB)\n",
219
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
220
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l-\b \bdone\n",
221
            "\u001b[?25hRequirement already satisfied: packaging in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from clu@ git+https://github.com/google/CommonLoopUtils#egg=clu-\u003et5x==0.0.0) (23.2)\n",
222
            "Collecting msgpack (from flax@ git+https://github.com/google/flax#egg=flax-\u003et5x==0.0.0)\n",
223
            "  Downloading msgpack-1.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)\n",
224
            "Collecting rich\u003e=11.1 (from flax@ git+https://github.com/google/flax#egg=flax-\u003et5x==0.0.0)\n",
225
            "  Downloading rich-13.6.0-py3-none-any.whl.metadata (18 kB)\n",
226
            "Requirement already satisfied: PyYAML\u003e=5.4.1 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from flax@ git+https://github.com/google/flax#egg=flax-\u003et5x==0.0.0) (6.0.1)\n",
227
            "Collecting chex\u003e=0.1.5 (from optax@ git+https://github.com/deepmind/optax#egg=optax-\u003et5x==0.0.0)\n",
228
            "  Downloading chex-0.1.84-py3-none-any.whl.metadata (17 kB)\n",
229
            "Requirement already satisfied: nest_asyncio in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from orbax-checkpoint-\u003et5x==0.0.0) (1.5.8)\n",
230
            "Collecting editdistance (from seqio@ git+https://github.com/google/seqio#egg=seqio-\u003et5x==0.0.0)\n",
231
            "  Downloading editdistance-0.6.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (282 kB)\n",
232
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m282.6/282.6 kB\u001b[0m \u001b[31m28.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
233
            "\u001b[?25hCollecting pyglove (from seqio@ git+https://github.com/google/seqio#egg=seqio-\u003et5x==0.0.0)\n",
234
            "  Downloading pyglove-0.4.3-py3-none-any.whl.metadata (6.7 kB)\n",
235
            "Collecting sentencepiece (from seqio@ git+https://github.com/google/seqio#egg=seqio-\u003et5x==0.0.0)\n",
236
            "  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
237
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m65.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
238
            "\u001b[?25hCollecting tensorflow-text (from seqio@ git+https://github.com/google/seqio#egg=seqio-\u003et5x==0.0.0)\n",
239
            "  Downloading tensorflow_text-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.9 kB)\n",
240
            "Collecting astunparse\u003e=1.6.0 (from tensorflow-cpu-\u003et5x==0.0.0)\n",
241
            "  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)\n",
242
            "Collecting flatbuffers\u003e=23.5.26 (from tensorflow-cpu-\u003et5x==0.0.0)\n",
243
            "  Downloading flatbuffers-23.5.26-py2.py3-none-any.whl.metadata (850 bytes)\n",
244
            "Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,\u003e=0.2.1 (from tensorflow-cpu-\u003et5x==0.0.0)\n",
245
            "  Downloading gast-0.5.4-py3-none-any.whl (19 kB)\n",
246
            "Collecting google-pasta\u003e=0.1.1 (from tensorflow-cpu-\u003et5x==0.0.0)\n",
247
            "  Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)\n",
248
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.5/57.5 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
249
            "\u001b[?25hCollecting h5py\u003e=2.9.0 (from tensorflow-cpu-\u003et5x==0.0.0)\n",
250
            "  Downloading h5py-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.5 kB)\n",
251
            "Collecting libclang\u003e=13.0.0 (from tensorflow-cpu-\u003et5x==0.0.0)\n",
252
            "  Downloading libclang-16.0.6-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)\n",
253
            "INFO: pip is looking at multiple versions of tensorflow-cpu to determine which version is compatible with other requirements. This could take a while.\n",
254
            "Collecting tensorflow-cpu (from t5x==0.0.0)\n",
255
            "  Downloading tensorflow_cpu-2.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)\n",
256
            "Collecting gast\u003c=0.4.0,\u003e=0.2.1 (from tensorflow-cpu-\u003et5x==0.0.0)\n",
257
            "  Downloading gast-0.4.0-py3-none-any.whl (9.8 kB)\n",
258
            "Collecting grpcio\u003c2.0,\u003e=1.24.3 (from tensorflow-cpu-\u003et5x==0.0.0)\n",
259
            "  Downloading grpcio-1.59.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)\n",
260
            "Collecting keras\u003c2.14,\u003e=2.13.1 (from tensorflow-cpu-\u003et5x==0.0.0)\n",
261
            "  Downloading keras-2.13.1-py3-none-any.whl.metadata (2.4 kB)\n",
262
            "Collecting numpy (from t5x==0.0.0)\n",
263
            "  Downloading numpy-1.24.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n",
264
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m90.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
265
            "\u001b[?25hRequirement already satisfied: setuptools in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow-cpu-\u003et5x==0.0.0) (68.0.0)\n",
266
            "Requirement already satisfied: six\u003e=1.12.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow-cpu-\u003et5x==0.0.0) (1.16.0)\n",
267
            "Collecting tensorboard\u003c2.14,\u003e=2.13 (from tensorflow-cpu-\u003et5x==0.0.0)\n",
268
            "  Downloading tensorboard-2.13.0-py3-none-any.whl (5.6 MB)\n",
269
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m96.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
270
            "\u001b[?25hCollecting tensorflow-estimator\u003c2.14,\u003e=2.13.0 (from tensorflow-cpu-\u003et5x==0.0.0)\n",
271
            "  Downloading tensorflow_estimator-2.13.0-py2.py3-none-any.whl.metadata (1.3 kB)\n",
272
            "Collecting typing-extensions (from fiddle\u003e=0.2.5-\u003et5x==0.0.0)\n",
273
            "  Downloading typing_extensions-4.5.0-py3-none-any.whl (27 kB)\n",
274
            "Collecting tensorflow-io-gcs-filesystem\u003e=0.23.1 (from tensorflow-cpu-\u003et5x==0.0.0)\n",
275
            "  Downloading tensorflow_io_gcs_filesystem-0.34.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (14 kB)\n",
276
            "Requirement already satisfied: wheel\u003c1.0,\u003e=0.23.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from astunparse\u003e=1.6.0-\u003etensorflow-cpu-\u003et5x==0.0.0) (0.41.2)\n",
277
            "Collecting toolz\u003e=0.9.0 (from chex\u003e=0.1.5-\u003eoptax@ git+https://github.com/deepmind/optax#egg=optax-\u003et5x==0.0.0)\n",
278
            "  Downloading toolz-0.12.0-py3-none-any.whl (55 kB)\n",
279
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.8/55.8 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
280
            "\u001b[?25hCollecting markdown-it-py\u003e=2.2.0 (from rich\u003e=11.1-\u003eflax@ git+https://github.com/google/flax#egg=flax-\u003et5x==0.0.0)\n",
281
            "  Downloading markdown_it_py-3.0.0-py3-none-any.whl.metadata (6.9 kB)\n",
282
            "Requirement already satisfied: pygments\u003c3.0.0,\u003e=2.13.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from rich\u003e=11.1-\u003eflax@ git+https://github.com/google/flax#egg=flax-\u003et5x==0.0.0) (2.16.1)\n",
283
            "Collecting google-auth\u003c3,\u003e=1.6.3 (from tensorboard\u003c2.14,\u003e=2.13-\u003etensorflow-cpu-\u003et5x==0.0.0)\n",
284
            "  Downloading google_auth-2.23.4-py2.py3-none-any.whl.metadata (4.7 kB)\n",
285
            "Collecting google-auth-oauthlib\u003c1.1,\u003e=0.5 (from tensorboard\u003c2.14,\u003e=2.13-\u003etensorflow-cpu-\u003et5x==0.0.0)\n",
286
            "  Downloading google_auth_oauthlib-1.0.0-py2.py3-none-any.whl (18 kB)\n",
287
            "Collecting markdown\u003e=2.6.8 (from tensorboard\u003c2.14,\u003e=2.13-\u003etensorflow-cpu-\u003et5x==0.0.0)\n",
288
            "  Downloading Markdown-3.5.1-py3-none-any.whl.metadata (7.1 kB)\n",
289
            "Collecting tensorboard-data-server\u003c0.8.0,\u003e=0.7.0 (from tensorboard\u003c2.14,\u003e=2.13-\u003etensorflow-cpu-\u003et5x==0.0.0)\n",
290
            "  Downloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata (1.1 kB)\n",
291
            "Collecting werkzeug\u003e=1.0.1 (from tensorboard\u003c2.14,\u003e=2.13-\u003etensorflow-cpu-\u003et5x==0.0.0)\n",
292
            "  Downloading werkzeug-3.0.1-py3-none-any.whl.metadata (4.1 kB)\n",
293
            "Collecting fsspec (from etils[epath,epy]-\u003eorbax-checkpoint-\u003et5x==0.0.0)\n",
294
            "  Downloading fsspec-2023.10.0-py3-none-any.whl.metadata (6.8 kB)\n",
295
            "Collecting importlib_resources (from etils[epath,epy]-\u003eorbax-checkpoint-\u003et5x==0.0.0)\n",
296
            "  Downloading importlib_resources-6.1.0-py3-none-any.whl.metadata (4.1 kB)\n",
297
            "Collecting zipp (from etils[epath,epy]-\u003eorbax-checkpoint-\u003et5x==0.0.0)\n",
298
            "  Using cached zipp-3.17.0-py3-none-any.whl.metadata (3.7 kB)\n",
299
            "Collecting cloudpickle (from grain-nightly-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
300
            "  Downloading cloudpickle-3.0.0-py3-none-any.whl.metadata (7.0 kB)\n",
301
            "Collecting jaxtyping (from grain-nightly-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
302
            "  Downloading jaxtyping-0.2.23-py3-none-any.whl.metadata (6.4 kB)\n",
303
            "Collecting more-itertools\u003e=9.1.0 (from grain-nightly-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
304
            "  Downloading more_itertools-10.1.0-py3-none-any.whl.metadata (33 kB)\n",
305
            "Collecting typing-inspect\u003e=0.4.0 (from libcst-\u003efiddle\u003e=0.2.5-\u003et5x==0.0.0)\n",
306
            "  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n",
307
            "Collecting contextlib2 (from ml_collections-\u003eclu@ git+https://github.com/google/CommonLoopUtils#egg=clu-\u003et5x==0.0.0)\n",
308
            "  Downloading contextlib2-21.6.0-py2.py3-none-any.whl (13 kB)\n",
309
            "Collecting docstring-parser\u003e=0.12 (from pyglove-\u003eseqio@ git+https://github.com/google/seqio#egg=seqio-\u003et5x==0.0.0)\n",
310
            "  Downloading docstring_parser-0.15-py3-none-any.whl (36 kB)\n",
311
            "Collecting tensorflow-hub\u003e=0.13.0 (from tensorflow-text-\u003eseqio@ git+https://github.com/google/seqio#egg=seqio-\u003et5x==0.0.0)\n",
312
            "  Downloading tensorflow_hub-0.15.0-py2.py3-none-any.whl.metadata (1.3 kB)\n",
313
            "Collecting tensorflow\u003c2.15,\u003e=2.14.0 (from tensorflow-text-\u003eseqio@ git+https://github.com/google/seqio#egg=seqio-\u003et5x==0.0.0)\n",
314
            "  Downloading tensorflow-2.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)\n",
315
            "Collecting cachetools\u003c6.0,\u003e=2.0.0 (from google-auth\u003c3,\u003e=1.6.3-\u003etensorboard\u003c2.14,\u003e=2.13-\u003etensorflow-cpu-\u003et5x==0.0.0)\n",
316
            "  Downloading cachetools-5.3.2-py3-none-any.whl.metadata (5.2 kB)\n",
317
            "Collecting pyasn1-modules\u003e=0.2.1 (from google-auth\u003c3,\u003e=1.6.3-\u003etensorboard\u003c2.14,\u003e=2.13-\u003etensorflow-cpu-\u003et5x==0.0.0)\n",
318
            "  Downloading pyasn1_modules-0.3.0-py2.py3-none-any.whl (181 kB)\n",
319
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m181.3/181.3 kB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
320
            "\u001b[?25hCollecting rsa\u003c5,\u003e=3.1.4 (from google-auth\u003c3,\u003e=1.6.3-\u003etensorboard\u003c2.14,\u003e=2.13-\u003etensorflow-cpu-\u003et5x==0.0.0)\n",
321
            "  Downloading rsa-4.9-py3-none-any.whl (34 kB)\n",
322
            "Collecting requests-oauthlib\u003e=0.7.0 (from google-auth-oauthlib\u003c1.1,\u003e=0.5-\u003etensorboard\u003c2.14,\u003e=2.13-\u003etensorflow-cpu-\u003et5x==0.0.0)\n",
323
            "  Downloading requests_oauthlib-1.3.1-py2.py3-none-any.whl (23 kB)\n",
324
            "Collecting mdurl~=0.1 (from markdown-it-py\u003e=2.2.0-\u003erich\u003e=11.1-\u003eflax@ git+https://github.com/google/flax#egg=flax-\u003et5x==0.0.0)\n",
325
            "  Downloading mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n",
326
            "Requirement already satisfied: charset-normalizer\u003c4,\u003e=2 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from requests\u003e=2.19.0-\u003etfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0) (3.3.2)\n",
327
            "Requirement already satisfied: idna\u003c4,\u003e=2.5 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from requests\u003e=2.19.0-\u003etfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0) (3.4)\n",
328
            "Requirement already satisfied: urllib3\u003c3,\u003e=1.21.1 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from requests\u003e=2.19.0-\u003etfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0) (2.0.7)\n",
329
            "Requirement already satisfied: certifi\u003e=2017.4.17 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from requests\u003e=2.19.0-\u003etfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0) (2023.7.22)\n",
330
            "INFO: pip is looking at multiple versions of tensorflow to determine which version is compatible with other requirements. This could take a while.\n",
331
            "Collecting tensorflow-text (from seqio@ git+https://github.com/google/seqio#egg=seqio-\u003et5x==0.0.0)\n",
332
            "  Downloading tensorflow_text-2.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.0 kB)\n",
333
            "Collecting tensorflow\u003c2.14,\u003e=2.13.0 (from tensorflow-text-\u003eseqio@ git+https://github.com/google/seqio#egg=seqio-\u003et5x==0.0.0)\n",
334
            "  Downloading tensorflow-2.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)\n",
335
            "Collecting mypy-extensions\u003e=0.3.0 (from typing-inspect\u003e=0.4.0-\u003elibcst-\u003efiddle\u003e=0.2.5-\u003et5x==0.0.0)\n",
336
            "  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
337
            "Requirement already satisfied: MarkupSafe\u003e=2.1.1 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from werkzeug\u003e=1.0.1-\u003etensorboard\u003c2.14,\u003e=2.13-\u003etensorflow-cpu-\u003et5x==0.0.0) (2.1.3)\n",
338
            "Collecting typeguard\u003c3,\u003e=2.13.3 (from jaxtyping-\u003egrain-nightly-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
339
            "  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)\n",
340
            "Collecting absl-py (from t5x==0.0.0)\n",
341
            "  Downloading absl_py-1.4.0-py3-none-any.whl (126 kB)\n",
342
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m126.5/126.5 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
343
            "\u001b[?25hCollecting googleapis-common-protos\u003c2,\u003e=1.52.0 (from tensorflow-metadata-\u003etfds-nightly==4.9.2.dev202308090034-\u003eairio@ git+https://github.com/google/airio#egg=airio-\u003et5x==0.0.0)\n",
344
            "  Downloading googleapis_common_protos-1.61.0-py2.py3-none-any.whl.metadata (1.5 kB)\n",
345
            "Collecting pyasn1\u003c0.6.0,\u003e=0.4.6 (from pyasn1-modules\u003e=0.2.1-\u003egoogle-auth\u003c3,\u003e=1.6.3-\u003etensorboard\u003c2.14,\u003e=2.13-\u003etensorflow-cpu-\u003et5x==0.0.0)\n",
346
            "  Downloading pyasn1-0.5.0-py2.py3-none-any.whl (83 kB)\n",
347
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.9/83.9 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
348
            "\u001b[?25hCollecting oauthlib\u003e=3.0.0 (from requests-oauthlib\u003e=0.7.0-\u003egoogle-auth-oauthlib\u003c1.1,\u003e=0.5-\u003etensorboard\u003c2.14,\u003e=2.13-\u003etensorflow-cpu-\u003et5x==0.0.0)\n",
349
            "  Downloading oauthlib-3.2.2-py3-none-any.whl (151 kB)\n",
350
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m151.7/151.7 kB\u001b[0m \u001b[31m20.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
351
            "\u001b[?25hDownloading fiddle-0.2.11-py3-none-any.whl (396 kB)\n",
352
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m396.4/396.4 kB\u001b[0m \u001b[31m35.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
353
            "\u001b[?25hDownloading tensorstore-0.1.47-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.7 MB)\n",
354
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.7/13.7 MB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
355
            "\u001b[?25hDownloading tfds_nightly-4.9.2.dev202308090034-py3-none-any.whl (5.0 MB)\n",
356
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m22.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
357
            "\u001b[?25hDownloading orbax_checkpoint-0.4.1-py3-none-any.whl (113 kB)\n",
358
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m113.5/113.5 kB\u001b[0m \u001b[31m14.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
359
            "\u001b[?25hDownloading tensorflow_cpu-2.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (186.6 MB)\n",
360
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m186.6/186.6 MB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
361
            "\u001b[?25hDownloading chex-0.1.84-py3-none-any.whl (94 kB)\n",
362
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.8/94.8 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
363
            "\u001b[?25hDownloading flatbuffers-23.5.26-py2.py3-none-any.whl (26 kB)\n",
364
            "Downloading grpcio-1.59.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.3 MB)\n",
365
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.3/5.3 MB\u001b[0m \u001b[31m92.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
366
            "\u001b[?25hDownloading h5py-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.8 MB)\n",
367
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.8/4.8 MB\u001b[0m \u001b[31m94.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
368
            "\u001b[?25hDownloading keras-2.13.1-py3-none-any.whl (1.7 MB)\n",
369
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m76.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
370
            "\u001b[?25hDownloading libclang-16.0.6-py2.py3-none-manylinux2010_x86_64.whl (22.9 MB)\n",
371
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m22.9/22.9 MB\u001b[0m \u001b[31m79.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
372
            "\u001b[?25hDownloading rich-13.6.0-py3-none-any.whl (239 kB)\n",
373
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m239.8/239.8 kB\u001b[0m \u001b[31m27.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
374
            "\u001b[?25hDownloading tensorflow_estimator-2.13.0-py2.py3-none-any.whl (440 kB)\n",
375
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.8/440.8 kB\u001b[0m \u001b[31m40.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
376
            "\u001b[?25hDownloading tensorflow_io_gcs_filesystem-0.34.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.4 MB)\n",
377
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m81.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
378
            "\u001b[?25hDownloading grain_nightly-0.0.2-py3-none-any.whl (353 kB)\n",
379
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m353.1/353.1 kB\u001b[0m \u001b[31m36.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
380
            "\u001b[?25hDownloading libcst-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)\n",
381
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.2/3.2 MB\u001b[0m \u001b[31m88.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
382
            "\u001b[?25hDownloading msgpack-1.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (530 kB)\n",
383
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m530.8/530.8 kB\u001b[0m \u001b[31m45.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
384
            "\u001b[?25hDownloading pyglove-0.4.3-py3-none-any.whl (568 kB)\n",
385
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m568.5/568.5 kB\u001b[0m \u001b[31m47.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
386
            "\u001b[?25hDownloading tensorflow_text-2.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.5 MB)\n",
387
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.5/6.5 MB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
388
            "\u001b[?25hDownloading tensorflow-2.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (479.7 MB)\n",
389
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m479.7/479.7 MB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
390
            "\u001b[?25hDownloading etils-1.5.2-py3-none-any.whl (140 kB)\n",
391
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m140.6/140.6 kB\u001b[0m \u001b[31m15.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
392
            "\u001b[?25hDownloading google_auth-2.23.4-py2.py3-none-any.whl (183 kB)\n",
393
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.3/183.3 kB\u001b[0m \u001b[31m24.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
394
            "\u001b[?25hDownloading Markdown-3.5.1-py3-none-any.whl (102 kB)\n",
395
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m102.2/102.2 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
396
            "\u001b[?25hDownloading markdown_it_py-3.0.0-py3-none-any.whl (87 kB)\n",
397
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.5/87.5 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
398
            "\u001b[?25hDownloading more_itertools-10.1.0-py3-none-any.whl (55 kB)\n",
399
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.8/55.8 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
400
            "\u001b[?25hDownloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl (6.6 MB)\n",
401
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m90.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
402
            "\u001b[?25hDownloading tensorflow_hub-0.15.0-py2.py3-none-any.whl (85 kB)\n",
403
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.4/85.4 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
404
            "\u001b[?25hDownloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n",
405
            "Downloading werkzeug-3.0.1-py3-none-any.whl (226 kB)\n",
406
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m226.7/226.7 kB\u001b[0m \u001b[31m26.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
407
            "\u001b[?25hDownloading array_record-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n",
408
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m83.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
409
            "\u001b[?25hDownloading click-8.1.7-py3-none-any.whl (97 kB)\n",
410
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.9/97.9 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
411
            "\u001b[?25hDownloading cloudpickle-3.0.0-py3-none-any.whl (20 kB)\n",
412
            "Downloading fsspec-2023.10.0-py3-none-any.whl (166 kB)\n",
413
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.4/166.4 kB\u001b[0m \u001b[31m22.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
414
            "\u001b[?25hDownloading importlib_resources-6.1.0-py3-none-any.whl (33 kB)\n",
415
            "Downloading jaxtyping-0.2.23-py3-none-any.whl (29 kB)\n",
416
            "Downloading tensorflow_metadata-1.14.0-py3-none-any.whl (28 kB)\n",
417
            "Downloading tqdm-4.66.1-py3-none-any.whl (78 kB)\n",
418
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.3/78.3 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
419
            "\u001b[?25hUsing cached zipp-3.17.0-py3-none-any.whl (7.4 kB)\n",
420
            "Downloading cachetools-5.3.2-py3-none-any.whl (9.3 kB)\n",
421
            "Downloading googleapis_common_protos-1.61.0-py2.py3-none-any.whl (230 kB)\n",
422
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m230.9/230.9 kB\u001b[0m \u001b[31m28.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
423
            "\u001b[?25hBuilding wheels for collected packages: t5x, airio, clu, flax, jestimator, optax, seqio, ml_collections, promise\n",
424
            "  Building wheel for t5x (setup.py) ... \u001b[?25l-\b \b\\\b \b|\b \b/\b \bdone\n",
425
            "\u001b[?25h  Created wheel for t5x: filename=t5x-0.0.0-py3-none-any.whl size=537539 sha256=a82724c7878f5136e8e4390f96556c73f7777e6d206c3ade1e3dfdd0d0b7c6d4\n",
426
            "  Stored in directory: /tmp/pip-ephem-wheel-cache-3xvz7uo5/wheels/8c/5e/f7/a203ceeee7df469882dcaab822728183bf86a6843d58295968\n",
427
            "  Building wheel for airio (setup.py) ... \u001b[?25l-\b \bdone\n",
428
            "\u001b[?25h  Created wheel for airio: filename=airio-0.0.1-py3-none-any.whl size=52299 sha256=413138cda2cfa0681b48459c8a645f2b4ef5bc82f617924219280656bb832595\n",
429
            "  Stored in directory: /tmp/pip-ephem-wheel-cache-3xvz7uo5/wheels/78/97/3e/060f3045a4aedeb17b240430126be515c0fc6d55808195c2e4\n",
430
            "  Building wheel for clu (setup.py) ... \u001b[?25l-\b \bdone\n",
431
            "\u001b[?25h  Created wheel for clu: filename=clu-0.0.11-py3-none-any.whl size=100847 sha256=28d7a44bc2933dcd8f9d8e4f903f160789737b5c006a95cf434dc38f5574e1ec\n",
432
            "  Stored in directory: /tmp/pip-ephem-wheel-cache-3xvz7uo5/wheels/39/c1/24/1c54cf87b5c57f73c428681b03e228d5187b900bbd1c33b90f\n",
433
            "  Building wheel for flax (pyproject.toml) ... \u001b[?25l-\b \b\\\b \bdone\n",
434
            "\u001b[?25h  Created wheel for flax: filename=flax-0.8.0-py3-none-any.whl size=244220 sha256=49d9eb924adc532907f7a5f6955df3564dfe5ab89274bd9dcbb6650d330a92da\n",
435
            "  Stored in directory: /tmp/pip-ephem-wheel-cache-3xvz7uo5/wheels/c2/b5/6c/e10d1bcdb3a30e0ffa048cf3a43762bb6f93c8262a27dd9a5d\n",
436
            "  Building wheel for jestimator (pyproject.toml) ... \u001b[?25ldone\n",
437
            "\u001b[?25h  Created wheel for jestimator: filename=jestimator-0.3.3-py3-none-any.whl size=2114058 sha256=4b93b8450e6d2bc76667fc810df11f133ef8765c386729c64334bcdabd5557f6\n",
438
            "  Stored in directory: /tmp/pip-ephem-wheel-cache-3xvz7uo5/wheels/0e/85/c0/a42e70edeb24559d5bb400f24e4f2220f99ad02c1ef7ceaac6\n",
439
            "  Building wheel for optax (pyproject.toml) ... \u001b[?25l-\b \bdone\n",
440
            "\u001b[?25h  Created wheel for optax: filename=optax-0.1.8.dev0-py3-none-any.whl size=175556 sha256=23566c0d7afbb0c0f4a1f88a44263c38f7417b6703e81ae5b1f3f80f6c4d5538\n",
441
            "  Stored in directory: /tmp/pip-ephem-wheel-cache-3xvz7uo5/wheels/23/21/3e/1d698479dc8dc59adfe13d4a2983e18cb2b95a251bcfde9718\n",
442
            "  Building wheel for seqio (setup.py) ... \u001b[?25l-\b \b\\\b \bdone\n",
443
            "\u001b[?25h  Created wheel for seqio: filename=seqio-0.0.17-py3-none-any.whl size=352935 sha256=7cc8f34359925a4d4c65f6501ab91f041d9fa3aa320b0d98fa31daf550030c03\n",
444
            "  Stored in directory: /tmp/pip-ephem-wheel-cache-3xvz7uo5/wheels/f3/82/f4/d9062a177e9f3274845c1f495e749c4f943f963ab5ca5836c0\n",
445
            "  Building wheel for ml_collections (setup.py) ... \u001b[?25l-\b \bdone\n",
446
            "\u001b[?25h  Created wheel for ml_collections: filename=ml_collections-0.1.1-py3-none-any.whl size=94506 sha256=f99fea0a15a36f2d472475918a47103facdeba9627e3d705c7ccd08114fe06f5\n",
447
            "  Stored in directory: /usr/local/google/home/biaojiaxing/.cache/pip/wheels/7b/89/c9/a9b87790789e94aadcfc393c283e3ecd5ab916aed0a31be8fe\n",
448
            "  Building wheel for promise (setup.py) ... \u001b[?25l-\b \bdone\n",
449
            "\u001b[?25h  Created wheel for promise: filename=promise-2.3-py3-none-any.whl size=21484 sha256=312b2567b50454fae5a98851a903de0752a082a9ce52967dc676137b9df19a36\n",
450
            "  Stored in directory: /usr/local/google/home/biaojiaxing/.cache/pip/wheels/54/4e/28/3ed0e1c8a752867445bab994d2340724928aa3ab059c57c8db\n",
451
            "Successfully built t5x airio clu flax jestimator optax seqio ml_collections promise\n",
452
            "Installing collected packages: sentencepiece, libclang, gin-config, flatbuffers, dm-tree, cached_property, zipp, wrapt, werkzeug, typing-extensions, typeguard, tqdm, toolz, toml, termcolor, tensorflow-io-gcs-filesystem, tensorflow-estimator, tensorboard-data-server, pyasn1, protobuf, promise, oauthlib, numpy, mypy-extensions, msgpack, more-itertools, mdurl, markdown, keras, importlib_resources, grpcio, graphviz, google-pasta, gast, fsspec, etils, editdistance, docstring-parser, contextlib2, cloudpickle, click, cachetools, astunparse, absl-py, typing-inspect, tensorflow-hub, rsa, requests-oauthlib, pyglove, pyasn1-modules, ml_collections, markdown-it-py, jaxtyping, h5py, googleapis-common-protos, tensorstore, tensorflow-metadata, rich, libcst, google-auth, orbax-checkpoint, google-auth-oauthlib, fiddle, chex, array-record, tensorboard, optax, grain-nightly, tfds-nightly, tensorflow-cpu, tensorflow, flax, tensorflow-text, jestimator, clu, seqio, airio, t5x\n",
453
            "  Attempting uninstall: typing-extensions\n",
454
            "    Found existing installation: typing_extensions 4.8.0\n",
455
            "    Uninstalling typing_extensions-4.8.0:\n",
456
            "      Successfully uninstalled typing_extensions-4.8.0\n",
457
            "  Attempting uninstall: numpy\n",
458
            "    Found existing installation: numpy 1.26.1\n",
459
            "    Uninstalling numpy-1.26.1:\n",
460
            "      Successfully uninstalled numpy-1.26.1\n",
461
            "Successfully installed absl-py-1.4.0 airio-0.0.1 array-record-0.5.0 astunparse-1.6.3 cached_property-1.5.2 cachetools-5.3.2 chex-0.1.84 click-8.1.7 cloudpickle-3.0.0 clu-0.0.11 contextlib2-21.6.0 dm-tree-0.1.8 docstring-parser-0.15 editdistance-0.6.2 etils-1.5.2 fiddle-0.2.11 flatbuffers-23.5.26 flax-0.8.0 fsspec-2023.10.0 gast-0.4.0 gin-config-0.5.0 google-auth-2.23.4 google-auth-oauthlib-1.0.0 google-pasta-0.2.0 googleapis-common-protos-1.61.0 grain-nightly-0.0.2 graphviz-0.20.1 grpcio-1.59.2 h5py-3.10.0 importlib_resources-6.1.0 jaxtyping-0.2.23 jestimator-0.3.3 keras-2.13.1 libclang-16.0.6 libcst-1.1.0 markdown-3.5.1 markdown-it-py-3.0.0 mdurl-0.1.2 ml_collections-0.1.1 more-itertools-10.1.0 msgpack-1.0.7 mypy-extensions-1.0.0 numpy-1.24.3 oauthlib-3.2.2 optax-0.1.8.dev0 orbax-checkpoint-0.4.1 promise-2.3 protobuf-3.20.3 pyasn1-0.5.0 pyasn1-modules-0.3.0 pyglove-0.4.3 requests-oauthlib-1.3.1 rich-13.6.0 rsa-4.9 sentencepiece-0.1.99 seqio-0.0.17 t5x-0.0.0 tensorboard-2.13.0 tensorboard-data-server-0.7.2 tensorflow-2.13.1 tensorflow-cpu-2.13.1 tensorflow-estimator-2.13.0 tensorflow-hub-0.15.0 tensorflow-io-gcs-filesystem-0.34.0 tensorflow-metadata-1.14.0 tensorflow-text-2.13.0 tensorstore-0.1.47 termcolor-2.3.0 tfds-nightly-4.9.2.dev202308090034 toml-0.10.2 toolz-0.12.0 tqdm-4.66.1 typeguard-2.13.3 typing-extensions-4.5.0 typing-inspect-0.9.0 werkzeug-3.0.1 wrapt-1.15.0 zipp-3.17.0\n"
462
          ]
463
        }
464
      ],
465
      "source": [
466
        "cpu = True  # True\n",
467
        "if cpu:\n",
468
        "  !python -m pip uninstall -y jax jaxlib\n",
469
        "  !python -m pip install jax[cpu]\n",
470
        "  !cd t5x \u0026\u0026 pip install .\n",
471
        "else:\n",
472
        "  !cd t5x \u0026\u0026 python3 -m pip install -e '.[tpu]' -f https://storage.googleapis.com/jax-releases/libtpu_releases.html"
473
      ]
474
    },
475
    {
476
      "cell_type": "code",
477
      "execution_count": null,
478
      "metadata": {
479
        "colab": {
480
          "base_uri": "https://localhost:8080/"
481
        },
482
        "id": "UaiG9mn_Q3w9",
483
        "outputId": "857d9e65-0bee-42c5-ff1e-e60555d6a36d"
484
      },
485
      "outputs": [
486
        {
487
          "name": "stdout",
488
          "output_type": "stream",
489
          "text": [
490
            "Requirement already satisfied: seqio in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (0.0.17)\n",
491
            "Collecting seqio\n",
492
            "  Using cached seqio-0.0.18-py3-none-any.whl.metadata (51 kB)\n",
493
            "Collecting t5\n",
494
            "  Using cached t5-0.9.4-py2.py3-none-any.whl (164 kB)\n",
495
            "Collecting numpy==1.24.1\n",
496
            "  Downloading numpy-1.24.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n",
497
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m21.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
498
            "\u001b[?25hCollecting tensorstore==0.1.35\n",
499
            "  Downloading tensorstore-0.1.35-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.9 MB)\n",
500
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.9/10.9 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
501
            "\u001b[?25hRequirement already satisfied: absl-py in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from seqio) (1.4.0)\n",
502
            "Requirement already satisfied: clu in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from seqio) (0.0.11)\n",
503
            "Requirement already satisfied: editdistance in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from seqio) (0.6.2)\n",
504
            "Requirement already satisfied: jax in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from seqio) (0.4.20)\n",
505
            "Requirement already satisfied: jaxlib in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from seqio) (0.4.20)\n",
506
            "Requirement already satisfied: packaging in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from seqio) (23.2)\n",
507
            "Requirement already satisfied: pyglove in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from seqio) (0.4.3)\n",
508
            "Requirement already satisfied: sentencepiece in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from seqio) (0.1.99)\n",
509
            "Requirement already satisfied: tensorflow-text in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from seqio) (2.13.0)\n",
510
            "Requirement already satisfied: tfds-nightly in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from seqio) (4.9.2.dev202308090034)\n",
511
            "Requirement already satisfied: protobuf\u003c=3.20.3 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from seqio) (3.20.3)\n",
512
            "Requirement already satisfied: babel in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from t5) (2.13.1)\n",
513
            "Collecting immutabledict (from t5)\n",
514
            "  Downloading immutabledict-3.0.0-py3-none-any.whl.metadata (3.1 kB)\n",
515
            "Requirement already satisfied: gin-config in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from t5) (0.5.0)\n",
516
            "Collecting mesh-tensorflow\u003e=0.1.13 (from mesh-tensorflow[transformer]\u003e=0.1.13-\u003et5)\n",
517
            "  Downloading mesh_tensorflow-0.1.21-py3-none-any.whl (385 kB)\n",
518
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m385.2/385.2 kB\u001b[0m \u001b[31m34.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
519
            "\u001b[?25hCollecting nltk (from t5)\n",
520
            "  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)\n",
521
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m72.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
522
            "\u001b[?25hCollecting pandas (from t5)\n",
523
            "  Downloading pandas-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)\n",
524
            "Collecting rouge-score\u003e=0.1.2 (from t5)\n",
525
            "  Downloading rouge_score-0.1.2.tar.gz (17 kB)\n",
526
            "  Preparing metadata (setup.py) ... \u001b[?25l-\b \bdone\n",
527
            "\u001b[?25hCollecting sacrebleu (from t5)\n",
528
            "  Downloading sacrebleu-2.3.1-py3-none-any.whl (118 kB)\n",
529
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m118.9/118.9 kB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
530
            "\u001b[?25hCollecting scikit-learn (from t5)\n",
531
            "  Downloading scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n",
532
            "Requirement already satisfied: scipy in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from t5) (1.11.3)\n",
533
            "Collecting seqio-nightly (from t5)\n",
534
            "  Downloading seqio_nightly-0.0.17.dev20231102-py3-none-any.whl.metadata (51 kB)\n",
535
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.9/51.9 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
536
            "\u001b[?25hRequirement already satisfied: six\u003e=1.14 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from t5) (1.16.0)\n",
537
            "Collecting transformers\u003e=2.7.0 (from t5)\n",
538
            "  Downloading transformers-4.35.0-py3-none-any.whl.metadata (123 kB)\n",
539
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m123.1/123.1 kB\u001b[0m \u001b[31m15.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
540
            "\u001b[?25hCollecting future (from mesh-tensorflow\u003e=0.1.13-\u003emesh-tensorflow[transformer]\u003e=0.1.13-\u003et5)\n",
541
            "  Downloading future-0.18.3.tar.gz (840 kB)\n",
542
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m840.9/840.9 kB\u001b[0m \u001b[31m57.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
543
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l-\b \b\\\b \bdone\n",
544
            "\u001b[?25hCollecting tensorflow-datasets (from mesh-tensorflow[transformer]\u003e=0.1.13-\u003et5)\n",
545
            "  Downloading tensorflow_datasets-4.9.3-py3-none-any.whl.metadata (9.3 kB)\n",
546
            "Collecting filelock (from transformers\u003e=2.7.0-\u003et5)\n",
547
            "  Downloading filelock-3.13.1-py3-none-any.whl.metadata (2.8 kB)\n",
548
            "Collecting huggingface-hub\u003c1.0,\u003e=0.16.4 (from transformers\u003e=2.7.0-\u003et5)\n",
549
            "  Downloading huggingface_hub-0.18.0-py3-none-any.whl.metadata (13 kB)\n",
550
            "Requirement already satisfied: pyyaml\u003e=5.1 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from transformers\u003e=2.7.0-\u003et5) (6.0.1)\n",
551
            "Collecting regex!=2019.12.17 (from transformers\u003e=2.7.0-\u003et5)\n",
552
            "  Downloading regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n",
553
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
554
            "\u001b[?25hRequirement already satisfied: requests in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from transformers\u003e=2.7.0-\u003et5) (2.31.0)\n",
555
            "Collecting tokenizers\u003c0.15,\u003e=0.14 (from transformers\u003e=2.7.0-\u003et5)\n",
556
            "  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n",
557
            "Collecting safetensors\u003e=0.3.1 (from transformers\u003e=2.7.0-\u003et5)\n",
558
            "  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n",
559
            "Requirement already satisfied: tqdm\u003e=4.27 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from transformers\u003e=2.7.0-\u003et5) (4.66.1)\n",
560
            "Requirement already satisfied: etils[epath] in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from clu-\u003eseqio) (1.5.2)\n",
561
            "Requirement already satisfied: flax in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from clu-\u003eseqio) (0.8.0)\n",
562
            "Requirement already satisfied: ml-collections in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from clu-\u003eseqio) (0.1.1)\n",
563
            "Requirement already satisfied: typing-extensions in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from clu-\u003eseqio) (4.5.0)\n",
564
            "Requirement already satisfied: wrapt in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from clu-\u003eseqio) (1.15.0)\n",
565
            "Requirement already satisfied: ml-dtypes\u003e=0.2.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from jax-\u003eseqio) (0.3.1)\n",
566
            "Requirement already satisfied: opt-einsum in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from jax-\u003eseqio) (3.3.0)\n",
567
            "Requirement already satisfied: click in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from nltk-\u003et5) (8.1.7)\n",
568
            "Collecting joblib (from nltk-\u003et5)\n",
569
            "  Downloading joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)\n",
570
            "Requirement already satisfied: python-dateutil\u003e=2.8.2 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from pandas-\u003et5) (2.8.2)\n",
571
            "Collecting pytz\u003e=2020.1 (from pandas-\u003et5)\n",
572
            "  Downloading pytz-2023.3.post1-py2.py3-none-any.whl.metadata (22 kB)\n",
573
            "Collecting tzdata\u003e=2022.1 (from pandas-\u003et5)\n",
574
            "  Downloading tzdata-2023.3-py2.py3-none-any.whl (341 kB)\n",
575
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.8/341.8 kB\u001b[0m \u001b[31m33.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
576
            "\u001b[?25hRequirement already satisfied: docstring-parser\u003e=0.12 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from pyglove-\u003eseqio) (0.15)\n",
577
            "Collecting portalocker (from sacrebleu-\u003et5)\n",
578
            "  Downloading portalocker-2.8.2-py3-none-any.whl.metadata (8.5 kB)\n",
579
            "Collecting tabulate\u003e=0.8.9 (from sacrebleu-\u003et5)\n",
580
            "  Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)\n",
581
            "Collecting colorama (from sacrebleu-\u003et5)\n",
582
            "  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
583
            "Collecting lxml (from sacrebleu-\u003et5)\n",
584
            "  Downloading lxml-4.9.3-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n",
585
            "Collecting threadpoolctl\u003e=2.0.0 (from scikit-learn-\u003et5)\n",
586
            "  Downloading threadpoolctl-3.2.0-py3-none-any.whl.metadata (10.0 kB)\n",
587
            "Requirement already satisfied: array-record in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tfds-nightly-\u003eseqio) (0.5.0)\n",
588
            "Requirement already satisfied: dm-tree in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tfds-nightly-\u003eseqio) (0.1.8)\n",
589
            "Requirement already satisfied: promise in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tfds-nightly-\u003eseqio) (2.3)\n",
590
            "Requirement already satisfied: psutil in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tfds-nightly-\u003eseqio) (5.9.6)\n",
591
            "Requirement already satisfied: tensorflow-metadata in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tfds-nightly-\u003eseqio) (1.14.0)\n",
592
            "Requirement already satisfied: termcolor in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tfds-nightly-\u003eseqio) (2.3.0)\n",
593
            "Requirement already satisfied: toml in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tfds-nightly-\u003eseqio) (0.10.2)\n",
594
            "Requirement already satisfied: tensorflow-hub\u003e=0.8.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow-text-\u003eseqio) (0.15.0)\n",
595
            "Requirement already satisfied: tensorflow\u003c2.14,\u003e=2.13.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow-text-\u003eseqio) (2.13.1)\n",
596
            "Requirement already satisfied: fsspec in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from etils[enp,epath,etree]\u003e=0.9.0-\u003etfds-nightly-\u003eseqio) (2023.10.0)\n",
597
            "Requirement already satisfied: importlib_resources in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from etils[enp,epath,etree]\u003e=0.9.0-\u003etfds-nightly-\u003eseqio) (6.1.0)\n",
598
            "Requirement already satisfied: zipp in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from etils[enp,epath,etree]\u003e=0.9.0-\u003etfds-nightly-\u003eseqio) (3.17.0)\n",
599
            "Requirement already satisfied: charset-normalizer\u003c4,\u003e=2 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from requests-\u003etransformers\u003e=2.7.0-\u003et5) (3.3.2)\n",
600
            "Requirement already satisfied: idna\u003c4,\u003e=2.5 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from requests-\u003etransformers\u003e=2.7.0-\u003et5) (3.4)\n",
601
            "Requirement already satisfied: urllib3\u003c3,\u003e=1.21.1 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from requests-\u003etransformers\u003e=2.7.0-\u003et5) (2.0.7)\n",
602
            "Requirement already satisfied: certifi\u003e=2017.4.17 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from requests-\u003etransformers\u003e=2.7.0-\u003et5) (2023.7.22)\n",
603
            "Requirement already satisfied: astunparse\u003e=1.6.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (1.6.3)\n",
604
            "Requirement already satisfied: flatbuffers\u003e=23.1.21 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (23.5.26)\n",
605
            "Requirement already satisfied: gast\u003c=0.4.0,\u003e=0.2.1 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (0.4.0)\n",
606
            "Requirement already satisfied: google-pasta\u003e=0.1.1 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (0.2.0)\n",
607
            "Requirement already satisfied: grpcio\u003c2.0,\u003e=1.24.3 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (1.59.2)\n",
608
            "Requirement already satisfied: h5py\u003e=2.9.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (3.10.0)\n",
609
            "Requirement already satisfied: keras\u003c2.14,\u003e=2.13.1 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (2.13.1)\n",
610
            "Requirement already satisfied: libclang\u003e=13.0.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (16.0.6)\n",
611
            "Requirement already satisfied: setuptools in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (68.0.0)\n",
612
            "Requirement already satisfied: tensorboard\u003c2.14,\u003e=2.13 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (2.13.0)\n",
613
            "Requirement already satisfied: tensorflow-estimator\u003c2.14,\u003e=2.13.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (2.13.0)\n",
614
            "Requirement already satisfied: tensorflow-io-gcs-filesystem\u003e=0.23.1 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (0.34.0)\n",
615
            "Collecting huggingface-hub\u003c1.0,\u003e=0.16.4 (from transformers\u003e=2.7.0-\u003et5)\n",
616
            "  Downloading huggingface_hub-0.17.3-py3-none-any.whl.metadata (13 kB)\n",
617
            "Requirement already satisfied: msgpack in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from flax-\u003eclu-\u003eseqio) (1.0.7)\n",
618
            "Requirement already satisfied: optax in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from flax-\u003eclu-\u003eseqio) (0.1.8.dev0)\n",
619
            "Requirement already satisfied: orbax-checkpoint in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from flax-\u003eclu-\u003eseqio) (0.4.1)\n",
620
            "Requirement already satisfied: rich\u003e=11.1 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from flax-\u003eclu-\u003eseqio) (13.6.0)\n",
621
            "Requirement already satisfied: contextlib2 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from ml-collections-\u003eclu-\u003eseqio) (21.6.0)\n",
622
            "Requirement already satisfied: googleapis-common-protos\u003c2,\u003e=1.52.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorflow-metadata-\u003etfds-nightly-\u003eseqio) (1.61.0)\n",
623
            "Requirement already satisfied: wheel\u003c1.0,\u003e=0.23.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from astunparse\u003e=1.6.0-\u003etensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (0.41.2)\n",
624
            "Requirement already satisfied: markdown-it-py\u003e=2.2.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from rich\u003e=11.1-\u003eflax-\u003eclu-\u003eseqio) (3.0.0)\n",
625
            "Requirement already satisfied: pygments\u003c3.0.0,\u003e=2.13.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from rich\u003e=11.1-\u003eflax-\u003eclu-\u003eseqio) (2.16.1)\n",
626
            "Requirement already satisfied: google-auth\u003c3,\u003e=1.6.3 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorboard\u003c2.14,\u003e=2.13-\u003etensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (2.23.4)\n",
627
            "Requirement already satisfied: google-auth-oauthlib\u003c1.1,\u003e=0.5 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorboard\u003c2.14,\u003e=2.13-\u003etensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (1.0.0)\n",
628
            "Requirement already satisfied: markdown\u003e=2.6.8 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorboard\u003c2.14,\u003e=2.13-\u003etensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (3.5.1)\n",
629
            "Requirement already satisfied: tensorboard-data-server\u003c0.8.0,\u003e=0.7.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorboard\u003c2.14,\u003e=2.13-\u003etensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (0.7.2)\n",
630
            "Requirement already satisfied: werkzeug\u003e=1.0.1 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from tensorboard\u003c2.14,\u003e=2.13-\u003etensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (3.0.1)\n",
631
            "Requirement already satisfied: chex\u003e=0.1.5 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from optax-\u003eflax-\u003eclu-\u003eseqio) (0.1.84)\n",
632
            "Requirement already satisfied: nest_asyncio in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from orbax-checkpoint-\u003eflax-\u003eclu-\u003eseqio) (1.5.8)\n",
633
            "Requirement already satisfied: toolz\u003e=0.9.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from chex\u003e=0.1.5-\u003eoptax-\u003eflax-\u003eclu-\u003eseqio) (0.12.0)\n",
634
            "Requirement already satisfied: cachetools\u003c6.0,\u003e=2.0.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from google-auth\u003c3,\u003e=1.6.3-\u003etensorboard\u003c2.14,\u003e=2.13-\u003etensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (5.3.2)\n",
635
            "Requirement already satisfied: pyasn1-modules\u003e=0.2.1 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from google-auth\u003c3,\u003e=1.6.3-\u003etensorboard\u003c2.14,\u003e=2.13-\u003etensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (0.3.0)\n",
636
            "Requirement already satisfied: rsa\u003c5,\u003e=3.1.4 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from google-auth\u003c3,\u003e=1.6.3-\u003etensorboard\u003c2.14,\u003e=2.13-\u003etensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (4.9)\n",
637
            "Requirement already satisfied: requests-oauthlib\u003e=0.7.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from google-auth-oauthlib\u003c1.1,\u003e=0.5-\u003etensorboard\u003c2.14,\u003e=2.13-\u003etensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (1.3.1)\n",
638
            "Requirement already satisfied: mdurl~=0.1 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from markdown-it-py\u003e=2.2.0-\u003erich\u003e=11.1-\u003eflax-\u003eclu-\u003eseqio) (0.1.2)\n",
639
            "Requirement already satisfied: MarkupSafe\u003e=2.1.1 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from werkzeug\u003e=1.0.1-\u003etensorboard\u003c2.14,\u003e=2.13-\u003etensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (2.1.3)\n",
640
            "Requirement already satisfied: pyasn1\u003c0.6.0,\u003e=0.4.6 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from pyasn1-modules\u003e=0.2.1-\u003egoogle-auth\u003c3,\u003e=1.6.3-\u003etensorboard\u003c2.14,\u003e=2.13-\u003etensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (0.5.0)\n",
641
            "Requirement already satisfied: oauthlib\u003e=3.0.0 in /usr/local/google/home/biaojiaxing/work/conda/miniconda3/envs/jupyter/lib/python3.10/site-packages (from requests-oauthlib\u003e=0.7.0-\u003egoogle-auth-oauthlib\u003c1.1,\u003e=0.5-\u003etensorboard\u003c2.14,\u003e=2.13-\u003etensorflow\u003c2.14,\u003e=2.13.0-\u003etensorflow-text-\u003eseqio) (3.2.2)\n",
642
            "Downloading seqio-0.0.18-py3-none-any.whl (351 kB)\n",
643
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m351.7/351.7 kB\u001b[0m \u001b[31m29.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
644
            "\u001b[?25hDownloading transformers-4.35.0-py3-none-any.whl (7.9 MB)\n",
645
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.9/7.9 MB\u001b[0m \u001b[31m98.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
646
            "\u001b[?25hDownloading immutabledict-3.0.0-py3-none-any.whl (4.0 kB)\n",
647
            "Downloading pandas-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)\n",
648
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m96.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
649
            "\u001b[?25hDownloading scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.8 MB)\n",
650
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.8/10.8 MB\u001b[0m \u001b[31m97.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
651
            "\u001b[?25hDownloading seqio_nightly-0.0.17.dev20231102-py3-none-any.whl (353 kB)\n",
652
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m353.2/353.2 kB\u001b[0m \u001b[31m31.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
653
            "\u001b[?25hDownloading joblib-1.3.2-py3-none-any.whl (302 kB)\n",
654
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.2/302.2 kB\u001b[0m \u001b[31m31.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
655
            "\u001b[?25hDownloading pytz-2023.3.post1-py2.py3-none-any.whl (502 kB)\n",
656
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m502.5/502.5 kB\u001b[0m \u001b[31m42.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
657
            "\u001b[?25hDownloading regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (773 kB)\n",
658
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m773.9/773.9 kB\u001b[0m \u001b[31m50.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
659
            "\u001b[?25hDownloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
660
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m65.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
661
            "\u001b[?25hDownloading threadpoolctl-3.2.0-py3-none-any.whl (15 kB)\n",
662
            "Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)\n",
663
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m89.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
664
            "\u001b[?25hDownloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)\n",
665
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
666
            "\u001b[?25hDownloading filelock-3.13.1-py3-none-any.whl (11 kB)\n",
667
            "Downloading lxml-4.9.3-cp310-cp310-manylinux_2_28_x86_64.whl (7.9 MB)\n",
668
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.9/7.9 MB\u001b[0m \u001b[31m85.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
669
            "\u001b[?25hDownloading portalocker-2.8.2-py3-none-any.whl (17 kB)\n",
670
            "Downloading tensorflow_datasets-4.9.3-py3-none-any.whl (5.0 MB)\n",
671
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m85.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
672
            "\u001b[?25hBuilding wheels for collected packages: rouge-score, future\n",
673
            "  Building wheel for rouge-score (setup.py) ... \u001b[?25l-\b \b\\\b \bdone\n",
674
            "\u001b[?25h  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24932 sha256=7331c4f0a659e367c01e6e22b69e97a1a887bcf5b4a9870c53dd133310e6c6f4\n",
675
            "  Stored in directory: /usr/local/google/home/biaojiaxing/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n",
676
            "  Building wheel for future (setup.py) ... \u001b[?25l-\b \b\\\b \b|\b \b/\b \bdone\n",
677
            "\u001b[?25h  Created wheel for future: filename=future-0.18.3-py3-none-any.whl size=492024 sha256=5c505da69eaf6dcf83f58940fa113a6a0ebea3783d010a60c76b531b319b388d\n",
678
            "  Stored in directory: /usr/local/google/home/biaojiaxing/.cache/pip/wheels/5e/a9/47/f118e66afd12240e4662752cc22cefae5d97275623aa8ef57d\n",
679
            "Successfully built rouge-score future\n",
680
            "Installing collected packages: pytz, tzdata, threadpoolctl, tabulate, safetensors, regex, portalocker, numpy, lxml, joblib, immutabledict, future, filelock, colorama, tensorstore, sacrebleu, pandas, nltk, mesh-tensorflow, huggingface-hub, tokenizers, scikit-learn, rouge-score, transformers, tensorflow-datasets, seqio-nightly, seqio, t5\n",
681
            "  Attempting uninstall: numpy\n",
682
            "    Found existing installation: numpy 1.24.3\n",
683
            "    Uninstalling numpy-1.24.3:\n",
684
            "      Successfully uninstalled numpy-1.24.3\n",
685
            "  Attempting uninstall: tensorstore\n",
686
            "    Found existing installation: tensorstore 0.1.47\n",
687
            "    Uninstalling tensorstore-0.1.47:\n",
688
            "      Successfully uninstalled tensorstore-0.1.47\n",
689
            "  Attempting uninstall: seqio\n",
690
            "    Found existing installation: seqio 0.0.17\n",
691
            "    Uninstalling seqio-0.0.17:\n",
692
            "      Successfully uninstalled seqio-0.0.17\n",
693
            "Successfully installed colorama-0.4.6 filelock-3.13.1 future-0.18.3 huggingface-hub-0.17.3 immutabledict-3.0.0 joblib-1.3.2 lxml-4.9.3 mesh-tensorflow-0.1.21 nltk-3.8.1 numpy-1.24.1 pandas-2.1.2 portalocker-2.8.2 pytz-2023.3.post1 regex-2023.10.3 rouge-score-0.1.2 sacrebleu-2.3.1 safetensors-0.4.0 scikit-learn-1.3.2 seqio-0.0.18 seqio-nightly-0.0.17.dev20231102 t5-0.9.4 tabulate-0.9.0 tensorflow-datasets-4.9.3 tensorstore-0.1.35 threadpoolctl-3.2.0 tokenizers-0.14.1 transformers-4.35.0 tzdata-2023.3\n"
694
          ]
695
        }
696
      ],
697
      "source": [
698
        "!pip install --upgrade seqio t5 numpy==1.24.1 tensorstore==0.1.35"
699
      ]
700
    },
701
    {
702
      "cell_type": "code",
703
      "execution_count": null,
704
      "metadata": {
705
        "colab": {
706
          "base_uri": "https://localhost:8080/"
707
        },
708
        "id": "KbAhp0joefth",
709
        "outputId": "16d0defc-9196-495c-cfcf-8e9dfbdf0e4f"
710
      },
711
      "outputs": [
712
        {
713
          "name": "stdout",
714
          "output_type": "stream",
715
          "text": [
716
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/3b-mt.gin...\r\n",
717
            "/ [0 files][    0.0 B/  3.5 GiB]                                                \rCopying gs://madlad-400-checkpoints/checkpoints/3b-mt/checkpoint...\r\n",
718
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.decoder_norm.scale.v/0...\n",
719
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/0...\n",
720
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/0...\n",
721
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/0...\n",
722
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0...\n",
723
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/0...\n",
724
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/0...\n",
725
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_10.pre_cross_attention_layer_norm.scale.v/0...\n",
726
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_10.pre_mlp_layer_norm.scale.v/0...\n",
727
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_10.pre_self_attention_layer_norm.scale.v/0...\n",
728
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_11.pre_cross_attention_layer_norm.scale.v/0...\n",
729
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_11.pre_mlp_layer_norm.scale.v/0...\n",
730
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_11.pre_self_attention_layer_norm.scale.v/0...\n",
731
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_12.pre_cross_attention_layer_norm.scale.v/0...\n",
732
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_12.pre_mlp_layer_norm.scale.v/0...\n",
733
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_12.pre_self_attention_layer_norm.scale.v/0...\n",
734
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_13.pre_cross_attention_layer_norm.scale.v/0...\n",
735
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_13.pre_mlp_layer_norm.scale.v/0...\n",
736
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_13.pre_self_attention_layer_norm.scale.v/0...\n",
737
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_14.pre_cross_attention_layer_norm.scale.v/0...\n",
738
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_14.pre_mlp_layer_norm.scale.v/0...\n",
739
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_14.pre_self_attention_layer_norm.scale.v/0...\n",
740
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_15.pre_cross_attention_layer_norm.scale.v/0...\n",
741
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_15.pre_mlp_layer_norm.scale.v/0...\n",
742
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_20.pre_cross_attention_layer_norm.scale.v/0...\n",
743
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_16.pre_cross_attention_layer_norm.scale.v/0...\n",
744
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_16.pre_mlp_layer_norm.scale.v/0...\n",
745
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_15.pre_self_attention_layer_norm.scale.v/0...\n",
746
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_28.pre_self_attention_layer_norm.scale.v/0...\n",
747
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_16.pre_self_attention_layer_norm.scale.v/0...\n",
748
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_17.pre_cross_attention_layer_norm.scale.v/0...\n",
749
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_17.pre_mlp_layer_norm.scale.v/0...\n",
750
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_17.pre_self_attention_layer_norm.scale.v/0...\n",
751
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_18.pre_mlp_layer_norm.scale.v/0...\n",
752
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_18.pre_cross_attention_layer_norm.scale.v/0...\n",
753
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_18.pre_self_attention_layer_norm.scale.v/0...\n",
754
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0...\n",
755
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_19.pre_mlp_layer_norm.scale.v/0...\n",
756
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_19.pre_self_attention_layer_norm.scale.v/0...\n",
757
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_19.pre_cross_attention_layer_norm.scale.v/0...\n",
758
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/0...\n",
759
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/0...\n",
760
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_20.pre_mlp_layer_norm.scale.v/0...\n",
761
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/0...\n",
762
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_21.pre_cross_attention_layer_norm.scale.v/0...\n",
763
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_21.pre_mlp_layer_norm.scale.v/0...\n",
764
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_21.pre_self_attention_layer_norm.scale.v/0...\n",
765
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/0...\n",
766
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_22.pre_cross_attention_layer_norm.scale.v/0...\n",
767
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_22.pre_mlp_layer_norm.scale.v/0...\n",
768
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_22.pre_self_attention_layer_norm.scale.v/0...\n",
769
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_23.pre_cross_attention_layer_norm.scale.v/0...\n",
770
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_23.pre_mlp_layer_norm.scale.v/0...\n",
771
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_20.pre_self_attention_layer_norm.scale.v/0...\n",
772
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_24.pre_self_attention_layer_norm.scale.v/0...\n",
773
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_25.pre_cross_attention_layer_norm.scale.v/0...\n",
774
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_25.pre_self_attention_layer_norm.scale.v/0...\n",
775
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_25.pre_mlp_layer_norm.scale.v/0...\n",
776
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_26.pre_mlp_layer_norm.scale.v/0...\n",
777
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_24.pre_mlp_layer_norm.scale.v/0...\n",
778
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_23.pre_self_attention_layer_norm.scale.v/0...\n",
779
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_26.pre_cross_attention_layer_norm.scale.v/0...\n",
780
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_24.pre_cross_attention_layer_norm.scale.v/0...\n",
781
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_27.pre_cross_attention_layer_norm.scale.v/0...\n",
782
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_27.pre_mlp_layer_norm.scale.v/0...\n",
783
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_26.pre_self_attention_layer_norm.scale.v/0...\n",
784
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_28.pre_cross_attention_layer_norm.scale.v/0...\n",
785
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/0...\n",
786
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_28.pre_mlp_layer_norm.scale.v/0...\n",
787
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_27.pre_self_attention_layer_norm.scale.v/0...\n",
788
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_29.pre_self_attention_layer_norm.scale.v/0...\n",
789
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_29.pre_cross_attention_layer_norm.scale.v/0...\n",
790
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_29.pre_mlp_layer_norm.scale.v/0...\n",
791
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/0...\n",
792
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/0...\n",
793
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/0...\n",
794
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_30.pre_self_attention_layer_norm.scale.v/0...\n",
795
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_30.pre_cross_attention_layer_norm.scale.v/0...\n",
796
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_31.pre_cross_attention_layer_norm.scale.v/0...\n",
797
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_30.pre_mlp_layer_norm.scale.v/0...\n",
798
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/0...\n",
799
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/0...\n",
800
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/0...\n",
801
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_6.pre_cross_attention_layer_norm.scale.v/0...\n",
802
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_31.pre_mlp_layer_norm.scale.v/0...\n",
803
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_31.pre_self_attention_layer_norm.scale.v/0...\n",
804
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_6.pre_mlp_layer_norm.scale.v/0...\n",
805
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_6.pre_self_attention_layer_norm.scale.v/0...\n",
806
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_7.pre_cross_attention_layer_norm.scale.v/0...\n",
807
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_7.pre_mlp_layer_norm.scale.v/0...\n",
808
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_7.pre_self_attention_layer_norm.scale.v/0...\n",
809
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_8.pre_cross_attention_layer_norm.scale.v/0...\n",
810
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_8.pre_mlp_layer_norm.scale.v/0...\n",
811
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_8.pre_self_attention_layer_norm.scale.v/0...\n",
812
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_9.pre_cross_attention_layer_norm.scale.v/0...\n",
813
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_9.pre_mlp_layer_norm.scale.v/0...\n",
814
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.relpos_bias.rel_embedding.v/0.0...\n",
815
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.decoder.layers_9.pre_self_attention_layer_norm.scale.v/0...\n",
816
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.encoder_norm.scale.v/0...\n",
817
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0...\n",
818
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/0...\n",
819
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_1.pre_mlp_layer_norm.scale.v/0...\n",
820
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/0...\n",
821
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/0...\n",
822
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/0...\n",
823
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_11.pre_attention_layer_norm.scale.v/0...\n",
824
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/0...\n",
825
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_12.pre_attention_layer_norm.scale.v/0...\n",
826
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/0...\n",
827
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/0...\n",
828
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/0...\n",
829
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_14.pre_attention_layer_norm.scale.v/0...\n",
830
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_14.pre_mlp_layer_norm.scale.v/0...\n",
831
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_15.pre_attention_layer_norm.scale.v/0...\n",
832
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_15.pre_mlp_layer_norm.scale.v/0...\n",
833
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_16.pre_attention_layer_norm.scale.v/0...\n",
834
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_17.pre_attention_layer_norm.scale.v/0...\n",
835
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_16.pre_mlp_layer_norm.scale.v/0...\n",
836
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_18.pre_mlp_layer_norm.scale.v/0...\n",
837
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/0...\n",
838
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_19.pre_attention_layer_norm.scale.v/0...\n",
839
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_18.pre_attention_layer_norm.scale.v/0...\n",
840
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_19.pre_mlp_layer_norm.scale.v/0...\n",
841
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_2.pre_attention_layer_norm.scale.v/0...\n",
842
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/0...\n",
843
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_20.pre_attention_layer_norm.scale.v/0...\n",
844
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_20.pre_mlp_layer_norm.scale.v/0...\n",
845
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_21.pre_attention_layer_norm.scale.v/0...\n",
846
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_21.pre_mlp_layer_norm.scale.v/0...\n",
847
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_22.pre_attention_layer_norm.scale.v/0...\n",
848
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_22.pre_mlp_layer_norm.scale.v/0...\n",
849
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_23.pre_attention_layer_norm.scale.v/0...\n",
850
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_23.pre_mlp_layer_norm.scale.v/0...\n",
851
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_24.pre_attention_layer_norm.scale.v/0...\n",
852
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_24.pre_mlp_layer_norm.scale.v/0...\n",
853
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_25.pre_mlp_layer_norm.scale.v/0...\n",
854
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_25.pre_attention_layer_norm.scale.v/0...\n",
855
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_26.pre_attention_layer_norm.scale.v/0...\n",
856
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_27.pre_attention_layer_norm.scale.v/0...\n",
857
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_26.pre_mlp_layer_norm.scale.v/0...\n",
858
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_27.pre_mlp_layer_norm.scale.v/0...\n",
859
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_28.pre_attention_layer_norm.scale.v/0...\n",
860
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_28.pre_mlp_layer_norm.scale.v/0...\n",
861
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_29.pre_attention_layer_norm.scale.v/0...\n",
862
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_29.pre_mlp_layer_norm.scale.v/0...\n",
863
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_3.pre_attention_layer_norm.scale.v/0...\n",
864
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/0...\n",
865
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_30.pre_attention_layer_norm.scale.v/0...\n",
866
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_30.pre_mlp_layer_norm.scale.v/0...\n",
867
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_31.pre_attention_layer_norm.scale.v/0...\n",
868
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_31.pre_mlp_layer_norm.scale.v/0...\n",
869
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_4.pre_attention_layer_norm.scale.v/0...\n",
870
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_4.pre_mlp_layer_norm.scale.v/0...\n",
871
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/0...\n",
872
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_5.pre_mlp_layer_norm.scale.v/0...\n",
873
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_6.pre_attention_layer_norm.scale.v/0...\n",
874
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_6.pre_mlp_layer_norm.scale.v/0...\n",
875
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/0...\n",
876
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_7.pre_mlp_layer_norm.scale.v/0...\n",
877
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/0...\n",
878
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/0...\n",
879
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_9.pre_attention_layer_norm.scale.v/0...\n",
880
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.layers_9.pre_mlp_layer_norm.scale.v/0...\n",
881
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/state.param_states.encoder.relpos_bias.rel_embedding.v/0.0...\n",
882
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.decoder_norm.scale/0...\n",
883
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_0.encoder_decoder_attention.key.kernel/0.0...\n",
884
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_0.encoder_decoder_attention.out.kernel/0.0...\n",
885
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_0.encoder_decoder_attention.query.kernel/0.0...\n",
886
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_0.encoder_decoder_attention.value.kernel/0.0...\n",
887
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_0.mlp.wi_0.kernel/0.0...\n",
888
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_0.mlp.wi_1.kernel/0.0...\n",
889
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_0.mlp.wo.kernel/0.0...\n",
890
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_0.pre_cross_attention_layer_norm.scale/0...\n",
891
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_0.pre_mlp_layer_norm.scale/0...\n",
892
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_0.pre_self_attention_layer_norm.scale/0...\n",
893
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_0.self_attention.key.kernel/0.0...\n",
894
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_0.self_attention.out.kernel/0.0...\n",
895
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_0.self_attention.query.kernel/0.0...\n",
896
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_1.encoder_decoder_attention.key.kernel/0.0...\n",
897
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_0.self_attention.value.kernel/0.0...\n",
898
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_1.encoder_decoder_attention.out.kernel/0.0...\n",
899
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_1.encoder_decoder_attention.query.kernel/0.0...\n",
900
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_1.encoder_decoder_attention.value.kernel/0.0...\n",
901
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_1.mlp.wi_0.kernel/0.0...\n",
902
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_1.mlp.wi_1.kernel/0.0...\n",
903
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_1.mlp.wo.kernel/0.0...\n",
904
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_1.pre_cross_attention_layer_norm.scale/0...\n",
905
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_1.pre_mlp_layer_norm.scale/0...\n",
906
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_1.pre_self_attention_layer_norm.scale/0...\n",
907
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_1.self_attention.key.kernel/0.0...\n",
908
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_1.self_attention.query.kernel/0.0...\n",
909
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_1.self_attention.out.kernel/0.0...\n",
910
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_1.self_attention.value.kernel/0.0...\n",
911
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_10.encoder_decoder_attention.key.kernel/0.0...\n",
912
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_10.encoder_decoder_attention.out.kernel/0.0...\n",
913
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_10.encoder_decoder_attention.query.kernel/0.0...\n",
914
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_10.encoder_decoder_attention.value.kernel/0.0...\n",
915
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_10.mlp.wi_0.kernel/0.0...\n",
916
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_10.mlp.wi_1.kernel/0.0...\n",
917
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_10.mlp.wo.kernel/0.0...\n",
918
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_10.pre_cross_attention_layer_norm.scale/0...\n",
919
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_10.pre_mlp_layer_norm.scale/0...\n",
920
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_10.pre_self_attention_layer_norm.scale/0...\n",
921
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_10.self_attention.key.kernel/0.0...\n",
922
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_10.self_attention.out.kernel/0.0...\n",
923
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_10.self_attention.query.kernel/0.0...\n",
924
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_10.self_attention.value.kernel/0.0...\n",
925
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_11.encoder_decoder_attention.key.kernel/0.0...\n",
926
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_11.encoder_decoder_attention.out.kernel/0.0...\n",
927
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_11.encoder_decoder_attention.query.kernel/0.0...\n",
928
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_11.encoder_decoder_attention.value.kernel/0.0...\n",
929
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_11.mlp.wi_0.kernel/0.0...\n",
930
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_11.mlp.wi_1.kernel/0.0...\n",
931
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_11.pre_cross_attention_layer_norm.scale/0...\n",
932
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_11.mlp.wo.kernel/0.0...\n",
933
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_11.pre_mlp_layer_norm.scale/0...\n",
934
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_11.pre_self_attention_layer_norm.scale/0...\n",
935
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_11.self_attention.key.kernel/0.0...\n",
936
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_11.self_attention.out.kernel/0.0...\n",
937
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_11.self_attention.query.kernel/0.0...\n",
938
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_11.self_attention.value.kernel/0.0...\n",
939
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_12.encoder_decoder_attention.key.kernel/0.0...\n",
940
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_12.encoder_decoder_attention.out.kernel/0.0...\n",
941
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_12.encoder_decoder_attention.query.kernel/0.0...\n",
942
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_12.encoder_decoder_attention.value.kernel/0.0...\n",
943
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_12.mlp.wi_0.kernel/0.0...\n",
944
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_12.mlp.wi_1.kernel/0.0...\n",
945
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_12.mlp.wo.kernel/0.0...\n",
946
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_12.pre_cross_attention_layer_norm.scale/0...\n",
947
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_12.pre_mlp_layer_norm.scale/0...\n",
948
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_12.pre_self_attention_layer_norm.scale/0...\n",
949
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_12.self_attention.key.kernel/0.0...\n",
950
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_12.self_attention.out.kernel/0.0...\n",
951
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_12.self_attention.query.kernel/0.0...\n",
952
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_12.self_attention.value.kernel/0.0...\n",
953
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_13.encoder_decoder_attention.key.kernel/0.0...\n",
954
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_13.encoder_decoder_attention.out.kernel/0.0...\n",
955
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_13.encoder_decoder_attention.query.kernel/0.0...\n",
956
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_13.encoder_decoder_attention.value.kernel/0.0...\n",
957
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_13.mlp.wi_0.kernel/0.0...\n",
958
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_13.mlp.wi_1.kernel/0.0...\n",
959
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_13.mlp.wo.kernel/0.0...\n",
960
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_13.pre_cross_attention_layer_norm.scale/0...\n",
961
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_13.pre_mlp_layer_norm.scale/0...\n",
962
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_13.pre_self_attention_layer_norm.scale/0...\n",
963
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_13.self_attention.key.kernel/0.0...\n",
964
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_13.self_attention.out.kernel/0.0...\n",
965
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_13.self_attention.query.kernel/0.0...\n",
966
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_13.self_attention.value.kernel/0.0...\n",
967
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_14.encoder_decoder_attention.key.kernel/0.0...\n",
968
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_14.encoder_decoder_attention.out.kernel/0.0...\n",
969
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_14.encoder_decoder_attention.query.kernel/0.0...\n",
970
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_14.encoder_decoder_attention.value.kernel/0.0...\n",
971
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_14.mlp.wi_0.kernel/0.0...\n",
972
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_14.mlp.wi_1.kernel/0.0...\n",
973
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_14.mlp.wo.kernel/0.0...\n",
974
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_14.pre_cross_attention_layer_norm.scale/0...\n",
975
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_14.pre_mlp_layer_norm.scale/0...\n",
976
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_14.pre_self_attention_layer_norm.scale/0...\n",
977
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_14.self_attention.key.kernel/0.0...\n",
978
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_15.mlp.wi_0.kernel/0.0...\n",
979
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_14.self_attention.out.kernel/0.0...\n",
980
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_14.self_attention.query.kernel/0.0...\n",
981
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_14.self_attention.value.kernel/0.0...\n",
982
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_15.encoder_decoder_attention.key.kernel/0.0...\n",
983
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_15.encoder_decoder_attention.out.kernel/0.0...\n",
984
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_15.encoder_decoder_attention.query.kernel/0.0...\n",
985
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_15.encoder_decoder_attention.value.kernel/0.0...\n",
986
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_15.mlp.wi_1.kernel/0.0...\n",
987
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_15.mlp.wo.kernel/0.0...\n",
988
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_15.pre_cross_attention_layer_norm.scale/0...\n",
989
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_15.pre_mlp_layer_norm.scale/0...\n",
990
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_15.pre_self_attention_layer_norm.scale/0...\n",
991
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_15.self_attention.out.kernel/0.0...\n",
992
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_15.self_attention.key.kernel/0.0...\n",
993
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_15.self_attention.query.kernel/0.0...\n",
994
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_15.self_attention.value.kernel/0.0...\n",
995
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_16.encoder_decoder_attention.out.kernel/0.0...\n",
996
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_16.encoder_decoder_attention.key.kernel/0.0...\n",
997
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_16.encoder_decoder_attention.query.kernel/0.0...\n",
998
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_16.encoder_decoder_attention.value.kernel/0.0...\n",
999
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_16.mlp.wi_0.kernel/0.0...\n",
1000
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_16.mlp.wi_1.kernel/0.0...\n",
1001
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_16.mlp.wo.kernel/0.0...\n",
1002
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_16.pre_cross_attention_layer_norm.scale/0...\n",
1003
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_16.pre_mlp_layer_norm.scale/0...\n",
1004
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_16.pre_self_attention_layer_norm.scale/0...\n",
1005
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_16.self_attention.key.kernel/0.0...\n",
1006
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_16.self_attention.out.kernel/0.0...\n",
1007
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_16.self_attention.query.kernel/0.0...\n",
1008
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_16.self_attention.value.kernel/0.0...\n",
1009
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_17.encoder_decoder_attention.out.kernel/0.0...\n",
1010
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_17.encoder_decoder_attention.key.kernel/0.0...\n",
1011
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_17.encoder_decoder_attention.query.kernel/0.0...\n",
1012
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_17.encoder_decoder_attention.value.kernel/0.0...\n",
1013
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_17.mlp.wi_0.kernel/0.0...\n",
1014
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_17.mlp.wi_1.kernel/0.0...\n",
1015
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_17.mlp.wo.kernel/0.0...\n",
1016
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_17.pre_cross_attention_layer_norm.scale/0...\n",
1017
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_17.pre_mlp_layer_norm.scale/0...\n",
1018
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_17.pre_self_attention_layer_norm.scale/0...\n",
1019
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_17.self_attention.key.kernel/0.0...\n",
1020
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_17.self_attention.out.kernel/0.0...\n",
1021
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_17.self_attention.query.kernel/0.0...\n",
1022
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_17.self_attention.value.kernel/0.0...\n",
1023
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_18.encoder_decoder_attention.key.kernel/0.0...\n",
1024
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_18.encoder_decoder_attention.out.kernel/0.0...\n",
1025
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_18.encoder_decoder_attention.query.kernel/0.0...\n",
1026
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_18.encoder_decoder_attention.value.kernel/0.0...\n",
1027
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_18.mlp.wi_0.kernel/0.0...\n",
1028
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_18.mlp.wi_1.kernel/0.0...\n",
1029
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_18.mlp.wo.kernel/0.0...\n",
1030
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_18.pre_cross_attention_layer_norm.scale/0...\n",
1031
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_18.pre_mlp_layer_norm.scale/0...\n",
1032
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_18.pre_self_attention_layer_norm.scale/0...\n",
1033
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_18.self_attention.key.kernel/0.0...\n",
1034
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_18.self_attention.out.kernel/0.0...\n",
1035
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_18.self_attention.query.kernel/0.0...\n",
1036
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_18.self_attention.value.kernel/0.0...\n",
1037
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_19.encoder_decoder_attention.key.kernel/0.0...\n",
1038
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_19.encoder_decoder_attention.out.kernel/0.0...\n",
1039
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_19.encoder_decoder_attention.query.kernel/0.0...\n",
1040
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_19.encoder_decoder_attention.value.kernel/0.0...\n",
1041
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_19.mlp.wi_0.kernel/0.0...\n",
1042
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_19.mlp.wi_1.kernel/0.0...\n",
1043
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_19.mlp.wo.kernel/0.0...\n",
1044
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_19.pre_cross_attention_layer_norm.scale/0...\n",
1045
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_19.pre_mlp_layer_norm.scale/0...\n",
1046
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_19.pre_self_attention_layer_norm.scale/0...\n",
1047
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_19.self_attention.key.kernel/0.0...\n",
1048
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_19.self_attention.out.kernel/0.0...\n",
1049
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_19.self_attention.query.kernel/0.0...\n",
1050
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_19.self_attention.value.kernel/0.0...\n",
1051
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_2.encoder_decoder_attention.key.kernel/0.0...\n",
1052
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_2.encoder_decoder_attention.out.kernel/0.0...\n",
1053
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_2.encoder_decoder_attention.query.kernel/0.0...\n",
1054
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_2.encoder_decoder_attention.value.kernel/0.0...\n",
1055
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_2.mlp.wi_0.kernel/0.0...\n",
1056
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_2.mlp.wi_1.kernel/0.0...\n",
1057
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_2.mlp.wo.kernel/0.0...\n",
1058
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_2.pre_cross_attention_layer_norm.scale/0...\n",
1059
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_2.pre_mlp_layer_norm.scale/0...\n",
1060
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_2.pre_self_attention_layer_norm.scale/0...\n",
1061
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_2.self_attention.key.kernel/0.0...\n",
1062
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_2.self_attention.out.kernel/0.0...\n",
1063
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_2.self_attention.query.kernel/0.0...\n",
1064
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_2.self_attention.value.kernel/0.0...\n",
1065
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_20.encoder_decoder_attention.key.kernel/0.0...\n",
1066
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_20.encoder_decoder_attention.out.kernel/0.0...\n",
1067
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_20.encoder_decoder_attention.query.kernel/0.0...\n",
1068
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_20.encoder_decoder_attention.value.kernel/0.0...\n",
1069
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_20.mlp.wi_0.kernel/0.0...\n",
1070
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_20.mlp.wi_1.kernel/0.0...\n",
1071
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_20.mlp.wo.kernel/0.0...\n",
1072
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_20.pre_cross_attention_layer_norm.scale/0...\n",
1073
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_20.pre_mlp_layer_norm.scale/0...\n",
1074
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_20.pre_self_attention_layer_norm.scale/0...\n",
1075
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_20.self_attention.key.kernel/0.0...\n",
1076
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_20.self_attention.out.kernel/0.0...\n",
1077
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_20.self_attention.query.kernel/0.0...\n",
1078
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_21.encoder_decoder_attention.key.kernel/0.0...\n",
1079
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_20.self_attention.value.kernel/0.0...\n",
1080
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_21.encoder_decoder_attention.out.kernel/0.0...\n",
1081
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_21.encoder_decoder_attention.query.kernel/0.0...\n",
1082
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_21.encoder_decoder_attention.value.kernel/0.0...\n",
1083
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_21.mlp.wi_0.kernel/0.0...\n",
1084
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_21.mlp.wi_1.kernel/0.0...\n",
1085
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_21.mlp.wo.kernel/0.0...\n",
1086
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_21.pre_cross_attention_layer_norm.scale/0...\n",
1087
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_21.pre_mlp_layer_norm.scale/0...\n",
1088
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_21.pre_self_attention_layer_norm.scale/0...\n",
1089
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_21.self_attention.key.kernel/0.0...\n",
1090
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_21.self_attention.out.kernel/0.0...\n",
1091
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_21.self_attention.query.kernel/0.0...\n",
1092
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_21.self_attention.value.kernel/0.0...\n",
1093
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_22.encoder_decoder_attention.key.kernel/0.0...\n",
1094
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_22.encoder_decoder_attention.out.kernel/0.0...\n",
1095
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_22.encoder_decoder_attention.query.kernel/0.0...\n",
1096
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_22.encoder_decoder_attention.value.kernel/0.0...\n",
1097
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_22.mlp.wi_0.kernel/0.0...\n",
1098
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_22.mlp.wi_1.kernel/0.0...\n",
1099
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_22.mlp.wo.kernel/0.0...\n",
1100
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_22.pre_cross_attention_layer_norm.scale/0...\n",
1101
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_22.pre_mlp_layer_norm.scale/0...\n",
1102
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_22.pre_self_attention_layer_norm.scale/0...\n",
1103
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_22.self_attention.key.kernel/0.0...\n",
1104
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_22.self_attention.out.kernel/0.0...\n",
1105
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_22.self_attention.query.kernel/0.0...\n",
1106
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_22.self_attention.value.kernel/0.0...\n",
1107
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_23.encoder_decoder_attention.key.kernel/0.0...\n",
1108
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_23.encoder_decoder_attention.out.kernel/0.0...\n",
1109
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_23.encoder_decoder_attention.query.kernel/0.0...\n",
1110
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_23.encoder_decoder_attention.value.kernel/0.0...\n",
1111
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_23.mlp.wi_0.kernel/0.0...\n",
1112
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_23.mlp.wi_1.kernel/0.0...\n",
1113
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_23.mlp.wo.kernel/0.0...\n",
1114
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_23.pre_cross_attention_layer_norm.scale/0...\n",
1115
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_23.pre_mlp_layer_norm.scale/0...\n",
1116
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_23.pre_self_attention_layer_norm.scale/0...\n",
1117
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_23.self_attention.key.kernel/0.0...\n",
1118
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_23.self_attention.out.kernel/0.0...\n",
1119
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_23.self_attention.query.kernel/0.0...\n",
1120
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_23.self_attention.value.kernel/0.0...\n",
1121
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_24.encoder_decoder_attention.key.kernel/0.0...\n",
1122
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_24.encoder_decoder_attention.out.kernel/0.0...\n",
1123
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_24.encoder_decoder_attention.query.kernel/0.0...\n",
1124
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_24.encoder_decoder_attention.value.kernel/0.0...\n",
1125
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_24.mlp.wi_0.kernel/0.0...\n",
1126
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_24.mlp.wi_1.kernel/0.0...\n",
1127
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_24.mlp.wo.kernel/0.0...\n",
1128
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_24.pre_cross_attention_layer_norm.scale/0...\n",
1129
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_24.pre_mlp_layer_norm.scale/0...\n",
1130
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_24.pre_self_attention_layer_norm.scale/0...\n",
1131
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_24.self_attention.key.kernel/0.0...\n",
1132
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_24.self_attention.out.kernel/0.0...\n",
1133
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_24.self_attention.query.kernel/0.0...\n",
1134
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_24.self_attention.value.kernel/0.0...\n",
1135
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_25.encoder_decoder_attention.key.kernel/0.0...\n",
1136
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_25.encoder_decoder_attention.out.kernel/0.0...\n",
1137
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_25.encoder_decoder_attention.query.kernel/0.0...\n",
1138
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_25.encoder_decoder_attention.value.kernel/0.0...\n",
1139
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_25.mlp.wi_0.kernel/0.0...\n",
1140
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_25.mlp.wi_1.kernel/0.0...\n",
1141
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_25.mlp.wo.kernel/0.0...\n",
1142
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_25.pre_cross_attention_layer_norm.scale/0...\n",
1143
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_25.pre_mlp_layer_norm.scale/0...\n",
1144
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_25.pre_self_attention_layer_norm.scale/0...\n",
1145
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_25.self_attention.key.kernel/0.0...\n",
1146
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_25.self_attention.out.kernel/0.0...\n",
1147
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_25.self_attention.query.kernel/0.0...\n",
1148
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_25.self_attention.value.kernel/0.0...\n",
1149
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_26.encoder_decoder_attention.key.kernel/0.0...\n",
1150
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_26.encoder_decoder_attention.out.kernel/0.0...\n",
1151
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_26.encoder_decoder_attention.query.kernel/0.0...\n",
1152
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_26.encoder_decoder_attention.value.kernel/0.0...\n",
1153
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_26.mlp.wi_0.kernel/0.0...\n",
1154
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_26.mlp.wi_1.kernel/0.0...\n",
1155
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_26.mlp.wo.kernel/0.0...\n",
1156
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_26.pre_cross_attention_layer_norm.scale/0...\n",
1157
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_26.pre_mlp_layer_norm.scale/0...\n",
1158
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_26.pre_self_attention_layer_norm.scale/0...\n",
1159
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_26.self_attention.key.kernel/0.0...\n",
1160
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_26.self_attention.out.kernel/0.0...\n",
1161
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_26.self_attention.query.kernel/0.0...\n",
1162
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_26.self_attention.value.kernel/0.0...\n",
1163
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_27.encoder_decoder_attention.key.kernel/0.0...\n",
1164
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_27.encoder_decoder_attention.out.kernel/0.0...\n",
1165
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_27.encoder_decoder_attention.query.kernel/0.0...\n",
1166
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_27.encoder_decoder_attention.value.kernel/0.0...\n",
1167
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_27.mlp.wi_0.kernel/0.0...\n",
1168
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_27.mlp.wi_1.kernel/0.0...\n",
1169
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_27.mlp.wo.kernel/0.0...\n",
1170
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_27.pre_cross_attention_layer_norm.scale/0...\n",
1171
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_27.pre_mlp_layer_norm.scale/0...\n",
1172
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_27.pre_self_attention_layer_norm.scale/0...\n",
1173
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_28.encoder_decoder_attention.value.kernel/0.0...\n",
1174
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_27.self_attention.key.kernel/0.0...\n",
1175
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_27.self_attention.out.kernel/0.0...\n",
1176
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_27.self_attention.query.kernel/0.0...\n",
1177
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_27.self_attention.value.kernel/0.0...\n",
1178
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_28.encoder_decoder_attention.key.kernel/0.0...\n",
1179
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_28.encoder_decoder_attention.out.kernel/0.0...\n",
1180
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_28.encoder_decoder_attention.query.kernel/0.0...\n",
1181
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_28.mlp.wi_0.kernel/0.0...\n",
1182
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_28.mlp.wi_1.kernel/0.0...\n",
1183
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_28.mlp.wo.kernel/0.0...\n",
1184
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_28.pre_cross_attention_layer_norm.scale/0...\n",
1185
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_28.pre_mlp_layer_norm.scale/0...\n",
1186
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_28.pre_self_attention_layer_norm.scale/0...\n",
1187
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_28.self_attention.key.kernel/0.0...\n",
1188
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_28.self_attention.out.kernel/0.0...\n",
1189
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_28.self_attention.query.kernel/0.0...\n",
1190
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_28.self_attention.value.kernel/0.0...\n",
1191
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_29.encoder_decoder_attention.key.kernel/0.0...\n",
1192
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_29.encoder_decoder_attention.out.kernel/0.0...\n",
1193
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_29.encoder_decoder_attention.query.kernel/0.0...\n",
1194
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_29.encoder_decoder_attention.value.kernel/0.0...\n",
1195
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_29.mlp.wi_0.kernel/0.0...\n",
1196
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_29.mlp.wi_1.kernel/0.0...\n",
1197
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_29.mlp.wo.kernel/0.0...\n",
1198
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_29.pre_cross_attention_layer_norm.scale/0...\n",
1199
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_29.pre_mlp_layer_norm.scale/0...\n",
1200
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_29.pre_self_attention_layer_norm.scale/0...\n",
1201
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_29.self_attention.key.kernel/0.0...\n",
1202
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_29.self_attention.out.kernel/0.0...\n",
1203
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_29.self_attention.query.kernel/0.0...\n",
1204
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_29.self_attention.value.kernel/0.0...\n",
1205
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_3.encoder_decoder_attention.key.kernel/0.0...\n",
1206
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_3.encoder_decoder_attention.out.kernel/0.0...\n",
1207
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_3.encoder_decoder_attention.query.kernel/0.0...\n",
1208
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_3.encoder_decoder_attention.value.kernel/0.0...\n",
1209
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_3.mlp.wi_0.kernel/0.0...\n",
1210
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_3.mlp.wi_1.kernel/0.0...\n",
1211
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_3.mlp.wo.kernel/0.0...\n",
1212
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_3.pre_cross_attention_layer_norm.scale/0...\n",
1213
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_3.pre_mlp_layer_norm.scale/0...\n",
1214
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_3.pre_self_attention_layer_norm.scale/0...\n",
1215
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_3.self_attention.key.kernel/0.0...\n",
1216
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_3.self_attention.out.kernel/0.0...\n",
1217
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_3.self_attention.query.kernel/0.0...\n",
1218
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_3.self_attention.value.kernel/0.0...\n",
1219
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_30.encoder_decoder_attention.key.kernel/0.0...\n",
1220
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_30.encoder_decoder_attention.out.kernel/0.0...\n",
1221
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_30.encoder_decoder_attention.query.kernel/0.0...\n",
1222
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_30.encoder_decoder_attention.value.kernel/0.0...\n",
1223
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_30.mlp.wi_1.kernel/0.0...\n",
1224
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_30.mlp.wi_0.kernel/0.0...\n",
1225
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_30.mlp.wo.kernel/0.0...\n",
1226
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_30.pre_cross_attention_layer_norm.scale/0...\n",
1227
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_30.pre_mlp_layer_norm.scale/0...\n",
1228
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_30.pre_self_attention_layer_norm.scale/0...\n",
1229
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_30.self_attention.key.kernel/0.0...\n",
1230
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_30.self_attention.out.kernel/0.0...\n",
1231
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_30.self_attention.query.kernel/0.0...\n",
1232
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_30.self_attention.value.kernel/0.0...\n",
1233
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_31.encoder_decoder_attention.key.kernel/0.0...\n",
1234
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_31.encoder_decoder_attention.out.kernel/0.0...\n",
1235
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_31.encoder_decoder_attention.query.kernel/0.0...\n",
1236
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_31.encoder_decoder_attention.value.kernel/0.0...\n",
1237
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_31.mlp.wi_0.kernel/0.0...\n",
1238
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_31.mlp.wi_1.kernel/0.0...\n",
1239
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_31.mlp.wo.kernel/0.0...\n",
1240
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_31.pre_cross_attention_layer_norm.scale/0...\n",
1241
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_31.pre_mlp_layer_norm.scale/0...\n",
1242
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_31.pre_self_attention_layer_norm.scale/0...\n",
1243
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_31.self_attention.key.kernel/0.0...\n",
1244
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_31.self_attention.out.kernel/0.0...\n",
1245
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_31.self_attention.query.kernel/0.0...\n",
1246
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_31.self_attention.value.kernel/0.0...\n",
1247
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_4.encoder_decoder_attention.key.kernel/0.0...\n",
1248
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_4.encoder_decoder_attention.out.kernel/0.0...\n",
1249
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_4.encoder_decoder_attention.query.kernel/0.0...\n",
1250
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_4.encoder_decoder_attention.value.kernel/0.0...\n",
1251
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_4.mlp.wi_0.kernel/0.0...\n",
1252
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_4.mlp.wi_1.kernel/0.0...\n",
1253
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_4.mlp.wo.kernel/0.0...\n",
1254
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_4.pre_cross_attention_layer_norm.scale/0...\n",
1255
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_4.pre_mlp_layer_norm.scale/0...\n",
1256
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_4.pre_self_attention_layer_norm.scale/0...\n",
1257
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_4.self_attention.key.kernel/0.0...\n",
1258
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_4.self_attention.out.kernel/0.0...\n",
1259
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_4.self_attention.query.kernel/0.0...\n",
1260
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_4.self_attention.value.kernel/0.0...\n",
1261
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_5.encoder_decoder_attention.key.kernel/0.0...\n",
1262
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_5.encoder_decoder_attention.out.kernel/0.0...\n",
1263
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_5.encoder_decoder_attention.query.kernel/0.0...\n",
1264
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_5.encoder_decoder_attention.value.kernel/0.0...\n",
1265
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_5.mlp.wi_0.kernel/0.0...\n",
1266
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_5.mlp.wi_1.kernel/0.0...\n",
1267
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_5.mlp.wo.kernel/0.0...\n",
1268
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_5.pre_cross_attention_layer_norm.scale/0...\n",
1269
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_5.pre_mlp_layer_norm.scale/0...\n",
1270
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_5.pre_self_attention_layer_norm.scale/0...\n",
1271
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_5.self_attention.key.kernel/0.0...\n",
1272
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_5.self_attention.out.kernel/0.0...\n",
1273
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_5.self_attention.query.kernel/0.0...\n",
1274
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_5.self_attention.value.kernel/0.0...\n",
1275
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_6.encoder_decoder_attention.key.kernel/0.0...\n",
1276
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_6.encoder_decoder_attention.out.kernel/0.0...\n",
1277
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_6.encoder_decoder_attention.query.kernel/0.0...\n",
1278
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_6.encoder_decoder_attention.value.kernel/0.0...\n",
1279
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_6.mlp.wi_0.kernel/0.0...\n",
1280
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_6.mlp.wi_1.kernel/0.0...\n",
1281
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_6.mlp.wo.kernel/0.0...\n",
1282
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_6.pre_cross_attention_layer_norm.scale/0...\n",
1283
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_6.pre_mlp_layer_norm.scale/0...\n",
1284
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_6.pre_self_attention_layer_norm.scale/0...\n",
1285
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_6.self_attention.key.kernel/0.0...\n",
1286
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_6.self_attention.out.kernel/0.0...\n",
1287
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_6.self_attention.query.kernel/0.0...\n",
1288
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_6.self_attention.value.kernel/0.0...\n",
1289
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_7.encoder_decoder_attention.key.kernel/0.0...\n",
1290
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_7.encoder_decoder_attention.out.kernel/0.0...\n",
1291
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_7.encoder_decoder_attention.query.kernel/0.0...\n",
1292
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_7.encoder_decoder_attention.value.kernel/0.0...\n",
1293
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_7.mlp.wi_0.kernel/0.0...\n",
1294
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_7.mlp.wi_1.kernel/0.0...\n",
1295
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_7.mlp.wo.kernel/0.0...\n",
1296
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_7.pre_cross_attention_layer_norm.scale/0...\n",
1297
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_7.pre_mlp_layer_norm.scale/0...\n",
1298
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_7.pre_self_attention_layer_norm.scale/0...\n",
1299
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_7.self_attention.key.kernel/0.0...\n",
1300
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_7.self_attention.out.kernel/0.0...\n",
1301
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_7.self_attention.query.kernel/0.0...\n",
1302
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_7.self_attention.value.kernel/0.0...\n",
1303
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_8.encoder_decoder_attention.key.kernel/0.0...\n",
1304
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_8.encoder_decoder_attention.out.kernel/0.0...\n",
1305
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_8.encoder_decoder_attention.query.kernel/0.0...\n",
1306
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_8.encoder_decoder_attention.value.kernel/0.0...\n",
1307
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_8.mlp.wi_0.kernel/0.0...\n",
1308
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_8.mlp.wi_1.kernel/0.0...\n",
1309
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_8.mlp.wo.kernel/0.0...\n",
1310
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_8.pre_cross_attention_layer_norm.scale/0...\n",
1311
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_8.pre_mlp_layer_norm.scale/0...\n",
1312
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_8.pre_self_attention_layer_norm.scale/0...\n",
1313
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_8.self_attention.key.kernel/0.0...\n",
1314
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_8.self_attention.out.kernel/0.0...\n",
1315
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_8.self_attention.query.kernel/0.0...\n",
1316
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_8.self_attention.value.kernel/0.0...\n",
1317
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_9.encoder_decoder_attention.key.kernel/0.0...\n",
1318
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_9.encoder_decoder_attention.out.kernel/0.0...\n",
1319
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_9.encoder_decoder_attention.query.kernel/0.0...\n",
1320
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_9.encoder_decoder_attention.value.kernel/0.0...\n",
1321
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_9.mlp.wi_0.kernel/0.0...\n",
1322
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_9.mlp.wi_1.kernel/0.0...\n",
1323
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_9.mlp.wo.kernel/0.0...\n",
1324
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_9.pre_cross_attention_layer_norm.scale/0...\n",
1325
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_9.pre_mlp_layer_norm.scale/0...\n",
1326
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_9.pre_self_attention_layer_norm.scale/0...\n",
1327
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_9.self_attention.key.kernel/0.0...\n",
1328
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_9.self_attention.out.kernel/0.0...\n",
1329
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_9.self_attention.query.kernel/0.0...\n",
1330
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.layers_9.self_attention.value.kernel/0.0...\n",
1331
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.0...\n",
1332
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.1...\n",
1333
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.11...\n",
1334
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.10...\n",
1335
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.12...\n",
1336
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.14...\n",
1337
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.13...\n",
1338
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.15...\n",
1339
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.2...\n",
1340
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.3...\n",
1341
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.4...\n",
1342
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.6...\n",
1343
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.5...\n",
1344
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.7...\n",
1345
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.8...\n",
1346
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.logits_dense.kernel/0.9...\n",
1347
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.decoder.relpos_bias.rel_embedding/0.0...\n",
1348
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.encoder_norm.scale/0...\n",
1349
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_0.attention.key.kernel/0.0...\n",
1350
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_0.attention.out.kernel/0.0...\n",
1351
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_0.attention.query.kernel/0.0...\n",
1352
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_0.attention.value.kernel/0.0...\n",
1353
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_0.mlp.wi_0.kernel/0.0...\n",
1354
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_0.mlp.wi_1.kernel/0.0...\n",
1355
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_0.mlp.wo.kernel/0.0...\n",
1356
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_0.pre_attention_layer_norm.scale/0...\n",
1357
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_0.pre_mlp_layer_norm.scale/0...\n",
1358
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_1.attention.key.kernel/0.0...\n",
1359
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_1.attention.query.kernel/0.0...\n",
1360
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_1.attention.out.kernel/0.0...\n",
1361
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_1.attention.value.kernel/0.0...\n",
1362
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_1.mlp.wi_0.kernel/0.0...\n",
1363
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_1.mlp.wi_1.kernel/0.0...\n",
1364
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_1.mlp.wo.kernel/0.0...\n",
1365
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_1.pre_attention_layer_norm.scale/0...\n",
1366
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_1.pre_mlp_layer_norm.scale/0...\n",
1367
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_10.attention.key.kernel/0.0...\n",
1368
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_10.attention.out.kernel/0.0...\n",
1369
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_10.attention.query.kernel/0.0...\n",
1370
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_10.attention.value.kernel/0.0...\n",
1371
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_10.mlp.wi_0.kernel/0.0...\n",
1372
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_10.mlp.wi_1.kernel/0.0...\n",
1373
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_10.mlp.wo.kernel/0.0...\n",
1374
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_10.pre_attention_layer_norm.scale/0...\n",
1375
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_10.pre_mlp_layer_norm.scale/0...\n",
1376
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_11.attention.key.kernel/0.0...\n",
1377
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_11.attention.out.kernel/0.0...\n",
1378
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_11.attention.query.kernel/0.0...\n",
1379
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_11.attention.value.kernel/0.0...\n",
1380
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_11.mlp.wi_0.kernel/0.0...\n",
1381
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_11.mlp.wi_1.kernel/0.0...\n",
1382
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_11.mlp.wo.kernel/0.0...\n",
1383
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_11.pre_attention_layer_norm.scale/0...\n",
1384
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_11.pre_mlp_layer_norm.scale/0...\n",
1385
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_13.attention.out.kernel/0.0...\n",
1386
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_12.attention.out.kernel/0.0...\n",
1387
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_12.attention.query.kernel/0.0...\n",
1388
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_12.attention.value.kernel/0.0...\n",
1389
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_12.mlp.wi_0.kernel/0.0...\n",
1390
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_12.mlp.wi_1.kernel/0.0...\n",
1391
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_12.mlp.wo.kernel/0.0...\n",
1392
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_12.pre_attention_layer_norm.scale/0...\n",
1393
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_12.pre_mlp_layer_norm.scale/0...\n",
1394
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_13.attention.key.kernel/0.0...\n",
1395
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_12.attention.key.kernel/0.0...\n",
1396
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_13.attention.query.kernel/0.0...\n",
1397
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_13.attention.value.kernel/0.0...\n",
1398
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_13.mlp.wi_0.kernel/0.0...\n",
1399
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_13.mlp.wi_1.kernel/0.0...\n",
1400
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_13.mlp.wo.kernel/0.0...\n",
1401
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_13.pre_attention_layer_norm.scale/0...\n",
1402
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_13.pre_mlp_layer_norm.scale/0...\n",
1403
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_14.attention.key.kernel/0.0...\n",
1404
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_14.attention.out.kernel/0.0...\n",
1405
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_14.attention.query.kernel/0.0...\n",
1406
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_14.attention.value.kernel/0.0...\n",
1407
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_14.mlp.wi_0.kernel/0.0...\n",
1408
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_14.mlp.wi_1.kernel/0.0...\n",
1409
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_14.mlp.wo.kernel/0.0...\n",
1410
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_14.pre_attention_layer_norm.scale/0...\n",
1411
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_14.pre_mlp_layer_norm.scale/0...\n",
1412
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_15.attention.key.kernel/0.0...\n",
1413
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_15.attention.out.kernel/0.0...\n",
1414
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_15.attention.query.kernel/0.0...\n",
1415
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_15.attention.value.kernel/0.0...\n",
1416
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_15.mlp.wi_0.kernel/0.0...\n",
1417
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_15.mlp.wi_1.kernel/0.0...\n",
1418
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_15.pre_attention_layer_norm.scale/0...\n",
1419
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_15.mlp.wo.kernel/0.0...\n",
1420
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_15.pre_mlp_layer_norm.scale/0...\n",
1421
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_16.attention.out.kernel/0.0...\n",
1422
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_16.attention.key.kernel/0.0...\n",
1423
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_16.attention.query.kernel/0.0...\n",
1424
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_16.attention.value.kernel/0.0...\n",
1425
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_16.mlp.wi_0.kernel/0.0...\n",
1426
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_16.mlp.wi_1.kernel/0.0...\n",
1427
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_16.mlp.wo.kernel/0.0...\n",
1428
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_16.pre_attention_layer_norm.scale/0...\n",
1429
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_16.pre_mlp_layer_norm.scale/0...\n",
1430
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_17.attention.key.kernel/0.0...\n",
1431
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_17.attention.out.kernel/0.0...\n",
1432
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_17.attention.query.kernel/0.0...\n",
1433
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_17.attention.value.kernel/0.0...\n",
1434
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_17.mlp.wi_0.kernel/0.0...\n",
1435
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_17.mlp.wi_1.kernel/0.0...\n",
1436
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_17.mlp.wo.kernel/0.0...\n",
1437
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_17.pre_attention_layer_norm.scale/0...\n",
1438
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_17.pre_mlp_layer_norm.scale/0...\n",
1439
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_18.attention.key.kernel/0.0...\n",
1440
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_18.attention.out.kernel/0.0...\n",
1441
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_18.attention.query.kernel/0.0...\n",
1442
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_18.attention.value.kernel/0.0...\n",
1443
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_18.mlp.wi_0.kernel/0.0...\n",
1444
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_18.mlp.wi_1.kernel/0.0...\n",
1445
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_18.mlp.wo.kernel/0.0...\n",
1446
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_18.pre_attention_layer_norm.scale/0...\n",
1447
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_18.pre_mlp_layer_norm.scale/0...\n",
1448
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_19.attention.key.kernel/0.0...\n",
1449
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_19.attention.out.kernel/0.0...\n",
1450
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_19.attention.query.kernel/0.0...\n",
1451
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_19.attention.value.kernel/0.0...\n",
1452
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_19.mlp.wi_0.kernel/0.0...\n",
1453
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_19.mlp.wi_1.kernel/0.0...\n",
1454
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_19.mlp.wo.kernel/0.0...\n",
1455
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_19.pre_attention_layer_norm.scale/0...\n",
1456
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_19.pre_mlp_layer_norm.scale/0...\n",
1457
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_2.attention.key.kernel/0.0...\n",
1458
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_2.attention.out.kernel/0.0...\n",
1459
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_2.attention.query.kernel/0.0...\n",
1460
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_2.attention.value.kernel/0.0...\n",
1461
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_2.mlp.wi_0.kernel/0.0...\n",
1462
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_2.mlp.wi_1.kernel/0.0...\n",
1463
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_2.mlp.wo.kernel/0.0...\n",
1464
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_2.pre_attention_layer_norm.scale/0...\n",
1465
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_2.pre_mlp_layer_norm.scale/0...\n",
1466
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_20.attention.key.kernel/0.0...\n",
1467
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_20.attention.out.kernel/0.0...\n",
1468
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_20.attention.query.kernel/0.0...\n",
1469
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_20.attention.value.kernel/0.0...\n",
1470
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_20.mlp.wi_0.kernel/0.0...\n",
1471
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_20.mlp.wi_1.kernel/0.0...\n",
1472
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_20.mlp.wo.kernel/0.0...\n",
1473
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_20.pre_attention_layer_norm.scale/0...\n",
1474
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_20.pre_mlp_layer_norm.scale/0...\n",
1475
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_21.attention.key.kernel/0.0...\n",
1476
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_21.attention.out.kernel/0.0...\n",
1477
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_21.attention.query.kernel/0.0...\n",
1478
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_21.attention.value.kernel/0.0...\n",
1479
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_21.mlp.wi_0.kernel/0.0...\n",
1480
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_21.mlp.wi_1.kernel/0.0...\n",
1481
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_21.mlp.wo.kernel/0.0...\n",
1482
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_21.pre_attention_layer_norm.scale/0...\n",
1483
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_21.pre_mlp_layer_norm.scale/0...\n",
1484
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_22.attention.key.kernel/0.0...\n",
1485
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_22.attention.out.kernel/0.0...\n",
1486
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_22.attention.query.kernel/0.0...\n",
1487
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_22.attention.value.kernel/0.0...\n",
1488
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_22.mlp.wi_0.kernel/0.0...\n",
1489
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_22.mlp.wi_1.kernel/0.0...\n",
1490
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_22.mlp.wo.kernel/0.0...\n",
1491
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_22.pre_attention_layer_norm.scale/0...\n",
1492
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_22.pre_mlp_layer_norm.scale/0...\n",
1493
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_23.attention.key.kernel/0.0...\n",
1494
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_23.attention.out.kernel/0.0...\n",
1495
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_23.attention.query.kernel/0.0...\n",
1496
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_23.attention.value.kernel/0.0...\n",
1497
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_23.mlp.wi_0.kernel/0.0...\n",
1498
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_23.mlp.wo.kernel/0.0...\n",
1499
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_23.mlp.wi_1.kernel/0.0...\n",
1500
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_23.pre_attention_layer_norm.scale/0...\n",
1501
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_23.pre_mlp_layer_norm.scale/0...\n",
1502
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_24.attention.key.kernel/0.0...\n",
1503
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_24.attention.out.kernel/0.0...\n",
1504
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_24.attention.query.kernel/0.0...\n",
1505
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_24.attention.value.kernel/0.0...\n",
1506
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_24.mlp.wi_0.kernel/0.0...\n",
1507
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_24.mlp.wi_1.kernel/0.0...\n",
1508
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_24.mlp.wo.kernel/0.0...\n",
1509
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_24.pre_attention_layer_norm.scale/0...\n",
1510
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_24.pre_mlp_layer_norm.scale/0...\n",
1511
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_25.attention.key.kernel/0.0...\n",
1512
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_25.attention.out.kernel/0.0...\n",
1513
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_25.attention.query.kernel/0.0...\n",
1514
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_25.attention.value.kernel/0.0...\n",
1515
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_25.mlp.wi_0.kernel/0.0...\n",
1516
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_25.mlp.wi_1.kernel/0.0...\n",
1517
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_25.mlp.wo.kernel/0.0...\n",
1518
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_25.pre_attention_layer_norm.scale/0...\n",
1519
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_25.pre_mlp_layer_norm.scale/0...\n",
1520
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_26.attention.key.kernel/0.0...\n",
1521
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_26.attention.out.kernel/0.0...\n",
1522
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_26.attention.query.kernel/0.0...\n",
1523
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_26.attention.value.kernel/0.0...\n",
1524
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_26.mlp.wi_0.kernel/0.0...\n",
1525
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_26.mlp.wi_1.kernel/0.0...\n",
1526
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_26.mlp.wo.kernel/0.0...\n",
1527
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_26.pre_attention_layer_norm.scale/0...\n",
1528
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_26.pre_mlp_layer_norm.scale/0...\n",
1529
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_27.attention.key.kernel/0.0...\n",
1530
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_27.attention.out.kernel/0.0...\n",
1531
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_27.attention.query.kernel/0.0...\n",
1532
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_27.attention.value.kernel/0.0...\n",
1533
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_27.mlp.wi_0.kernel/0.0...\n",
1534
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_27.mlp.wi_1.kernel/0.0...\n",
1535
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_27.mlp.wo.kernel/0.0...\n",
1536
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_27.pre_attention_layer_norm.scale/0...\n",
1537
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_27.pre_mlp_layer_norm.scale/0...\n",
1538
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_28.attention.key.kernel/0.0...\n",
1539
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_28.attention.out.kernel/0.0...\n",
1540
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_28.attention.query.kernel/0.0...\n",
1541
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_28.attention.value.kernel/0.0...\n",
1542
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_28.mlp.wi_0.kernel/0.0...\n",
1543
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_28.mlp.wi_1.kernel/0.0...\n",
1544
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_28.mlp.wo.kernel/0.0...\n",
1545
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_28.pre_attention_layer_norm.scale/0...\n",
1546
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_28.pre_mlp_layer_norm.scale/0...\n",
1547
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_29.attention.key.kernel/0.0...\n",
1548
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_29.attention.out.kernel/0.0...\n",
1549
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_29.attention.query.kernel/0.0...\n",
1550
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_29.attention.value.kernel/0.0...\n",
1551
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_29.mlp.wi_0.kernel/0.0...\n",
1552
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_29.mlp.wi_1.kernel/0.0...\n",
1553
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_29.mlp.wo.kernel/0.0...\n",
1554
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_29.pre_attention_layer_norm.scale/0...\n",
1555
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_29.pre_mlp_layer_norm.scale/0...\n",
1556
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_3.attention.key.kernel/0.0...\n",
1557
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_3.attention.out.kernel/0.0...\n",
1558
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_3.attention.query.kernel/0.0...\n",
1559
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_3.attention.value.kernel/0.0...\n",
1560
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_3.mlp.wi_0.kernel/0.0...\n",
1561
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_3.mlp.wi_1.kernel/0.0...\n",
1562
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_3.mlp.wo.kernel/0.0...\n",
1563
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_3.pre_attention_layer_norm.scale/0...\n",
1564
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_3.pre_mlp_layer_norm.scale/0...\n",
1565
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_30.attention.key.kernel/0.0...\n",
1566
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_30.attention.out.kernel/0.0...\n",
1567
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_30.attention.query.kernel/0.0...\n",
1568
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_30.attention.value.kernel/0.0...\n",
1569
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_30.mlp.wi_0.kernel/0.0...\n",
1570
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_30.mlp.wi_1.kernel/0.0...\n",
1571
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_30.mlp.wo.kernel/0.0...\n",
1572
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_30.pre_attention_layer_norm.scale/0...\n",
1573
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_30.pre_mlp_layer_norm.scale/0...\n",
1574
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_31.attention.key.kernel/0.0...\n",
1575
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_31.attention.out.kernel/0.0...\n",
1576
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_31.attention.query.kernel/0.0...\n",
1577
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_31.attention.value.kernel/0.0...\n",
1578
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_31.mlp.wi_0.kernel/0.0...\n",
1579
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_31.mlp.wi_1.kernel/0.0...\n",
1580
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_31.mlp.wo.kernel/0.0...\n",
1581
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_31.pre_attention_layer_norm.scale/0...\n",
1582
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_31.pre_mlp_layer_norm.scale/0...\n",
1583
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_4.attention.key.kernel/0.0...\n",
1584
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_4.attention.out.kernel/0.0...\n",
1585
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_4.attention.query.kernel/0.0...\n",
1586
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_4.attention.value.kernel/0.0...\n",
1587
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_4.mlp.wi_0.kernel/0.0...\n",
1588
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_4.mlp.wi_1.kernel/0.0...\n",
1589
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_4.mlp.wo.kernel/0.0...\n",
1590
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_4.pre_attention_layer_norm.scale/0...\n",
1591
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_4.pre_mlp_layer_norm.scale/0...\n",
1592
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_5.attention.key.kernel/0.0...\n",
1593
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_5.attention.out.kernel/0.0...\n",
1594
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_5.attention.query.kernel/0.0...\n",
1595
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_5.attention.value.kernel/0.0...\n",
1596
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_5.mlp.wi_0.kernel/0.0...\n",
1597
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_5.mlp.wi_1.kernel/0.0...\n",
1598
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_5.mlp.wo.kernel/0.0...\n",
1599
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_5.pre_attention_layer_norm.scale/0...\n",
1600
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_5.pre_mlp_layer_norm.scale/0...\n",
1601
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_6.attention.key.kernel/0.0...\n",
1602
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_6.attention.out.kernel/0.0...\n",
1603
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_6.attention.query.kernel/0.0...\n",
1604
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_6.attention.value.kernel/0.0...\n",
1605
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_6.mlp.wi_0.kernel/0.0...\n",
1606
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_6.mlp.wi_1.kernel/0.0...\n",
1607
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_6.mlp.wo.kernel/0.0...\n",
1608
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_6.pre_attention_layer_norm.scale/0...\n",
1609
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_6.pre_mlp_layer_norm.scale/0...\n",
1610
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_7.attention.key.kernel/0.0...\n",
1611
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_7.attention.out.kernel/0.0...\n",
1612
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_7.attention.query.kernel/0.0...\n",
1613
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_7.attention.value.kernel/0.0...\n",
1614
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_7.mlp.wi_0.kernel/0.0...\n",
1615
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_7.mlp.wi_1.kernel/0.0...\n",
1616
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_7.mlp.wo.kernel/0.0...\n",
1617
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_7.pre_attention_layer_norm.scale/0...\n",
1618
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_7.pre_mlp_layer_norm.scale/0...\n",
1619
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_8.attention.key.kernel/0.0...\n",
1620
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_8.attention.out.kernel/0.0...\n",
1621
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_8.attention.query.kernel/0.0...\n",
1622
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_8.attention.value.kernel/0.0...\n",
1623
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_8.mlp.wi_0.kernel/0.0...\n",
1624
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_8.mlp.wi_1.kernel/0.0...\n",
1625
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_8.mlp.wo.kernel/0.0...\n",
1626
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_8.pre_mlp_layer_norm.scale/0...\n",
1627
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_8.pre_attention_layer_norm.scale/0...\n",
1628
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_9.attention.key.kernel/0.0...\n",
1629
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_9.attention.out.kernel/0.0...\n",
1630
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_9.attention.query.kernel/0.0...\n",
1631
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_9.attention.value.kernel/0.0...\n",
1632
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_9.mlp.wi_0.kernel/0.0...\n",
1633
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_9.mlp.wi_1.kernel/0.0...\n",
1634
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_9.mlp.wo.kernel/0.0...\n",
1635
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_9.pre_attention_layer_norm.scale/0...\n",
1636
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.layers_9.pre_mlp_layer_norm.scale/0...\n",
1637
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.encoder.relpos_bias.rel_embedding/0.0...\n",
1638
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/0.0...\n",
1639
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/1.0...\n",
1640
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/11.0...\n",
1641
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/12.0...\n",
1642
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/10.0...\n",
1643
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/14.0...\n",
1644
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/13.0...\n",
1645
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/15.0...\n",
1646
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/2.0...\n",
1647
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/3.0...\n",
1648
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/4.0...\n",
1649
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/5.0...\n",
1650
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/6.0...\n",
1651
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/7.0...\n",
1652
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/8.0...\n",
1653
            "Copying gs://madlad-400-checkpoints/checkpoints/3b-mt/target.token_embedder.embedding/9.0...\n"
1654
          ]
1655
        }
1656
      ],
1657
      "source": [
1658
        "\n",
1659
        "# Madlad checkpoint\n",
1660
        "!gsutil -m cp -r gs://madlad-400-checkpoints/checkpoints/3b-mt ."
1661
      ]
1662
    },
1663
    {
1664
      "cell_type": "markdown",
1665
      "metadata": {
1666
        "id": "IQgbbapGqHC1"
1667
      },
1668
      "source": [
1669
        "## Rebuild .zarray files"
1670
      ]
1671
    },
1672
    {
1673
      "cell_type": "code",
1674
      "execution_count": null,
1675
      "metadata": {
1676
        "id": "vXs21LMReqyc"
1677
      },
1678
      "outputs": [],
1679
      "source": [
1680
        "import json\n",
1681
        "import os\n",
1682
        "import orbax.checkpoint\n",
1683
        "\n",
1684
        "orbax_checkpointer = orbax.checkpoint.PyTreeCheckpointer()\n",
1685
        "model = orbax_checkpointer.restore(\"3b-mt\")\n",
1686
        "\n",
1687
        "def create_jarr_files(d):\n",
1688
        "  if not isinstance(d, dict):\n",
1689
        "    return\n",
1690
        "  if \"kvstore\" in d:\n",
1691
        "    path = os.path.join('3b-mt', d[\"kvstore\"][\"path\"], \".zarray\")\n",
1692
        "    with open(path, \"w\") as f:\n",
1693
        "      data = d[\"metadata\"].copy()\n",
1694
        "      data[\"zarr_format\"] = 2\n",
1695
        "      assert d[\"dtype\"] == \"float32\"\n",
1696
        "      data[\"dtype\"] = \"\u003cf4\"\n",
1697
        "      data[\"dimension_separator\"] = \".\"\n",
1698
        "      data[\"fill_value\"] = None\n",
1699
        "      data[\"filters\"] = None\n",
1700
        "      data[\"order\"] = \"C\"\n",
1701
        "      f.write(json.dumps(data))\n",
1702
        "  for v in d.values():\n",
1703
        "    create_jarr_files(v)\n",
1704
        "\n",
1705
        "create_jarr_files(model)"
1706
      ]
1707
    },
1708
    {
1709
      "cell_type": "markdown",
1710
      "metadata": {
1711
        "id": "nZJbWZcfkyxI"
1712
      },
1713
      "source": [
1714
        "## Set-Up"
1715
      ]
1716
    },
1717
    {
1718
      "cell_type": "markdown",
1719
      "metadata": {
1720
        "id": "8my9yhSRi6GG"
1721
      },
1722
      "source": [
1723
        "Note: If you are using the public colab (no the Pro version), you are likely run out of memory. Please consider using its `Connect to a local runtime` option by following the [setup guide](https://github.com/google-research/t5x/blob/main/t5x/notebooks/README.md)."
1724
      ]
1725
    },
1726
    {
1727
      "cell_type": "code",
1728
      "execution_count": null,
1729
      "metadata": {
1730
        "id": "jIGSIHzD7YPO"
1731
      },
1732
      "outputs": [],
1733
      "source": [
1734
        "\n",
1735
        "import functools\n",
1736
        "\n",
1737
        "import jax\n",
1738
        "from jax.experimental import multihost_utils\n",
1739
        "import numpy as np\n",
1740
        "import os\n",
1741
        "import re\n",
1742
        "import seqio\n",
1743
        "import tensorflow as tf\n",
1744
        "\n",
1745
        "from t5x.examples.t5 import network\n",
1746
        "import t5x\n",
1747
        "from t5x import models\n",
1748
        "from t5x import partitioning\n",
1749
        "from t5x import trainer as trainer_lib\n",
1750
        "from t5x import utils\n",
1751
        "from t5x.infer import _extract_tokens_and_aux_values\n",
1752
        "from t5x.interactive_model import get_dataset_from_natural_text_examples"
1753
      ]
1754
    },
1755
    {
1756
      "cell_type": "markdown",
1757
      "metadata": {
1758
        "id": "S5Lb-Z1fkF5a"
1759
      },
1760
      "source": [
1761
        "### Model Network"
1762
      ]
1763
    },
1764
    {
1765
      "cell_type": "code",
1766
      "execution_count": null,
1767
      "metadata": {
1768
        "id": "Ne8U8qoWkX_r"
1769
      },
1770
      "outputs": [],
1771
      "source": [
1772
        "# t5 network\n",
1773
        "t5_config = network.T5Config(\n",
1774
        "    vocab_size=256000,\n",
1775
        "    dtype='bfloat16',\n",
1776
        "    emb_dim=1024,\n",
1777
        "    num_heads=16,\n",
1778
        "    num_encoder_layers=32,\n",
1779
        "    num_decoder_layers=32,\n",
1780
        "    head_dim=128,\n",
1781
        "    mlp_dim=8192,\n",
1782
        "    mlp_activations=('gelu', 'linear'),\n",
1783
        "    dropout_rate=0.0,\n",
1784
        "    logits_via_embedding=False)\n",
1785
        "module = network.Transformer(config=t5_config)\n",
1786
        "\n",
1787
        "# vocabulary\n",
1788
        "vocabulary = seqio.SentencePieceVocabulary(\n",
1789
        "    'gs://madlad-400-checkpoints/vocabulary/256k_vocab/spm.model')\n",
1790
        "\n",
1791
        "# model\n",
1792
        "model = t5x.models.EncoderDecoderModel(\n",
1793
        "    module=module,\n",
1794
        "    input_vocabulary=vocabulary,\n",
1795
        "    output_vocabulary=vocabulary,\n",
1796
        "    optimizer_def=t5x.adafactor.Adafactor(decay_rate=0.8, step_offset=0))\n",
1797
        "\n",
1798
        "# downloaded checkpoint path\n",
1799
        "checkpoint_path='3b-mt'\n",
1800
        "\n",
1801
        "# Misc\n",
1802
        "dtype='bfloat16'\n",
1803
        "restore_mode='specific'\n",
1804
        "# Define a partitioner.\n",
1805
        "partitioner=partitioning.PjitPartitioner(num_partitions=1)\n",
1806
        "# Define additional, miscellaneous constructor arguments.\n",
1807
        "batch_size=8\n",
1808
        "task_feature_lengths = {'inputs': 64, 'targets': 64}\n",
1809
        "output_dir='/tmp/output_dir'\n",
1810
        "input_shapes = {\n",
1811
        "    'encoder_input_tokens': np.array([8, 38]),\n",
1812
        "    'decoder_target_tokens': np.array([8, 18]),\n",
1813
        "    'decoder_input_tokens': np.array([8, 18]),\n",
1814
        "    'decoder_loss_weights': np.array([8, 18])\n",
1815
        "}"
1816
      ]
1817
    },
1818
    {
1819
      "cell_type": "markdown",
1820
      "metadata": {
1821
        "id": "EYwdg-fFTU8Q"
1822
      },
1823
      "source": [
1824
        "### Checkpoint Restore"
1825
      ]
1826
    },
1827
    {
1828
      "cell_type": "code",
1829
      "execution_count": null,
1830
      "metadata": {
1831
        "id": "YmGTJBAcTpMR"
1832
      },
1833
      "outputs": [],
1834
      "source": [
1835
        "# 1.) Configure the Output Directory\n",
1836
        "output_dir = re.sub(r\"(?\u003c!gs:)([\\/]{2,})\", \"/\", output_dir)\n",
1837
        "if not os.path.exists(output_dir):\n",
1838
        "  os.mkdir(output_dir)\n",
1839
        "\n",
1840
        "# 2.) Initialize RNGs\n",
1841
        "init_random_seed = 42\n",
1842
        "random_seed = multihost_utils.broadcast_one_to_all(np.int32(init_random_seed))\n",
1843
        "utils.set_hardware_rng_ops()\n",
1844
        "rng = jax.random.PRNGKey(random_seed)\n",
1845
        "init_rng, trainer_rng = jax.random.split(rng, 2)\n",
1846
        "\n",
1847
        "# 3.) Validate the Partitioner\n",
1848
        "if partitioner._model_parallel_submesh:\n",
1849
        "  num_partitions = np.prod(partitioner._model_parallel_submesh)\n",
1850
        "else:\n",
1851
        "  num_partitions = partitioner._num_partitions\n",
1852
        "if jax.device_count() % num_partitions != 0:\n",
1853
        "  raise ValueError(\n",
1854
        "    \"The number of devices available must be a multiple of the number of\",\n",
1855
        "    f\" partitions. There are {jax.device_count()} devices available, but\",\n",
1856
        "    f\" the number of partitions is set to {num_partitions}. Please\",\n",
1857
        "    \" provide a different number of partitions.\")\n",
1858
        "\n",
1859
        "# 4.) Create a Checkpoint Manager\n",
1860
        "# a.) Define CheckpointCfg wrappers.\n",
1861
        "save_checkpoint_cfg = utils.SaveCheckpointConfig(\n",
1862
        "        dtype=dtype,\n",
1863
        "        keep=5, # The number of checkpoints to keep in the output_dir.\n",
1864
        "        save_dataset=False)\n",
1865
        "restore_checkpoint_cfg = utils.RestoreCheckpointConfig(\n",
1866
        "        dtype=dtype,\n",
1867
        "        mode=restore_mode,\n",
1868
        "        path=checkpoint_path)\n",
1869
        "\n",
1870
        "# b.) Define a train state initializer, which will help us get information about the\n",
1871
        "# TrainState shape.\n",
1872
        "train_state_initializer = utils.TrainStateInitializer(\n",
1873
        "        optimizer_def=model.optimizer_def,\n",
1874
        "        init_fn=model.get_initial_variables,\n",
1875
        "        input_shapes=input_shapes,\n",
1876
        "        input_types=None,\n",
1877
        "        partitioner=partitioner)\n",
1878
        "\n",
1879
        "# c.) Define the checkpoint manager.\n",
1880
        "checkpoint_manager = utils.LegacyCheckpointManager(\n",
1881
        "        save_cfg=save_checkpoint_cfg,\n",
1882
        "        restore_cfg=restore_checkpoint_cfg,\n",
1883
        "        train_state_shape=train_state_initializer.global_train_state_shape,\n",
1884
        "        partitioner=partitioner,\n",
1885
        "        ds_iter=None,\n",
1886
        "        model_dir=output_dir)\n",
1887
        "\n",
1888
        "### 5.) Restore the Model from a Checkpoint, or Initialize from Scratch ###\n",
1889
        "def get_state(rng):\n",
1890
        "  return train_state_initializer.from_scratch(rng).state_dict()\n",
1891
        "\n",
1892
        "# a.) Try to restore a model from a checkpoint.\n",
1893
        "train_state = checkpoint_manager.restore(\n",
1894
        "  [restore_checkpoint_cfg.path],\n",
1895
        "  restore_checkpoint_cfg,\n",
1896
        "  utils.get_fallback_state(restore_checkpoint_cfg, get_state, init_rng)\n",
1897
        ")\n",
1898
        "\n",
1899
        "# b.) If no checkpoint to restore, init from scratch.\n",
1900
        "if train_state is None:\n",
1901
        "  train_state = train_state_initializer.from_scratch(init_rng)"
1902
      ]
1903
    },
1904
    {
1905
      "cell_type": "markdown",
1906
      "metadata": {
1907
        "id": "ib9aOi2xaCKQ"
1908
      },
1909
      "source": [
1910
        "## Do the inference\n",
1911
        "\n",
1912
        "Some languages supported (with parallel data)\n",
1913
        "\n",
1914
        "ace\n",
1915
        "ace_Arab\n",
1916
        "af\n",
1917
        "am\n",
1918
        "an\n",
1919
        "ar\n",
1920
        "ary\n",
1921
        "arz\n",
1922
        "as\n",
1923
        "az\n",
1924
        "ba\n",
1925
        "ban\n",
1926
        "bar\n",
1927
        "be\n",
1928
        "bg\n",
1929
        "bho\n",
1930
        "bjn\n",
1931
        "bjn_Arab\n",
1932
        "bm\n",
1933
        "bn\n",
1934
        "br\n",
1935
        "bs\n",
1936
        "bug\n",
1937
        "ca\n",
1938
        "ceb\n",
1939
        "crh_Latn\n",
1940
        "cs\n",
1941
        "cy\n",
1942
        "da\n",
1943
        "de\n",
1944
        "din\n",
1945
        "dv\n",
1946
        "dz\n",
1947
        "el\n",
1948
        "en\n",
1949
        "en_xx_simple\n",
1950
        "eo\n",
1951
        "es\n",
1952
        "et\n",
1953
        "eu\n",
1954
        "fa\n",
1955
        "fi\n",
1956
        "fil\n",
1957
        "fo\n",
1958
        "fr\n",
1959
        "fr_CA\n",
1960
        "fr_ca\n",
1961
        "fur\n",
1962
        "fuv\n",
1963
        "fy\n",
1964
        "ga\n",
1965
        "gd\n",
1966
        "gl\n",
1967
        "gn\n",
1968
        "gu\n",
1969
        "ha\n",
1970
        "he\n",
1971
        "hi\n",
1972
        "hne\n",
1973
        "hr\n",
1974
        "hu\n",
1975
        "hy\n",
1976
        "id\n",
1977
        "ig\n",
1978
        "io\n",
1979
        "is\n",
1980
        "it\n",
1981
        "iu\n",
1982
        "ja\n",
1983
        "jv\n",
1984
        "ka\n",
1985
        "kk\n",
1986
        "km\n",
1987
        "kn\n",
1988
        "ko\n",
1989
        "kr\n",
1990
        "kr_Arab\n",
1991
        "ks\n",
1992
        "ks_Deva\n",
1993
        "ku\n",
1994
        "ky\n",
1995
        "la\n",
1996
        "lb\n",
1997
        "li\n",
1998
        "lij\n",
1999
        "lmo\n",
2000
        "lt\n",
2001
        "ltg\n",
2002
        "lv\n",
2003
        "mag\n",
2004
        "mg\n",
2005
        "mi\n",
2006
        "mk\n",
2007
        "ml\n",
2008
        "mn\n",
2009
        "mni\n",
2010
        "mr\n",
2011
        "ms\n",
2012
        "mt\n",
2013
        "mwl\n",
2014
        "my\n",
2015
        "nb\n",
2016
        "nds\n",
2017
        "nds_NL\n",
2018
        "nds_nl\n",
2019
        "ne\n",
2020
        "nl\n",
2021
        "nn\n",
2022
        "no\n",
2023
        "nus\n",
2024
        "oc\n",
2025
        "or\n",
2026
        "pa\n",
2027
        "pl\n",
2028
        "prs\n",
2029
        "ps\n",
2030
        "pt\n",
2031
        "pt_br\n",
2032
        "ro\n",
2033
        "ru\n",
2034
        "rw\n",
2035
        "sc\n",
2036
        "scn\n",
2037
        "sd\n",
2038
        "se\n",
2039
        "sh\n",
2040
        "shn\n",
2041
        "si\n",
2042
        "simple\n",
2043
        "sk\n",
2044
        "sl\n",
2045
        "so\n",
2046
        "sq\n",
2047
        "sr\n",
2048
        "sv\n",
2049
        "sw\n",
2050
        "szl\n",
2051
        "ta\n",
2052
        "taq\n",
2053
        "taq_Tfng\n",
2054
        "te\n",
2055
        "tg\n",
2056
        "th\n",
2057
        "tk\n",
2058
        "tl\n",
2059
        "tr\n",
2060
        "tt\n",
2061
        "tzm\n",
2062
        "ug\n",
2063
        "uk\n",
2064
        "ur\n",
2065
        "uz\n",
2066
        "vec\n",
2067
        "vi\n",
2068
        "wa\n",
2069
        "wuu\n",
2070
        "xh\n",
2071
        "yi\n",
2072
        "yo\n",
2073
        "zh\n",
2074
        "zh_Hant\n",
2075
        "zh_cn\n",
2076
        "zh_tw\n",
2077
        "zu"
2078
      ]
2079
    },
2080
    {
2081
      "cell_type": "code",
2082
      "execution_count": null,
2083
      "metadata": {
2084
        "id": "IuFiyFqi5wCi"
2085
      },
2086
      "outputs": [],
2087
      "source": []
2088
    },
2089
    {
2090
      "cell_type": "code",
2091
      "execution_count": null,
2092
      "metadata": {
2093
        "colab": {
2094
          "base_uri": "https://localhost:8080/"
2095
        },
2096
        "id": "yhhR0yDcAn7w",
2097
        "outputId": "1af679bf-d887-4c6b-f587-f11b32d0562b"
2098
      },
2099
      "outputs": [
2100
        {
2101
          "name": "stderr",
2102
          "output_type": "stream",
2103
          "text": [
2104
            "WARNING:absl:T5 library uses PAD_ID=0, which is different from the sentencepiece vocabulary, which defines pad_id=-1\n"
2105
          ]
2106
        }
2107
      ],
2108
      "source": [
2109
        "examples = [\n",
2110
        "    '\u003c2zh\u003e Now both sides have renewed ambitions and have cleared the way for the visit with a series of gestures, experts say.',\n",
2111
        "    '\u003c2es\u003e Now both sides have renewed ambitions and have cleared the way for the visit with a series of gestures, experts say.',\n",
2112
        "    '\u003c2de\u003e Now both sides have renewed ambitions and have cleared the way for the visit with a series of gestures, experts say.',\n",
2113
        "    '\u003c2en\u003e 11月1日起,四项电动车国家标准正式实施。这些标准对电动车的设计制造、安全性能、电池系统和标识等方面都提出了明确规定,涉及电动车的车把、锂电池、总线设计、电子控制系统等。',\n",
2114
        "    '\u003c2en\u003e Ahora, un estudio de una universidad de Arizona ha revelado que usar redes luminosas puede resolver este problema.',\n",
2115
        "]\n",
2116
        "\n",
2117
        "# tokenization\n",
2118
        "output_features = {\n",
2119
        "    \"inputs\":\n",
2120
        "        seqio.Feature(\n",
2121
        "            vocabulary=model.input_vocabulary, add_eos=True),\n",
2122
        "    \"targets\":\n",
2123
        "        seqio.Feature(\n",
2124
        "            vocabulary=model.output_vocabulary, add_eos=True)\n",
2125
        "}\n",
2126
        "\n",
2127
        "# datasetm, preprocessing and feature conversion\n",
2128
        "dataset = get_dataset_from_natural_text_examples(\n",
2129
        "    examples,\n",
2130
        "    preprocessors=[\n",
2131
        "        seqio.preprocessors.tokenize,\n",
2132
        "        seqio.preprocessors.append_eos\n",
2133
        "    ],\n",
2134
        "    task_feature_lengths=task_feature_lengths,\n",
2135
        "    features=output_features)\n",
2136
        "feature_converter = model.FEATURE_CONVERTER_CLS(pack=False)\n",
2137
        "model_dataset = feature_converter(\n",
2138
        "    dataset, task_feature_lengths=task_feature_lengths)\n",
2139
        "\n",
2140
        "# inference function\n",
2141
        "infer_fn = functools.partial(\n",
2142
        "  utils.get_infer_fn(\n",
2143
        "    infer_step=model.predict_batch_with_aux,\n",
2144
        "    batch_size=batch_size,\n",
2145
        "    train_state_axes=train_state_initializer.train_state_axes,\n",
2146
        "    partitioner=partitioner),\n",
2147
        "  train_state=train_state)"
2148
      ]
2149
    },
2150
    {
2151
      "cell_type": "code",
2152
      "execution_count": null,
2153
      "metadata": {
2154
        "colab": {
2155
          "base_uri": "https://localhost:8080/"
2156
        },
2157
        "id": "3l_Ihma2vI8O",
2158
        "outputId": "d7ab27d1-1c77-412c-c3f0-02a9ed3e16d3"
2159
      },
2160
      "outputs": [
2161
        {
2162
          "name": "stderr",
2163
          "output_type": "stream",
2164
          "text": [
2165
            "normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n",
2166
            "normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n"
2167
          ]
2168
        },
2169
        {
2170
          "name": "stdout",
2171
          "output_type": "stream",
2172
          "text": [
2173
            "Source: \u003c2zh\u003e Now both sides have renewed ambitions and have cleared the way for the visit with a series of gestures, experts say.\n",
2174
            "Translation: 专家们认为,双方都重新提出了雄心壮志,并通过一系列姿态为访问扫清了道路。\n",
2175
            "Score: -5.694250106811523\n",
2176
            "\n",
2177
            "Source: \u003c2es\u003e Now both sides have renewed ambitions and have cleared the way for the visit with a series of gestures, experts say.\n",
2178
            "Translation: Ahora ambas partes han renovado sus ambiciones y han despejado el camino para la visita con una serie de gestos, dicen los expertos.\n",
2179
            "Score: -2.3198225498199463\n",
2180
            "\n",
2181
            "Source: \u003c2de\u003e Now both sides have renewed ambitions and have cleared the way for the visit with a series of gestures, experts say.\n",
2182
            "Translation: Jetzt haben beide Seiten ihre Ambitionen erneuert und den Weg für den Besuch mit einer Reihe von Gesten frei gemacht, sagen Experten.\n",
2183
            "Score: -3.43707537651062\n",
2184
            "\n",
2185
            "Source: \u003c2en\u003e 11月1日起,四项电动车国家标准正式实施。这些标准对电动车的设计制造、安全性能、电池系统和标识等方面都提出了明确规定,涉及电动车的车\n",
2186
            "Translation: As of 1 November, four national standards for electric vehicles have been officially implemented.These standards provide clear regulations for the design and manufacture of electric vehicles, safety performance, battery systems and marking, and for vehicles involving electric vehicles.\n",
2187
            "Score: -6.792211532592773\n",
2188
            "\n",
2189
            "Source: \u003c2en\u003e Ahora, un estudio de una universidad de Arizona ha revelado que usar redes luminosas puede resolver este problema.\n",
2190
            "Translation: Now, a study from an Arizona university has revealed that using luminous nets can solve this problem.\n",
2191
            "Score: -2.857039213180542\n",
2192
            "\n"
2193
          ]
2194
        },
2195
        {
2196
          "name": "stderr",
2197
          "output_type": "stream",
2198
          "text": [
2199
            "normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n"
2200
          ]
2201
        }
2202
      ],
2203
      "source": [
2204
        "# Infererence and get the predictions\n",
2205
        "predictions, aux_values = _extract_tokens_and_aux_values(\n",
2206
        "    infer_fn(model_dataset.enumerate()))\n",
2207
        "\n",
2208
        "inferences = []\n",
2209
        "for idx, inputs in model_dataset.enumerate().as_numpy_iterator():\n",
2210
        "  prediction = np.asarray(predictions[idx]).astype(np.int32)\n",
2211
        "  source = np.asarray(inputs['encoder_input_tokens']).astype(np.int32)\n",
2212
        "\n",
2213
        "  # Remove padding values\n",
2214
        "  source = source[source != 0]\n",
2215
        "  prediction = prediction[prediction != 0]\n",
2216
        "\n",
2217
        "  # Decode into string\n",
2218
        "  prediction = output_features[\"targets\"].vocabulary.decode_tf(\n",
2219
        "      tf.constant(prediction)).numpy()\n",
2220
        "  source = output_features[\"inputs\"].vocabulary.decode_tf(\n",
2221
        "      tf.constant(source)).numpy()\n",
2222
        "  inferences.append((source, prediction, aux_values['scores'][idx]))\n",
2223
        "\n",
2224
        "for src, hyp, score in inferences:\n",
2225
        "  print(f\"Source: {src.decode('utf-8')}\")\n",
2226
        "  print(f\"Translation: {hyp.decode('utf-8')}\")\n",
2227
        "  print(f\"Score: {score}\")\n",
2228
        "  print()\n"
2229
      ]
2230
    }
2231
  ],
2232
  "metadata": {
2233
    "colab": {
2234
      "machine_shape": "hm",
2235
      "provenance": []
2236
    },
2237
    "kernelspec": {
2238
      "display_name": "Python 3",
2239
      "name": "python3"
2240
    },
2241
    "language_info": {
2242
      "name": "python"
2243
    }
2244
  },
2245
  "nbformat": 4,
2246
  "nbformat_minor": 0
2247
}
2248

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.