gpt-neox
/
Dockerfile
92 строки · 4.0 Кб
1# Copyright (c) 2024, EleutherAI
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15FROM nvcr.io/nvidia/pytorch:24.02-py3
16
17ENV DEBIAN_FRONTEND=noninteractive
18
19# metainformation
20LABEL org.opencontainers.image.version = "2.0"
21LABEL org.opencontainers.image.authors = "contact@eleuther.ai"
22LABEL org.opencontainers.image.source = "https://www.github.com/eleutherai/gpt-neox"
23LABEL org.opencontainers.image.licenses = " Apache-2.0"
24LABEL org.opencontainers.image.base.name="nvcr.io/nvidia/pytorch:24.02-py3"
25
26#### System package (uses default Python 3 version in Ubuntu 20.04)
27RUN apt-get update -y && \
28apt-get install -y \
29git python3-dev libpython3-dev python3-pip sudo pdsh \
30htop tmux zstd software-properties-common build-essential autotools-dev \
31nfs-common pdsh cmake g++ gcc curl wget vim less unzip htop iftop iotop ca-certificates ssh \
32rsync iputils-ping net-tools libcupti-dev libmlx4-1 infiniband-diags ibutils ibverbs-utils \
33rdmacm-utils perftest rdma-core nano && \
34update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \
35update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
36python -m pip install --upgrade pip && \
37python -m pip install gpustat
38
39### SSH
40RUN mkdir /var/run/sshd && \
41# Prevent user being kicked off after login
42sed -i 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' /etc/pam.d/sshd && \
43echo 'AuthorizedKeysFile .ssh/authorized_keys' >> /etc/ssh/sshd_config && \
44echo 'PasswordAuthentication yes' >> /etc/ssh/sshd_config && \
45# FIX SUDO BUG: https://github.com/sudo-project/sudo/issues/42
46echo "Set disable_coredump false" >> /etc/sudo.conf
47
48# Expose SSH port
49EXPOSE 22
50
51# Needs to be in docker PATH if compiling other items & bashrc PATH (later)
52ENV PATH=/usr/local/mpi/bin:${PATH} \
53LD_LIBRARY_PATH=/usr/local/lib:/usr/local/mpi/lib:/usr/local/mpi/lib64:${LD_LIBRARY_PATH}
54
55# Create a wrapper for OpenMPI to allow running as root by default
56RUN mv /usr/local/mpi/bin/mpirun /usr/local/mpi/bin/mpirun.real && \
57echo '#!/bin/bash' > /usr/local/mpi/bin/mpirun && \
58echo 'mpirun.real --allow-run-as-root --prefix /usr/local/mpi "$@"' >> /usr/local/mpi/bin/mpirun && \
59chmod a+x /usr/local/mpi/bin/mpirun
60
61#### User account
62RUN useradd --create-home --uid 1000 --shell /bin/bash mchorse && \
63usermod -aG sudo mchorse && \
64echo "mchorse ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
65
66## SSH config and bashrc
67RUN mkdir -p /home/mchorse/.ssh /job && \
68echo 'Host *' > /home/mchorse/.ssh/config && \
69echo ' StrictHostKeyChecking no' >> /home/mchorse/.ssh/config && \
70echo 'export PDSH_RCMD_TYPE=ssh' >> /home/mchorse/.bashrc && \
71echo 'export PATH=/home/mchorse/.local/bin:$PATH' >> /home/mchorse/.bashrc && \
72echo 'export PATH=/usr/local/mpi/bin:$PATH' >> /home/mchorse/.bashrc && \
73echo 'export LD_LIBRARY_PATH=/usr/local/lib:/usr/local/mpi/lib:/usr/local/mpi/lib64:$LD_LIBRARY_PATH' >> /home/mchorse/.bashrc
74
75#### Python packages
76COPY requirements/* ./
77RUN python -m pip install --no-cache-dir -r requirements.txt && pip install -r requirements-onebitadam.txt
78RUN python -m pip install -r requirements-sparseattention.txt
79RUN python -m pip install -r requirements-flashattention.txt
80RUN python -m pip install -r requirements-wandb.txt
81RUN python -m pip install protobuf==3.20.*
82
83COPY megatron/fused_kernels/ /megatron/fused_kernels
84WORKDIR /megatron/fused_kernels
85RUN python setup.py install
86
87# Clear staging
88RUN mkdir -p /tmp && chmod 0777 /tmp
89
90#### SWITCH TO mchorse USER
91USER mchorse
92WORKDIR /home/mchorse
93