colossalai
51 строка · 1.8 Кб
1FROM hpcaitech/cuda-conda:11.3
2
3# metainformation
4LABEL org.opencontainers.image.source = "https://github.com/hpcaitech/ColossalAI"
5LABEL org.opencontainers.image.licenses = "Apache License 2.0"
6LABEL org.opencontainers.image.base.name = "docker.io/library/hpcaitech/cuda-conda:11.3"
7
8# enable passwordless ssh
9RUN mkdir ~/.ssh && \
10printf "Host * \n ForwardAgent yes\nHost *\n StrictHostKeyChecking no" > ~/.ssh/config && \
11ssh-keygen -t rsa -N "" -f ~/.ssh/id_rsa && \
12cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
13
14# enable RDMA support
15RUN apt-get update && \
16apt-get install -y infiniband-diags perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1 && \
17apt-get clean && \
18rm -rf /var/lib/apt/lists/*
19
20# install torch
21RUN conda install -y pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch
22
23# install ninja
24RUN apt-get update && \
25apt-get install -y --no-install-recommends ninja-build && \
26apt-get clean && \
27rm -rf /var/lib/apt/lists/*
28
29# install apex
30RUN git clone https://github.com/NVIDIA/apex && \
31cd apex && \
32git checkout 91fcaa && \
33pip install packaging && \
34pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_layer_norm" ./
35
36# install colossalai
37ARG VERSION=main
38RUN git clone -b ${VERSION} https://github.com/hpcaitech/ColossalAI.git \
39&& cd ./ColossalAI \
40&& BUILD_EXT=1 pip install -v --no-cache-dir .
41
42# install titans
43RUN pip install --no-cache-dir titans
44
45# install tensornvme
46RUN conda install -y cmake && \
47git clone https://github.com/hpcaitech/TensorNVMe.git && \
48cd TensorNVMe && \
49apt update -y && apt install -y libaio-dev && \
50pip install -r requirements.txt && \
51pip install -v --no-cache-dir .
52