unstructured
/
Dockerfile
41 строка · 1.3 Кб
1# syntax=docker/dockerfile:experimental
2FROM quay.io/unstructured-io/base-images:rocky9.2-9@sha256:73d8492452f086144d4b92b7931aa04719f085c74d16cae81e8826ef873729c9 as base
3
4# NOTE(crag): NB_USER ARG for mybinder.org compat:
5# https://mybinder.readthedocs.io/en/latest/tutorials/dockerfile.html
6ARG NB_USER=notebook-user
7ARG NB_UID=1000
8ARG PIP_VERSION
9
10# Set up environment
11ENV HOME /home/${NB_USER}
12ENV PYTHONPATH="${PYTHONPATH}:${HOME}"
13ENV PATH="/home/usr/.local/bin:${PATH}"
14
15RUN groupadd --gid ${NB_UID} ${NB_USER}
16RUN useradd --uid ${NB_UID} --gid ${NB_UID} ${NB_USER}
17WORKDIR ${HOME}
18
19FROM base as deps
20# Copy and install Unstructured
21COPY requirements requirements
22
23RUN python3.10 -m pip install pip==${PIP_VERSION} && \
24dnf -y groupinstall "Development Tools" && \
25find requirements/ -type f -name "*.txt" -exec python3 -m pip install --no-cache -r '{}' ';' && \
26dnf -y groupremove "Development Tools" && \
27dnf clean all
28
29RUN python3.10 -c "import nltk; nltk.download('punkt')" && \
30python3.10 -c "import nltk; nltk.download('averaged_perceptron_tagger')"
31
32FROM deps as code
33
34USER ${NB_USER}
35
36COPY example-docs example-docs
37COPY unstructured unstructured
38
39RUN python3.10 -c "from unstructured.partition.model_init import initialize; initialize()"
40
41CMD ["/bin/bash"]
42