-
Notifications
You must be signed in to change notification settings - Fork 129
Expand file tree
/
Copy pathDockerfile
More file actions
41 lines (29 loc) · 2.05 KB
/
Dockerfile
File metadata and controls
41 lines (29 loc) · 2.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
FROM databricksruntime/minimal:experimental
ARG python_version="3.12"
ARG pip_version="25.0.1"
ARG setuptools_version="74.0.0"
ARG wheel_version="0.45.1"
ARG virtualenv_version="20.29.3"
# Installs python and virtualenv for Spark and Notebooks
RUN apt-get update \
&& apt-get install -y curl software-properties-common python${python_version} python${python_version}-dev \
&& curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \
&& /usr/bin/python${python_version} get-pip.py --break-system-packages pip==${pip_version} setuptools==${setuptools_version} wheel==${wheel_version} \
&& rm get-pip.py
RUN /usr/local/bin/pip${python_version} install --break-system-packages --no-cache-dir virtualenv==${virtualenv_version} \
&& sed -i -r 's/^(PERIODIC_UPDATE_ON_BY_DEFAULT) = True$/\1 = False/' /usr/local/lib/python${python_version}/dist-packages/virtualenv/seed/embed/base_embed.py \
&& /usr/local/bin/pip${python_version} download pip==${pip_version} --dest \
/usr/local/lib/python${python_version}/dist-packages/virtualenv_support/
# Initialize the default environment that Spark and notebooks will use
RUN virtualenv --python=python${python_version} --system-site-packages /databricks/python3 --no-download --no-setuptools
# These python libraries are used by Databricks notebooks and the Python REPL
# You do not need to install pyspark - it is injected when the cluster is launched
# Versions are intended to reflect latest DBR LTS: https://docs.databricks.com/en/release-notes/runtime/15.4lts.html#system-environment
COPY requirements.txt /databricks/.
RUN apt-get install -y libpq-dev build-essential
RUN /databricks/python3/bin/pip install --no-deps -r /databricks/requirements.txt
# Specifies where Spark will look for the python process
ENV PYSPARK_PYTHON=/databricks/python3/bin/python3
RUN virtualenv --python=python${python_version} --system-site-packages /databricks/python-lsp --no-download --no-setuptools
COPY python-lsp-requirements.txt /databricks/.
RUN /databricks/python-lsp/bin/pip install --no-deps -r /databricks/python-lsp-requirements.txt