diff options
Diffstat (limited to '.devops/llama-cli-cuda.Dockerfile')
| -rw-r--r-- | .devops/llama-cli-cuda.Dockerfile | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/.devops/llama-cli-cuda.Dockerfile b/.devops/llama-cli-cuda.Dockerfile new file mode 100644 index 00000000..d5ce538f --- /dev/null +++ b/.devops/llama-cli-cuda.Dockerfile @@ -0,0 +1,35 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG CUDA_VERSION=11.7.1 +# Target the CUDA build image +ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} +# Target the CUDA runtime image +ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_CUDA_DEV_CONTAINER} as build + +# Unless otherwise specified, we make a fat build. +ARG CUDA_DOCKER_ARCH=all + +RUN apt-get update && \ + apt-get install -y build-essential git + +WORKDIR /app + +COPY . . + +# Set nvcc architecture +ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} +# Enable CUDA +ENV LLAMA_CUDA=1 + +RUN make -j$(nproc) llama-cli + +FROM ${BASE_CUDA_RUN_CONTAINER} as runtime + +RUN apt-get update && \ + apt-get install -y libgomp1 + +COPY --from=build /app/llama-cli /llama-cli + +ENTRYPOINT [ "/llama-cli" ] |
