forked from qdrant/vector-db-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathDockerfile
More file actions
113 lines (87 loc) · 2.91 KB
/
Dockerfile
File metadata and controls
113 lines (87 loc) · 2.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# Multi-stage Dockerfile for vector-db-benchmark
# Stage 1: Build environment
FROM python:3.10-slim AS builder
# Build arguments for Git metadata
ARG GIT_SHA
ARG GIT_DIRTY
# Environment variables for Python
ENV PYTHONFAULTHANDLER=1 \
PYTHONUNBUFFERED=1 \
PYTHONHASHSEED=random \
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
POETRY_VERSION=1.5.1
# Install system dependencies
RUN apt-get update && apt-get install -y \
wget \
git \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# Install Poetry
RUN pip install "poetry==$POETRY_VERSION"
# Set working directory
WORKDIR /code
# Copy dependency files first for better caching
COPY poetry.lock pyproject.toml /code/
COPY README.md /code/
# Copy package directories needed by Poetry
COPY benchmark /code/benchmark
COPY dataset_reader /code/dataset_reader
COPY engine /code/engine
COPY datasets /code/datasets
COPY experiments /code/experiments
COPY run.py /code/run.py
# Configure Poetry and install dependencies
RUN poetry config virtualenvs.create false \
&& poetry install --no-dev --no-interaction --no-ansi
# Install additional dependencies
RUN pip install "boto3"
# Copy remaining source code
COPY . /code
# Store Git information
RUN if [ -z "$GIT_SHA" ]; then \
GIT_SHA=$(git rev-parse HEAD 2>/dev/null || echo "unknown"); \
fi && \
if [ -z "$GIT_DIRTY" ]; then \
GIT_DIRTY=$(git diff --no-ext-diff 2>/dev/null | wc -l || echo "0"); \
fi && \
echo "Built with GIT_SHA=${GIT_SHA}, GIT_DIRTY=${GIT_DIRTY}" > /code/build_info.txt
# Stage 2: Runtime environment
FROM python:3.10-slim
# Environment variables for Python
ENV PYTHONFAULTHANDLER=1 \
PYTHONUNBUFFERED=1 \
PYTHONHASHSEED=random
# Install runtime dependencies
RUN apt-get update && apt-get install -y \
wget \
&& rm -rf /var/lib/apt/lists/*
# Set working directory
WORKDIR /code
# Copy Python environment from builder
COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# Copy application code
COPY --from=builder /code /code
# Create directories with proper permissions
RUN mkdir -p /code/results /code/datasets && \
chmod -R 777 /code/results /code/datasets && \
chmod -R 755 /code
# Create entrypoint script to handle user permissions
RUN echo '#!/bin/bash\n\
# Handle user permissions for volume mounts\n\
# Ensure results directory is writable\n\
mkdir -p /code/results\n\
chmod 777 /code/results\n\
exec "$@"' > /code/entrypoint.sh && \
chmod +x /code/entrypoint.sh
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import sys; sys.exit(0)" || exit 1
# Expose common ports (for documentation purposes)
EXPOSE 6379 6380
# Set entrypoint
ENTRYPOINT ["/code/entrypoint.sh"]
# Default command (show help)
CMD ["vector-db-benchmark", "--help"]