FROM python:3.12-slim

LABEL maintainer="Theodolite <support@theodolite.io>"
LABEL description="Theodolite data discovery scanner for cloud storage"

# Prevent Python from writing .pyc files and buffering stdout/stderr
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1

WORKDIR /app

# Install the scanner with all provider dependencies
COPY pyproject.toml .
COPY theodolite_scanner/ theodolite_scanner/
RUN pip install --no-cache-dir ".[all]"

# Pre-download the spaCy model so first scan doesn't have to
RUN python -m spacy download en_core_web_sm

# Scanner entrypoint — all config via CLI flags or env vars
ENTRYPOINT ["theodolite-scan"]
