Docling/.github/workflows/checks.yml

on:
  workflow_call:
    inputs:
      push_coverage:
          type: boolean
          description: "If true, the coverage results are pushed to codecov.io."
          default: true
    secrets:
      CODECOV_TOKEN:
        required: false

env:
  HF_HUB_DOWNLOAD_TIMEOUT: "60"
  HF_HUB_ETAG_TIMEOUT: "60"
  UV_FROZEN: "1"

jobs:
  run-checks:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
    steps:
      - uses: actions/checkout@v4
      - name: Install tesseract and ffmpeg
        run: sudo apt-get update && sudo apt-get install -y ffmpeg tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-script-latn libleptonica-dev libtesseract-dev pkg-config
      - name: Set TESSDATA_PREFIX
        run: |
          echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
      - name: Cache Hugging Face models
        uses: actions/cache@v4
        with:
          path: ~/.cache/huggingface
          key: huggingface-cache-py${{ matrix.python-version }}
      - name: Install uv and set the python version
        uses: astral-sh/setup-uv@v5
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
      - name: pre-commit cache key
        run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> "$GITHUB_ENV"
      - uses: actions/cache@v4
        with:
          path: ~/.cache/pre-commit
          key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }}
      - name: Install dependencies
        run: uv sync --frozen --all-extras
      - name: Check style and run tests
        run: pre-commit run --all-files
      - name: Testing
        run: |
          uv run --no-sync pytest -v --cov=docling --cov-report=xml tests
      - name: Upload coverage to Codecov
        if: inputs.push_coverage
        uses: codecov/codecov-action@v5
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          files: ./coverage.xml
      - name: Run examples
        run: |
          for file in docs/examples/*.py; do
            # Skip batch_convert.py
            if [[ "$(basename "$file")" =~ ^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model).py ]]; then
                echo "Skipping $file"
                continue
            fi

            echo "Running example $file"
            uv run --no-sync python "$file" || exit 1
          done

  build-package:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ['3.12']
    steps:
      - uses: actions/checkout@v4
      - name: Install uv and set the python version
        uses: astral-sh/setup-uv@v5
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
      - name: Install dependencies
        run: uv sync --all-extras
      - name: Build package
        run: uv build
      - name: Check content of wheel
        run: unzip -l dist/*.whl
      - name: Store the distribution packages
        uses: actions/upload-artifact@v4
        with:
          name: python-package-distributions
          path: dist/

  test-package:
    needs:
      - build-package
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ['3.12']
    steps:
      - name: Download all the dists
        uses: actions/download-artifact@v4
        with:
          name: python-package-distributions
          path: dist/
      - name: Install uv and set the python version
        uses: astral-sh/setup-uv@v5
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
      - name: Install package
        run: uv pip install dist/*.whl
      - name: Run docling
        run: docling --help