Docling/.github/workflows/checks.yml

on:
  workflow_call:
    inputs:
      push_coverage:
          type: boolean
          description: "If true, the coverage results are pushed to codecov.io."
          default: true
    secrets:
      CODECOV_TOKEN:
        required: false

env:
  HF_HUB_DOWNLOAD_TIMEOUT: "60"
  HF_HUB_ETAG_TIMEOUT: "60"

jobs:
  run-checks:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
    steps:
      - uses: actions/checkout@v4
      - name: Install tesseract
        run: sudo apt-get update && sudo apt-get install -y tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa tesseract-ocr-script-latn libleptonica-dev libtesseract-dev pkg-config
      - name: Set TESSDATA_PREFIX
        run: |
          echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
      - name: Cache Hugging Face models
        uses: actions/cache@v4
        with:
          path: ~/.cache/huggingface
          key: huggingface-cache-py${{ matrix.python-version }}
      - uses: ./.github/actions/setup-poetry
        with:
          python-version: ${{ matrix.python-version }}
      - name: Run styling check
        run: poetry run pre-commit run --all-files
      - name: Install with poetry
        run: poetry install --all-extras
      - name: Testing
        run: |
          poetry run pytest -v --cov=docling --cov-report=xml tests
      - name: Upload coverage to Codecov
        if: inputs.push_coverage
        uses: codecov/codecov-action@v5
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          files: ./coverage.xml
      - name: Run examples
        run: |
          for file in docs/examples/*.py; do
            # Skip batch_convert.py
            if [[ "$(basename "$file")" =~ ^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model).py ]]; then
                echo "Skipping $file"
                continue
            fi

            echo "Running example $file"
            poetry run python "$file" || exit 1
          done
      - name: Build with poetry
        run: poetry build