import importlib
import json
import logging
import time
import warnings
from enum import Enum
from pathlib import Path
from typing import Annotated, Iterable, List, Optional

import typer
from pydantic import AnyUrl

from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
from docling.datamodel.base_models import ConversionStatus, PipelineOptions
from docling.datamodel.document import ConversionResult, DocumentConversionInput
from docling.document_converter import DocumentConverter

warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")

_log = logging.getLogger(__name__)
from rich.console import Console

err_console = Console(stderr=True)


app = typer.Typer(
    name="Docling",
    no_args_is_help=True,
    add_completion=False,
    pretty_exceptions_enable=False,
)


def version_callback(value: bool):
    if value:
        docling_version = importlib.metadata.version("docling")
        docling_core_version = importlib.metadata.version("docling-core")
        docling_ibm_models_version = importlib.metadata.version("docling-ibm-models")
        docling_parse_version = importlib.metadata.version("docling-parse")
        print(f"Docling version: {docling_version}")
        print(f"Docling Core version: {docling_core_version}")
        print(f"Docling IBM Models version: {docling_ibm_models_version}")
        print(f"Docling Parse version: {docling_parse_version}")
        raise typer.Exit()


# Define an enum for the backend options
class Backend(str, Enum):
    PYPDFIUM2 = "pypdfium2"
    DOCLING = "docling"


def export_documents(
    conv_results: Iterable[ConversionResult],
    output_dir: Path,
    export_json: bool,
    export_md: bool,
    export_txt: bool,
    export_doctags: bool,
):

    success_count = 0
    failure_count = 0

    for conv_res in conv_results:
        if conv_res.status == ConversionStatus.SUCCESS:
            success_count += 1
            doc_filename = conv_res.input.file.stem

            # Export Deep Search document JSON format:
            if export_json:
                fname = output_dir / f"{doc_filename}.json"
                with fname.open("w") as fp:
                    _log.info(f"writing JSON output to {fname}")
                    fp.write(json.dumps(conv_res.render_as_dict()))

            # Export Text format:
            if export_txt:
                fname = output_dir / f"{doc_filename}.txt"
                with fname.open("w") as fp:
                    _log.info(f"writing Text output to {fname}")
                    fp.write(conv_res.render_as_text())

            # Export Markdown format:
            if export_md:
                fname = output_dir / f"{doc_filename}.md"
                with fname.open("w") as fp:
                    _log.info(f"writing Markdown output to {fname}")
                    fp.write(conv_res.render_as_markdown())

            # Export Document Tags format:
            if export_doctags:
                fname = output_dir / f"{doc_filename}.doctags"
                with fname.open("w") as fp:
                    _log.info(f"writing Doc Tags output to {fname}")
                    fp.write(conv_res.render_as_doctags())

        else:
            _log.warning(f"Document {conv_res.input.file} failed to convert.")
            failure_count += 1

    _log.info(
        f"Processed {success_count + failure_count} docs, of which {failure_count} failed"
    )


@app.command(no_args_is_help=True)
def convert(
    input_sources: Annotated[
        List[Path],
        typer.Argument(
            ...,
            metavar="source",
            help="PDF files to convert. Directories are also accepted.",
        ),
    ],
    export_json: Annotated[
        bool,
        typer.Option(
            ..., "--json/--no-json", help="If enabled the document is exported as JSON."
        ),
    ] = False,
    export_md: Annotated[
        bool,
        typer.Option(
            ..., "--md/--no-md", help="If enabled the document is exported as Markdown."
        ),
    ] = True,
    export_txt: Annotated[
        bool,
        typer.Option(
            ..., "--txt/--no-txt", help="If enabled the document is exported as Text."
        ),
    ] = False,
    export_doctags: Annotated[
        bool,
        typer.Option(
            ...,
            "--doctags/--no-doctags",
            help="If enabled the document is exported as Doc Tags.",
        ),
    ] = False,
    ocr: Annotated[
        bool,
        typer.Option(
            ..., help="If enabled, the bitmap content will be processed using OCR."
        ),
    ] = True,
    backend: Annotated[
        Backend, typer.Option(..., help="The PDF backend to use.")
    ] = Backend.DOCLING,
    output: Annotated[
        Path, typer.Option(..., help="Output directory where results are saved.")
    ] = Path("."),
    version: Annotated[
        Optional[bool],
        typer.Option(
            "--version",
            callback=version_callback,
            is_eager=True,
            help="Show version information.",
        ),
    ] = None,
):
    logging.basicConfig(level=logging.INFO)

    input_doc_paths: List[Path] = []
    for source in input_sources:
        if not source.exists():
            err_console.print(
                f"[red]Error: The input file {source} does not exist.[/red]"
            )
            raise typer.Abort()
        elif source.is_dir():
            input_doc_paths.extend(list(source.glob("**/*.pdf")))
            input_doc_paths.extend(list(source.glob("**/*.PDF")))
        else:
            input_doc_paths.append(source)

    ###########################################################################

    # The following sections contain a combination of PipelineOptions
    # and PDF Backends for various configurations.
    # Uncomment one section at the time to see the differences in the output.

    doc_converter = None
    if backend == Backend.PYPDFIUM2 and not ocr:  # PyPdfium without OCR
        pipeline_options = PipelineOptions()
        pipeline_options.do_ocr = False
        pipeline_options.do_table_structure = True
        pipeline_options.table_structure_options.do_cell_matching = False

        doc_converter = DocumentConverter(
            pipeline_options=pipeline_options,
            pdf_backend=PyPdfiumDocumentBackend,
        )

    elif backend == Backend.PYPDFIUM2.value and ocr:  # PyPdfium with OCR
        pipeline_options = PipelineOptions()
        pipeline_options.do_ocr = False
        pipeline_options.do_table_structure = True
        pipeline_options.table_structure_options.do_cell_matching = True

        doc_converter = DocumentConverter(
            pipeline_options=pipeline_options,
            pdf_backend=PyPdfiumDocumentBackend,
        )

    elif backend == Backend.DOCLING.value and not ocr:  # Docling Parse without OCR
        pipeline_options = PipelineOptions()
        pipeline_options.do_ocr = False
        pipeline_options.do_table_structure = True
        pipeline_options.table_structure_options.do_cell_matching = True

        doc_converter = DocumentConverter(
            pipeline_options=pipeline_options,
            pdf_backend=DoclingParseDocumentBackend,
        )

    elif backend == Backend.DOCLING.value and ocr:  # Docling Parse with OCR
        pipeline_options = PipelineOptions()
        pipeline_options.do_ocr = True
        pipeline_options.do_table_structure = True
        pipeline_options.table_structure_options.do_cell_matching = True

        doc_converter = DocumentConverter(
            pipeline_options=pipeline_options,
            pdf_backend=DoclingParseDocumentBackend,
        )

    ###########################################################################

    # Define input files
    input = DocumentConversionInput.from_paths(input_doc_paths)

    start_time = time.time()

    conv_results = doc_converter.convert(input)

    output.mkdir(parents=True, exist_ok=True)
    export_documents(
        conv_results,
        output_dir=output,
        export_json=export_json,
        export_md=export_md,
        export_txt=export_txt,
        export_doctags=export_doctags,
    )

    end_time = time.time() - start_time

    _log.info(f"All documents were converted in {end_time:.2f} seconds.")


if __name__ == "__main__":
    app()