add dolphin inference by tensorrt-llm

2025-06-30 19:41:03 +08:00
parent ce591d9136
commit c247e5e1f3
14 changed files with 2472 additions and 0 deletions
--- a/deployment/tensorrt_llm/run_dolphin.sh
+++ b/deployment/tensorrt_llm/run_dolphin.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+set -ex
+
+export MODEL_NAME="Dolphin"
+
+python run_dolphin.py \
+    --batch_size 1 \
+    --hf_model_dir tmp/hf_models/${MODEL_NAME} \
+    --visual_engine_dir tmp/trt_engines/${MODEL_NAME}/vision_encoder \
+    --llm_engine_dir tmp/trt_engines/${MODEL_NAME}/1-gpu/bfloat16 \
+    --max_new_tokens 4096 \
+    --repetition_penalty 1.0 \
+    --input_text "Parse the reading order of this document." \
+    --image_path "../../demo/page_imgs/page_1.jpeg"
+
+
+python run_dolphin.py \
+    --batch_size 1 \
+    --hf_model_dir tmp/hf_models/${MODEL_NAME} \
+    --visual_engine_dir tmp/trt_engines/${MODEL_NAME}/vision_encoder \
+    --llm_engine_dir tmp/trt_engines/${MODEL_NAME}/1-gpu/bfloat16 \
+    --max_new_tokens 4096 \
+    --repetition_penalty 1.0 \
+    --input_text "Read text in the image." \
+    --image_path "../../demo/element_imgs/block_formula.jpeg"
+
+
+python run_dolphin.py \
+    --batch_size 1 \
+    --hf_model_dir tmp/hf_models/${MODEL_NAME} \
+    --visual_engine_dir tmp/trt_engines/${MODEL_NAME}/vision_encoder \
+    --llm_engine_dir tmp/trt_engines/${MODEL_NAME}/1-gpu/bfloat16 \
+    --max_new_tokens 4096 \
+    --repetition_penalty 1.0 \
+    --input_text "Read text in the image." \
+    --image_path "../../demo/element_imgs/para_1.jpg"
+
+
+python run_dolphin.py \
+    --batch_size 1 \
+    --hf_model_dir tmp/hf_models/${MODEL_NAME} \
+    --visual_engine_dir tmp/trt_engines/${MODEL_NAME}/vision_encoder \
+    --llm_engine_dir tmp/trt_engines/${MODEL_NAME}/1-gpu/bfloat16 \
+    --max_new_tokens 4096 \
+    --repetition_penalty 1.0 \
+    --input_text "Parse the table in the image." \
+    --image_path "../../demo/element_imgs/table_1.jpeg"