update doc
This commit is contained in:
parent
f4a6c495a6
commit
0705bc12ce
@ -5,7 +5,9 @@
|
|||||||
## ✅ Introduction
|
## ✅ Introduction
|
||||||
The Dolphin model employs a **Swin Encoder + MBart Decoder** architecture. In the HuggingFace Transformers [Config](https://huggingface.co/ByteDance/Dolphin/blob/main/config.json),
|
The Dolphin model employs a **Swin Encoder + MBart Decoder** architecture. In the HuggingFace Transformers [Config](https://huggingface.co/ByteDance/Dolphin/blob/main/config.json),
|
||||||
its architectures field is specified as "VisionEncoderDecoderModel". Dolphin, Nougat, and Donut share the same model architecture. TensorRT-LLM has already supported the Nougat model.
|
its architectures field is specified as "VisionEncoderDecoderModel". Dolphin, Nougat, and Donut share the same model architecture. TensorRT-LLM has already supported the Nougat model.
|
||||||
Following Nougat's conversion script, we have successfully implemented Dolphin on TensorRT-LLM. Note: input_ids MUST be of int32 type, otherwise TensorRT-LLM will produce incorrect results.
|
Following Nougat's conversion script, we have successfully implemented Dolphin on TensorRT-LLM.
|
||||||
|
|
||||||
|
**Note:** prompt_ids MUST be of **int32** type, otherwise TensorRT-LLM will produce incorrect results.
|
||||||
|
|
||||||
## 🛠️ Installation
|
## 🛠️ Installation
|
||||||
> We only test TensorRT-LLM 0.18.1 on Linux.
|
> We only test TensorRT-LLM 0.18.1 on Linux.
|
||||||
|
@ -115,8 +115,9 @@ class DolphinRunner(MultimodalModelRunner):
|
|||||||
prompts = [f"<s>{text.strip()} <Answer/>" for text in input_texts]
|
prompts = [f"<s>{text.strip()} <Answer/>" for text in input_texts]
|
||||||
images = self.processor(input_images, return_tensors="pt")['pixel_values'].to("cuda")
|
images = self.processor(input_images, return_tensors="pt")['pixel_values'].to("cuda")
|
||||||
prompt_ids = self.tokenizer(prompts, add_special_tokens=False, return_tensors="pt").input_ids.to("cuda")
|
prompt_ids = self.tokenizer(prompts, add_special_tokens=False, return_tensors="pt").input_ids.to("cuda")
|
||||||
prompt_ids = prompt_ids.to(
|
|
||||||
torch.int32) # Important! If the type of prompt_ids is not int32, the output will be wrong.
|
# 🚨🚨🚨 Important! If the type of prompt_ids is not int32, the output will be wrong. 🚨🚨🚨
|
||||||
|
prompt_ids = prompt_ids.to(torch.int32)
|
||||||
|
|
||||||
logger.info("---------------------------------------------------------")
|
logger.info("---------------------------------------------------------")
|
||||||
logger.info(f"images size: {images.size()}")
|
logger.info(f"images size: {images.size()}")
|
||||||
|
Loading…
Reference in New Issue
Block a user