From eb4acafb4ab150dcd279c00d5c2fec3628d92b16 Mon Sep 17 00:00:00 2001 From: Leo Zhang <1095412419@qq.com> Date: Tue, 5 Nov 2024 10:51:19 +0800 Subject: [PATCH] Update builder.py add_special_tokens for phi 2 --- imp_llava/model/builder.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/imp_llava/model/builder.py b/imp_llava/model/builder.py index f1ec77e..407605e 100644 --- a/imp_llava/model/builder.py +++ b/imp_llava/model/builder.py @@ -58,6 +58,8 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, l assert 'imp' in model_name.lower(), 'The model name must contain `imp`' tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False, trust_remote_code=True) if 'phi-2' in model_name.lower() or 'phi2' in model_name.lower(): + logger.info('Tokenizer config: set `` as `eos_token`.') + tokenizer.add_special_tokens({'eos_token': ''}) lora_cfg_pretrained = ImpConfig.from_pretrained(model_path) model = ImpForCausalLM.from_pretrained(model_base, config=lora_cfg_pretrained, **kwargs) elif 'qwen1.5' in model_name.lower(): @@ -137,6 +139,8 @@ def load_from_hf(repo_id, filename, subfolder=None): if 'phi2' in model_name.lower() or 'phi-2' in model_name.lower(): tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + logger.info('Tokenizer config: set `` as `eos_token`.') + tokenizer.add_special_tokens({'eos_token': ''}) model = ImpForCausalLM.from_pretrained(model_path, **kwargs) logger.info('Model is loaded...') elif 'qwen1.5' in model_name.lower():