Ошибка OSError: Unable to load vocabulary from file. Please check that the provided vocabulary is accessible and not corrupted. Помогите исправить
!pip install torch sentencepiece
accelerate==0.18.0
bitsandbytes==0.37.2
git+https://github.com/huggingface/transformers.git@15641892985b1d77acc74c9065c332cd7c3f7d7f
git+https://github.com/huggingface/peft.git@c22a57420cc539b547beb7e40cd0712c9f56910a
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
MODEL_NAME = "IlyaGusev/saiga_7b_lora"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
config = PeftConfig.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
load_in_8bit=True,
device_map="auto",
torch_dtype=torch.float16
)
model = PeftModel.from_pretrained(
model,
MODEL_NAME,
torch_dtype=torch.float16
)
model.eval()
После этого выдает ошибку:
OSError Traceback (most recent call last)
File ~\AppData\Roaming\Python\Python311\site-packages\transformers\tokenization_utils_base.py:1965, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, use_auth_token, cache_dir, local_files_only, _commit_hash, *init_inputs, **kwargs)
1964 try:
-> 1965 tokenizer = cls(*init_inputs, **init_kwargs)
1966 except OSError:
File ~\AppData\Roaming\Python\Python311\site-packages\transformers\models\llama\tokenization_llama.py:96, in LlamaTokenizer.__init__(self, vocab_file, unk_token, bos_token, eos_token, pad_token, sp_model_kwargs, add_bos_token, add_eos_token, clean_up_tokenization_spaces, **kwargs)
95 self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
---> 96 self.sp_model.Load(vocab_file)
File ~\AppData\Roaming\Python\Python311\site-packages\sentencepiece\__init__.py:961, in SentencePieceProcessor.Load(self, model_file, model_proto)
960 return self.LoadFromSerializedProto(model_proto)
--> 961 return self.LoadFromFile(model_file)
File ~\AppData\Roaming\Python\Python311\site-packages\sentencepiece\__init__.py:316, in SentencePieceProcessor.LoadFromFile(self, arg)
315 def LoadFromFile(self, arg):
--> 316 return _sentencepiece.SentencePieceProcessor_LoadFromFile(self, arg)
OSError: Not found: "C:\Users\Нейронка/.cache\huggingface\hub\models--IlyaGusev--saiga_7b_lora\snapshots\ea333322229f2a5d16dd06b5cc1b882dbc06124f\tokenizer.model": No such file or directory Error #2
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
Cell In[39], line 7
3 from transformers import AutoModelForCausalLM, AutoTokenizer
5 MODEL_NAME = "IlyaGusev/saiga_7b_lora"
----> 7 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
8 config = PeftConfig.from_pretrained(MODEL_NAME)
9 model = AutoModelForCausalLM.from_pretrained(
10 config.base_model_name_or_path,
11 load_in_8bit=True,
12 device_map="auto",
13 torch_dtype=torch.float16
14 )
File ~\AppData\Roaming\Python\Python311\site-packages\transformers\models\auto\tokenization_auto.py:694, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
690 if tokenizer_class is None:
691 raise ValueError(
692 f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
693 )
--> 694 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
696 # Otherwise we have to be creative.
697 # if model is an encoder decoder, the encoder tokenizer class is used by default
698 if isinstance(config, EncoderDecoderConfig):
File ~\AppData\Roaming\Python\Python311\site-packages\transformers\tokenization_utils_base.py:1811, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)
1808 else:
1809 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 1811 return cls._from_pretrained(
1812 resolved_vocab_files,
1813 pretrained_model_name_or_path,
1814 init_configuration,
1815 *init_inputs,
1816 use_auth_token=use_auth_token,
1817 cache_dir=cache_dir,
1818 local_files_only=local_files_only,
1819 _commit_hash=commit_hash,
1820 **kwargs,
1821 )
File ~\AppData\Roaming\Python\Python311\site-packages\transformers\tokenization_utils_base.py:1967, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, use_auth_token, cache_dir, local_files_only, _commit_hash, *init_inputs, **kwargs)
1965 tokenizer = cls(*init_inputs, **init_kwargs)
1966 except OSError:
-> 1967 raise OSError(
1968 "Unable to load vocabulary from file. "
1969 "Please check that the provided vocabulary is accessible and not corrupted."
1970 )
1972 # Save inputs and kwargs for saving and re-loading with ``save_pretrained``
1973 # Removed: Now done at the base class level
1974 # tokenizer.init_inputs = init_inputs
1975 # tokenizer.init_kwargs = init_kwargs
1976
1977 # If there is a complementary special token map, load it
1978 special_tokens_map_file = resolved_vocab_files.pop("special_tokens_map_file", None)
OSError: Unable to load vocabulary from file. Please check that the provided vocabulary is accessible and not corrupted.