diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 6a0d9a9ba..f7b6d97b1 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -7,7 +7,6 @@ import pathlib import re import requests -import sys import json import shutil import argparse @@ -69,8 +68,7 @@ args = parser.parse_args() hf_token = args.hf_token if args.hf_token is not None else hf_token if hf_token is None: - logger.error("HF token is required. Please provide it as an argument or set it in ~/.cache/huggingface/token") - sys.exit(1) + logger.warning("HF token not found. You can provide it as an argument or set it in ~/.cache/huggingface/token") # TODO: this string has to exercise as much pre-tokenizer functionality as possible # will be updated with time - contributions welcome @@ -151,7 +149,7 @@ pre_computed_hashes = [ def download_file_with_auth(url, token, save_path): - headers = {"Authorization": f"Bearer {token}"} + headers = {"Authorization": f"Bearer {token}"} if token else None response = sess.get(url, headers=headers) response.raise_for_status() os.makedirs(os.path.dirname(save_path), exist_ok=True) @@ -250,10 +248,9 @@ for model in [*pre_computed_hashes, *all_models]: else: # otherwise, compute the hash of the tokenizer - # Skip if the tokenizer folder does not exist or there are other download issues previously - if not os.path.exists(f"models/tokenizers/{name}"): - logger.warning(f"Directory for tokenizer {name} not found. Skipping...") - continue + # Fail if the tokenizer folder with config does not exist or there are other download issues previously + if not os.path.isfile(f"models/tokenizers/{name}/tokenizer_config.json"): + raise OSError(f"Config for tokenizer {name} not found. The model may not exist or is not accessible with the provided token.") try: logger.info(f"Loading tokenizer from {f'models/tokenizers/{name}'}...") @@ -261,9 +258,8 @@ for model in [*pre_computed_hashes, *all_models]: tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False) else: tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}") - except OSError as e: - logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}") - continue # Skip to the next model if the tokenizer can't be loaded + except Exception as e: + raise OSError(f"Error loading tokenizer for model {name}.") from e chktok = tokenizer.encode(CHK_TXT) chkhsh = sha256(str(chktok).encode()).hexdigest()