I just found out that with the version 0.0.4a2, the same code does not work and gave out the following. Also tried to download the bert model locally and refer to the local folder, still the same kind of error:
Traceback (most recent call last):
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py", line 286, in hf_raise_for_status
response.raise_for_status()
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/requests/models.py", line 1021, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/None/resolve/main/config.json
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/transformers/utils/hub.py", line 389, in cached_file
resolved_file = hf_hub_download(
^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 118, in _inner_fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1368, in hf_hub_download
raise head_call_error
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1238, in hf_hub_download
metadata = get_hf_file_metadata(
^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 118, in _inner_fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1631, in get_hf_file_metadata
r = _request_wrapper(
^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 385, in _request_wrapper
response = _request_wrapper(
^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 409, in _request_wrapper
hf_raise_for_status(response)
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py", line 323, in hf_raise_for_status
raise RepositoryNotFoundError(message, response) from e
huggingface_hub.utils._errors.RepositoryNotFoundError: 401 Client Error. (Request ID: Root=1-65a26540-4ffa46f312262c9956872e4f;02ebfce3-0189-4426-9f49-9cc393b0cfde)
Repository Not Found for url: https://huggingface.co/None/resolve/main/config.json.
Please make sure you specified the correct `repo_id` and `repo_type`.
If you are trying to access a private or gated repo, make sure you are authenticated.
Invalid username or password.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/main.py", line 180, in <module>
prepare_training_data_out_of_synthetic_dataset(colbert_training_query_docs_path,colbert_model_path,latest_model_name,local_pretrained_model,language,colbert_training_triplets_path)
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/synthethic_retrieval_dataset_generation.py", line 270, in prepare_training_data_out_of_synthetic_dataset
trainer = RAGTrainer(model_name=latest_model_name, pretrained_model_name=pretrained_model_name, language_code=language_code)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/ragatouille/RAGTrainer.py", line 45, in __init__
self.model = ColBERT(
^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/ragatouille/models/colbert.py", line 62, in __init__
self.inference_ckpt = Checkpoint(self.checkpoint, colbert_config=self.config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/colbert/modeling/checkpoint.py", line 24, in __init__
self.query_tokenizer = QueryTokenizer(self.colbert_config, verbose=self.verbose)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/colbert/modeling/tokenization/query_tokenization.py", line 12, in __init__
HF_ColBERT = class_factory(config.checkpoint)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/colbert/modeling/hf_colbert.py", line 59, in class_factory
loadedConfig = AutoConfig.from_pretrained(name_or_path)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/transformers/models/auto/configuration_auto.py", line 1082, in from_pretrained
config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/transformers/configuration_utils.py", line 644, in get_config_dict
config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/transformers/configuration_utils.py", line 699, in _get_config_dict
resolved_config_file = cached_file(
^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/transformers/utils/hub.py", line 410, in cached_file
raise EnvironmentError(
OSError: None is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
Exception ignored in: <function ColBERT.__del__ at 0x7f124c2a96c0>
Traceback (most recent call last):
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/ragatouille/models/colbert.py", line 463, in __del__
AttributeError: 'ColBERT' object has no attribute 'run_context'
(venv) tm16@ThewindsHPZBookFury:~/Work/00_RandomCoding/e2e_retrieval_pipeline$ /home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/bin/python /home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/main.py
/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/langchain/llms/__init__.py:548: LangChainDeprecationWarning: Importing LLMs from langchain is deprecated. Importing from langchain will no longer be supported as of langchain==0.2.0. Please import from langchain-community instead:
`from langchain_community.llms import OpenAI`.
To install langchain-community run `pip install -U langchain-community`.
warnings.warn(
/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:115: LangChainDeprecationWarning: The class `OpenAI` was deprecated in LangChain 0.1.0 and will be removed in 0.2.0. Use langchain_openai.OpenAI instead.
warn_deprecated(
bert base
bert base
Some weights of HF_ColBERT were not initialized from the model checkpoint at deepset/gbert-large and are newly initialized: ['linear.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Traceback (most recent call last):
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py", line 286, in hf_raise_for_status
response.raise_for_status()
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/requests/models.py", line 1021, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/None/resolve/main/config.json
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/transformers/utils/hub.py", line 389, in cached_file
resolved_file = hf_hub_download(
^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 118, in _inner_fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1368, in hf_hub_download
raise head_call_error
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1238, in hf_hub_download
metadata = get_hf_file_metadata(
^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 118, in _inner_fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1631, in get_hf_file_metadata
r = _request_wrapper(
^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 385, in _request_wrapper
response = _request_wrapper(
^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 409, in _request_wrapper
hf_raise_for_status(response)
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py", line 323, in hf_raise_for_status
raise RepositoryNotFoundError(message, response) from e
huggingface_hub.utils._errors.RepositoryNotFoundError: 401 Client Error. (Request ID: Root=1-65a26710-578e59d841d16b9b394a8895;a8aa5928-f89b-4ff8-9528-8c535da29b00)
Repository Not Found for url: https://huggingface.co/None/resolve/main/config.json.
Please make sure you specified the correct `repo_id` and `repo_type`.
If you are trying to access a private or gated repo, make sure you are authenticated.
Invalid username or password.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/main.py", line 178, in <module>
trainer = RAGTrainer(model_name="HBOColbert", pretrained_model_name="deepset/gbert-large", language_code="de")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/ragatouille/RAGTrainer.py", line 45, in __init__
self.model = ColBERT(
^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/ragatouille/models/colbert.py", line 62, in __init__
self.inference_ckpt = Checkpoint(self.checkpoint, colbert_config=self.config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/colbert/modeling/checkpoint.py", line 24, in __init__
self.query_tokenizer = QueryTokenizer(self.colbert_config, verbose=self.verbose)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/colbert/modeling/tokenization/query_tokenization.py", line 12, in __init__
HF_ColBERT = class_factory(config.checkpoint)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/colbert/modeling/hf_colbert.py", line 59, in class_factory
loadedConfig = AutoConfig.from_pretrained(name_or_path)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/transformers/models/auto/configuration_auto.py", line 1082, in from_pretrained
config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/transformers/configuration_utils.py", line 644, in get_config_dict
config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/transformers/configuration_utils.py", line 699, in _get_config_dict
resolved_config_file = cached_file(
^^^^^^^^^^^^
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/transformers/utils/hub.py", line 410, in cached_file
raise EnvironmentError(
OSError: None is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
Exception ignored in: <function ColBERT.__del__ at 0x7f523c515800>
Traceback (most recent call last):
File "/home/tm16/Work/00_RandomCoding/e2e_retrieval_pipeline/venv/lib/python3.11/site-packages/ragatouille/models/colbert.py", line 463, in __del__
AttributeError: 'ColBERT' object has no attribute 'run_context'