refactor(processor): enforce config_filename requirement for HF Hub loading (#1860)

- Updated the DataProcessorPipeline to require a specific config_filename when loading from Hugging Face Hub, enhancing clarity and preventing errors.
- Simplified local path checks and improved error handling for invalid paths.
- Adjusted tests to reflect the new requirement and ensure proper error handling for various loading scenarios.
This commit is contained in:
Adil Zouitine
2025-09-04 10:31:18 +02:00
committed by GitHub
parent a6dbb65917
commit 793ad86fc9
2 changed files with 48 additions and 58 deletions

View File

@@ -1714,16 +1714,26 @@ def test_override_with_device_strings():
def test_from_pretrained_nonexistent_path():
"""Test error handling when loading from non-existent sources."""
from huggingface_hub.errors import HfHubHTTPError, HFValidationError
from huggingface_hub.errors import HfHubHTTPError
# Test with an invalid repo ID (too many slashes) - caught by HF validation
with pytest.raises(HFValidationError):
# Test with an invalid local path - should raise FileNotFoundError
with pytest.raises(FileNotFoundError):
DataProcessorPipeline.from_pretrained("/path/that/does/not/exist")
# Test with a non-existent but valid Hub repo format
with pytest.raises((FileNotFoundError, HfHubHTTPError)):
# Test with a Hub repo format that would be a local path (too many slashes)
with pytest.raises(FileNotFoundError):
DataProcessorPipeline.from_pretrained("user/repo/extra/path")
# Test with a non-existent but valid Hub repo format (now requires config_filename)
with pytest.raises(ValueError, match="you must specify the config_filename parameter"):
DataProcessorPipeline.from_pretrained("nonexistent-user/nonexistent-repo")
# Test with a non-existent Hub repo when config_filename is provided
with pytest.raises((FileNotFoundError, HfHubHTTPError)):
DataProcessorPipeline.from_pretrained(
"nonexistent-user/nonexistent-repo", config_filename="processor.json"
)
# Test with a local directory that exists but has no config files
with tempfile.TemporaryDirectory() as tmp_dir:
with pytest.raises(FileNotFoundError, match="No .json configuration files found"):