add: number of workers for merging datasets

This commit is contained in:
Francesco Capuano
2025-11-06 00:41:12 +00:00
parent a4aa316470
commit bb5676ee5a

View File

@@ -103,6 +103,7 @@ class SplitConfig:
class MergeConfig:
type: str = "merge"
repo_ids: list[str] | None = None
num_workers: int | None = None
@dataclass
@@ -215,6 +216,7 @@ def handle_merge(cfg: EditDatasetConfig) -> None:
datasets,
output_repo_id=cfg.repo_id,
output_dir=output_dir,
num_workers=cfg.operation.num_workers,
)
logging.info(f"Merged dataset saved to {output_dir}")