Files
2023-03-02 11:17:26 +08:00

48 lines
1.4 KiB
Python

# Copyright (c) Alibaba, Inc. and its affiliates.
from abc import ABC, abstractmethod
from typing import Any, List, Tuple, Union
class TaskDataset(ABC):
"""The task dataset base class for all the task specific dataset processors.
"""
def __init__(self,
datasets: Union[Any, List[Any]],
mode,
preprocessor=None,
**kwargs):
super().__init__()
self.mode = mode
self.preprocessor = preprocessor
self._inner_dataset = self.prepare_dataset(datasets)
@abstractmethod
def prepare_dataset(self, datasets: Union[Any, List[Any]]) -> Any:
"""Prepare a dataset.
User can process the input datasets in a whole dataset perspective.
This method also helps to merge several datasets to one.
Args:
datasets: The original dataset(s)
Returns: A single dataset, which may be created after merging.
"""
pass
@abstractmethod
def prepare_sample(self, data):
"""Preprocess the data fetched from the inner_dataset.
If the preprocessor is None, the original data will be returned, else the preprocessor will be called.
User can override this method to implement custom logics.
Args:
data: The data fetched from the dataset.
Returns: The processed data.
"""
pass