Module datatap.api.entities

The datatap.api.entities submodule contains several enttiies that provide a user-friendly abstraction for the dataTap API.

Expand source code
"""
The `datatap.api.entities` submodule contains several enttiies
that provide a user-friendly abstraction for the dataTap API.
"""

from .api import Api

from .user import User
from .database import Database
from .dataset import Dataset
from .repository import Repository, Tag, Split

__all__ = [
    "Api",
    "User",
    "Database",
    "Dataset",
    "Repository",
    "Tag",
    "Split",
]

Sub-modules

datatap.api.entities.api
datatap.api.entities.database
datatap.api.entities.dataset
datatap.api.entities.repository
datatap.api.entities.user

Classes

class Api (api_key: Optional[str] = None, uri: Optional[str] = None)

The Api object is the primary method of interacting with the dataTap API.

The Api constructor takes two optional arguments.

The first, api_key, should be the current user's personal API key. In order to encourage good secret practices, this class will use the value found in the DATATAP_API_KEY if no key is passed in. Consider using environment variables or another secret manager for your API keys.

The second argument is uri. This should only be used if you would like to target a different API server than the default. For instance, if you are using a proxy to reach the API, you can use the uri argument to point toward your proxy.

This object encapsulates most of the logic for interacting with API. For instance, to get a list of all datasets that a user has access to, you can run

from datatap import Api

api = Api()
print([
    dataset
    for database in api.get_database_list()
    for dataset in database.get_dataset_list()
])

For more details on the functionality provided by the Api object, take a look at its documentation.

Expand source code
class Api:
    """
    The `Api` object is the primary method of interacting with the dataTap API.

    The `Api` constructor takes two optional arguments.

    The first, `api_key`, should be the current user's personal API key. In
    order to encourage good secret practices, this class will use the value
    found in the `DATATAP_API_KEY` if no key is passed in. Consider using
    environment variables or another secret manager for your API keys.

    The second argument is `uri`. This should only be used if you would like
    to target a different API server than the default. For instance, if you
    are using a proxy to reach the API, you can use the `uri` argument to
    point toward your proxy.

    This object encapsulates most of the logic for interacting with API.
    For instance, to get a list of all datasets that a user has access to,
    you can run

    ```py
    from datatap import Api

    api = Api()
    print([
        dataset
        for database in api.get_database_list()
        for dataset in database.get_dataset_list()
    ])
    ```

    For more details on the functionality provided by the Api object, take
    a look at its documentation.
    """
    def __init__(self, api_key: Optional[str] = None, uri: Optional[str] = None):
        self.endpoints = ApiEndpoints(api_key, uri)

    def get_current_user(self) -> User:
        """
        Returns the current logged-in user.
        """
        return User.from_json(self.endpoints, self.endpoints.user.current())

    def get_database_list(self) -> List[Database]:
        """
        Returns a list of all databases that the current user has access to.
        """
        return [
            Database.from_json(self.endpoints, json_db)
            for json_db in self.endpoints.database.list()
        ]

    def get_default_database(self) -> Database:
        """
        Returns the default database for the user (this defaults to the public
        database).
        """

        # TODO(zwade): Have a way of specifying a per-user default
        current_user = self.get_current_user()
        if current_user.default_database is None:
            raise Exception("Trying to find the default database, but none is specified")

        return self.get_database_by_uid(current_user.default_database)

    def get_database_by_uid(self, uid: str) -> Database:
        """
        Queries a database by its UID and returns it.
        """
        return Database.from_json(self.endpoints, self.endpoints.database.query_by_uid(uid))


    @overload
    def get_database_by_name(self, name: str, allow_multiple: Literal[True]) -> List[Database]: ...
    @overload
    def get_database_by_name(self, name: str, allow_multiple: Literal[False] = False) -> Database: ...
    def get_database_by_name(self, name: str, allow_multiple: bool = False) -> Union[Database, List[Database]]:
        """
        Queries a database by its name and returns it. If `allow_multiple` is true, it will return
        a list of databases.
        """
        database_list = [
            Database.from_json(self.endpoints, database)
            for database in self.endpoints.database.query_by_name(name)
        ]

        if allow_multiple:
            return database_list
        else:
            return assert_one(database_list)

Methods

def get_current_user(self) ‑> User

Returns the current logged-in user.

Expand source code
def get_current_user(self) -> User:
    """
    Returns the current logged-in user.
    """
    return User.from_json(self.endpoints, self.endpoints.user.current())
def get_database_by_name(self, name: str, allow_multiple: bool = False) ‑> Union[Database, List[Database]]

Queries a database by its name and returns it. If allow_multiple is true, it will return a list of databases.

Expand source code
def get_database_by_name(self, name: str, allow_multiple: bool = False) -> Union[Database, List[Database]]:
    """
    Queries a database by its name and returns it. If `allow_multiple` is true, it will return
    a list of databases.
    """
    database_list = [
        Database.from_json(self.endpoints, database)
        for database in self.endpoints.database.query_by_name(name)
    ]

    if allow_multiple:
        return database_list
    else:
        return assert_one(database_list)
def get_database_by_uid(self, uid: str) ‑> Database

Queries a database by its UID and returns it.

Expand source code
def get_database_by_uid(self, uid: str) -> Database:
    """
    Queries a database by its UID and returns it.
    """
    return Database.from_json(self.endpoints, self.endpoints.database.query_by_uid(uid))
def get_database_list(self) ‑> List[Database]

Returns a list of all databases that the current user has access to.

Expand source code
def get_database_list(self) -> List[Database]:
    """
    Returns a list of all databases that the current user has access to.
    """
    return [
        Database.from_json(self.endpoints, json_db)
        for json_db in self.endpoints.database.list()
    ]
def get_default_database(self) ‑> Database

Returns the default database for the user (this defaults to the public database).

Expand source code
def get_default_database(self) -> Database:
    """
    Returns the default database for the user (this defaults to the public
    database).
    """

    # TODO(zwade): Have a way of specifying a per-user default
    current_user = self.get_current_user()
    if current_user.default_database is None:
        raise Exception("Trying to find the default database, but none is specified")

    return self.get_database_by_uid(current_user.default_database)
class Database (endpoints: ApiEndpoints, uid: str, *, name: str, connection_options: JsonDatabaseOptions)

Represents a database. This database could either be the public database, or a user's private database that they have connected to the dataTap platform.

This class provides utilites for viewing and updating the database's configuration, as well as inspecting its contents.

Expand source code
class Database:
    """
    Represents a database. This database could either be the public database,
    or a user's private database that they have connected to the dataTap
    platform.

    This class provides utilites for viewing and updating the database's
    configuration, as well as inspecting its contents.
    """
    _endpoints: ApiEndpoints

    uid: str
    """
    The UID of this database.
    """

    name: str
    """
    The name of this database.
    """

    connection_options: JsonDatabaseOptions
    """
    How this database is configured. Sensitive details, such as database
    credentials, are omitted.
    """

    @staticmethod
    def from_json(endpoints: ApiEndpoints, json: JsonDatabase) -> Database:
        """
        Creates a `Database` from a `JsonDatabase`.
        """
        return Database(
            endpoints,
            uid = json["uid"],
            name = json["name"],
            connection_options = json["connectionOptions"]
        )

    def __init__(self, endpoints: ApiEndpoints, uid: str, *, name: str, connection_options: JsonDatabaseOptions):
        self._endpoints = endpoints
        self.uid = uid
        self.name = name
        self.connection_options = connection_options

    def get_repository_list(self) -> List[Repository]:
        """
        Returns a list of all `Repository`s that are stored in this database.
        """
        return [
            Repository.from_json(self._endpoints, self.uid, repository_json)
            for repository_json in self._endpoints.repository.list(self.uid)
        ]


    @overload
    def get_repository(self, slug: str) -> Repository: ...
    @overload
    def get_repository(self, namespace: str, name: str) -> Repository: ...
    def get_repository(self, *args: str) -> Repository:
        """
        Queries a `Repository` by its namespace and name, or via its slug (namespace/name).
        """
        if len(args) == 1:
            namespace, name = args[0].split("/")
        else:
            namespace, name = args

        return Repository.from_json(self._endpoints, self.uid, self._endpoints.repository.query(self.uid, namespace, name))

    @overload
    def get_dataset(self, slug: str) -> Dataset: ...
    @overload
    def get_dataset(self, namespace: str, name: str, tag: str) -> Dataset: ...
    def get_dataset(self, *args: str) -> Dataset:
        """
        Queries a `Dataset` by its namespace, name, and tag, or via its slug (namespace/name:tag).
        """
        if len(args) == 1:
            repo_slug, tag = args[0].split(":")
            repo = self.get_repository(repo_slug)
        else:
            namespace, name, tag = args
            repo = self.get_repository(namespace, name)

        return repo.get_dataset(tag)

    def __repr__(self):
        return basic_repr("Database", self.uid, name = self.name)

Class variables

var connection_optionsJsonDatabaseOptionsDirect

How this database is configured. Sensitive details, such as database credentials, are omitted.

var name : str

The name of this database.

var uid : str

The UID of this database.

Static methods

def from_json(endpoints: ApiEndpoints, json: JsonDatabase) ‑> Database

Creates a Database from a JsonDatabase.

Expand source code
@staticmethod
def from_json(endpoints: ApiEndpoints, json: JsonDatabase) -> Database:
    """
    Creates a `Database` from a `JsonDatabase`.
    """
    return Database(
        endpoints,
        uid = json["uid"],
        name = json["name"],
        connection_options = json["connectionOptions"]
    )

Methods

def get_dataset(self, *args: str) ‑> Dataset

Queries a Dataset by its namespace, name, and tag, or via its slug (namespace/name:tag).

Expand source code
def get_dataset(self, *args: str) -> Dataset:
    """
    Queries a `Dataset` by its namespace, name, and tag, or via its slug (namespace/name:tag).
    """
    if len(args) == 1:
        repo_slug, tag = args[0].split(":")
        repo = self.get_repository(repo_slug)
    else:
        namespace, name, tag = args
        repo = self.get_repository(namespace, name)

    return repo.get_dataset(tag)
def get_repository(self, *args: str) ‑> Repository

Queries a Repository by its namespace and name, or via its slug (namespace/name).

Expand source code
def get_repository(self, *args: str) -> Repository:
    """
    Queries a `Repository` by its namespace and name, or via its slug (namespace/name).
    """
    if len(args) == 1:
        namespace, name = args[0].split("/")
    else:
        namespace, name = args

    return Repository.from_json(self._endpoints, self.uid, self._endpoints.repository.query(self.uid, namespace, name))
def get_repository_list(self) ‑> List[Repository]

Returns a list of all Repositorys that are stored in this database.

Expand source code
def get_repository_list(self) -> List[Repository]:
    """
    Returns a list of all `Repository`s that are stored in this database.
    """
    return [
        Repository.from_json(self._endpoints, self.uid, repository_json)
        for repository_json in self._endpoints.repository.list(self.uid)
    ]
class Dataset (endpoints: ApiEndpoints, uid: str, *, database: str, repository: DatasetRepository, splits: List[str], template: ImageAnnotationTemplate)

Represents a concrete version of a dataset. Critically, Datasets cannot be changed once they're created.

For reproducable training, ensure that you store the specific Dataset used during training.

Expand source code
class Dataset:
    """
    Represents a concrete version of a dataset. Critically, `Dataset`s cannot be changed
    once they're created.

    For reproducable training, ensure that you store the specific `Dataset` used
    during training.
    """
    _endpoints: ApiEndpoints

    uid: str
    """
    The UID of this `Dataset`.
    """

    database: str
    """
    The UID of the database in which this dataset lives.
    """

    repository: DatasetRepository
    """
    The repository this dataset belongs to.
    """

    splits: List[str]
    """
    A list of all the splits that this dataset has. By default, this will be
    `["training", "validation"]`.
    """

    template: ImageAnnotationTemplate
    """
    The `ImageAnnotationTemplate` that all annotations in this dataset version adhere to.
    """

    @staticmethod
    def from_json(endpoints: ApiEndpoints, json: JsonDataset) -> Dataset:
        """
        Creates a new `Dataset` from a `JsonDataset`.
        """
        return Dataset(
            endpoints,
            uid = json["uid"],
            database = json["database"],
            repository = DatasetRepository.from_json(json["repository"]),
            splits = json["splits"],
            template = ImageAnnotationTemplate.from_json(json["template"])
        )

    def __init__(
        self,
        endpoints: ApiEndpoints,
        uid: str,
        *,
        database: str,
        repository: DatasetRepository,
        splits: List[str],
        template: ImageAnnotationTemplate
    ):
        self._endpoints = endpoints
        self.uid = uid
        self.database = database
        self.repository = repository
        self.splits = splits
        self.template = template

    @overload
    def stream_split(self, split: str) -> Generator[ImageAnnotation, None, None]: ...
    @overload
    def stream_split(self, split: str, chunk: int, nchunks: int) -> Generator[ImageAnnotation, None, None]: ...
    def stream_split(self, split: str, chunk: int = 0, nchunks: int = 1) -> Generator[ImageAnnotation, None, None]:
        """
        Streams a specific split of this dataset from the database. All yielded annotations will be of type
        `ImageAnnotation` and adhere to this dataset version's annotation template.

        If `chunk` and `nchunks` are omitted, then the full split will be streamed. Otherwise, the split will be
        broken into `nchunks` pieces, and only the chunk identified by `chunk` will be streamed.
        """
        for droplet in self._endpoints.dataset.stream_split(
            database_uid = self.database,
            namespace = self.repository.namespace,
            name = self.repository.name,
            uid = self.uid,
            split = split,
            chunk = chunk,
            nchunks = nchunks,
        ):
            yield ImageAnnotation.from_json(droplet)

    def get_stable_identifier(self) -> str:
        return f"{self.repository.namespace}/{self.repository.name}:{self.uid}"

    def __repr__(self) -> str:
        return basic_repr(
            "Dataset",
            self.get_stable_identifier(),
            database = self.database,
            splits = self.splits
        )

Class variables

var database : str

The UID of the database in which this dataset lives.

var repositoryDatasetRepository

The repository this dataset belongs to.

var splits : List[str]

A list of all the splits that this dataset has. By default, this will be ["training", "validation"].

var templateImageAnnotationTemplate

The ImageAnnotationTemplate that all annotations in this dataset version adhere to.

var uid : str

The UID of this Dataset.

Static methods

def from_json(endpoints: ApiEndpoints, json: JsonDataset) ‑> Dataset

Creates a new Dataset from a JsonDataset.

Expand source code
@staticmethod
def from_json(endpoints: ApiEndpoints, json: JsonDataset) -> Dataset:
    """
    Creates a new `Dataset` from a `JsonDataset`.
    """
    return Dataset(
        endpoints,
        uid = json["uid"],
        database = json["database"],
        repository = DatasetRepository.from_json(json["repository"]),
        splits = json["splits"],
        template = ImageAnnotationTemplate.from_json(json["template"])
    )

Methods

def get_stable_identifier(self) ‑> str
Expand source code
def get_stable_identifier(self) -> str:
    return f"{self.repository.namespace}/{self.repository.name}:{self.uid}"
def stream_split(self, split: str, chunk: int = 0, nchunks: int = 1) ‑> Generator[ImageAnnotation, None, None]

Streams a specific split of this dataset from the database. All yielded annotations will be of type ImageAnnotation and adhere to this dataset version's annotation template.

If chunk and nchunks are omitted, then the full split will be streamed. Otherwise, the split will be broken into nchunks pieces, and only the chunk identified by chunk will be streamed.

Expand source code
def stream_split(self, split: str, chunk: int = 0, nchunks: int = 1) -> Generator[ImageAnnotation, None, None]:
    """
    Streams a specific split of this dataset from the database. All yielded annotations will be of type
    `ImageAnnotation` and adhere to this dataset version's annotation template.

    If `chunk` and `nchunks` are omitted, then the full split will be streamed. Otherwise, the split will be
    broken into `nchunks` pieces, and only the chunk identified by `chunk` will be streamed.
    """
    for droplet in self._endpoints.dataset.stream_split(
        database_uid = self.database,
        namespace = self.repository.namespace,
        name = self.repository.name,
        uid = self.uid,
        split = split,
        chunk = chunk,
        nchunks = nchunks,
    ):
        yield ImageAnnotation.from_json(droplet)
class Repository (endpoints: ApiEndpoints, database: str, *, name: str, namespace: str, tags: Sequence[Tag])

Represents a repository that contains one or more datasets.

Expand source code
class Repository:
    """
    Represents a repository that contains one or more datasets.
    """
    _endpoints: ApiEndpoints
    _database: str

    name: str
    """
    The name of this repository.
    """

    namespace: str
    """
    The namespace of this repository.
    """

    tags: Sequence[Tag]
    """
    The tags available for this repository.
    """

    @staticmethod
    def from_json(endpoints: ApiEndpoints, database: str, json: JsonRepository) -> Repository:
        """
        Creates a `Dataset` from a `JsonDataset`.
        """
        return Repository(
            endpoints,
            database,
            name = json["name"],
            namespace = json["namespace"],
            tags = [Tag.from_json(tag) for tag in json["tags"]],
        )

    def __init__(self, endpoints: ApiEndpoints, database: str, *, name: str, namespace: str, tags: Sequence[Tag]):
        self._endpoints = endpoints
        self._database = database
        self.name = name
        self.namespace = namespace
        self.tags = tags

    def get_dataset(self, tag: str) -> Dataset:
        """
        Fetches dataset by its tag (or UID).
        """
        return Dataset.from_json(
            self._endpoints,
            self._endpoints.dataset.query(self._database, self.namespace, self.name, tag)
        )

    def __repr__(self) -> str:
        return basic_repr("Repository", name = self.name, namespace = self.namespace, tags = [tag.tag for tag in self.tags])

Class variables

var name : str

The name of this repository.

var namespace : str

The namespace of this repository.

var tags : Sequence[Tag]

The tags available for this repository.

Static methods

def from_json(endpoints: ApiEndpoints, database: str, json: JsonRepository) ‑> Repository

Creates a Dataset from a JsonDataset.

Expand source code
@staticmethod
def from_json(endpoints: ApiEndpoints, database: str, json: JsonRepository) -> Repository:
    """
    Creates a `Dataset` from a `JsonDataset`.
    """
    return Repository(
        endpoints,
        database,
        name = json["name"],
        namespace = json["namespace"],
        tags = [Tag.from_json(tag) for tag in json["tags"]],
    )

Methods

def get_dataset(self, tag: str) ‑> Dataset

Fetches dataset by its tag (or UID).

Expand source code
def get_dataset(self, tag: str) -> Dataset:
    """
    Fetches dataset by its tag (or UID).
    """
    return Dataset.from_json(
        self._endpoints,
        self._endpoints.dataset.query(self._database, self.namespace, self.name, tag)
    )
class Split (split: str, annotation_count: int)

Represents the splits available for a given dataset.

Expand source code
class Split:
    """
    Represents the splits available for a given dataset.
    """

    split: str
    """
    The kind of the split (e.g, "training" or "validation").
    """

    annotation_count: int
    """
    The number of annotations available in this split.
    """

    @staticmethod
    def from_json(json: JsonSplit) -> Split:
        """
        Creates a `Split` from a `JsonSplit`
        """
        return Split(json["split"], json["annotationCount"])

    def __init__(self, split: str, annotation_count: int):
        self.split = split
        self.annotation_count = annotation_count

    def __repr__(self) -> str:
        return basic_repr("Split", self.split, annotation_count = self.annotation_count)

Class variables

var annotation_count : int

The number of annotations available in this split.

var split : str

The kind of the split (e.g, "training" or "validation").

Static methods

def from_json(json: JsonSplit) ‑> Split

Creates a Split from a JsonSplit

Expand source code
@staticmethod
def from_json(json: JsonSplit) -> Split:
    """
    Creates a `Split` from a `JsonSplit`
    """
    return Split(json["split"], json["annotationCount"])
class Tag (tag: str, dataset: str, updated_at: datetime, splits: Sequence[Split])

Represents a single tag that may be accessed in this repository.

Expand source code
class Tag:
    """
    Represents a single tag that may be accessed in this repository.
    """

    tag: str
    """
    A slug representing this tag (such as "latest").
    """

    dataset: str
    """
    The uid of the dataset to which this tag points.
    """

    updated_at: datetime
    """
    When this tag was most recently updated.
    """

    splits: Sequence[Split]
    """
    A list of splits available on this tag.
    """

    @staticmethod
    def from_json(json: JsonTag) -> Tag:
        """
        Creates a `Tag` from a `JsonTag`.
        """
        return Tag(
            json["tag"],
            json["dataset"],
            datetime.fromtimestamp(json["updatedAt"] / 1000),
            [Split.from_json(split) for split in json["splits"]]
        )

    def __init__(self, tag: str, dataset: str, updated_at: datetime, splits: Sequence[Split]):
        self.tag = tag
        self.dataset = dataset
        self.updated_at = updated_at
        self.splits = splits

    def __repr__(self) -> str:
        return basic_repr("Tag", self.tag, dataset = self.dataset, splits = self.splits)

Class variables

var dataset : str

The uid of the dataset to which this tag points.

var splits : Sequence[Split]

A list of splits available on this tag.

var tag : str

A slug representing this tag (such as "latest").

var updated_at : datetime.datetime

When this tag was most recently updated.

Static methods

def from_json(json: JsonTag) ‑> Tag

Creates a Tag from a JsonTag.

Expand source code
@staticmethod
def from_json(json: JsonTag) -> Tag:
    """
    Creates a `Tag` from a `JsonTag`.
    """
    return Tag(
        json["tag"],
        json["dataset"],
        datetime.fromtimestamp(json["updatedAt"] / 1000),
        [Split.from_json(split) for split in json["splits"]]
    )
class User (endpoints: ApiEndpoints, uid: str, *, username: str, email: str, default_database: Optional[str])

Represents a user account in the dataTap platform.

Expand source code
class User:
    """
    Represents a user account in the dataTap platform.
    """

    _endpoints: ApiEndpoints

    uid: str
    """
    The user's UID.
    """

    username: str
    """
    The user's username.
    """

    email: str
    """
    The user's email address.
    """

    default_database: Optional[str]
    """
    The user's default database
    """

    @staticmethod
    def from_json(endpoints: ApiEndpoints, json: JsonUser) -> User:
        """
        Creates a `User` from a `JsonUser`.
        """
        return User(
            endpoints,
            json["uid"],
            username = json["username"],
            email = json["email"],
            default_database = json["defaultDatabase"]
        )

    def __init__(self, endpoints: ApiEndpoints, uid: str, *, username: str, email: str, default_database: Optional[str]):
        self._endpoints = endpoints
        self.uid = uid
        self.username = username
        self.email = email
        self.default_database = default_database

    def __repr__(self) -> str:
        return basic_repr("User", self.uid, username = self.username, email = self.email)

Class variables

var default_database : Optional[str]

The user's default database

var email : str

The user's email address.

var uid : str

The user's UID.

var username : str

The user's username.

Static methods

def from_json(endpoints: ApiEndpoints, json: JsonUser) ‑> User

Creates a User from a JsonUser.

Expand source code
@staticmethod
def from_json(endpoints: ApiEndpoints, json: JsonUser) -> User:
    """
    Creates a `User` from a `JsonUser`.
    """
    return User(
        endpoints,
        json["uid"],
        username = json["username"],
        email = json["email"],
        default_database = json["defaultDatabase"]
    )