Module datatap.api.entities
The datatap.api.entities
submodule contains several enttiies
that provide a user-friendly abstraction for the dataTap API.
Expand source code
"""
The `datatap.api.entities` submodule contains several enttiies
that provide a user-friendly abstraction for the dataTap API.
"""
from .api import Api
from .user import User
from .database import Database
from .dataset import AnyDataset, Dataset
from .repository import Repository, Tag, Split
__all__ = [
"Api",
"User",
"Database",
"AnyDataset",
"Dataset",
"Repository",
"Tag",
"Split",
]
Sub-modules
datatap.api.entities.api
datatap.api.entities.database
datatap.api.entities.dataset
datatap.api.entities.repository
datatap.api.entities.user
Classes
class Api (api_key: Optional[str] = None, uri: Optional[str] = None)
-
The
Api
object is the primary method of interacting with the dataTap API.The
Api
constructor takes two optional arguments.The first,
api_key
, should be the current user's personal API key. In order to encourage good secret practices, this class will use the value found in theDATATAP_API_KEY
if no key is passed in. Consider using environment variables or another secret manager for your API keys.The second argument is
uri
. This should only be used if you would like to target a different API server than the default. For instance, if you are using a proxy to reach the API, you can use theuri
argument to point toward your proxy.This object encapsulates most of the logic for interacting with API. For instance, to get a list of all datasets that a user has access to, you can run
from datatap import Api api = Api() print([ dataset for database in api.get_database_list() for dataset in database.get_dataset_list() ])
For more details on the functionality provided by the Api object, take a look at its documentation.
Expand source code
class Api: """ The `Api` object is the primary method of interacting with the dataTap API. The `Api` constructor takes two optional arguments. The first, `api_key`, should be the current user's personal API key. In order to encourage good secret practices, this class will use the value found in the `DATATAP_API_KEY` if no key is passed in. Consider using environment variables or another secret manager for your API keys. The second argument is `uri`. This should only be used if you would like to target a different API server than the default. For instance, if you are using a proxy to reach the API, you can use the `uri` argument to point toward your proxy. This object encapsulates most of the logic for interacting with API. For instance, to get a list of all datasets that a user has access to, you can run ```py from datatap import Api api = Api() print([ dataset for database in api.get_database_list() for dataset in database.get_dataset_list() ]) ``` For more details on the functionality provided by the Api object, take a look at its documentation. """ def __init__(self, api_key: Optional[str] = None, uri: Optional[str] = None): self.endpoints = ApiEndpoints(api_key, uri) def get_current_user(self) -> User: """ Returns the current logged-in user. """ return User.from_json(self.endpoints, self.endpoints.user.current()) def get_database_list(self) -> List[Database]: """ Returns a list of all databases that the current user has access to. """ return [ Database.from_json(self.endpoints, json_db) for json_db in self.endpoints.database.list() ] def get_default_database(self) -> Database: """ Returns the default database for the user (this defaults to the public database). """ # TODO(zwade): Have a way of specifying a per-user default current_user = self.get_current_user() if current_user.default_database is None: raise Exception("Trying to find the default database, but none is specified") return self.get_database_by_uid(current_user.default_database) def get_database_by_uid(self, uid: str) -> Database: """ Queries a database by its UID and returns it. """ return Database.from_json(self.endpoints, self.endpoints.database.query_by_uid(uid)) @overload def get_database_by_name(self, name: str, allow_multiple: Literal[True]) -> List[Database]: ... @overload def get_database_by_name(self, name: str, allow_multiple: Literal[False] = False) -> Database: ... def get_database_by_name(self, name: str, allow_multiple: bool = False) -> Union[Database, List[Database]]: """ Queries a database by its name and returns it. If `allow_multiple` is true, it will return a list of databases. """ database_list = [ Database.from_json(self.endpoints, database) for database in self.endpoints.database.query_by_name(name) ] if allow_multiple: return database_list else: return assert_one(database_list)
Methods
def get_current_user(self) ‑> User
-
Returns the current logged-in user.
Expand source code
def get_current_user(self) -> User: """ Returns the current logged-in user. """ return User.from_json(self.endpoints, self.endpoints.user.current())
def get_database_by_name(self, name: str, allow_multiple: bool = False) ‑> Union[Database, List[Database]]
-
Queries a database by its name and returns it. If
allow_multiple
is true, it will return a list of databases.Expand source code
def get_database_by_name(self, name: str, allow_multiple: bool = False) -> Union[Database, List[Database]]: """ Queries a database by its name and returns it. If `allow_multiple` is true, it will return a list of databases. """ database_list = [ Database.from_json(self.endpoints, database) for database in self.endpoints.database.query_by_name(name) ] if allow_multiple: return database_list else: return assert_one(database_list)
def get_database_by_uid(self, uid: str) ‑> Database
-
Queries a database by its UID and returns it.
Expand source code
def get_database_by_uid(self, uid: str) -> Database: """ Queries a database by its UID and returns it. """ return Database.from_json(self.endpoints, self.endpoints.database.query_by_uid(uid))
def get_database_list(self) ‑> List[Database]
-
Returns a list of all databases that the current user has access to.
Expand source code
def get_database_list(self) -> List[Database]: """ Returns a list of all databases that the current user has access to. """ return [ Database.from_json(self.endpoints, json_db) for json_db in self.endpoints.database.list() ]
def get_default_database(self) ‑> Database
-
Returns the default database for the user (this defaults to the public database).
Expand source code
def get_default_database(self) -> Database: """ Returns the default database for the user (this defaults to the public database). """ # TODO(zwade): Have a way of specifying a per-user default current_user = self.get_current_user() if current_user.default_database is None: raise Exception("Trying to find the default database, but none is specified") return self.get_database_by_uid(current_user.default_database)
class Database (endpoints: ApiEndpoints, uid: str, *, name: str, connection_options: JsonDatabaseOptions)
-
Represents a database. This database could either be the public database, or a user's private database that they have connected to the dataTap platform.
This class provides utilites for viewing and updating the database's configuration, as well as inspecting its contents.
Expand source code
class Database: """ Represents a database. This database could either be the public database, or a user's private database that they have connected to the dataTap platform. This class provides utilites for viewing and updating the database's configuration, as well as inspecting its contents. """ _endpoints: ApiEndpoints uid: str """ The UID of this database. """ name: str """ The name of this database. """ connection_options: JsonDatabaseOptions """ How this database is configured. Sensitive details, such as database credentials, are omitted. """ @staticmethod def from_json(endpoints: ApiEndpoints, json: JsonDatabase) -> Database: """ Creates a `Database` from a `JsonDatabase`. """ return Database( endpoints, uid = json["uid"], name = json["name"], connection_options = json["connectionOptions"] ) def __init__(self, endpoints: ApiEndpoints, uid: str, *, name: str, connection_options: JsonDatabaseOptions): self._endpoints = endpoints self.uid = uid self.name = name self.connection_options = connection_options def get_repository_list(self) -> List[Repository]: """ Returns a list of all `Repository`s that are stored in this database. """ return [ Repository.from_json(self._endpoints, self.uid, repository_json) for repository_json in self._endpoints.repository.list(self.uid) ] @overload def get_repository(self, slug: str) -> Repository: ... @overload def get_repository(self, namespace: str, name: str) -> Repository: ... def get_repository(self, *args: str, **kwargs: Any) -> Repository: """ Queries a `Repository` by its namespace and name, or via its slug (namespace/name). """ if len(kwargs) > 0: raise ValueError("get_repository is positional-only") elif len(args) == 1: namespace, name = args[0].split("/") else: namespace, name = args return Repository.from_json(self._endpoints, self.uid, self._endpoints.repository.query(self.uid, namespace, name)) @overload def get_dataset(self, slug: str) -> AnyDataset: ... @overload def get_dataset(self, namespace: str, name: str, tag: str) -> AnyDataset: ... def get_dataset(self, *args: str, **kwargs: Any) -> AnyDataset: """ Queries a `Dataset` by its namespace, name, and tag, or via its slug (namespace/name:tag). """ if len(kwargs) > 0: raise ValueError("get_repository is positional-only") elif len(args) == 1: repo_slug, tag = args[0].split(":") repo = self.get_repository(repo_slug) else: namespace, name, tag = args repo = self.get_repository(namespace, name) return repo.get_dataset(tag) def __repr__(self): return basic_repr("Database", self.uid, name = self.name)
Class variables
var connection_options : JsonDatabaseOptionsDirect
-
How this database is configured. Sensitive details, such as database credentials, are omitted.
var name : str
-
The name of this database.
var uid : str
-
The UID of this database.
Static methods
def from_json(endpoints: ApiEndpoints, json: JsonDatabase) ‑> Database
-
Creates a
Database
from aJsonDatabase
.Expand source code
@staticmethod def from_json(endpoints: ApiEndpoints, json: JsonDatabase) -> Database: """ Creates a `Database` from a `JsonDatabase`. """ return Database( endpoints, uid = json["uid"], name = json["name"], connection_options = json["connectionOptions"] )
Methods
def get_dataset(self, *args: str, **kwargs: Any) ‑> Union[Dataset[ImageAnnotationTemplate], Dataset[VideoAnnotationTemplate]]
-
Queries a
Dataset
by its namespace, name, and tag, or via its slug (namespace/name:tag).Expand source code
def get_dataset(self, *args: str, **kwargs: Any) -> AnyDataset: """ Queries a `Dataset` by its namespace, name, and tag, or via its slug (namespace/name:tag). """ if len(kwargs) > 0: raise ValueError("get_repository is positional-only") elif len(args) == 1: repo_slug, tag = args[0].split(":") repo = self.get_repository(repo_slug) else: namespace, name, tag = args repo = self.get_repository(namespace, name) return repo.get_dataset(tag)
def get_repository(self, *args: str, **kwargs: Any) ‑> Repository
-
Queries a
Repository
by its namespace and name, or via its slug (namespace/name).Expand source code
def get_repository(self, *args: str, **kwargs: Any) -> Repository: """ Queries a `Repository` by its namespace and name, or via its slug (namespace/name). """ if len(kwargs) > 0: raise ValueError("get_repository is positional-only") elif len(args) == 1: namespace, name = args[0].split("/") else: namespace, name = args return Repository.from_json(self._endpoints, self.uid, self._endpoints.repository.query(self.uid, namespace, name))
def get_repository_list(self) ‑> List[Repository]
-
Returns a list of all
Repository
s that are stored in this database.Expand source code
def get_repository_list(self) -> List[Repository]: """ Returns a list of all `Repository`s that are stored in this database. """ return [ Repository.from_json(self._endpoints, self.uid, repository_json) for repository_json in self._endpoints.repository.list(self.uid) ]
class Dataset (endpoints: ApiEndpoints, uid: str, *, database: str, repository: DatasetRepository, splits: List[str], template: Union[ImageAnnotationTemplate, VideoAnnotationTemplate])
-
Represents a concrete version of a dataset. Critically,
Dataset
s cannot be changed once they're created.For reproducable training, ensure that you store the specific
Dataset
used during training.Expand source code
class Dataset(Generic[T]): """ Represents a concrete version of a dataset. Critically, `Dataset`s cannot be changed once they're created. For reproducable training, ensure that you store the specific `Dataset` used during training. """ _endpoints: ApiEndpoints uid: str """ The UID of this `Dataset`. """ database: str """ The UID of the database in which this dataset lives. """ repository: DatasetRepository """ The repository this dataset belongs to. """ splits: List[str] """ A list of all the splits that this dataset has. By default, this will be `["training", "validation"]`. """ template: T """ The template that all annotations in this dataset version adhere to. """ @staticmethod def from_json(endpoints: ApiEndpoints, json: JsonDataset) -> AnyDataset: """ Creates a new `Dataset` from a `JsonDataset`. """ template_json = json["template"] template: Union[ImageAnnotationTemplate, VideoAnnotationTemplate] if template_json["kind"] == "ImageAnnotationTemplate": template = ImageAnnotationTemplate.from_json(template_json) elif template_json["kind"] == "VideoAnnotationTemplate": template = VideoAnnotationTemplate.from_json(template_json) else: raise ValueError(f"Unknown template kind: {template_json['kind']}") return Dataset( endpoints, uid = json["uid"], database = json["database"], repository = DatasetRepository.from_json(json["repository"]), splits = json["splits"], template = template ) def __init__( self, endpoints: ApiEndpoints, uid: str, *, database: str, repository: DatasetRepository, splits: List[str], template: Union[ImageAnnotationTemplate, VideoAnnotationTemplate] ): self._endpoints = endpoints self.uid = uid self.database = database self.repository = repository self.splits = splits self.template = template @overload def stream_split( self: Dataset[ImageAnnotationTemplate], split: str ) -> Generator[ImageAnnotation, None, None]: ... @overload def stream_split( self: Dataset[ImageAnnotationTemplate], split: str, chunk: int, nchunks: int ) -> Generator[ImageAnnotation, None, None]: ... @overload def stream_split( self: Dataset[VideoAnnotationTemplate], split: str ) -> Generator[VideoAnnotation, None, None]: ... @overload def stream_split( self: Dataset[VideoAnnotationTemplate], split: str, chunk: int, nchunks: int ) -> Generator[VideoAnnotation, None, None]: ... def stream_split( self, split: str, chunk: int = 0, nchunks: int = 1 ) -> Generator[Union[ImageAnnotation, VideoAnnotation], None, None]: """ Streams a specific split of this dataset from the database. All yielded annotations will adhere to this dataset's annotation template. If `chunk` and `nchunks` are omitted, then the full split will be streamed. Otherwise, the split will be broken into `nchunks` pieces, and only the chunk identified by `chunk` will be streamed. """ for droplet in self._endpoints.dataset.stream_split( database_uid = self.database, namespace = self.repository.namespace, name = self.repository.name, uid = self.uid, split = split, chunk = chunk, nchunks = nchunks, ): if isinstance(self.template, ImageAnnotationTemplate): yield ImageAnnotation.from_json(droplet) elif isinstance(self.template, VideoAnnotationTemplate): # type: ignore - isinstance is excessive yield VideoAnnotation.from_json(droplet) else: raise ValueError(f"Unknown template kind: {type(self.template)}") def get_stable_identifier(self) -> str: return f"{self.repository.namespace}/{self.repository.name}:{self.uid}" def __repr__(self) -> str: return basic_repr( "Dataset", self.get_stable_identifier(), database = self.database, splits = self.splits )
Ancestors
- typing.Generic
Class variables
var database : str
-
The UID of the database in which this dataset lives.
var repository : DatasetRepository
-
The repository this dataset belongs to.
var splits : List[str]
-
A list of all the splits that this dataset has. By default, this will be
["training", "validation"]
. var template : ~T
-
The template that all annotations in this dataset version adhere to.
var uid : str
-
The UID of this
Dataset
.
Static methods
def from_json(endpoints: ApiEndpoints, json: JsonDataset) ‑> Union[Dataset[ImageAnnotationTemplate], Dataset[VideoAnnotationTemplate]]
-
Creates a new
Dataset
from aJsonDataset
.Expand source code
@staticmethod def from_json(endpoints: ApiEndpoints, json: JsonDataset) -> AnyDataset: """ Creates a new `Dataset` from a `JsonDataset`. """ template_json = json["template"] template: Union[ImageAnnotationTemplate, VideoAnnotationTemplate] if template_json["kind"] == "ImageAnnotationTemplate": template = ImageAnnotationTemplate.from_json(template_json) elif template_json["kind"] == "VideoAnnotationTemplate": template = VideoAnnotationTemplate.from_json(template_json) else: raise ValueError(f"Unknown template kind: {template_json['kind']}") return Dataset( endpoints, uid = json["uid"], database = json["database"], repository = DatasetRepository.from_json(json["repository"]), splits = json["splits"], template = template )
Methods
def get_stable_identifier(self) ‑> str
-
Expand source code
def get_stable_identifier(self) -> str: return f"{self.repository.namespace}/{self.repository.name}:{self.uid}"
def stream_split(self, split: str, chunk: int = 0, nchunks: int = 1) ‑> Generator[Union[ImageAnnotation, VideoAnnotation], None, None]
-
Streams a specific split of this dataset from the database. All yielded annotations will adhere to this dataset's annotation template.
If
chunk
andnchunks
are omitted, then the full split will be streamed. Otherwise, the split will be broken intonchunks
pieces, and only the chunk identified bychunk
will be streamed.Expand source code
def stream_split( self, split: str, chunk: int = 0, nchunks: int = 1 ) -> Generator[Union[ImageAnnotation, VideoAnnotation], None, None]: """ Streams a specific split of this dataset from the database. All yielded annotations will adhere to this dataset's annotation template. If `chunk` and `nchunks` are omitted, then the full split will be streamed. Otherwise, the split will be broken into `nchunks` pieces, and only the chunk identified by `chunk` will be streamed. """ for droplet in self._endpoints.dataset.stream_split( database_uid = self.database, namespace = self.repository.namespace, name = self.repository.name, uid = self.uid, split = split, chunk = chunk, nchunks = nchunks, ): if isinstance(self.template, ImageAnnotationTemplate): yield ImageAnnotation.from_json(droplet) elif isinstance(self.template, VideoAnnotationTemplate): # type: ignore - isinstance is excessive yield VideoAnnotation.from_json(droplet) else: raise ValueError(f"Unknown template kind: {type(self.template)}")
class Repository (endpoints: ApiEndpoints, database: str, *, name: str, namespace: str, tags: Sequence[Tag])
-
Represents a repository that contains one or more datasets.
Expand source code
class Repository: """ Represents a repository that contains one or more datasets. """ _endpoints: ApiEndpoints _database: str name: str """ The name of this repository. """ namespace: str """ The namespace of this repository. """ tags: Sequence[Tag] """ The tags available for this repository. """ @staticmethod def from_json(endpoints: ApiEndpoints, database: str, json: JsonRepository) -> Repository: """ Creates a `Dataset` from a `JsonDataset`. """ return Repository( endpoints, database, name = json["name"], namespace = json["namespace"], tags = [Tag.from_json(tag) for tag in json["tags"]], ) def __init__(self, endpoints: ApiEndpoints, database: str, *, name: str, namespace: str, tags: Sequence[Tag]): self._endpoints = endpoints self._database = database self.name = name self.namespace = namespace self.tags = tags def get_dataset(self, tag: str) -> AnyDataset: """ Fetches dataset by its tag (or UID). """ return Dataset.from_json( self._endpoints, self._endpoints.dataset.query(self._database, self.namespace, self.name, tag) ) def __repr__(self) -> str: return basic_repr("Repository", name = self.name, namespace = self.namespace, tags = [tag.tag for tag in self.tags])
Class variables
var name : str
-
The name of this repository.
var namespace : str
-
The namespace of this repository.
-
The tags available for this repository.
Static methods
def from_json(endpoints: ApiEndpoints, database: str, json: JsonRepository) ‑> Repository
-
Creates a
Dataset
from aJsonDataset
.Expand source code
@staticmethod def from_json(endpoints: ApiEndpoints, database: str, json: JsonRepository) -> Repository: """ Creates a `Dataset` from a `JsonDataset`. """ return Repository( endpoints, database, name = json["name"], namespace = json["namespace"], tags = [Tag.from_json(tag) for tag in json["tags"]], )
Methods
def get_dataset(self, tag: str) ‑> Union[Dataset[ImageAnnotationTemplate], Dataset[VideoAnnotationTemplate]]
-
Fetches dataset by its tag (or UID).
Expand source code
def get_dataset(self, tag: str) -> AnyDataset: """ Fetches dataset by its tag (or UID). """ return Dataset.from_json( self._endpoints, self._endpoints.dataset.query(self._database, self.namespace, self.name, tag) )
class Split (split: str, annotation_count: int)
-
Represents the splits available for a given dataset.
Expand source code
class Split: """ Represents the splits available for a given dataset. """ split: str """ The kind of the split (e.g, "training" or "validation"). """ annotation_count: int """ The number of annotations available in this split. """ @staticmethod def from_json(json: JsonSplit) -> Split: """ Creates a `Split` from a `JsonSplit` """ return Split(json["split"], json["annotationCount"]) def __init__(self, split: str, annotation_count: int): self.split = split self.annotation_count = annotation_count def __repr__(self) -> str: return basic_repr("Split", self.split, annotation_count = self.annotation_count)
Class variables
var annotation_count : int
-
The number of annotations available in this split.
var split : str
-
The kind of the split (e.g, "training" or "validation").
Static methods
def from_json(json: JsonSplit) ‑> Split
-
Creates a
Split
from aJsonSplit
Expand source code
@staticmethod def from_json(json: JsonSplit) -> Split: """ Creates a `Split` from a `JsonSplit` """ return Split(json["split"], json["annotationCount"])
class Tag (tag: str, dataset: str, updated_at: datetime, splits: Sequence[Split])
-
Represents a single tag that may be accessed in this repository.
Expand source code
class Tag: """ Represents a single tag that may be accessed in this repository. """ tag: str """ A slug representing this tag (such as "latest"). """ dataset: str """ The uid of the dataset to which this tag points. """ updated_at: datetime """ When this tag was most recently updated. """ splits: Sequence[Split] """ A list of splits available on this tag. """ @staticmethod def from_json(json: JsonTag) -> Tag: """ Creates a `Tag` from a `JsonTag`. """ return Tag( json["tag"], json["dataset"], datetime.fromtimestamp(json["updatedAt"] / 1000), [Split.from_json(split) for split in json["splits"]] ) def __init__(self, tag: str, dataset: str, updated_at: datetime, splits: Sequence[Split]): self.tag = tag self.dataset = dataset self.updated_at = updated_at self.splits = splits def __repr__(self) -> str: return basic_repr("Tag", self.tag, dataset = self.dataset, splits = self.splits)
Class variables
var dataset : str
-
The uid of the dataset to which this tag points.
var splits : Sequence[Split]
-
A list of splits available on this tag.
var tag : str
-
A slug representing this tag (such as "latest").
var updated_at : datetime.datetime
-
When this tag was most recently updated.
Static methods
def from_json(json: JsonTag) ‑> Tag
-
Creates a
Tag
from aJsonTag
.Expand source code
@staticmethod def from_json(json: JsonTag) -> Tag: """ Creates a `Tag` from a `JsonTag`. """ return Tag( json["tag"], json["dataset"], datetime.fromtimestamp(json["updatedAt"] / 1000), [Split.from_json(split) for split in json["splits"]] )
class User (endpoints: ApiEndpoints, uid: str, *, username: str, email: str, default_database: Optional[str])
-
Represents a user account in the dataTap platform.
Expand source code
class User: """ Represents a user account in the dataTap platform. """ _endpoints: ApiEndpoints uid: str """ The user's UID. """ username: str """ The user's username. """ email: str """ The user's email address. """ default_database: Optional[str] """ The user's default database """ @staticmethod def from_json(endpoints: ApiEndpoints, json: JsonUser) -> User: """ Creates a `User` from a `JsonUser`. """ return User( endpoints, json["uid"], username = json["username"], email = json["email"], default_database = json["defaultDatabase"] ) def __init__(self, endpoints: ApiEndpoints, uid: str, *, username: str, email: str, default_database: Optional[str]): self._endpoints = endpoints self.uid = uid self.username = username self.email = email self.default_database = default_database def __repr__(self) -> str: return basic_repr("User", self.uid, username = self.username, email = self.email)
Class variables
var default_database : Optional[str]
-
The user's default database
var email : str
-
The user's email address.
var uid : str
-
The user's UID.
var username : str
-
The user's username.
Static methods
def from_json(endpoints: ApiEndpoints, json: JsonUser) ‑> User
-
Creates a
User
from aJsonUser
.Expand source code
@staticmethod def from_json(endpoints: ApiEndpoints, json: JsonUser) -> User: """ Creates a `User` from a `JsonUser`. """ return User( endpoints, json["uid"], username = json["username"], email = json["email"], default_database = json["defaultDatabase"] )