fused.api
whoami
def whoami()
Returns information on the currently logged-in user.
delete
def delete(path: str,
max_deletion_depth: Union[int, Literal["unlimited"]] = 2) -> bool
Delete the files at the path.
Arguments:
path
- Directory or file to delete, likefd://my-old-table/
max_deletion_depth
- If set (defaults to 2), the maximum depth the operation will recurse to. This helps avoid accidentally deleting more data than intended. Pass"unlimited"
for unlimited.
Examples:
fused.api.delete("fd://bucket-name/deprecated_table/")
list
def list(path: str, *, details: bool = False)
List the files in the path.
Arguments:
path
- Parent directory URL, likefd://bucket-name/
Arguments:
details
- If True, return additional metadata about each record.
Returns:
A list of paths as URLs, or as metadata objects.
Examples:
fused.api.list("fd://bucket-name/")
get
def get(path: str) -> bytes
Download the contents at the path to memory.
Arguments:
path
- URL to a file, likefd://bucket-name/file.parquet
Returns:
Bytes of the file.
Examples:
fused.api.get("fd://bucket-name/file.parquet")
download
def download(path: str, local_path: Union[str, Path]) -> None
Download the contents at the path to disk.
Arguments:
path
- URL to a file, likefd://bucket-name/file.parquet
local_path
- Path to a local file.
upload
def upload(local_path: Union[str, Path, bytes, BinaryIO],
remote_path: str) -> None
Upload local file to S3.
Arguments:
local_path
- Either a path to a local file (str
,Path
) or the contents to upload. Any string will be treated as a Path, if you wish to upload the contents of the string, first encode it:s.encode("utf-8")
remote_path
- URL to upload to, likefd://new-file.txt
Examples:
To upload a local JSON file to your Fused-managed S3 bucket:
fused.api.upload("my_file.json", "fd://my_bucket/my_file.json")
sign_url
def sign_url(path: str) -> str
Create a signed URL to access the path. This function may not check that the file represented by the path exists.
Arguments:
path
- URL to a file, likefd://bucket-name/file.parquet
Returns:
HTTPS URL to access the file using signed access.
Examples:
fused.api.sign_url("fd://bucket-name/table_directory/file.parquet")
sign_url_prefix
def sign_url_prefix(path: str) -> Dict[str, str]
Create signed URLs to access all blobs under the path.
Arguments:
path
- URL to a prefix, likefd://bucket-name/some_directory/
Returns:
Dictionary mapping from blob store key to signed HTTPS URL.
Examples:
fused.api.sign_url_prefix("fd://bucket-name/table_directory/")
get_udfs
def get_udfs(n: int = 10,
*,
skip: int = 0,
per_request: int = 25,
max_requests: Optional[int] = None,
by: Literal["name", "id", "slug"] = "name",
whose: Literal["self", "public"] = "self")
Fetches a list of UDFs.
Arguments:
n
- The total number of UDFs to fetch. Defaults to 10.skip
- The number of UDFs to skip before starting to collect the result set. Defaults to 0.per_request
- The number of UDFs to fetch in each API request. Defaults to 25.max_requests
- The maximum number of API requests to make.by
- The attribute by which to sort the UDFs. Can be "name", "id", or "slug". Defaults to "name".whose
- Specifies whose UDFs to fetch. Can be "self" for the user's own UDFs or "public" for UDFs available publicly. Defaults to "self".
Returns:
A list of UDFs.
Examples:
Fetch UDFs under the user account:
fused.api.get_udfs()
job_get_logs
def job_get_logs(job: CoerceableToJobId,
since_ms: Optional[int] = None) -> List[Any]
Fetch logs for a job.
Arguments:
job
- the identifier of a job or aRunResponse
object.since_ms
- Timestamp, in milliseconds since epoch, to get logs for. Defaults to None for all logs.
Returns:
Log messages for the given job.
job_print_logs
def job_print_logs(job: CoerceableToJobId,
since_ms: Optional[int] = None,
file: Optional[IO] = None) -> None
Fetch and print logs for a job.
Arguments:
job
- the identifier of a job or aRunResponse
object.since_ms
- Timestamp, in milliseconds since epoch, to get logs for. Defaults to None for all logs.file
- Where to print logs to. Defaults tosys.stdout
.
job_tail_logs
def job_tail_logs(job: CoerceableToJobId,
refresh_seconds: float = 1,
sample_logs: bool = True,
timeout: Optional[float] = None,
get_logs_retries: int = 1)
Continuously print logs for a job.
Arguments:
job
- the identifier of a job or aRunResponse
object.refresh_seconds
- how frequently, in seconds, to check for new logs. Defaults to 1.sample_logs
- if true, print out only a sample of logs. Defaults to True.timeout
- if not None, how long to continue tailing logs for. Defaults to None for indefinite.get_logs_retries
- Number of additional retries for log requests. Defaults to 1.
job_get_status
def job_get_status(job: CoerceableToJobId) -> RunResponse
Fetch the status of a running job.
Arguments:
job
- the identifier of a job or aRunResponse
object.
Returns:
The status of the given job.
job_cancel
def job_cancel(job: CoerceableToJobId) -> RunResponse
Cancel an existing job.
Arguments:
job
- the identifier of a job or aRunResponse
object.
Returns:
A new job object.
job_get_exec_time
def job_get_exec_time(job: CoerceableToJobId) -> timedelta
Determine the execution time of this job, using the logs.
Returns:
Time the job took. If the job is in progress, time from first to last log message is returned.
job_wait_for_job
def job_wait_for_job(job: CoerceableToJobId,
poll_interval_seconds: float = 5,
timeout: Optional[float] = None) -> RunResponse
Block the Python kernel until this job has finished.
Arguments:
poll_interval_seconds
- How often (in seconds) to poll for status updates. Defaults to 5.timeout
- The length of time in seconds to wait for the job. Defaults to None.
Raises:
TimeoutError
- if waiting for the job timed out.
Returns:
The status of the given job.
FusedAPI
class FusedAPI()
API for running jobs in the Fused service.
__init__
def __init__(*,
base_url: Optional[str] = None,
set_global_api: bool = True,
credentials_needed: bool = True)
Create a FusedAPI instance.
Arguments:
base_url
- The Fused instance to send requests to. Defaults tohttps://www.fused.io/server/v1
.set_global_api
- Set this as the global API object. Defaults to True.credentials_needed
- If True, automatically attempt to log in. Defaults to True.
create_udf_access_token
def create_udf_access_token(udf_email_or_name_or_id: Optional[str] = None,
udf_name: Optional[str] = None,
*,
udf_email: Optional[str] = None,
udf_id: Optional[str] = None,
client_id: Union[str, Ellipsis, None] = ...,
cache: bool = True,
metadata_json: Optional[Dict[str, Any]] = None,
enabled: bool = True) -> UdfAccessToken
Create a token for running a UDF. Anyone with the token can run the UDF with any parameters they choose. The UDF will run under your environment.
The token does not allow running any other UDF on your account.
Arguments:
udf_email_or_name_or_id
- A UDF ID, email address (for use with udf_name), or UDF name.udf_name
- The name of the UDF to create the
Arguments:
udf_email
- The email of the user owning the UDF, or, if udf_name is None, the name of the UDF.udf_id
- The backend ID of the UDF to create the token for.client_id
- If specified, overrides which realtime environment to run the UDF under.cache
- If True, UDF tiles will be cached.metadata_json
- Additional metadata to serve as part of the tiles metadata.json.enable
- If True, the token can be used.
upload
def upload(path: str, data: Union[bytes, BinaryIO]) -> None
Upload a binary blob to a cloud location.
start_job
def start_job(config: Union[JobConfig, JobStepConfig],
*,
instance_type: Optional[WHITELISTED_INSTANCE_TYPES] = None,
region: Optional[str] = None,
disk_size_gb: Optional[int] = None,
additional_env: Optional[Sequence[str]] = (
"FUSED_CREDENTIAL_PROVIDER=ec2", ),
image_name: Optional[str] = None) -> RunResponse
Execute an operation.
Arguments:
config
- the configuration object to run in the job.
Arguments:
instance_type
- The AWS EC2 instance type to use for the job. Acceptable strings arem5.large
,m5.xlarge
,m5.2xlarge
,m5.4xlarge
,m5.8xlarge
,m5.12xlarge
,m5.16xlarge
,r5.large
,r5.xlarge
,r5.2xlarge
,r5.4xlarge
,r5.8xlarge
,r5.12xlarge
, orr5.16xlarge
. Defaults to None.region
- The AWS region in which to run. Defaults to None.disk_size_gb
- The disk size to specify for the job. Defaults to None.additional_env
- Any additional environment variables to be passed into the job, each in the form KEY=value. Defaults to None.image_name
- Custom image name to run. Defaults to None for default image.
get_jobs
def get_jobs(n: int = 5,
*,
skip: int = 0,
per_request: int = 25,
max_requests: Optional[int] = 1) -> Jobs
Get jobs history with get_jobs
.
Arguments:
n
- The number of jobs to fetch. Defaults to 5.
Arguments:
skip
- Where in the job history to begin. Defaults to 0, which retrieves the most recent job.per_request
- Number of jobs per request to fetch. Defaults to 25.max_requests
- Maximum number of requests to make. May be None to fetch all jobs. Defaults to 1.
Returns:
The job history.
get_status
def get_status(job: CoerceableToJobId) -> RunResponse
Fetch the status of a running job.
Arguments:
job
- the identifier of a job or aRunResponse
object.
Returns:
The status of the job.
get_logs
def get_logs(job: CoerceableToJobId,
since_ms: Optional[int] = None) -> List[Any]
Fetch logs for a job
Arguments:
job
- the identifier of a job or aRunResponse
object.since_ms
- Timestamp, in milliseconds since epoch, to get logs for. Defaults to None for all logs.
Returns:
Log messages for the given job.
tail_logs
def tail_logs(job: CoerceableToJobId,
refresh_seconds: float = 1,
sample_logs: bool = True,
timeout: Optional[float] = None,
get_logs_retries: int = 1)
Continuously print logs for a job
Arguments:
job
- the identifier of a job or aRunResponse
object.refresh_seconds
- how frequently, in seconds, to check for new logs. Defaults to 1.sample_logs
- if true, print out only a sample of logs. Defaults to True.timeout
- if not None, how long to continue tailing logs for. Defaults to None for indefinite.get_logs_retries
- Number of additional retries for log requests. Defaults to 1.
wait_for_job
def wait_for_job(job: CoerceableToJobId,
poll_interval_seconds: float = 5,
timeout: Optional[float] = None) -> RunResponse
Block the Python kernel until the given job has finished.
Arguments:
job
- the identifier of a job or aRunResponse
object.poll_interval_seconds
- How often (in seconds) to poll for status updates. Defaults to 5.timeout
- The length of time in seconds to wait for the job. Defaults to None.
Raises:
TimeoutError
- if waiting for the job timed out.
Returns:
The status of the given job.
cancel_job
def cancel_job(job: CoerceableToJobId) -> RunResponse
Cancel an existing job.
Arguments:
job
- the identifier of a job or aRunResponse
object.
Returns:
A new job object.
auth_token
def auth_token() -> str
Returns the current user's Fused environment (team) auth token.