fused.api

`whoami`

def whoami()

Returns information on the currently logged-in user.

`delete`

def delete(path: str,
           max_deletion_depth: Union[int, Literal["unlimited"]] = 2) -> bool

Delete the files at the path.

Arguments:

path - Directory or file to delete, like fd://my-old-table/
max_deletion_depth - If set (defaults to 2), the maximum depth the operation will recurse to. This helps avoid accidentally deleting more data than intended. Pass "unlimited" for unlimited.

Examples:

fused.api.delete("fd://bucket-name/deprecated_table/")

`list`

def list(path: str, *, details: bool = False)

List the files in the path.

Arguments:

path - Parent directory URL, like fd://bucket-name/

Arguments:

details - If True, return additional metadata about each record.

Returns:

A list of paths as URLs, or as metadata objects.

Examples:

fused.api.list("fd://bucket-name/")

`get`

def get(path: str) -> bytes

Download the contents at the path to memory.

Arguments:

path - URL to a file, like fd://bucket-name/file.parquet

Returns:

Bytes of the file.

Examples:

fused.api.get("fd://bucket-name/file.parquet")

`download`

def download(path: str, local_path: Union[str, Path]) -> None

Download the contents at the path to disk.

Arguments:

path - URL to a file, like fd://bucket-name/file.parquet
local_path - Path to a local file.

`upload`

def upload(local_path: Union[str, Path, bytes, BinaryIO],
           remote_path: str) -> None

Upload local file to S3.

Arguments:

local_path - Either a path to a local file (str, Path) or the contents to upload. Any string will be treated as a Path, if you wish to upload the contents of the string, first encode it: s.encode("utf-8")
remote_path - URL to upload to, like fd://new-file.txt

Examples:

To upload a local JSON file to your Fused-managed S3 bucket:

fused.api.upload("my_file.json", "fd://my_bucket/my_file.json")

`sign_url`

def sign_url(path: str) -> str

Create a signed URL to access the path. This function may not check that the file represented by the path exists.

Arguments:

path - URL to a file, like fd://bucket-name/file.parquet

Returns:

HTTPS URL to access the file using signed access.

Examples:

fused.api.sign_url("fd://bucket-name/table_directory/file.parquet")

`sign_url_prefix`

def sign_url_prefix(path: str) -> Dict[str, str]

Create signed URLs to access all blobs under the path.

Arguments:

path - URL to a prefix, like fd://bucket-name/some_directory/

Returns:

Dictionary mapping from blob store key to signed HTTPS URL.

Examples:

fused.api.sign_url_prefix("fd://bucket-name/table_directory/")

`get_udfs`

def get_udfs(n: int = 10,
             *,
             skip: int = 0,
             per_request: int = 25,
             max_requests: Optional[int] = None,
             by: Literal["name", "id", "slug"] = "name",
             whose: Literal["self", "public"] = "self")

Fetches a list of UDFs.

Arguments:

n - The total number of UDFs to fetch. Defaults to 10.
skip - The number of UDFs to skip before starting to collect the result set. Defaults to 0.
per_request - The number of UDFs to fetch in each API request. Defaults to 25.
max_requests - The maximum number of API requests to make.
by - The attribute by which to sort the UDFs. Can be "name", "id", or "slug". Defaults to "name".
whose - Specifies whose UDFs to fetch. Can be "self" for the user's own UDFs or "public" for UDFs available publicly. Defaults to "self".

Returns:

A list of UDFs.

Examples:

Fetch UDFs under the user account:

fused.api.get_udfs()

`job_get_logs`

def job_get_logs(job: CoerceableToJobId,
                 since_ms: Optional[int] = None) -> List[Any]

Fetch logs for a job.

Arguments:

job - the identifier of a job or a RunResponse object.
since_ms - Timestamp, in milliseconds since epoch, to get logs for. Defaults to None for all logs.

Returns:

Log messages for the given job.

`job_print_logs`

def job_print_logs(job: CoerceableToJobId,
                   since_ms: Optional[int] = None,
                   file: Optional[IO] = None) -> None

Fetch and print logs for a job.

Arguments:

job - the identifier of a job or a RunResponse object.
since_ms - Timestamp, in milliseconds since epoch, to get logs for. Defaults to None for all logs.
file - Where to print logs to. Defaults to sys.stdout.

`job_tail_logs`

def job_tail_logs(job: CoerceableToJobId,
                  refresh_seconds: float = 1,
                  sample_logs: bool = True,
                  timeout: Optional[float] = None,
                  get_logs_retries: int = 1)

Continuously print logs for a job.

Arguments:

job - the identifier of a job or a RunResponse object.
refresh_seconds - how frequently, in seconds, to check for new logs. Defaults to 1.
sample_logs - if true, print out only a sample of logs. Defaults to True.
timeout - if not None, how long to continue tailing logs for. Defaults to None for indefinite.
get_logs_retries - Number of additional retries for log requests. Defaults to 1.

`job_get_status`

def job_get_status(job: CoerceableToJobId) -> RunResponse

Fetch the status of a running job.

Arguments:

job - the identifier of a job or a RunResponse object.

Returns:

The status of the given job.

`job_cancel`

def job_cancel(job: CoerceableToJobId) -> RunResponse

Cancel an existing job.

Arguments:

job - the identifier of a job or a RunResponse object.

Returns:

A new job object.

`job_get_exec_time`

def job_get_exec_time(job: CoerceableToJobId) -> timedelta

Determine the execution time of this job, using the logs.

Returns:

Time the job took. If the job is in progress, time from first to last log message is returned.

`job_wait_for_job`

def job_wait_for_job(job: CoerceableToJobId,
                     poll_interval_seconds: float = 5,
                     timeout: Optional[float] = None) -> RunResponse

Block the Python kernel until this job has finished.

Arguments:

poll_interval_seconds - How often (in seconds) to poll for status updates. Defaults to 5.
timeout - The length of time in seconds to wait for the job. Defaults to None.

Raises:

TimeoutError - if waiting for the job timed out.

Returns:

The status of the given job.

`FusedAPI`

class FusedAPI()

API for running jobs in the Fused service.

`init`

def __init__(*,
             base_url: Optional[str] = None,
             set_global_api: bool = True,
             credentials_needed: bool = True)

Create a FusedAPI instance.

Arguments:

base_url - The Fused instance to send requests to. Defaults to https://www.fused.io/server/v1.
set_global_api - Set this as the global API object. Defaults to True.
credentials_needed - If True, automatically attempt to log in. Defaults to True.

`create_udf_access_token`

def create_udf_access_token(udf_email_or_name_or_id: Optional[str] = None,
                            udf_name: Optional[str] = None,
                            *,
                            udf_email: Optional[str] = None,
                            udf_id: Optional[str] = None,
                            client_id: Union[str, Ellipsis, None] = ...,
                            cache: bool = True,
                            metadata_json: Optional[Dict[str, Any]] = None,
                            enabled: bool = True) -> UdfAccessToken

Create a token for running a UDF. Anyone with the token can run the UDF with any parameters they choose. The UDF will run under your environment.

The token does not allow running any other UDF on your account.

Arguments:

udf_email_or_name_or_id - A UDF ID, email address (for use with udf_name), or UDF name.
udf_name - The name of the UDF to create the

Arguments:

udf_email - The email of the user owning the UDF, or, if udf_name is None, the name of the UDF.
udf_id - The backend ID of the UDF to create the token for.
client_id - If specified, overrides which realtime environment to run the UDF under.
cache - If True, UDF tiles will be cached.
metadata_json - Additional metadata to serve as part of the tiles metadata.json.
enable - If True, the token can be used.

`upload`

def upload(path: str, data: Union[bytes, BinaryIO]) -> None

Upload a binary blob to a cloud location.

`start_job`

def start_job(config: Union[JobConfig, JobStepConfig],
              *,
              instance_type: Optional[WHITELISTED_INSTANCE_TYPES] = None,
              region: Optional[str] = None,
              disk_size_gb: Optional[int] = None,
              additional_env: Optional[Sequence[str]] = (
                  "FUSED_CREDENTIAL_PROVIDER=ec2", ),
              image_name: Optional[str] = None) -> RunResponse

Execute an operation.

Arguments:

config - the configuration object to run in the job.

Arguments:

instance_type - The AWS EC2 instance type to use for the job. Acceptable strings are m5.large, m5.xlarge, m5.2xlarge, m5.4xlarge, m5.8xlarge, m5.12xlarge, m5.16xlarge, r5.large, r5.xlarge, r5.2xlarge, r5.4xlarge, r5.8xlarge, r5.12xlarge, or r5.16xlarge. Defaults to None.
region - The AWS region in which to run. Defaults to None.
disk_size_gb - The disk size to specify for the job. Defaults to None.
additional_env - Any additional environment variables to be passed into the job, each in the form KEY=value. Defaults to None.
image_name - Custom image name to run. Defaults to None for default image.

`get_jobs`

def get_jobs(n: int = 5,
             *,
             skip: int = 0,
             per_request: int = 25,
             max_requests: Optional[int] = 1) -> Jobs

Get jobs history with get_jobs.

Arguments:

n - The number of jobs to fetch. Defaults to 5.

Arguments:

skip - Where in the job history to begin. Defaults to 0, which retrieves the most recent job.
per_request - Number of jobs per request to fetch. Defaults to 25.
max_requests - Maximum number of requests to make. May be None to fetch all jobs. Defaults to 1.

Returns:

The job history.

`get_status`

def get_status(job: CoerceableToJobId) -> RunResponse

Fetch the status of a running job.

Arguments:

job - the identifier of a job or a RunResponse object.

Returns:

The status of the job.

`get_logs`

def get_logs(job: CoerceableToJobId,
             since_ms: Optional[int] = None) -> List[Any]

Fetch logs for a job

Arguments:

job - the identifier of a job or a RunResponse object.
since_ms - Timestamp, in milliseconds since epoch, to get logs for. Defaults to None for all logs.

Returns:

Log messages for the given job.

`tail_logs`

def tail_logs(job: CoerceableToJobId,
              refresh_seconds: float = 1,
              sample_logs: bool = True,
              timeout: Optional[float] = None,
              get_logs_retries: int = 1)

Continuously print logs for a job

Arguments:

job - the identifier of a job or a RunResponse object.
refresh_seconds - how frequently, in seconds, to check for new logs. Defaults to 1.
sample_logs - if true, print out only a sample of logs. Defaults to True.
timeout - if not None, how long to continue tailing logs for. Defaults to None for indefinite.
get_logs_retries - Number of additional retries for log requests. Defaults to 1.

`wait_for_job`

def wait_for_job(job: CoerceableToJobId,
                 poll_interval_seconds: float = 5,
                 timeout: Optional[float] = None) -> RunResponse

Block the Python kernel until the given job has finished.

Arguments:

job - the identifier of a job or a RunResponse object.
poll_interval_seconds - How often (in seconds) to poll for status updates. Defaults to 5.
timeout - The length of time in seconds to wait for the job. Defaults to None.

Raises:

TimeoutError - if waiting for the job timed out.

Returns:

The status of the given job.

`cancel_job`

def cancel_job(job: CoerceableToJobId) -> RunResponse

Cancel an existing job.

Arguments:

job - the identifier of a job or a RunResponse object.

Returns:

A new job object.

`auth_token`

def auth_token() -> str

Returns the current user's Fused environment (team) auth token.

whoami​

delete​

list​

get​

download​

upload​

sign_url​

sign_url_prefix​

get_udfs​

job_get_logs​

job_print_logs​

job_tail_logs​

job_get_status​

job_cancel​

job_get_exec_time​

job_wait_for_job​

FusedAPI​

__init__​

create_udf_access_token​

upload​

start_job​

get_jobs​

get_status​

get_logs​

tail_logs​

wait_for_job​

cancel_job​

auth_token​

`whoami`

`delete`

`list`

`get`

`download`

`upload`

`sign_url`

`sign_url_prefix`

`get_udfs`

`job_get_logs`

`job_print_logs`

`job_tail_logs`

`job_get_status`

`job_cancel`

`job_get_exec_time`

`job_wait_for_job`

`FusedAPI`

`init`

`create_udf_access_token`

`upload`

`start_job`

`get_jobs`

`get_status`

`get_logs`

`tail_logs`

`wait_for_job`

`cancel_job`

`auth_token`