Skip to main content

fused.api

whoami

def whoami()

Returns information on the currently logged-in user.


delete

def delete(path: str,
max_deletion_depth: Union[int, Literal["unlimited"]] = 2) -> bool

Delete the files at the path.

Arguments:

  • path - Directory or file to delete, like fd://my-old-table/
  • max_deletion_depth - If set (defaults to 2), the maximum depth the operation will recurse to. This helps avoid accidentally deleting more data than intended. Pass "unlimited" for unlimited.

Examples:

    fused.delete("fd://bucket-name/deprecated_table/")

list

def list(path: str, *, details: bool = False)

List the files in the path.

Arguments:

  • path - Parent directory URL, like fd://bucket-name/

Arguments:

  • details - If True, return additional metadata about each record.

Returns:

A list of paths as URLs, or as metadata objects.

Examples:

    fused.list("fd://bucket-name/")

get

def get(path: str) -> bytes

Download the contents at the path to memory.

Arguments:

  • path - URL to a file, like fd://bucket-name/file.parquet

Returns:

Bytes of the file.

Examples:

fused.get("fd://bucket-name/file.parquet")

download

def download(path: str, local_path: Union[str, Path]) -> None

Download the contents at the path to disk.

Arguments:

  • path - URL to a file, like fd://bucket-name/file.parquet
  • local_path - Path to a local file.

upload

def upload(local_path: Union[str, Path, bytes, BinaryIO],
remote_path: str) -> None

Upload local file to S3.

Arguments:

  • local_path - Either a path to a local file (str, Path) or the contents to upload. Any string will be treated as a Path, if you wish to upload the contents of the string, first encode it: s.encode("utf-8")
  • remote_path - URL to upload to, like fd://new-file.txt

Examples:

To upload a local JSON file to your Fused-managed S3 bucket:

fused.upload("my_file.json", "fd://my_bucket/my_file.json")

sign_url

def sign_url(path: str) -> str

Create a signed URL to access the path. This function may not check that the file represented by the path exists.

Arguments:

  • path - URL to a file, like fd://bucket-name/file.parquet

Returns:

HTTPS URL to access the file using signed access.

Examples:

    fused.sign_url("fd://bucket-name/table_directory/file.parquet")

sign_url_prefix

def sign_url_prefix(path: str) -> Dict[str, str]

Create signed URLs to access all blobs under the path.

Arguments:

  • path - URL to a prefix, like fd://bucket-name/some_directory/

Returns:

Dictionary mapping from blob store key to signed HTTPS URL.

Examples:

fused.sign_url_prefix("fd://bucket-name/table_directory/")

get_udfs

def get_udfs(n: int = 10,
*,
skip: int = 0,
per_request: int = 25,
max_requests: Optional[int] = None,
by: Literal["name", "id", "slug"] = "name",
whose: Literal["self", "public"] = "self")

Fetches a list of UDFs.

Arguments:

  • n - The total number of UDFs to fetch. Defaults to 10.
  • skip - The number of UDFs to skip before starting to collect the result set. Defaults to 0.
  • per_request - The number of UDFs to fetch in each API request. Defaults to 25.
  • max_requests - The maximum number of API requests to make.
  • by - The attribute by which to sort the UDFs. Can be "name", "id", or "slug". Defaults to "name".
  • whose - Specifies whose UDFs to fetch. Can be "self" for the user's own UDFs or "public" for UDFs available publicly. Defaults to "self".

Returns:

A list of UDFs.

Examples:

Fetch UDFs under the user account:

    fused.get_udfs()

job_get_logs

def job_get_logs(job: CoerceableToJobId,
since_ms: Optional[int] = None) -> List[Any]

Fetch logs for a job.

Arguments:

  • job - the identifier of a job or a RunResponse object.
  • since_ms - Timestamp, in milliseconds since epoch, to get logs for. Defaults to None for all logs.

Returns:

Log messages for the given job.


job_print_logs

def job_print_logs(job: CoerceableToJobId,
since_ms: Optional[int] = None,
file: Optional[IO] = None) -> None

Fetch and print logs for a job.

Arguments:

  • job - the identifier of a job or a RunResponse object.
  • since_ms - Timestamp, in milliseconds since epoch, to get logs for. Defaults to None for all logs.
  • file - Where to print logs to. Defaults to sys.stdout.

job_tail_logs

def job_tail_logs(job: CoerceableToJobId,
refresh_seconds: float = 1,
sample_logs: bool = True,
timeout: Optional[float] = None,
get_logs_retries: int = 1)

Continuously print logs for a job.

Arguments:

  • job - the identifier of a job or a RunResponse object.
  • refresh_seconds - how frequently, in seconds, to check for new logs. Defaults to 1.
  • sample_logs - if true, print out only a sample of logs. Defaults to True.
  • timeout - if not None, how long to continue tailing logs for. Defaults to None for indefinite.
  • get_logs_retries - Number of additional retries for log requests. Defaults to 1.

job_get_status

def job_get_status(job: CoerceableToJobId) -> RunResponse

Fetch the status of a running job.

Arguments:

  • job - the identifier of a job or a RunResponse object.

Returns:

The status of the given job.


job_cancel

def job_cancel(job: CoerceableToJobId) -> RunResponse

Cancel an existing job.

Arguments:

  • job - the identifier of a job or a RunResponse object.

Returns:

A new job object.


job_get_exec_time

def job_get_exec_time(job: CoerceableToJobId) -> timedelta

Determine the execution time of this job, using the logs.

Returns:

Time the job took. If the job is in progress, time from first to last log message is returned.


job_wait_for_job

def job_wait_for_job(job: CoerceableToJobId,
poll_interval_seconds: float = 5,
timeout: Optional[float] = None) -> RunResponse

Block the Python kernel until this job has finished.

Arguments:

  • poll_interval_seconds - How often (in seconds) to poll for status updates. Defaults to 5.
  • timeout - The length of time in seconds to wait for the job. Defaults to None.

Raises:

  • TimeoutError - if waiting for the job timed out.

Returns:

The status of the given job.


FusedAPI

class FusedAPI()

API for running jobs in the Fused service.

__init__

def __init__(*,
base_url: Optional[str] = None,
set_global_api: bool = True,
credentials_needed: bool = True)

Create a FusedAPI instance.

Arguments:

  • base_url - The Fused instance to send requests to. Defaults to https://www.fused.io/server/v1.
  • set_global_api - Set this as the global API object. Defaults to True.
  • credentials_needed - If True, automatically attempt to log in. Defaults to True.

create_udf_access_token

def create_udf_access_token(udf_email_or_name_or_id: Optional[str] = None,
udf_name: Optional[str] = None,
*,
udf_email: Optional[str] = None,
udf_id: Optional[str] = None,
client_id: Union[str, Ellipsis, None] = ...,
cache: bool = True,
metadata_json: Optional[Dict[str, Any]] = None,
enabled: bool = True) -> UdfAccessToken

Create a token for running a UDF. Anyone with the token can run the UDF with any parameters they choose. The UDF will run under your environment.

The token does not allow running any other UDF on your account.

Arguments:

  • udf_email_or_name_or_id - A UDF ID, email address (for use with udf_name), or UDF name.
  • udf_name - The name of the UDF to create the

Arguments:

  • udf_email - The email of the user owning the UDF, or, if udf_name is None, the name of the UDF.
  • udf_id - The backend ID of the UDF to create the token for.
  • client_id - If specified, overrides which realtime environment to run the UDF under.
  • cache - If True, UDF tiles will be cached.
  • metadata_json - Additional metadata to serve as part of the tiles metadata.json.
  • enable - If True, the token can be used.

upload

def upload(path: str, data: Union[bytes, BinaryIO]) -> None

Upload a binary blob to a cloud location.


start_job

def start_job(config: Union[JobConfig, JobStepConfig],
*,
instance_type: Optional[WHITELISTED_INSTANCE_TYPES] = None,
region: Optional[str] = None,
disk_size_gb: Optional[int] = None,
additional_env: Optional[Sequence[str]] = (
"FUSED_CREDENTIAL_PROVIDER=ec2", ),
image_name: Optional[str] = None) -> RunResponse

Execute an operation.

Arguments:

  • config - the configuration object to run in the job.

Arguments:

  • instance_type - The AWS EC2 instance type to use for the job. Acceptable strings are m5.large, m5.xlarge, m5.2xlarge, m5.4xlarge, r5.large, r5.xlarge, r5.2xlarge, r5.4xlarge. Defaults to None.
  • region - The AWS region in which to run. Defaults to None.
  • disk_size_gb - The disk size to specify for the job. Defaults to None.
  • additional_env - Any additional environment variables to be passed into the job, each in the form KEY=value. Defaults to None.
  • image_name - Custom image name to run. Defaults to None for default image.

get_jobs

def get_jobs(n: int = 5,
*,
skip: int = 0,
per_request: int = 25,
max_requests: Optional[int] = 1) -> Jobs

Get the job history.

Arguments:

  • n - The number of jobs to fetch. Defaults to 5.

Arguments:

  • skip - Where in the job history to begin. Defaults to 0, which retrieves the most recent job.
  • per_request - Number of jobs per request to fetch. Defaults to 25.
  • max_requests - Maximum number of requests to make. May be None to fetch all jobs. Defaults to 1.

Returns:

The job history.


get_status

def get_status(job: CoerceableToJobId) -> RunResponse

Fetch the status of a running job.

Arguments:

  • job - the identifier of a job or a RunResponse object.

Returns:

The status of the job.


get_logs

def get_logs(job: CoerceableToJobId,
since_ms: Optional[int] = None) -> List[Any]

Fetch logs for a job

Arguments:

  • job - the identifier of a job or a RunResponse object.
  • since_ms - Timestamp, in milliseconds since epoch, to get logs for. Defaults to None for all logs.

Returns:

Log messages for the given job.


tail_logs

def tail_logs(job: CoerceableToJobId,
refresh_seconds: float = 1,
sample_logs: bool = True,
timeout: Optional[float] = None,
get_logs_retries: int = 1)

Continuously print logs for a job

Arguments:

  • job - the identifier of a job or a RunResponse object.
  • refresh_seconds - how frequently, in seconds, to check for new logs. Defaults to 1.
  • sample_logs - if true, print out only a sample of logs. Defaults to True.
  • timeout - if not None, how long to continue tailing logs for. Defaults to None for indefinite.
  • get_logs_retries - Number of additional retries for log requests. Defaults to 1.

wait_for_job

def wait_for_job(job: CoerceableToJobId,
poll_interval_seconds: float = 5,
timeout: Optional[float] = None) -> RunResponse

Block the Python kernel until the given job has finished.

Arguments:

  • job - the identifier of a job or a RunResponse object.
  • poll_interval_seconds - How often (in seconds) to poll for status updates. Defaults to 5.
  • timeout - The length of time in seconds to wait for the job. Defaults to None.

Raises:

  • TimeoutError - if waiting for the job timed out.

Returns:

The status of the given job.


cancel_job

def cancel_job(job: CoerceableToJobId) -> RunResponse

Cancel an existing job.

Arguments:

  • job - the identifier of a job or a RunResponse object.

Returns:

A new job object.


auth_token

def auth_token() -> str

Returns the current user's Fused environment (team) auth token.