Read Data
Common examples for reading geospatial data in Fused.
Python Packages
geopandas
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/subway_stations.geojson"):
import geopandas as gpd
return gpd.read_file(path)
shapely
@fused.udf
def udf():
import geopandas as gpd
from shapely.geometry import Point, Polygon
# Create geometries with shapely
points = [Point(-122.4, 37.8), Point(-122.3, 37.7)]
polygon = Polygon([(-122.5, 37.7), (-122.3, 37.7), (-122.3, 37.9), (-122.5, 37.9)])
gdf = gpd.GeoDataFrame(
{'type': ['point', 'point', 'polygon']},
geometry=points + [polygon],
crs=4326
)
return gdf
duckdb
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/housing_2024.parquet"):
import duckdb
conn = duckdb.connect()
result = conn.execute(f"""
SELECT *
FROM '{path}'
WHERE latitude IS NOT NULL
LIMIT 1000
""").df()
return result
rioxarray
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/elevation.tif"):
import rioxarray as rxr
# Read raster data with rioxarray
raster = rxr.open_rasterio(path)
# Convert to DataFrame for display
df = raster.to_dataframe().reset_index()
return df.head(1000)
xarray
@fused.udf
def udf():
import xarray as xr
# Download NetCDF data to mount disk for proper reading
path = fused.download('s3://fused-sample/demo_data/2025_01_01_ERA5_surface.nc','2025_01_01_ERA5_surface.nc')
ds = xr.open_dataset(path)
# Convert to DataFrame
df = ds.to_dataframe().reset_index()
return df.head(1000)
Table Formats (Vector)
GeoJSON (.geojson, .json)
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states.geojson"):
import geopandas as gpd
return gpd.read_file(path)
Shapefile (.shp + .shx, .dbf, .prj)
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states_shapefile.shp"):
import geopandas as gpd
return gpd.read_file(path)
GeoPackage (.gpkg)
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states_geopackage.gpkg"):
import geopandas as gpd
return gpd.read_file(path)
KML/KMZ (.kml, .kmz)
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states.kml"):
import geopandas as gpd
return gpd.read_file(path)
Parquet (.parquet)
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/buildings.parquet"):
import geopandas as gpd
return gpd.read_parquet(path)
CSV with coordinates (.csv)
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/subway_stations.csv"):
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
# Read CSV
df = pd.read_csv(path)
# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(df.longitude, df.latitude),
crs=4326
)
return gdf
Excel (.xlsx)
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/subway_stations.xlsx"):
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
# Read Excel file
df = pd.read_excel(path)
# Convert to GeoDataFrame if coordinates exist
if 'longitude' in df.columns and 'latitude' in df.columns:
gdf = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(df.longitude, df.latitude),
crs=4326
)
return gdf
return df
Array Formats (Raster)
GeoTIFF (.tif, .tiff)
@fused.udf
def udf(
path: str = 's3://fused-sample/demo_data/satellite_imagery/wildfires.tiff'
):
import rasterio
with rasterio.open(path) as src:
data = src.read()
bounds = src.bounds
return data, bounds
NetCDF (.nc)
@fused.udf
def udf():
import xarray as xr
# Download to mount disk for proper NetCDF reading
path = fused.download('s3://fused-sample/demo_data/climate_data.nc', 'climate_data.nc')
# Open NetCDF dataset
ds = xr.open_dataset(path)
return ds.to_dataframe().reset_index().head(1000)
STAC Catalog
Earth on AWS
@fused.udf
def udf(
bounds: fused.types.Bounds = [-77.083, 38.804, -76.969, 38.983],
):
import odc.stac
import pystac_client
import planetary_computer
odc.stac.configure_s3_access(aws_unsigned=True)
catalog = pystac_client.Client.open("https://earth-search.aws.element84.com/v1")
# Loading Elevation model
items = catalog.search(
collections=["cop-dem-glo-30"],
bbox=bounds
).item_collection()
xarray_dataset = odc.stac.load(
items,
crs="EPSG:3857",
bands=["data"],
resolution=150,
bbox=bounds,
).astype(int)
return xarray_dataset["data"], bounds
Microsoft Planetary Computer
@fused.udf
def udf(
bounds: fused.types.Bounds = [-122.463,37.755,-122.376,37.803],
):
import odc.stac
import planetary_computer
import pystac_client
catalog = pystac_client.Client.open(
"https://planetarycomputer.microsoft.com/api/stac/v1",
modifier=planetary_computer.sign_inplace,
)
# Loading Elevation model
items = catalog.search(collections=["cop-dem-glo-30"],bbox=bounds).item_collection()
xarray_dataset = odc.stac.load(
items,
crs="EPSG:3857",
bands=["data"],
resolution=150,
bbox=bounds,
).astype(int)
return xarray_dataset["data"], bounds