Skip to main content

Reading Geospatial Data

Common examples for reading geospatial data in Fused.

Python Packages

geopandas

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/subway_stations.geojson"):
import geopandas as gpd

return gpd.read_file(path)

shapely

@fused.udf
def udf():
import geopandas as gpd
from shapely.geometry import Point, Polygon

# Create geometries with shapely
points = [Point(-122.4, 37.8), Point(-122.3, 37.7)]
polygon = Polygon([(-122.5, 37.7), (-122.3, 37.7), (-122.3, 37.9), (-122.5, 37.9)])

gdf = gpd.GeoDataFrame(
{'type': ['point', 'point', 'polygon']},
geometry=points + [polygon],
crs=4326
)

return gdf

rioxarray

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/elevation.tif"):
import rioxarray as rxr

# Read raster data with rioxarray
raster = rxr.open_rasterio(path)

# Convert to DataFrame for display
df = raster.to_dataframe().reset_index()

return df.head(1000)

xarray

@fused.udf
def udf():
import xarray as xr

# Download NetCDF data to mount disk for proper reading
path = fused.download('s3://fused-sample/demo_data/2025_01_01_ERA5_surface.nc','2025_01_01_ERA5_surface.nc')
ds = xr.open_dataset(path)

# Convert to DataFrame
df = ds.to_dataframe().reset_index()

return df.head(1000)

Vector Formats

GeoJSON (.geojson, .json)

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states.geojson"):
import geopandas as gpd

return gpd.read_file(path)

Shapefile (.shp + .shx, .dbf, .prj)

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states_shapefile.shp"):
import geopandas as gpd

return gpd.read_file(path)

GeoPackage (.gpkg)

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states_geopackage.gpkg"):
import geopandas as gpd

return gpd.read_file(path)

KML/KMZ (.kml, .kmz)

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states.kml"):
import geopandas as gpd

return gpd.read_file(path)

GeoParquet (.parquet)

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/buildings.parquet"):
import geopandas as gpd

return gpd.read_parquet(path)

CSV with coordinates (.csv)

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/subway_stations.csv"):
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# Read CSV
df = pd.read_csv(path)

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(df.longitude, df.latitude),
crs=4326
)

return gdf

Excel with coordinates (.xlsx)

@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/subway_stations.xlsx"):
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# Read Excel file
df = pd.read_excel(path)

# Convert to GeoDataFrame if coordinates exist
if 'longitude' in df.columns and 'latitude' in df.columns:
gdf = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(df.longitude, df.latitude),
crs=4326
)
return gdf

return df

Raster Formats

GeoTIFF (.tif, .tiff)

@fused.udf
def udf(
path: str = 's3://fused-sample/demo_data/satellite_imagery/wildfires.tiff'
):
import rasterio

with rasterio.open(path) as src:
data = src.read()
bounds = src.bounds

return data, bounds

NetCDF (.nc)

@fused.udf
def udf():
import xarray as xr

# Download to mount disk for proper NetCDF reading
path = fused.download('s3://fused-sample/demo_data/climate_data.nc', 'climate_data.nc')

# Open NetCDF dataset
ds = xr.open_dataset(path)

return ds.to_dataframe().reset_index().head(1000)
STAC Catalogs

For working with STAC catalogs (Earth on AWS, Microsoft Planetary Computer), see STAC.