Reading Geospatial Data
Common examples for reading geospatial data in Fused.
Python Packages
geopandas
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/subway_stations.geojson"):
import geopandas as gpd
return gpd.read_file(path)
shapely
@fused.udf
def udf():
import geopandas as gpd
from shapely.geometry import Point, Polygon
# Create geometries with shapely
points = [Point(-122.4, 37.8), Point(-122.3, 37.7)]
polygon = Polygon([(-122.5, 37.7), (-122.3, 37.7), (-122.3, 37.9), (-122.5, 37.9)])
gdf = gpd.GeoDataFrame(
{'type': ['point', 'point', 'polygon']},
geometry=points + [polygon],
crs=4326
)
return gdf
rioxarray
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/elevation.tif"):
import rioxarray as rxr
# Read raster data with rioxarray
raster = rxr.open_rasterio(path)
# Convert to DataFrame for display
df = raster.to_dataframe().reset_index()
return df.head(1000)
xarray
@fused.udf
def udf():
import xarray as xr
# Download NetCDF data to mount disk for proper reading
path = fused.download('s3://fused-sample/demo_data/2025_01_01_ERA5_surface.nc','2025_01_01_ERA5_surface.nc')
ds = xr.open_dataset(path)
# Convert to DataFrame
df = ds.to_dataframe().reset_index()
return df.head(1000)
Vector Formats
GeoJSON (.geojson, .json)
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states.geojson"):
import geopandas as gpd
return gpd.read_file(path)
Shapefile (.shp + .shx, .dbf, .prj)
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states_shapefile.shp"):
import geopandas as gpd
return gpd.read_file(path)
GeoPackage (.gpkg)
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states_geopackage.gpkg"):
import geopandas as gpd
return gpd.read_file(path)
KML/KMZ (.kml, .kmz)
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/US_states.kml"):
import geopandas as gpd
return gpd.read_file(path)
GeoParquet (.parquet)
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/buildings.parquet"):
import geopandas as gpd
return gpd.read_parquet(path)
CSV with coordinates (.csv)
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/subway_stations.csv"):
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
# Read CSV
df = pd.read_csv(path)
# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(df.longitude, df.latitude),
crs=4326
)
return gdf
Excel with coordinates (.xlsx)
@fused.udf
def udf(path: str = "s3://fused-sample/demo_data/table/subway_stations.xlsx"):
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
# Read Excel file
df = pd.read_excel(path)
# Convert to GeoDataFrame if coordinates exist
if 'longitude' in df.columns and 'latitude' in df.columns:
gdf = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(df.longitude, df.latitude),
crs=4326
)
return gdf
return df
Raster Formats
GeoTIFF (.tif, .tiff)
@fused.udf
def udf(
path: str = 's3://fused-sample/demo_data/satellite_imagery/wildfires.tiff'
):
import rasterio
with rasterio.open(path) as src:
data = src.read()
bounds = src.bounds
return data, bounds
NetCDF (.nc)
@fused.udf
def udf():
import xarray as xr
# Download to mount disk for proper NetCDF reading
path = fused.download('s3://fused-sample/demo_data/climate_data.nc', 'climate_data.nc')
# Open NetCDF dataset
ds = xr.open_dataset(path)
return ds.to_dataframe().reset_index().head(1000)
STAC Catalogs
For working with STAC catalogs (Earth on AWS, Microsoft Planetary Computer), see STAC.