diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2e41465 --- /dev/null +++ b/.gitignore @@ -0,0 +1,33 @@ +# Python +__pycache__/ +*.py[cod] +*.pyo +*.pyd +*.so + +# Distribution / packaging +*.egg +*.egg-info/ +dist/ +build/ +.eggs/ + +# Virtual environments +.venv/ +venv/ +env/ + +# Testing +.pytest_cache/ +.coverage +htmlcov/ + +# Editors +.vscode/ +.idea/ +*.swp +*~ + +# OS +.DS_Store +Thumbs.db diff --git a/README.md b/README.md index 9ea4e6e..305918e 100644 --- a/README.md +++ b/README.md @@ -1 +1,87 @@ -# Opera_tools \ No newline at end of file +# Opera_tools + +Python tools for working with [OPERA](https://www.jpl.nasa.gov/go/opera) SAR/InSAR product files. + +## Features + +* **Spatial subsetting** – Clip OPERA product files to a geographic bounding box. + * Supports **GeoTIFF** (`.tif`, `.tiff`) products (RTC, DSWx, …) + * Supports **HDF5** (`.h5`, `.hdf5`, `.nc`) products (CSLC, DISP-S1, …) + +## Installation + +```bash +pip install -r requirements.txt +pip install -e . +``` + +Or directly from GitHub: + +```bash +pip install git+https://github.com/geodesymiami/Opera_tools.git +``` + +## Quick start + +### Command-line + +```bash +# Subset one or more GeoTIFF files to a bounding box +opera_subset --bbox -118.5 33.5 -117.5 34.5 product.tif + +# Subset multiple files and write results to a directory +opera_subset --bbox -118.5 33.5 -117.5 34.5 -o ./subset/ *.tif *.h5 + +# Custom output suffix +opera_subset --bbox -118.5 33.5 -117.5 34.5 --suffix _cropped product.h5 +``` + +Positional and optional arguments: + +``` +usage: opera_subset [-h] --bbox MIN_LON MIN_LAT MAX_LON MAX_LAT + [-o DIR] [--suffix SUFFIX] + FILE [FILE ...] + +positional arguments: + FILE Input OPERA product file(s) (.tif, .tiff, .h5, .hdf5, .nc). + +optional arguments: + --bbox MIN_LON MIN_LAT MAX_LON MAX_LAT + Geographic bounding box in WGS 84 degrees. + -o DIR, --outdir DIR Output directory (default: current directory). + --suffix SUFFIX Suffix appended to each output filename (default: _subset). +``` + +### Python API + +```python +from opera_tools import subset_opera_file, subset_opera_files + +# Subset a single file +success = subset_opera_file( + "OPERA_L2_RTC-S1_T064-135524-IW1_20231101T015038Z_20231101T101117Z_S1A_30_v1.0.tif", + "output_subset.tif", + bbox=(-118.5, 33.5, -117.5, 34.5), # (min_lon, min_lat, max_lon, max_lat) +) + +# Subset a batch of files +written = subset_opera_files( + input_files=["file1.tif", "file2.h5"], + output_dir="./subset/", + bbox=(-118.5, 33.5, -117.5, 34.5), +) +``` + +## Dependencies + +* [numpy](https://numpy.org/) +* [rasterio](https://rasterio.readthedocs.io/) – GeoTIFF subsetting +* [h5py](https://www.h5py.org/) – HDF5 subsetting + +## Running tests + +```bash +pip install pytest +pytest tests/ +``` diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d2aed0b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +numpy +rasterio +h5py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d19bbcd --- /dev/null +++ b/setup.py @@ -0,0 +1,30 @@ +from setuptools import setup, find_packages + +setup( + name="opera-tools", + version="0.1.0", + description="Tools for working with OPERA SAR/InSAR product files", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", + author="geodesymiami", + url="https://github.com/geodesymiami/Opera_tools", + package_dir={"": "src"}, + packages=find_packages(where="src"), + python_requires=">=3.8", + install_requires=[ + "numpy", + "rasterio", + "h5py", + ], + entry_points={ + "console_scripts": [ + "opera_subset=opera_tools.subset:main", + ], + }, + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Topic :: Scientific/Engineering :: GIS", + "Topic :: Scientific/Engineering :: Image Processing", + ], +) diff --git a/src/opera_tools/__init__.py b/src/opera_tools/__init__.py new file mode 100644 index 0000000..585803e --- /dev/null +++ b/src/opera_tools/__init__.py @@ -0,0 +1,5 @@ +"""opera_tools: tools for working with OPERA SAR/InSAR product files.""" + +from .subset import subset_opera_file, subset_opera_files + +__all__ = ["subset_opera_file", "subset_opera_files"] diff --git a/src/opera_tools/subset.py b/src/opera_tools/subset.py new file mode 100644 index 0000000..beca94f --- /dev/null +++ b/src/opera_tools/subset.py @@ -0,0 +1,471 @@ +""" +Tool for subsetting OPERA product files to a geographic bounding box. + +Supports GeoTIFF (.tif) and HDF5 (.h5) OPERA product files. + +Usage (command-line): + opera_subset --bbox \\ + [-o ] [ ...] + +Example: + opera_subset --bbox -118.5 33.5 -117.5 34.5 -o ./subset/ *.tif +""" + +import argparse +import os +import sys + +import numpy as np + +try: + import rasterio + from rasterio.windows import from_bounds + from rasterio.transform import array_bounds + _RASTERIO_AVAILABLE = True +except ImportError: + _RASTERIO_AVAILABLE = False + +try: + import h5py + _H5PY_AVAILABLE = True +except ImportError: + _H5PY_AVAILABLE = False + + +def _check_bbox(bbox): + """Validate a bounding box tuple (min_lon, min_lat, max_lon, max_lat).""" + if len(bbox) != 4: + raise ValueError("bbox must have exactly 4 elements: (min_lon, min_lat, max_lon, max_lat)") + min_lon, min_lat, max_lon, max_lat = bbox + if min_lon >= max_lon: + raise ValueError(f"min_lon ({min_lon}) must be less than max_lon ({max_lon})") + if min_lat >= max_lat: + raise ValueError(f"min_lat ({min_lat}) must be less than max_lat ({max_lat})") + if min_lon < -180 or max_lon > 360: + raise ValueError(f"Longitude values must be in the range [-180, 360]") + if min_lat < -90 or max_lat > 90: + raise ValueError(f"Latitude values must be in the range [-90, 90]") + + +def subset_geotiff(input_file, output_file, bbox): + """ + Subset a GeoTIFF file to the given bounding box. + + Parameters + ---------- + input_file : str + Path to the input GeoTIFF file. + output_file : str + Path to the output GeoTIFF file. + bbox : tuple of float + Bounding box as (min_lon, min_lat, max_lon, max_lat) in geographic + coordinates (EPSG:4326 / WGS 84 degrees). + + Returns + ------- + bool + True if the subset was written successfully, False if the bounding box + does not overlap the file extent. + + Raises + ------ + ImportError + If rasterio is not installed. + FileNotFoundError + If the input file does not exist. + ValueError + If the bounding box is invalid. + """ + if not _RASTERIO_AVAILABLE: + raise ImportError( + "rasterio is required for GeoTIFF subsetting. " + "Install it with: pip install rasterio" + ) + if not os.path.isfile(input_file): + raise FileNotFoundError(f"Input file not found: {input_file}") + _check_bbox(bbox) + + min_lon, min_lat, max_lon, max_lat = bbox + + with rasterio.open(input_file) as src: + # Reproject bbox to file CRS if necessary + from rasterio.crs import CRS + wgs84 = CRS.from_epsg(4326) + file_crs = src.crs + + left, bottom, right, top = min_lon, min_lat, max_lon, max_lat + + if file_crs is not None and not file_crs.equals(wgs84): + try: + from rasterio.warp import transform_bounds + left, bottom, right, top = transform_bounds( + wgs84, file_crs, min_lon, min_lat, max_lon, max_lat + ) + except Exception as exc: + raise RuntimeError( + f"Could not reproject bounding box to file CRS: {exc}" + ) from exc + + # Compute the pixel window that covers the bounding box + window = from_bounds(left, bottom, right, top, src.transform) + file_window = rasterio.windows.Window(0, 0, src.width, src.height) + + try: + window = window.intersection(file_window) + except rasterio.errors.WindowError: + # Bounding box does not overlap the file extent + return False + + if window.width <= 0 or window.height <= 0: + return False + + # Read the data in the window + data = src.read(window=window) + transform = src.window_transform(window) + + # Prepare output metadata + meta = src.meta.copy() + meta.update({ + "height": data.shape[1], + "width": data.shape[2], + "transform": transform, + }) + + os.makedirs(os.path.dirname(os.path.abspath(output_file)), exist_ok=True) + with rasterio.open(output_file, "w", **meta) as dst: + dst.write(data) + + return True + + +def subset_hdf5(input_file, output_file, bbox): + """ + Subset an HDF5 OPERA file to the given bounding box. + + The function looks for 2-D latitude and longitude datasets (or coordinate + arrays stored in the file) to determine which rows and columns fall within + the requested bounding box. If no geographic coordinate information can be + found, a ``RuntimeError`` is raised. + + Parameters + ---------- + input_file : str + Path to the input HDF5 file. + output_file : str + Path to the output HDF5 file. + bbox : tuple of float + Bounding box as (min_lon, min_lat, max_lon, max_lat) in geographic + coordinates (degrees). + + Returns + ------- + bool + True if the subset was written successfully, False if the bounding box + does not overlap the file extent. + + Raises + ------ + ImportError + If h5py is not installed. + FileNotFoundError + If the input file does not exist. + ValueError + If the bounding box is invalid. + RuntimeError + If no geographic coordinate information can be found in the file. + """ + if not _H5PY_AVAILABLE: + raise ImportError( + "h5py is required for HDF5 subsetting. " + "Install it with: pip install h5py" + ) + if not os.path.isfile(input_file): + raise FileNotFoundError(f"Input file not found: {input_file}") + _check_bbox(bbox) + + min_lon, min_lat, max_lon, max_lat = bbox + + with h5py.File(input_file, "r") as src: + # Locate latitude/longitude datasets + lat_data, lon_data = _find_latlon_datasets(src) + if lat_data is None or lon_data is None: + raise RuntimeError( + f"Cannot find latitude/longitude information in {input_file}. " + "Ensure the file contains datasets named 'latitude'/'longitude' " + "or similar geographic coordinate datasets." + ) + + lat = np.asarray(lat_data) + lon = np.asarray(lon_data) + + # Support 1-D coordinate vectors + if lat.ndim == 1 and lon.ndim == 1: + lat_mask = (lat >= min_lat) & (lat <= max_lat) + lon_mask = (lon >= min_lon) & (lon <= max_lon) + row_idx = np.where(lat_mask)[0] + col_idx = np.where(lon_mask)[0] + elif lat.ndim == 2 and lon.ndim == 2: + mask = ( + (lat >= min_lat) & (lat <= max_lat) & + (lon >= min_lon) & (lon <= max_lon) + ) + rows, cols = np.where(mask) + if rows.size == 0: + return False + row_idx = np.arange(rows.min(), rows.max() + 1) + col_idx = np.arange(cols.min(), cols.max() + 1) + else: + raise RuntimeError( + "Latitude/longitude datasets have unsupported dimensions " + f"(lat.ndim={lat.ndim}, lon.ndim={lon.ndim})." + ) + + if row_idx.size == 0 or col_idx.size == 0: + return False + + row_start, row_stop = int(row_idx[0]), int(row_idx[-1]) + 1 + col_start, col_stop = int(col_idx[0]), int(col_idx[-1]) + 1 + + os.makedirs(os.path.dirname(os.path.abspath(output_file)), exist_ok=True) + with h5py.File(output_file, "w") as dst: + _copy_hdf5_subset(src, dst, row_start, row_stop, col_start, col_stop) + + return True + + +# --------------------------------------------------------------------------- +# HDF5 helpers +# --------------------------------------------------------------------------- + +_LAT_NAMES = {"latitude", "lat", "/latitude", "/lat", "y", "y_coords"} +_LON_NAMES = {"longitude", "lon", "/longitude", "/lon", "x", "x_coords"} + + +def _find_latlon_datasets(h5file): + """Return (lat_dataset, lon_dataset) from an open HDF5 file, or (None, None).""" + lat_ds = None + lon_ds = None + + def _visitor(name, obj): + nonlocal lat_ds, lon_ds + if isinstance(obj, h5py.Dataset): + lower = name.lower().split("/")[-1] + if lower in _LAT_NAMES and lat_ds is None: + lat_ds = obj + if lower in _LON_NAMES and lon_ds is None: + lon_ds = obj + + h5file.visititems(_visitor) + return lat_ds, lon_ds + + +def _copy_hdf5_subset(src, dst, row_start, row_stop, col_start, col_stop): + """Recursively copy a spatial subset of datasets from src to dst.""" + # Copy root attributes + for key, val in src.attrs.items(): + try: + dst.attrs[key] = val + except Exception: + pass + + def _copy_item(name, obj): + if isinstance(obj, h5py.Group): + grp = dst.require_group(name) + for key, val in obj.attrs.items(): + try: + grp.attrs[key] = val + except Exception: + pass + elif isinstance(obj, h5py.Dataset): + data = obj[()] + lower = name.lower().split("/")[-1] + + if data.ndim == 2: + data = data[row_start:row_stop, col_start:col_stop] + elif data.ndim == 3: + data = data[:, row_start:row_stop, col_start:col_stop] + elif data.ndim == 1: + # 1-D coordinate array: subset rows or columns + if lower in _LAT_NAMES: + data = data[row_start:row_stop] + elif lower in _LON_NAMES: + data = data[col_start:col_stop] + # Other 1-D datasets are copied as-is + + ds = dst.require_dataset( + name, + shape=data.shape, + dtype=data.dtype, + data=data, + ) + for key, val in obj.attrs.items(): + try: + ds.attrs[key] = val + except Exception: + pass + + src.visititems(_copy_item) + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +def subset_opera_file(input_file, output_file, bbox): + """ + Subset a single OPERA product file to the given bounding box. + + The file format is detected automatically from the file extension: + * ``.tif`` / ``.tiff`` → GeoTIFF (requires rasterio) + * ``.h5`` / ``.hdf5`` / ``.nc`` → HDF5 (requires h5py) + + Parameters + ---------- + input_file : str + Path to the input OPERA product file. + output_file : str + Path to the output (subsetted) file. + bbox : tuple of float + Bounding box as ``(min_lon, min_lat, max_lon, max_lat)`` in WGS 84 + geographic coordinates (degrees). + + Returns + ------- + bool + ``True`` if the output file was written, ``False`` if the bounding + box does not overlap the file's spatial extent. + + Raises + ------ + ValueError + If the file extension is not recognised or the bounding box is invalid. + """ + ext = os.path.splitext(input_file)[-1].lower() + if ext in {".tif", ".tiff"}: + return subset_geotiff(input_file, output_file, bbox) + elif ext in {".h5", ".hdf5", ".nc"}: + return subset_hdf5(input_file, output_file, bbox) + else: + raise ValueError( + f"Unsupported file extension '{ext}'. " + "Supported formats: .tif, .tiff, .h5, .hdf5, .nc" + ) + + +def subset_opera_files(input_files, output_dir, bbox, suffix="_subset"): + """ + Subset multiple OPERA product files to the given bounding box. + + Parameters + ---------- + input_files : list of str + Paths to the input OPERA product files. + output_dir : str + Directory where subsetted files will be written. + bbox : tuple of float + Bounding box as ``(min_lon, min_lat, max_lon, max_lat)`` in WGS 84 + geographic coordinates (degrees). + suffix : str, optional + Suffix appended to the base filename (before the extension) for the + output files. Default is ``'_subset'``. + + Returns + ------- + list of str + Paths of output files that were successfully written. + """ + os.makedirs(output_dir, exist_ok=True) + written = [] + for input_file in input_files: + base = os.path.basename(input_file) + name, ext = os.path.splitext(base) + output_file = os.path.join(output_dir, f"{name}{suffix}{ext}") + success = subset_opera_file(input_file, output_file, bbox) + if success: + written.append(output_file) + else: + print( + f"WARNING: bounding box does not overlap '{input_file}'; skipped.", + file=sys.stderr, + ) + return written + + +# --------------------------------------------------------------------------- +# Command-line interface +# --------------------------------------------------------------------------- + +def _build_parser(): + parser = argparse.ArgumentParser( + prog="opera_subset", + description=( + "Subset OPERA product files (GeoTIFF or HDF5) to a geographic " + "bounding box." + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Subset a single GeoTIFF file + opera_subset --bbox -118.5 33.5 -117.5 34.5 product.tif + + # Subset multiple files, writing results to ./subset/ + opera_subset --bbox -118.5 33.5 -117.5 34.5 -o ./subset/ *.tif *.h5 + + # Use a custom suffix for output filenames + opera_subset --bbox -118.5 33.5 -117.5 34.5 --suffix _cropped product.tif +""", + ) + parser.add_argument( + "files", + nargs="+", + metavar="FILE", + help="Input OPERA product file(s) (.tif, .tiff, .h5, .hdf5, .nc).", + ) + parser.add_argument( + "--bbox", + nargs=4, + type=float, + metavar=("MIN_LON", "MIN_LAT", "MAX_LON", "MAX_LAT"), + required=True, + help=( + "Geographic bounding box in WGS 84 degrees: " + "MIN_LON MIN_LAT MAX_LON MAX_LAT." + ), + ) + parser.add_argument( + "-o", "--outdir", + default=".", + metavar="DIR", + help="Output directory for subsetted files (default: current directory).", + ) + parser.add_argument( + "--suffix", + default="_subset", + metavar="SUFFIX", + help=( + "Suffix appended to the base filename for each output file " + "(default: '_subset')." + ), + ) + return parser + + +def main(argv=None): + """Entry point for the ``opera_subset`` command-line tool.""" + parser = _build_parser() + args = parser.parse_args(argv) + + bbox = tuple(args.bbox) + written = subset_opera_files(args.files, args.outdir, bbox, suffix=args.suffix) + + if written: + print(f"Wrote {len(written)} file(s):") + for path in written: + print(f" {path}") + else: + print("No output files were written (bounding box may not overlap any input files).") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/test_subset.py b/tests/test_subset.py new file mode 100644 index 0000000..1423c3e --- /dev/null +++ b/tests/test_subset.py @@ -0,0 +1,366 @@ +"""Tests for opera_tools.subset.""" + +import os +import tempfile + +import numpy as np +import pytest + + +# --------------------------------------------------------------------------- +# Helpers for creating small synthetic test files +# --------------------------------------------------------------------------- + +def _make_geotiff(path, lon_min=-118.5, lat_min=33.5, lon_max=-117.5, lat_max=34.5, + width=100, height=100, nodata=-9999): + """Create a minimal single-band GeoTIFF covering the given extent.""" + rasterio = pytest.importorskip("rasterio") + from rasterio.transform import from_bounds + from rasterio.crs import CRS + + transform = from_bounds(lon_min, lat_min, lon_max, lat_max, width, height) + data = np.arange(width * height, dtype=np.float32).reshape(1, height, width) + + with rasterio.open( + path, + "w", + driver="GTiff", + height=height, + width=width, + count=1, + dtype=data.dtype, + crs=CRS.from_epsg(4326), + transform=transform, + nodata=nodata, + ) as dst: + dst.write(data) + + +def _make_hdf5(path, lat_min=33.5, lat_max=34.5, lon_min=-118.5, lon_max=-117.5, + nlat=100, nlon=100): + """Create a minimal HDF5 file with 1-D lat/lon coordinate arrays.""" + h5py = pytest.importorskip("h5py") + + lat = np.linspace(lat_max, lat_min, nlat) # descending (north-to-south) + lon = np.linspace(lon_min, lon_max, nlon) + data = np.random.rand(nlat, nlon).astype(np.float32) + + with h5py.File(path, "w") as f: + f.create_dataset("latitude", data=lat) + f.create_dataset("longitude", data=lon) + f.create_dataset("data", data=data) + f.attrs["description"] = "synthetic test file" + + +# --------------------------------------------------------------------------- +# Tests for _check_bbox +# --------------------------------------------------------------------------- + +class TestCheckBbox: + def test_valid_bbox(self): + from opera_tools.subset import _check_bbox + _check_bbox((-118.5, 33.5, -117.5, 34.5)) # should not raise + + def test_wrong_length(self): + from opera_tools.subset import _check_bbox + with pytest.raises(ValueError, match="exactly 4"): + _check_bbox((-118.5, 33.5, -117.5)) + + def test_min_lon_ge_max_lon(self): + from opera_tools.subset import _check_bbox + with pytest.raises(ValueError, match="min_lon"): + _check_bbox((-117.5, 33.5, -118.5, 34.5)) + + def test_min_lat_ge_max_lat(self): + from opera_tools.subset import _check_bbox + with pytest.raises(ValueError, match="min_lat"): + _check_bbox((-118.5, 34.5, -117.5, 33.5)) + + def test_invalid_longitude(self): + from opera_tools.subset import _check_bbox + with pytest.raises(ValueError, match="Longitude"): + _check_bbox((-200.0, 33.5, -117.5, 34.5)) + + def test_invalid_latitude(self): + from opera_tools.subset import _check_bbox + with pytest.raises(ValueError, match="Latitude"): + _check_bbox((-118.5, -91.0, -117.5, 34.5)) + + +# --------------------------------------------------------------------------- +# Tests for subset_geotiff +# --------------------------------------------------------------------------- + +class TestSubsetGeotiff: + def test_basic_subset(self, tmp_path): + rasterio = pytest.importorskip("rasterio") + from opera_tools.subset import subset_geotiff + + src = str(tmp_path / "input.tif") + dst = str(tmp_path / "output.tif") + _make_geotiff(src) + + # Subset to a smaller bbox inside the file extent + result = subset_geotiff(src, dst, (-118.2, 33.7, -117.8, 34.2)) + assert result is True + assert os.path.isfile(dst) + + with rasterio.open(dst) as f: + assert f.width > 0 + assert f.height > 0 + # Output must be smaller than input + with rasterio.open(src) as orig: + assert f.width <= orig.width + assert f.height <= orig.height + + def test_no_overlap_returns_false(self, tmp_path): + pytest.importorskip("rasterio") + from opera_tools.subset import subset_geotiff + + src = str(tmp_path / "input.tif") + dst = str(tmp_path / "output.tif") + _make_geotiff(src) + + # Bbox completely outside the file extent + result = subset_geotiff(src, dst, (10.0, 10.0, 20.0, 20.0)) + assert result is False + assert not os.path.isfile(dst) + + def test_file_not_found(self, tmp_path): + pytest.importorskip("rasterio") + from opera_tools.subset import subset_geotiff + + with pytest.raises(FileNotFoundError): + subset_geotiff(str(tmp_path / "missing.tif"), str(tmp_path / "out.tif"), + (-118.5, 33.5, -117.5, 34.5)) + + def test_invalid_bbox_raises(self, tmp_path): + pytest.importorskip("rasterio") + from opera_tools.subset import subset_geotiff + + src = str(tmp_path / "input.tif") + _make_geotiff(src) + with pytest.raises(ValueError): + subset_geotiff(src, str(tmp_path / "out.tif"), (-117.5, 33.5, -118.5, 34.5)) + + def test_full_overlap_preserves_data(self, tmp_path): + rasterio = pytest.importorskip("rasterio") + from opera_tools.subset import subset_geotiff + + src = str(tmp_path / "input.tif") + dst = str(tmp_path / "output.tif") + _make_geotiff(src) + + # Subset with the exact file extent should copy all pixels + result = subset_geotiff(src, dst, (-118.5, 33.5, -117.5, 34.5)) + assert result is True + + with rasterio.open(src) as s, rasterio.open(dst) as d: + assert s.width == d.width + assert s.height == d.height + + def test_output_directory_created(self, tmp_path): + pytest.importorskip("rasterio") + from opera_tools.subset import subset_geotiff + + src = str(tmp_path / "input.tif") + _make_geotiff(src) + out_dir = tmp_path / "subdir" / "nested" + dst = str(out_dir / "output.tif") + + result = subset_geotiff(src, dst, (-118.2, 33.7, -117.8, 34.2)) + assert result is True + assert os.path.isfile(dst) + + +# --------------------------------------------------------------------------- +# Tests for subset_hdf5 +# --------------------------------------------------------------------------- + +class TestSubsetHdf5: + def test_basic_subset(self, tmp_path): + h5py = pytest.importorskip("h5py") + from opera_tools.subset import subset_hdf5 + + src = str(tmp_path / "input.h5") + dst = str(tmp_path / "output.h5") + _make_hdf5(src) + + result = subset_hdf5(src, dst, (-118.2, 33.7, -117.8, 34.2)) + assert result is True + assert os.path.isfile(dst) + + with h5py.File(src) as s, h5py.File(dst) as d: + assert d["latitude"].shape[0] <= s["latitude"].shape[0] + assert d["longitude"].shape[0] <= s["longitude"].shape[0] + # Lat/lon values in output must be within bbox + assert d["latitude"][()].min() >= 33.7 - 1e-6 + assert d["latitude"][()].max() <= 34.2 + 1e-6 + assert d["longitude"][()].min() >= -118.2 - 1e-6 + assert d["longitude"][()].max() <= -117.8 + 1e-6 + + def test_no_overlap_returns_false(self, tmp_path): + pytest.importorskip("h5py") + from opera_tools.subset import subset_hdf5 + + src = str(tmp_path / "input.h5") + dst = str(tmp_path / "output.h5") + _make_hdf5(src) + + result = subset_hdf5(src, dst, (10.0, 10.0, 20.0, 20.0)) + assert result is False + assert not os.path.isfile(dst) + + def test_file_not_found(self, tmp_path): + pytest.importorskip("h5py") + from opera_tools.subset import subset_hdf5 + + with pytest.raises(FileNotFoundError): + subset_hdf5(str(tmp_path / "missing.h5"), str(tmp_path / "out.h5"), + (-118.5, 33.5, -117.5, 34.5)) + + def test_attributes_preserved(self, tmp_path): + h5py = pytest.importorskip("h5py") + from opera_tools.subset import subset_hdf5 + + src = str(tmp_path / "input.h5") + dst = str(tmp_path / "output.h5") + _make_hdf5(src) + + subset_hdf5(src, dst, (-118.2, 33.7, -117.8, 34.2)) + + with h5py.File(dst) as d: + assert d.attrs.get("description") == "synthetic test file" + + +# --------------------------------------------------------------------------- +# Tests for subset_opera_file (dispatcher) +# --------------------------------------------------------------------------- + +class TestSubsetOperaFile: + def test_dispatches_geotiff(self, tmp_path): + pytest.importorskip("rasterio") + from opera_tools.subset import subset_opera_file + + src = str(tmp_path / "product.tif") + dst = str(tmp_path / "product_subset.tif") + _make_geotiff(src) + + result = subset_opera_file(src, dst, (-118.2, 33.7, -117.8, 34.2)) + assert result is True + + def test_dispatches_hdf5(self, tmp_path): + pytest.importorskip("h5py") + from opera_tools.subset import subset_opera_file + + src = str(tmp_path / "product.h5") + dst = str(tmp_path / "product_subset.h5") + _make_hdf5(src) + + result = subset_opera_file(src, dst, (-118.2, 33.7, -117.8, 34.2)) + assert result is True + + def test_unsupported_extension(self, tmp_path): + from opera_tools.subset import subset_opera_file + + with pytest.raises(ValueError, match="Unsupported file extension"): + subset_opera_file( + str(tmp_path / "file.xyz"), + str(tmp_path / "out.xyz"), + (-118.5, 33.5, -117.5, 34.5), + ) + + +# --------------------------------------------------------------------------- +# Tests for subset_opera_files (batch) +# --------------------------------------------------------------------------- + +class TestSubsetOperaFiles: + def test_batch_geotiff(self, tmp_path): + pytest.importorskip("rasterio") + from opera_tools.subset import subset_opera_files + + src1 = str(tmp_path / "a.tif") + src2 = str(tmp_path / "b.tif") + _make_geotiff(src1) + _make_geotiff(src2) + + out_dir = str(tmp_path / "subset") + written = subset_opera_files([src1, src2], out_dir, + (-118.2, 33.7, -117.8, 34.2)) + assert len(written) == 2 + for path in written: + assert os.path.isfile(path) + + def test_skips_non_overlapping(self, tmp_path): + pytest.importorskip("rasterio") + from opera_tools.subset import subset_opera_files + + src1 = str(tmp_path / "a.tif") + src2 = str(tmp_path / "b.tif") + _make_geotiff(src1) + _make_geotiff(src2) + + # Bbox far from both files + out_dir = str(tmp_path / "subset") + written = subset_opera_files([src1, src2], out_dir, (10.0, 10.0, 20.0, 20.0)) + assert len(written) == 0 + + def test_custom_suffix(self, tmp_path): + pytest.importorskip("rasterio") + from opera_tools.subset import subset_opera_files + + src = str(tmp_path / "product.tif") + _make_geotiff(src) + + out_dir = str(tmp_path / "subset") + written = subset_opera_files([src], out_dir, + (-118.2, 33.7, -117.8, 34.2), + suffix="_cropped") + assert len(written) == 1 + assert written[0].endswith("_cropped.tif") + + +# --------------------------------------------------------------------------- +# Tests for CLI +# --------------------------------------------------------------------------- + +class TestCLI: + def test_basic_cli(self, tmp_path): + pytest.importorskip("rasterio") + from opera_tools.subset import main + + src = str(tmp_path / "product.tif") + out_dir = str(tmp_path / "out") + _make_geotiff(src) + + main(["--bbox", "-118.2", "33.7", "-117.8", "34.2", + "-o", out_dir, src]) + + assert os.path.isfile(os.path.join(out_dir, "product_subset.tif")) + + def test_cli_no_overlap_exits_nonzero(self, tmp_path): + pytest.importorskip("rasterio") + from opera_tools.subset import main + + src = str(tmp_path / "product.tif") + out_dir = str(tmp_path / "out") + _make_geotiff(src) + + with pytest.raises(SystemExit) as exc_info: + main(["--bbox", "10.0", "10.0", "20.0", "20.0", + "-o", out_dir, src]) + assert exc_info.value.code != 0 + + def test_cli_custom_suffix(self, tmp_path): + pytest.importorskip("rasterio") + from opera_tools.subset import main + + src = str(tmp_path / "product.tif") + out_dir = str(tmp_path / "out") + _make_geotiff(src) + + main(["--bbox", "-118.2", "33.7", "-117.8", "34.2", + "-o", out_dir, "--suffix", "_cropped", src]) + + assert os.path.isfile(os.path.join(out_dir, "product_cropped.tif"))