invest/tests/test_sdr.py

503 lines
21 KiB
Python

"""InVEST SDR model tests."""
import os
import shutil
import tempfile
import unittest
import pytest
import numpy
import pygeoprocessing
from osgeo import gdal
from osgeo import osr
gdal.UseExceptions()
REGRESSION_DATA = os.path.join(
os.path.dirname(__file__), '..', 'data', 'invest-test-data', 'sdr')
SAMPLE_DATA = os.path.join(REGRESSION_DATA, 'input')
def assert_expected_results_in_vector(expected_results, vector_path):
"""Assert one feature vector maps to expected_results key/value pairs."""
watershed_results_vector = gdal.OpenEx(vector_path, gdal.OF_VECTOR)
watershed_results_layer = watershed_results_vector.GetLayer()
watershed_results_feature = watershed_results_layer.GetFeature(0)
actual_results = {}
for key in expected_results:
actual_results[key] = watershed_results_feature.GetField(key)
watershed_results_vector = None
watershed_results_layer = None
watershed_results_feature = None
incorrect_vals = {}
for key in expected_results:
# Using relative tolerance here because results with different
# orders of magnitude are tested
try:
numpy.testing.assert_allclose(
actual_results[key], expected_results[key],
rtol=0.00001, atol=0)
except AssertionError:
incorrect_vals[key] = (actual_results[key], expected_results[key])
if incorrect_vals:
raise AssertionError(
f'these key (actual/expected) errors occured: {incorrect_vals}')
class SDRTests(unittest.TestCase):
"""Regression tests for InVEST SDR model."""
def setUp(self):
"""Initialize SDRRegression tests."""
self.workspace_dir = tempfile.mkdtemp()
def tearDown(self):
"""Clean up remaining files."""
shutil.rmtree(self.workspace_dir)
@staticmethod
def generate_base_args(workspace_dir):
"""Generate a base sample args dict for SDR."""
args = {
'biophysical_table_path': os.path.join(
SAMPLE_DATA, 'biophysical_table.csv'),
'dem_path': os.path.join(SAMPLE_DATA, 'dem.tif'),
'erodibility_path': os.path.join(
SAMPLE_DATA, 'erodibility_SI_clip.tif'),
'erosivity_path': os.path.join(SAMPLE_DATA, 'erosivity.tif'),
'ic_0_param': '0.5',
'k_param': '2',
'lulc_path': os.path.join(SAMPLE_DATA, 'landuse_90.tif'),
'sdr_max': '0.8',
'l_max': '122',
'threshold_flow_accumulation': '1000',
'watersheds_path': os.path.join(SAMPLE_DATA, 'watersheds.shp'),
'workspace_dir': workspace_dir,
'n_workers': -1,
'flow_dir_algorithm': 'MFD'
}
return args
def test_sdr_validation(self):
"""SDR test regular validation."""
from natcap.invest.sdr import sdr
# use predefined directory so test can clean up files during teardown
args = SDRTests.generate_base_args(self.workspace_dir)
args['drainage_path'] = os.path.join(
REGRESSION_DATA, 'sample_drainage.tif')
validate_result = sdr.validate(args, limit_to=None)
self.assertFalse(
validate_result, # List should be empty if validation passes
"expected no failed validations instead got %s" % str(
validate_result))
def test_sdr_validation_wrong_types(self):
"""SDR test validation for wrong GIS types."""
from natcap.invest.sdr import sdr
# use predefined directory so test can clean up files during teardown
args = SDRTests.generate_base_args(self.workspace_dir)
# swap watershed and dem for different types
args['dem_path'], args['watersheds_path'] = (
args['watersheds_path'], args['dem_path'])
validate_result = sdr.validate(args, limit_to=None)
self.assertTrue(
validate_result,
"expected failed validations instead didn't get any")
for (validation_keys, error_msg), phrase in zip(
validate_result, ['GDAL raster', 'GDAL vector']):
self.assertTrue(phrase in error_msg)
def test_sdr_validation_key_no_value(self):
"""SDR test validation that's missing a value on a key."""
from natcap.invest.sdr import sdr
# use predefined directory so test can clean up files during teardown
args = SDRTests.generate_base_args(
self.workspace_dir)
args['dem_path'] = ''
validate_result = sdr.validate(args, limit_to=None)
self.assertTrue(
validate_result,
'expected a validation error but didn\'t get one')
def test_base_regression(self):
"""SDR base regression test on test data.
Executes SDR with test data. Checks for accuracy of aggregate
values in summary vector, presence of drainage raster in
intermediate outputs, absence of negative (non-nodata) values
in sed_deposition raster, and accuracy of raster outputs (as
measured by the sum of their non-nodata pixel values).
"""
from natcap.invest.sdr import sdr
# use predefined directory so test can clean up files during teardown
args = SDRTests.generate_base_args(self.workspace_dir)
sdr.execute(args)
expected_watershed_totals = {
'usle_tot': 2.62457418442,
'sed_export': 0.09748090804,
'sed_dep': 1.71672844887,
'avoid_exp': 10199.46875,
'avoid_eros': 274444.75,
}
vector_path = os.path.join(
args['workspace_dir'], 'watershed_results_sdr.shp')
assert_expected_results_in_vector(expected_watershed_totals,
vector_path)
# We only need to test that the drainage mask exists. Functionality
# for that raster is tested elsewhere
self.assertTrue(
os.path.exists(
os.path.join(
args['workspace_dir'], 'intermediate_outputs',
'what_drains_to_stream.tif')))
# Check that sed_deposition does not have any negative, non-nodata
# values, even if they are very small.
sed_deposition_path = os.path.join(args['workspace_dir'],
'sed_deposition.tif')
sed_dep_nodata = pygeoprocessing.get_raster_info(
sed_deposition_path)['nodata'][0]
sed_dep_array = pygeoprocessing.raster_to_numpy_array(
sed_deposition_path)
negative_non_nodata_mask = (
(~numpy.isclose(sed_dep_array, sed_dep_nodata)) &
(sed_dep_array < 0))
self.assertEqual(
numpy.count_nonzero(sed_dep_array[negative_non_nodata_mask]), 0)
# Check raster outputs to make sure values are in Mg/ha/yr.
raster_info = pygeoprocessing.get_raster_info(args['dem_path'])
pixel_area = abs(numpy.prod(raster_info['pixel_size']))
pixels_per_hectare = 10000 / pixel_area
for (raster_name,
attr_name) in [('usle.tif', 'usle_tot'),
('sed_export.tif', 'sed_export'),
('sed_deposition.tif', 'sed_dep'),
('avoided_export.tif', 'avoid_exp'),
('avoided_erosion.tif', 'avoid_eros')]:
# Since pixel values are Mg/(ha•yr), raster sum is (Mg•px)/(ha•yr),
# equal to the watershed total (Mg/yr) * (pixels_per_hectare px/ha).
expected_sum = (expected_watershed_totals[attr_name]
* pixels_per_hectare)
raster_path = os.path.join(args['workspace_dir'], raster_name)
nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
raster_sum = 0.0
for _, block in pygeoprocessing.iterblocks((raster_path, 1)):
raster_sum += numpy.sum(
block[~pygeoprocessing.array_equals_nodata(
block, nodata)], dtype=numpy.float64)
numpy.testing.assert_allclose(raster_sum, expected_sum)
def test_base_regression_d8(self):
"""SDR base regression test on sample data in D8 mode.
Execute SDR with sample data and checks that the output files are
generated and that the aggregate shapefile fields are the same as the
regression case.
"""
from natcap.invest.sdr import sdr
# use predefined directory so test can clean up files during teardown
args = SDRTests.generate_base_args(self.workspace_dir)
args['flow_dir_algorithm'] = 'D8'
args['threshold_flow_accumulation'] = 100
# make args explicit that this is a base run of SWY
sdr.execute(args)
expected_results = {
'usle_tot': 2.520746,
'sed_export': 0.187428,
'sed_dep': 2.300645,
'avoid_exp': 19283.767578,
'avoid_eros': 263415,
}
vector_path = os.path.join(
args['workspace_dir'], 'watershed_results_sdr.shp')
assert_expected_results_in_vector(expected_results, vector_path)
# We only need to test that the drainage mask exists. Functionality
# for that raster is tested elsewhere
self.assertTrue(
os.path.exists(
os.path.join(
args['workspace_dir'], 'intermediate_outputs',
'what_drains_to_stream.tif')))
# Check that sed_deposition does not have any negative, non-nodata
# values, even if they are very small.
sed_deposition_path = os.path.join(args['workspace_dir'],
'sed_deposition.tif')
sed_dep_nodata = pygeoprocessing.get_raster_info(
sed_deposition_path)['nodata'][0]
sed_dep_array = pygeoprocessing.raster_to_numpy_array(
sed_deposition_path)
negative_non_nodata_mask = (
(~numpy.isclose(sed_dep_array, sed_dep_nodata)) &
(sed_dep_array < 0))
self.assertEqual(
numpy.count_nonzero(sed_dep_array[negative_non_nodata_mask]), 0)
def test_regression_with_undefined_nodata(self):
"""SDR base regression test with undefined nodata values.
Execute SDR with sample data with all rasters having undefined nodata
values.
"""
from natcap.invest.sdr import sdr
# use predefined directory so test can clean up files during teardown
args = SDRTests.generate_base_args(self.workspace_dir)
# set all input rasters to have undefined nodata values
tmp_dir = os.path.join(args['workspace_dir'], 'nodata_raster_dir')
os.makedirs(tmp_dir)
for path_key in ['erodibility_path', 'erosivity_path', 'lulc_path']:
target_path = os.path.join(
tmp_dir, os.path.basename(args[path_key]))
datatype = pygeoprocessing.get_raster_info(
args[path_key])['datatype']
pygeoprocessing.new_raster_from_base(
args[path_key], target_path, datatype, [None])
base_raster = gdal.OpenEx(args[path_key], gdal.OF_RASTER)
base_band = base_raster.GetRasterBand(1)
base_array = base_band.ReadAsArray()
base_band = None
base_raster = None
target_raster = gdal.OpenEx(
target_path, gdal.OF_RASTER | gdal.GA_Update)
target_band = target_raster.GetRasterBand(1)
target_band.WriteArray(base_array)
target_band = None
target_raster = None
args[path_key] = target_path
sdr.execute(args)
expected_results = {
'sed_export': 0.09748090804,
'usle_tot': 2.62457418442,
'avoid_exp': 10199.46875,
'avoid_eros': 274444.75,
}
vector_path = os.path.join(
args['workspace_dir'], 'watershed_results_sdr.shp')
# make args explicit that this is a base run of SWY
assert_expected_results_in_vector(expected_results, vector_path)
def test_non_square_dem(self):
"""SDR non-square DEM pixels.
Execute SDR with a non-square DEM and get a good result back.
"""
from natcap.invest.sdr import sdr
# use predefined directory so test can clean up files during teardown
args = SDRTests.generate_base_args(self.workspace_dir)
args['dem_path'] = os.path.join(SAMPLE_DATA, 'dem_non_square.tif')
# make args explicit that this is a base run of SWY
sdr.execute(args)
expected_results = {
'sed_export': 0.08896198869,
'usle_tot': 1.86480891705,
'avoid_exp': 9203.955078125,
'avoid_eros': 194212.28125,
}
vector_path = os.path.join(
args['workspace_dir'], 'watershed_results_sdr.shp')
assert_expected_results_in_vector(expected_results, vector_path)
def test_drainage_regression(self):
"""SDR drainage layer regression test on sample data.
Execute SDR with sample data and a drainage layer and checks that the
output files are generated and that the aggregate shapefile fields
are the same as the regression case.
"""
from natcap.invest.sdr import sdr
# use predefined directory so test can clean up files during teardown
args = SDRTests.generate_base_args(self.workspace_dir)
args['drainage_path'] = os.path.join(
REGRESSION_DATA, 'sample_drainage.tif')
sdr.execute(args)
expected_results = {
'sed_export': 0.17336219549,
'usle_tot': 2.56186032295,
'avoid_exp': 17980.05859375,
'avoid_eros': 267663.71875,
}
vector_path = os.path.join(
args['workspace_dir'], 'watershed_results_sdr.shp')
assert_expected_results_in_vector(expected_results, vector_path)
def test_base_usle_c_too_large(self):
"""SDR test exepected exception for USLE_C > 1.0."""
from natcap.invest.sdr import sdr
# use predefined directory so test can clean up files during teardown
args = SDRTests.generate_base_args(
self.workspace_dir)
args['biophysical_table_path'] = os.path.join(
REGRESSION_DATA, 'biophysical_table_too_large.csv')
with self.assertRaises(ValueError) as context:
sdr.execute(args)
self.assertIn(
'A value in the biophysical table is not a number '
'within range 0..1.', str(context.exception))
def test_base_usle_p_nan(self):
"""SDR test expected exception for USLE_P not a number."""
from natcap.invest.sdr import sdr
# use predefined directory so test can clean up files during teardown
args = SDRTests.generate_base_args(
self.workspace_dir)
args['biophysical_table_path'] = os.path.join(
REGRESSION_DATA, 'biophysical_table_invalid_value.csv')
with self.assertRaises(ValueError) as context:
sdr.execute(args)
self.assertIn(
'could not be interpreted as RatioInput', str(context.exception))
def test_lucode_not_a_number(self):
"""SDR test expected exception for invalid data in lucode column."""
from natcap.invest.sdr import sdr
# use predefined directory so test can clean up files during teardown
args = SDRTests.generate_base_args(
self.workspace_dir)
args['biophysical_table_path'] = os.path.join(
self.workspace_dir, 'biophysical_table_invalid_lucode.csv')
invalid_value = 'forest'
with open(args['biophysical_table_path'], 'w') as file:
file.write(
f'desc,lucode,usle_p,usle_c\n'
f'0,{invalid_value},0.5,0.5\n')
with self.assertRaises(ValueError) as context:
sdr.execute(args)
self.assertIn(
'could not be interpreted as IntegerInput', str(context.exception))
def test_missing_lulc_value(self):
"""SDR test for ValueError when LULC value not found in table."""
import pandas
from natcap.invest.sdr import sdr
# use predefined directory so test can clean up files during teardown
args = SDRTests.generate_base_args(self.workspace_dir)
# remove a row from the biophysical table so that lulc value is missing
bad_biophysical_path = os.path.join(
self.workspace_dir, 'bad_biophysical_table.csv')
bio_df = pandas.read_csv(args['biophysical_table_path'])
bio_df = bio_df[bio_df['lucode'] != 2]
bio_df.to_csv(bad_biophysical_path)
bio_df = None
args['biophysical_table_path'] = bad_biophysical_path
with self.assertRaises(ValueError) as context:
sdr.execute(args)
self.assertIn(
"The missing values found in the LULC raster but not the table"
" are: [2.]", str(context.exception))
def test_what_drains_to_stream(self):
"""SDR test for what pixels drain to a stream."""
from natcap.invest.sdr import sdr
srs = osr.SpatialReference()
srs.ImportFromEPSG(26910) # NAD83 / UTM zone 11N
srs_wkt = srs.ExportToWkt()
origin = (463250, 4929700)
pixel_size = (30, -30)
flow_dir_mfd = numpy.array([
[0, 1],
[1, 1]], dtype=numpy.float64)
flow_dir_mfd_nodata = 0 # Matches pygeoprocessing output
flow_dir_mfd_path = os.path.join(self.workspace_dir, 'flow_dir.tif')
pygeoprocessing.numpy_array_to_raster(
flow_dir_mfd, flow_dir_mfd_nodata, pixel_size, origin, srs_wkt,
flow_dir_mfd_path)
dist_to_channel = numpy.array([
[10, 5],
[-1, 6]], dtype=numpy.float64)
dist_to_channel_nodata = -1 # Matches pygeoprocessing output
dist_to_channel_path = os.path.join(
self.workspace_dir, 'dist_to_channel.tif')
pygeoprocessing.numpy_array_to_raster(
dist_to_channel, dist_to_channel_nodata, pixel_size, origin,
srs_wkt, dist_to_channel_path)
target_what_drains_path = os.path.join(
self.workspace_dir, 'what_drains.tif')
sdr._calculate_what_drains_to_stream(
flow_dir_mfd_path, dist_to_channel_path, target_what_drains_path)
# 255 is the byte nodata value assigned
expected_drainage = numpy.array([
[255, 1],
[0, 1]], dtype=numpy.uint8)
what_drains = pygeoprocessing.raster_to_numpy_array(
target_what_drains_path)
numpy.testing.assert_allclose(what_drains, expected_drainage)
def test_ls_factor(self):
"""SDR test for our LS Factor function."""
from natcap.invest.sdr import sdr
nodata = -1
# These varying percent slope values should cover all of the slope
# factor and slope table cases.
pct_slope_array = numpy.array(
[[1.5, 4, 8, 10, 15, nodata]], dtype=numpy.float32)
flow_accum_array = numpy.array(
[[100, 100, 100, 100, 10000000, nodata]], dtype=numpy.float32)
l_max = 25 # affects the last item in the array only
srs = osr.SpatialReference()
srs.ImportFromEPSG(26910) # NAD83 / UTM zone 11N
srs_wkt = srs.ExportToWkt()
origin = (463250, 4929700)
pixel_size = (30, -30)
pct_slope_path = os.path.join(self.workspace_dir, 'pct_slope.tif')
pygeoprocessing.numpy_array_to_raster(
pct_slope_array, nodata, pixel_size, origin, srs_wkt,
pct_slope_path)
flow_accum_path = os.path.join(self.workspace_dir, 'flow_accum.tif')
pygeoprocessing.numpy_array_to_raster(
flow_accum_array, nodata, pixel_size, origin, srs_wkt,
flow_accum_path)
target_ls_factor_path = os.path.join(self.workspace_dir, 'ls.tif')
sdr._calculate_ls_factor(flow_accum_path, pct_slope_path, l_max,
target_ls_factor_path)
ls = pygeoprocessing.raster_to_numpy_array(target_ls_factor_path)
nodata = float(numpy.finfo(numpy.float32).max)
expected_ls = numpy.array(
[[0.253996, 0.657229, 1.345856, 1.776729, 49.802994, nodata]],
dtype=numpy.float32)
numpy.testing.assert_allclose(ls, expected_ls, rtol=1e-6)