invest/tests/test_ndr.py

540 lines
23 KiB
Python

"""InVEST NDR model tests."""
import os
import shutil
import tempfile
import unittest
import pytest
import numpy
import pandas
import pygeoprocessing
import shapely.geometry
from osgeo import gdal
from osgeo import ogr
from osgeo import osr
gdal.UseExceptions()
REGRESSION_DATA = os.path.join(
os.path.dirname(__file__), '..', 'data', 'invest-test-data', 'ndr')
class NDRTests(unittest.TestCase):
"""Regression tests for InVEST SDR model."""
def setUp(self):
"""Initalize SDRRegression tests."""
self.workspace_dir = tempfile.mkdtemp()
def tearDown(self):
"""Clean up remaining files."""
shutil.rmtree(self.workspace_dir)
@staticmethod
def generate_base_args(workspace_dir):
"""Generate a base sample args dict for NDR."""
args = {
'biophysical_table_path':
os.path.join(REGRESSION_DATA, 'input', 'biophysical_table.csv'),
'calc_n': True,
'calc_p': True,
'dem_path': os.path.join(REGRESSION_DATA, 'input', 'dem.tif'),
'k_param': 2.0,
'lulc_path':
os.path.join(REGRESSION_DATA, 'input', 'landuse_90.tif'),
'runoff_proxy_path':
os.path.join(REGRESSION_DATA, 'input', 'precip.tif'),
'subsurface_critical_length_n': 150,
'subsurface_eff_n': 0.4,
'threshold_flow_accumulation': '1000',
'watersheds_path':
os.path.join(REGRESSION_DATA, 'input', 'watersheds.shp'),
'workspace_dir': workspace_dir,
'flow_dir_algorithm': 'MFD'
}
return args.copy()
def test_normalize_raster_float64(self):
"""NDR _normalize_raster handle float64.
Regression test for an issue raised on the forums when normalizing a
Float64 raster that has a nodata value that exceeds Float32 space. The
output raster, in the buggy version, would have pixel values of -inf
where they should have been nodata.
https://community.naturalcapitalproject.org/t/ndr-null-values-in-watershed-results/914
"""
from natcap.invest.ndr import ndr
raster_xsize = 1124
raster_ysize = 512
float64_raster_path = os.path.join(
self.workspace_dir, 'float64_raster.tif')
driver = gdal.GetDriverByName('GTiff')
raster = driver.Create(
float64_raster_path, raster_xsize, raster_ysize, 1,
gdal.GDT_Float64)
source_nodata = -1.797693e+308 # taken from user's data
band = raster.GetRasterBand(1)
band.SetNoDataValue(source_nodata)
source_array = numpy.empty(
(raster_ysize, raster_xsize), dtype=numpy.float64)
source_array[0:256][:] = 5.5 # Something, anything.
source_array[256:][:] = source_nodata
band.WriteArray(source_array)
band = None
raster = None
driver = None
normalized_raster_path = os.path.join(
self.workspace_dir, 'normalized.tif')
ndr._normalize_raster((float64_raster_path, 1), normalized_raster_path)
normalized_raster_nodata = pygeoprocessing.get_raster_info(
normalized_raster_path)['nodata'][0]
normalized_array = gdal.OpenEx(normalized_raster_path).ReadAsArray()
expected_array = numpy.empty(
(raster_ysize, raster_xsize), dtype=numpy.float32)
expected_array[0:256][:] = 1.
expected_array[256:][:] = normalized_raster_nodata
# Assert that the output values match the target nodata value
self.assertEqual(
287744, # Nodata pixels
numpy.count_nonzero(
numpy.isclose(normalized_array, normalized_raster_nodata)))
numpy.testing.assert_allclose(
normalized_array, expected_array, rtol=0, atol=1e-6)
def test_missing_headers(self):
"""NDR biophysical headers missing should return validation message."""
from natcap.invest.ndr import ndr
# use predefined directory so test can clean up files during teardown
args = NDRTests.generate_base_args(self.workspace_dir)
args['biophysical_table_path'] = os.path.join(
REGRESSION_DATA, 'input', 'biophysical_table_missing_headers.csv')
validation_messages = ndr.validate(args)
self.assertEqual(len(validation_messages), 1)
def test_crit_len_0(self):
"""NDR test case where crit len is 0 in biophysical table."""
from natcap.invest.ndr import ndr
# use predefined directory so test can clean up files during teardown
args = NDRTests.generate_base_args(self.workspace_dir)
new_table_path = os.path.join(self.workspace_dir, 'table_c_len_0.csv')
with open(new_table_path, 'w') as target_file:
with open(args['biophysical_table_path'], 'r') as table_file:
target_file.write(table_file.readline())
while True:
line = table_file.readline()
if not line:
break
line_list = line.split(',')
# replace the crit_len_p with 0 in this column
line = (
','.join(line_list[0:12] + ['0.0'] + line_list[13::]))
target_file.write(line)
args['biophysical_table_path'] = new_table_path
ndr.execute(args)
result_vector = ogr.Open(
os.path.join(args['workspace_dir'], 'watershed_results_ndr.gpkg'))
result_layer = result_vector.GetLayer()
error_results = {}
feature = result_layer.GetFeature(1)
if not feature:
raise AssertionError("No features were output.")
for field, value in [
('p_surface_load', 41.826904),
('p_surface_export', 5.566120),
('n_surface_load', 2977.551270),
('n_surface_export', 274.020844),
('n_subsurface_load', 28.558048),
('n_subsurface_export', 15.578484),
('n_total_export', 289.599314)]:
if not numpy.isclose(feature.GetField(field), value, atol=1e-2):
error_results[field] = (
'field', feature.GetField(field), value)
ogr.Feature.__swig_destroy__(feature)
feature = None
result_layer = None
ogr.DataSource.__swig_destroy__(result_vector)
result_vector = None
if error_results:
raise AssertionError(
"The following values are not equal: %s" % error_results)
def test_missing_lucode(self):
"""NDR missing lucode in biophysical table should raise a KeyError."""
from natcap.invest.ndr import ndr
# use predefined directory so test can clean up files during teardown
args = NDRTests.generate_base_args(self.workspace_dir)
args['biophysical_table_path'] = os.path.join(
REGRESSION_DATA, 'input', 'biophysical_table_missing_lucode.csv')
with self.assertRaises(KeyError) as cm:
ndr.execute(args)
actual_message = str(cm.exception)
self.assertTrue(
'present in the landuse raster but missing from the biophysical'
in actual_message)
def test_no_nutrient_selected(self):
"""NDR no nutrient selected should return a validation message."""
from natcap.invest.ndr import ndr
# use predefined directory so test can clean up files during teardown
args = NDRTests.generate_base_args(self.workspace_dir)
args['calc_n'] = False
args['calc_p'] = False
validation_messages = ndr.validate(args)
self.assertEqual(len(validation_messages), 1)
def test_base_regression(self):
"""NDR base regression test on test data.
Executes NDR with test data. Checks for accuracy of aggregate
values in summary vector, presence of drainage raster in
intermediate outputs, and accuracy of raster outputs (as
measured by the sum of their non-nodata pixel values).
"""
from natcap.invest.ndr import ndr
# use predefined directory so test can clean up files during teardown
args = NDRTests.generate_base_args(self.workspace_dir)
# make an empty output shapefile on top of where the new output
# shapefile should reside to ensure the model overwrites it
with open(
os.path.join(self.workspace_dir, 'watershed_results_ndr.gpkg'),
'wb') as f:
f.write(b'')
ndr.execute(args)
result_vector = ogr.Open(os.path.join(
args['workspace_dir'], 'watershed_results_ndr.gpkg'))
result_layer = result_vector.GetLayer()
result_feature = result_layer.GetFeature(1)
result_layer = None
result_vector = None
mismatch_list = []
# these values were generated by manual inspection of regression
# results
expected_watershed_totals = {
'p_surface_load': 41.826904,
'p_surface_export': 5.870544,
'n_surface_load': 2977.551270,
'n_surface_export': 274.020844,
'n_subsurface_load': 28.558048,
'n_subsurface_export': 15.578484,
'n_total_export': 289.599314
}
for field in expected_watershed_totals:
expected_value = expected_watershed_totals[field]
val = result_feature.GetField(field)
if not numpy.isclose(val, expected_value):
mismatch_list.append(
(field, 'expected: %f' % expected_value,
'actual: %f' % val))
result_feature = None
if mismatch_list:
raise AssertionError("results not expected: %s" % mismatch_list)
# We only need to test that the drainage mask exists. Functionality
# for that raster is tested in SDR.
self.assertTrue(
os.path.exists(
os.path.join(
args['workspace_dir'], 'intermediate_outputs',
'what_drains_to_stream.tif')))
# Check raster outputs to make sure values are in kg/ha/yr.
raster_info = pygeoprocessing.get_raster_info(args['dem_path'])
pixel_area = abs(numpy.prod(raster_info['pixel_size']))
pixels_per_hectare = 10000 / pixel_area
for attr_name in ['p_surface_export',
'n_surface_export',
'n_subsurface_export',
'n_total_export']:
# Since pixel values are kg/(ha•yr), raster sum is (kg•px)/(ha•yr),
# equal to the watershed total (kg/yr) * (pixels_per_hectare px/ha).
expected_sum = (expected_watershed_totals[attr_name]
* pixels_per_hectare)
raster_name = attr_name + '.tif'
raster_path = os.path.join(args['workspace_dir'], raster_name)
nodata = pygeoprocessing.get_raster_info(raster_path)['nodata'][0]
raster_sum = 0.0
for _, block in pygeoprocessing.iterblocks((raster_path, 1)):
raster_sum += numpy.sum(
block[~pygeoprocessing.array_equals_nodata(
block, nodata)], dtype=numpy.float64)
numpy.testing.assert_allclose(raster_sum, expected_sum, rtol=1e-6)
def test_base_regression_d8(self):
"""NDR base regression test on sample data in D8 mode.
Execute NDR with sample data and checks that the output files are
generated and that the aggregate shapefile fields are the same as the
regression case.
"""
from natcap.invest.ndr import ndr
# use predefined directory so test can clean up files during teardown
args = NDRTests.generate_base_args(self.workspace_dir)
args['flow_dir_algorithm'] = 'D8'
# make an empty output shapefile on top of where the new output
# shapefile should reside to ensure the model overwrites it
with open(
os.path.join(self.workspace_dir, 'watershed_results_ndr.gpkg'),
'wb') as f:
f.write(b'')
ndr.execute(args)
result_vector = ogr.Open(os.path.join(
args['workspace_dir'], 'watershed_results_ndr.gpkg'))
result_layer = result_vector.GetLayer()
result_feature = result_layer.GetFeature(1)
result_layer = None
result_vector = None
mismatch_list = []
# these values were generated by manual inspection of regression
# results
for field, expected_value in [
('p_surface_load', 41.826904),
('p_surface_export', 4.915544),
('n_surface_load', 2977.551914),
('n_surface_export', 320.082319),
('n_subsurface_load', 28.558048),
('n_subsurface_export', 12.609187),
('n_total_export', 330.293407)]:
val = result_feature.GetField(field)
if not numpy.isclose(val, expected_value):
mismatch_list.append(
(field, 'expected: %f' % expected_value,
'actual: %f' % val))
result_feature = None
if mismatch_list:
raise RuntimeError("results not expected: %s" % mismatch_list)
# We only need to test that the drainage mask exists. Functionality
# for that raster is tested in SDR.
self.assertTrue(
os.path.exists(
os.path.join(
args['workspace_dir'], 'intermediate_outputs',
'what_drains_to_stream.tif')))
def test_regression_undefined_nodata(self):
"""NDR test when DEM, LULC and runoff proxy have undefined nodata."""
from natcap.invest.ndr import ndr
# use predefined directory so test can clean up files during teardown
args = NDRTests.generate_base_args(self.workspace_dir)
# unset nodata values for DEM, LULC, and runoff proxy
# this is ok because the test data is 100% valid
# regression test for https://github.com/natcap/invest/issues/1005
for key in ['runoff_proxy_path', 'dem_path', 'lulc_path']:
target_path = os.path.join(self.workspace_dir, f'{key}_no_nodata.tif')
source = gdal.OpenEx(args[key], gdal.OF_RASTER)
driver = gdal.GetDriverByName('GTIFF')
target = driver.CreateCopy(target_path, source)
target.GetRasterBand(1).DeleteNoDataValue()
source, target = None, None
args[key] = target_path
ndr.execute(args)
result_vector = ogr.Open(os.path.join(
args['workspace_dir'], 'watershed_results_ndr.gpkg'))
result_layer = result_vector.GetLayer()
result_feature = result_layer.GetFeature(1)
result_layer = None
result_vector = None
mismatch_list = []
# these values were generated by manual inspection of regression
# results
for field, expected_value in [
('p_surface_load', 41.826904),
('p_surface_export', 5.870544),
('n_surface_load', 2977.551270),
('n_surface_export', 274.020844),
('n_subsurface_load', 28.558048),
('n_subsurface_export', 15.578484),
('n_total_export', 289.599314)]:
val = result_feature.GetField(field)
if not numpy.isclose(val, expected_value):
mismatch_list.append(
(field, 'expected: %f' % expected_value,
'actual: %f' % val))
result_feature = None
if mismatch_list:
raise RuntimeError("results not expected: %s" % mismatch_list)
def test_validation(self):
"""NDR test argument validation."""
from natcap.invest import validation
from natcap.invest.ndr import ndr
# use predefined directory so test can clean up files during teardown
args = NDRTests.generate_base_args(self.workspace_dir)
# should not raise an exception
validation_errors = ndr.validate(args)
self.assertEqual(len(validation_errors), 0)
del args['workspace_dir']
validation_errors = ndr.validate(args)
self.assertEqual(len(validation_errors), 1)
args = NDRTests.generate_base_args(self.workspace_dir)
args['workspace_dir'] = ''
validation_error_list = ndr.validate(args)
# we should have one warning that is an empty value
self.assertEqual(len(validation_error_list), 1)
# here the wrong GDAL type happens (vector instead of raster)
args = NDRTests.generate_base_args(self.workspace_dir)
args['lulc_path'] = args['watersheds_path']
validation_error_list = ndr.validate(args)
# we should have one warning that is an empty value
self.assertEqual(len(validation_error_list), 1)
# here the wrong GDAL type happens (raster instead of vector)
args = NDRTests.generate_base_args(self.workspace_dir)
args['watersheds_path'] = args['lulc_path']
validation_error_list = ndr.validate(args)
# we should have one warning that is an empty value
self.assertEqual(len(validation_error_list), 1)
# cover that there's no p and n calculation
args = NDRTests.generate_base_args(self.workspace_dir)
args['calc_p'] = False
args['calc_n'] = False
validation_error_list = ndr.validate(args)
# we should have one warning that is an empty value
self.assertEqual(len(validation_error_list), 1)
self.assertTrue('calc_n' in validation_error_list[0][0] and
'calc_p' in validation_error_list[0][0])
# cover that a file is missing
args = NDRTests.generate_base_args(self.workspace_dir)
args['lulc_path'] = 'this/path/does/not/exist.tif'
validation_error_list = ndr.validate(args)
# we should have one warning that is an empty value
self.assertEqual(len(validation_error_list), 1)
# cover that some args are conditionally required when
# these args are present and true
args = {'calc_p': True, 'calc_n': True}
validation_error_list = ndr.validate(args)
invalid_args = validation.get_invalid_keys(validation_error_list)
expected_missing_args = [
'biophysical_table_path',
'threshold_flow_accumulation',
'dem_path',
'subsurface_critical_length_n',
'runoff_proxy_path',
'lulc_path',
'workspace_dir',
'k_param',
'watersheds_path',
'subsurface_eff_n',
'flow_dir_algorithm'
]
self.assertEqual(set(invalid_args), set(expected_missing_args))
def test_masking_invalid_geometry(self):
"""NDR test masking of invalid geometries.
For more context, see https://github.com/natcap/invest/issues/1412.
"""
from natcap.invest.ndr import ndr
default_origin = (444720, 3751320)
default_pixel_size = (30, -30)
default_epsg = 3116
default_srs = osr.SpatialReference()
default_srs.ImportFromEPSG(default_epsg)
# bowtie geometry is invalid; verify we can still create a mask.
coordinates = []
for pixel_x_offset, pixel_y_offset in [
(0, 0), (0, 1), (1, 0.25), (1, 0.75), (0, 0)]:
coordinates.append((
default_origin[0] + default_pixel_size[0] * pixel_x_offset,
default_origin[1] + default_pixel_size[1] * pixel_y_offset
))
source_vector_path = os.path.join(self.workspace_dir, 'vector.geojson')
pygeoprocessing.shapely_geometry_to_vector(
[shapely.geometry.Polygon(coordinates)], source_vector_path,
default_srs.ExportToWkt(), 'GeoJSON')
source_raster_path = os.path.join(self.workspace_dir, 'raster.tif')
vector_info = pygeoprocessing.get_vector_info(source_vector_path)
bbox_geom = shapely.geometry.box(*vector_info['bounding_box'])
bbox_geom.buffer(50) # expand around the vector
pygeoprocessing.create_raster_from_bounding_box(
bbox_geom.bounds, source_raster_path,
default_pixel_size, gdal.GDT_Byte, default_srs.ExportToWkt(),
target_nodata=255)
target_raster_path = os.path.join(self.workspace_dir, 'target.tif')
ndr._create_mask_raster(source_raster_path, source_vector_path,
target_raster_path)
expected_array = numpy.array([[1]])
numpy.testing.assert_array_equal(
expected_array,
pygeoprocessing.raster_to_numpy_array(target_raster_path))
def test_synthetic_runoff_proxy_av(self):
"""
Test RPI given user-entered or auto-calculated runoff proxy average.
Test that the runoff proxy index (RPI) is calculated correctly if
(1) the user specifies a runoff proxy average value,
(2) the user does not specify a value so the runoff proxy average
is auto-calculated.
"""
from natcap.invest.ndr import ndr
# make simple raster
runoff_proxy_path = os.path.join(self.workspace_dir, "ppt.tif")
runoff_proxy_array = numpy.array(
[[800, 799, 567, 234], [765, 867, 765, 654]], dtype=numpy.float32)
srs = osr.SpatialReference()
srs.ImportFromEPSG(26910)
projection_wkt = srs.ExportToWkt()
origin = (461251, 4923445)
pixel_size = (30, -30)
no_data = -1
pygeoprocessing.numpy_array_to_raster(
runoff_proxy_array, no_data, pixel_size, origin, projection_wkt,
runoff_proxy_path)
target_rpi_path = os.path.join(self.workspace_dir, "out_raster.tif")
# Calculate RPI with user-specified runoff proxy average
runoff_proxy_av = 2
ndr._normalize_raster((runoff_proxy_path, 1), target_rpi_path,
user_provided_mean=runoff_proxy_av)
actual_rpi = pygeoprocessing.raster_to_numpy_array(target_rpi_path)
expected_rpi = runoff_proxy_array/runoff_proxy_av
numpy.testing.assert_allclose(actual_rpi, expected_rpi)
# Now calculate RPI with auto-calculated RP average
ndr._normalize_raster((runoff_proxy_path, 1), target_rpi_path,
user_provided_mean=None)
actual_rpi = pygeoprocessing.raster_to_numpy_array(target_rpi_path)
expected_rpi = runoff_proxy_array/numpy.mean(runoff_proxy_array)
numpy.testing.assert_allclose(actual_rpi, expected_rpi)