invest/tests/test_annual_water_yield.py

370 lines
16 KiB
Python

"""Module for Regression Testing the InVEST Annual Water Yield module."""
import unittest
import tempfile
import shutil
import os
import pandas
import numpy
from osgeo import gdal
import pygeoprocessing
REGRESSION_DATA = os.path.join(
os.path.dirname(__file__), '..', 'data', 'invest-test-data', 'annual_water_yield')
SAMPLE_DATA = os.path.join(REGRESSION_DATA, 'input')
gdal.UseExceptions()
class AnnualWaterYieldTests(unittest.TestCase):
"""Regression Tests for Annual Water Yield Model."""
def setUp(self):
"""Overriding setUp func. to create temporary workspace directory."""
# this lets us delete the workspace after its done no matter the
# the rest result
self.workspace_dir = tempfile.mkdtemp()
def tearDown(self):
"""Overriding tearDown function to remove temporary directory."""
shutil.rmtree(self.workspace_dir)
@staticmethod
def generate_base_args(workspace_dir):
"""Generate an args list that is consistent across regression tests."""
args = {
'workspace_dir': workspace_dir,
'lulc_path': os.path.join(SAMPLE_DATA, 'lulc.tif'),
'depth_to_root_rest_layer_path': os.path.join(
SAMPLE_DATA,
'depth_to_root_rest_layer.tif'),
'precipitation_path': os.path.join(SAMPLE_DATA, 'precip.tif'),
'pawc_path': os.path.join(SAMPLE_DATA, 'pawc.tif'),
'eto_path': os.path.join(SAMPLE_DATA, 'eto.tif'),
'watersheds_path': os.path.join(SAMPLE_DATA, 'watersheds.shp'),
'biophysical_table_path': os.path.join(
SAMPLE_DATA, 'biophysical_table.csv'),
'seasonality_constant': 5,
'n_workers': -1,
}
return args
def test_invalid_lulc_veg(self):
"""Hydro: catching invalid LULC_veg values."""
from natcap.invest import annual_water_yield
args = AnnualWaterYieldTests.generate_base_args(self.workspace_dir)
new_lulc_veg_path = os.path.join(self.workspace_dir,
'new_lulc_veg.csv')
table_df = pandas.read_csv(args['biophysical_table_path'])
table_df['LULC_veg'] = ['']*len(table_df.index)
table_df.to_csv(new_lulc_veg_path)
args['biophysical_table_path'] = new_lulc_veg_path
with self.assertRaises(ValueError) as cm:
annual_water_yield.execute(args)
self.assertTrue('veg value must be either 1 or 0' in str(cm.exception))
table_df = pandas.read_csv(args['biophysical_table_path'])
table_df['LULC_veg'] = ['-1']*len(table_df.index)
table_df.to_csv(new_lulc_veg_path)
args['biophysical_table_path'] = new_lulc_veg_path
with self.assertRaises(ValueError) as cm:
annual_water_yield.execute(args)
self.assertTrue('veg value must be either 1 or 0' in str(cm.exception))
def test_missing_lulc_value(self):
"""Hydro: catching missing LULC value in Biophysical table."""
from natcap.invest import annual_water_yield
args = AnnualWaterYieldTests.generate_base_args(self.workspace_dir)
# remove a row from the biophysical table so that lulc value is missing
bad_biophysical_path = os.path.join(
self.workspace_dir, 'bad_biophysical_table.csv')
bio_df = pandas.read_csv(args['biophysical_table_path'])
bio_df = bio_df[bio_df['lucode'] != 2]
bio_df.to_csv(bad_biophysical_path)
bio_df = None
args['biophysical_table_path'] = bad_biophysical_path
with self.assertRaises(ValueError) as cm:
annual_water_yield.execute(args)
self.assertTrue(
"The missing values found in the LULC raster but not the table"
" are: [2]" in str(cm.exception))
def test_missing_lulc_demand_value(self):
"""Hydro: catching missing LULC value in Demand table."""
from natcap.invest import annual_water_yield
args = AnnualWaterYieldTests.generate_base_args(self.workspace_dir)
args['demand_table_path'] = os.path.join(
SAMPLE_DATA, 'water_demand_table.csv')
args['sub_watersheds_path'] = os.path.join(
SAMPLE_DATA, 'subwatersheds.shp')
# remove a row from the biophysical table so that lulc value is missing
bad_demand_path = os.path.join(
self.workspace_dir, 'bad_demand_table.csv')
demand_df = pandas.read_csv(args['demand_table_path'])
demand_df = demand_df[demand_df['lucode'] != 2]
demand_df.to_csv(bad_demand_path)
demand_df = None
args['demand_table_path'] = bad_demand_path
with self.assertRaises(ValueError) as cm:
annual_water_yield.execute(args)
self.assertTrue(
"The missing values found in the LULC raster but not the table"
" are: [2]" in str(cm.exception))
def test_water_yield_subshed(self):
"""Hydro: testing water yield component only w/ subwatershed."""
from natcap.invest import annual_water_yield
from natcap.invest import utils
args = AnnualWaterYieldTests.generate_base_args(self.workspace_dir)
args['sub_watersheds_path'] = os.path.join(
SAMPLE_DATA, 'subwatersheds.shp')
args['results_suffix'] = 'test'
annual_water_yield.execute(args)
raster_results = ['aet_test.tif', 'fractp_test.tif', 'wyield_test.tif']
for raster_path in raster_results:
model_array = pygeoprocessing.raster_to_numpy_array(
os.path.join(
args['workspace_dir'], 'output', 'per_pixel', raster_path))
reg_array = pygeoprocessing.raster_to_numpy_array(
os.path.join(
REGRESSION_DATA, raster_path.replace('_test', '')))
numpy.testing.assert_allclose(model_array, reg_array, rtol=1e-03)
vector_results = ['watershed_results_wyield_test.shp',
'subwatershed_results_wyield_test.shp']
for vector_path in vector_results:
utils._assert_vectors_equal(
os.path.join(args['workspace_dir'], 'output', vector_path),
os.path.join(
REGRESSION_DATA, 'water_yield', vector_path.replace(
'_test', '')))
table_results = ['watershed_results_wyield_test.csv',
'subwatershed_results_wyield_test.csv']
for table_path in table_results:
base_table = pandas.read_csv(
os.path.join(args['workspace_dir'], 'output', table_path))
expected_table = pandas.read_csv(
os.path.join(
REGRESSION_DATA, 'water_yield',
table_path.replace('_test', '')))
pandas.testing.assert_frame_equal(base_table, expected_table)
def test_scarcity_subshed(self):
"""Hydro: testing Scarcity component w/ subwatershed."""
from natcap.invest import annual_water_yield
from natcap.invest import utils
args = AnnualWaterYieldTests.generate_base_args(self.workspace_dir)
args['demand_table_path'] = os.path.join(
SAMPLE_DATA, 'water_demand_table.csv')
args['sub_watersheds_path'] = os.path.join(
SAMPLE_DATA, 'subwatersheds.shp')
annual_water_yield.execute(args)
raster_results = ['aet.tif', 'fractp.tif', 'wyield.tif']
for raster_path in raster_results:
model_array = pygeoprocessing.raster_to_numpy_array(
os.path.join(
args['workspace_dir'], 'output', 'per_pixel', raster_path))
reg_array = pygeoprocessing.raster_to_numpy_array(
os.path.join(REGRESSION_DATA, raster_path))
numpy.testing.assert_allclose(model_array, reg_array, rtol=1e-03)
vector_results = ['watershed_results_wyield.shp',
'subwatershed_results_wyield.shp']
for vector_path in vector_results:
utils._assert_vectors_equal(
os.path.join(args['workspace_dir'], 'output', vector_path),
os.path.join(REGRESSION_DATA, 'scarcity', vector_path))
table_results = ['watershed_results_wyield.csv',
'subwatershed_results_wyield.csv']
for table_path in table_results:
base_table = pandas.read_csv(
os.path.join(args['workspace_dir'], 'output', table_path))
expected_table = pandas.read_csv(
os.path.join(REGRESSION_DATA, 'scarcity', table_path))
pandas.testing.assert_frame_equal(base_table, expected_table)
def test_valuation_subshed(self):
"""Hydro: testing Valuation component w/ subwatershed."""
from natcap.invest import annual_water_yield
from natcap.invest import utils
args = AnnualWaterYieldTests.generate_base_args(self.workspace_dir)
args['demand_table_path'] = os.path.join(
SAMPLE_DATA, 'water_demand_table.csv')
args['valuation_table_path'] = os.path.join(
SAMPLE_DATA, 'hydropower_valuation_table.csv')
args['sub_watersheds_path'] = os.path.join(
SAMPLE_DATA, 'subwatersheds.shp')
annual_water_yield.execute(args)
raster_results = ['aet.tif', 'fractp.tif', 'wyield.tif']
for raster_path in raster_results:
model_array = pygeoprocessing.raster_to_numpy_array(
os.path.join(
args['workspace_dir'], 'output', 'per_pixel', raster_path))
reg_array = pygeoprocessing.raster_to_numpy_array(
os.path.join(REGRESSION_DATA, raster_path))
numpy.testing.assert_allclose(model_array, reg_array, 1e-03)
vector_results = ['watershed_results_wyield.shp',
'subwatershed_results_wyield.shp']
for vector_path in vector_results:
utils._assert_vectors_equal(
os.path.join(args['workspace_dir'], 'output', vector_path),
os.path.join(REGRESSION_DATA, 'valuation', vector_path))
table_results = ['watershed_results_wyield.csv',
'subwatershed_results_wyield.csv']
for table_path in table_results:
base_table = pandas.read_csv(
os.path.join(args['workspace_dir'], 'output', table_path))
expected_table = pandas.read_csv(
os.path.join(REGRESSION_DATA, 'valuation', table_path))
pandas.testing.assert_frame_equal(base_table, expected_table)
def test_validation(self):
"""Hydro: test failure cases on the validation function."""
from natcap.invest import annual_water_yield, validation
args = AnnualWaterYieldTests.generate_base_args(self.workspace_dir)
# default args should be fine
self.assertEqual(annual_water_yield.validate(args), [])
args_bad_vector = args.copy()
args_bad_vector['watersheds_path'] = args_bad_vector['eto_path']
bad_vector_list = annual_water_yield.validate(args_bad_vector)
self.assertTrue('not be opened as a GDAL vector'
in bad_vector_list[0][1])
args_bad_raster = args.copy()
args_bad_raster['eto_path'] = args_bad_raster['watersheds_path']
bad_raster_list = annual_water_yield.validate(args_bad_raster)
self.assertTrue('not be opened as a GDAL raster'
in bad_raster_list[0][1])
args_bad_file = args.copy()
args_bad_file['eto_path'] = 'non_existant_file.tif'
bad_file_list = annual_water_yield.validate(args_bad_file)
self.assertTrue('File not found' in bad_file_list[0][1])
args_missing_key = args.copy()
del args_missing_key['eto_path']
validation_warnings = annual_water_yield.validate(
args_missing_key)
self.assertEqual(
validation_warnings,
[(['eto_path'], validation.MESSAGES['MISSING_KEY'])])
# ensure that a missing landcover code in the biophysical table will
# raise an exception that's helpful
args_bad_biophysical_table = args.copy()
bad_biophysical_path = os.path.join(
self.workspace_dir, 'bad_biophysical_table.csv')
with open(bad_biophysical_path, 'wb') as bad_biophysical_file:
with open(args['biophysical_table_path'], 'rb') as (
biophysical_table_file):
lines_to_write = 2
for line in biophysical_table_file.readlines():
bad_biophysical_file.write(line)
lines_to_write -= 1
if lines_to_write == 0:
break
args_bad_biophysical_table['biophysical_table_path'] = (
bad_biophysical_path)
with self.assertRaises(ValueError) as cm:
annual_water_yield.execute(args_bad_biophysical_table)
actual_message = str(cm.exception)
self.assertTrue(
"The missing values found in the LULC raster but not the table"
" are: [2 3]" in actual_message, actual_message)
# ensure that a missing landcover code in the demand table will
# raise an exception that's helpful
args_bad_biophysical_table = args.copy()
bad_biophysical_path = os.path.join(
self.workspace_dir, 'bad_biophysical_table.csv')
with open(bad_biophysical_path, 'wb') as bad_biophysical_file:
with open(args['biophysical_table_path'], 'rb') as (
biophysical_table_file):
lines_to_write = 2
for line in biophysical_table_file.readlines():
bad_biophysical_file.write(line)
lines_to_write -= 1
if lines_to_write == 0:
break
args_bad_demand_table = args.copy()
bad_demand_path = os.path.join(
self.workspace_dir, 'bad_demand_table.csv')
args_bad_demand_table['demand_table_path'] = (
bad_demand_path)
with open(bad_demand_path, 'wb') as bad_demand_file:
with open(os.path.join(
SAMPLE_DATA, 'water_demand_table.csv'), 'rb') as (
demand_table_file):
lines_to_write = 2
for line in demand_table_file.readlines():
bad_demand_file.write(line)
lines_to_write -= 1
if lines_to_write == 0:
break
# ensure that a missing watershed id the valuation table will
# raise an exception that's helpful
with self.assertRaises(ValueError) as cm:
annual_water_yield.execute(args_bad_demand_table)
actual_message = str(cm.exception)
self.assertTrue(
"The missing values found in the LULC raster but not the table"
" are: [2 3]" in actual_message, actual_message)
args_bad_valuation_table = args.copy()
bad_valuation_path = os.path.join(
self.workspace_dir, 'bad_valuation_table.csv')
args_bad_valuation_table['valuation_table_path'] = (
bad_valuation_path)
# args contract requires a demand table if there is a valuation table
args_bad_valuation_table['demand_table_path'] = os.path.join(
SAMPLE_DATA, 'water_demand_table.csv')
with open(bad_valuation_path, 'wb') as bad_valuation_file:
with open(os.path.join(
SAMPLE_DATA, 'hydropower_valuation_table.csv'), 'rb') as (
valuation_table_file):
lines_to_write = 2
for line in valuation_table_file.readlines():
bad_valuation_file.write(line)
lines_to_write -= 1
if lines_to_write == 0:
break
with self.assertRaises(ValueError) as cm:
annual_water_yield.execute(args_bad_valuation_table)
actual_message = str(cm.exception)
self.assertTrue(
'but are not found in the valuation table' in
actual_message, actual_message)