update tests

This commit is contained in:
Emily Soth 2024-01-22 10:44:09 -08:00
parent 5ca1ec3da3
commit a5cd7186bf
8 changed files with 207 additions and 186 deletions

View File

@ -240,7 +240,7 @@ MODEL_SPEC = {
"type": "freestyle_string",
"about": gettext("Unique name for the habitat. No spaces allowed.")},
"path": {
"type": "raster_or_vector",
"type": {"raster", "vector"},
"fields": {},
"geometries": {"POLYGON", "MULTIPOLYGON"},
"bands": {1: {"type": "number", "units": u.none}},

View File

@ -314,8 +314,13 @@ def build_datastack_archive(args, model_name, datastack_path):
if 'columns' in args_spec[key]:
for col_name, col_definition in (
args_spec[key]['columns'].items()):
if col_definition['type'] in [
'raster', 'vector', 'raster_or_vector']:
# Type attribute may be a string (one type) or set
# (multiple types allowed), so always convert to a set for
# easier comparison.
col_types = col_definition['type']
if isinstance(col_types, str):
col_types = set([col_types])
if col_types.intersection(spatial_types):
spatial_columns.append(col_name)
LOGGER.debug(f'Detected spatial columns: {spatial_columns}')

View File

@ -70,7 +70,7 @@ MODEL_SPEC = {
"names must match the habitat and stressor names in "
"the Criteria Scores Table.")},
"path": {
"type": "raster_or_vector",
"type": {"raster", "vector"},
"bands": {1: {
"type": "number",
"units": u.none,

View File

@ -52,7 +52,7 @@ predictor_table_columns = {
"characters or less).")
},
"path": {
"type": "raster_or_vector",
"type": {"raster", "vector"},
"about": gettext("A spatial file to use as a predictor."),
"bands": {1: {"type": "number", "units": u.none}},
"fields": {},

View File

@ -648,7 +648,7 @@ def get_validated_dataframe(csv_path, columns=None, rows=None, index_col=None,
for col in matching_cols:
try:
# frozenset needed to make the set hashable. A frozenset and set with the same members are equal.
if col_spec['type'] in {'csv', 'directory', 'file', 'raster', 'vector', 'raster_or_vector'}:
if col_spec['type'] in {'csv', 'directory', 'file', 'raster', 'vector', frozenset({'raster', 'vector'})}:
df[col] = df[col].apply(
lambda p: p if pandas.isna(p) else utils.expand_path(str(p).strip(), csv_path))
df[col] = df[col].astype(pandas.StringDtype())
@ -669,12 +669,15 @@ def get_validated_dataframe(csv_path, columns=None, rows=None, index_col=None,
f'Value(s) in the "{col}" column could not be interpreted '
f'as {col_spec["type"]}s. Original error: {err}')
if col_spec['type'] in {'csv', 'directory', 'file', 'raster', 'vector', 'raster_or_vector'}:
col_type = col_spec['type']
if isinstance(col_type, set):
col_type = frozenset(col_type)
if col_type in {'csv', 'directory', 'file', 'raster', 'vector', frozenset({'raster', 'vector'})}:
# recursively validate the files within the column
def check_value(value):
if pandas.isna(value):
return
err_msg = _VALIDATION_FUNCS[col_spec['type']](value, **col_spec)
err_msg = _VALIDATION_FUNCS[col_type](value, **col_spec)
if err_msg:
raise ValueError(
f'Error in {axis} "{col}", value "{value}": {err_msg}')
@ -838,6 +841,8 @@ def timeout(func, *args, timeout=5, **kwargs):
message_queue = queue.Queue()
def wrapper_func():
print(func)
print(args, kwargs)
message_queue.put(func(*args, **kwargs))
thread = threading.Thread(target=wrapper_func)
@ -903,7 +908,7 @@ _VALIDATION_FUNCS = {
'option_string': check_option_string,
'raster': functools.partial(timeout, check_raster),
'vector': functools.partial(timeout, check_vector),
'raster_or_vector': functools.partial(timeout, check_raster_or_vector),
frozenset({'raster', 'vector'}): functools.partial(timeout, check_raster_or_vector),
'other': None, # Up to the user to define their validate()
}
@ -1041,7 +1046,10 @@ def validate(args, spec, spatial_overlap_opts=None):
LOGGER.debug(f'Provided key {key} does not exist in MODEL_SPEC')
continue
type_validation_func = _VALIDATION_FUNCS[parameter_spec['type']]
param_type = parameter_spec['type']
if isinstance(param_type, set):
param_type = frozenset(param_type)
type_validation_func = _VALIDATION_FUNCS[param_type]
if type_validation_func is None:
# Validation for 'other' type must be performed by the user.
@ -1182,6 +1190,8 @@ def invest_validator(validate_func):
# need to validate it.
if args_value not in ('', None):
input_type = args_key_spec['type']
if isinstance(input_type, set):
input_type = frozenset(input_type)
validator_func = _VALIDATION_FUNCS[input_type]
error_msg = validator_func(args_value, **args_key_spec)

View File

@ -14,7 +14,11 @@ MODEL_SPEC = {
'type': 'csv',
'columns': {
'ID': {'type': 'integer'},
'path': {'type': {'raster', 'vector'}},
'path': {
'type': {'raster', 'vector'},
'geometries': {'POINT', 'POLYGON'},
'bands': {1: {'type': 'number'}}
}
}
}
}

View File

@ -1260,7 +1260,7 @@ class HRAModelTests(unittest.TestCase):
array, 255, (10, -10), (ORIGIN[0] - 50, ORIGIN[1] - 50),
SRS_WKT, full_path)
archive_path = '/Users/emily/Documents/hrad_dat.tar.gz'#os.path.join(self.workspace_dir, 'datstack.tar.gz')
archive_path = os.path.join(self.workspace_dir, 'datstack.tar.gz')
datastack.build_datastack_archive(
args, 'natcap.invest.hra', archive_path)

View File

@ -19,7 +19,6 @@ valid_nested_types = {
'raster',
'ratio',
'vector',
'raster_or_vector'
},
'raster': {'integer', 'number', 'ratio', 'percent'},
'vector': {
@ -38,8 +37,7 @@ valid_nested_types = {
'percent',
'raster',
'ratio',
'vector',
'raster_or_vector'},
'vector'},
'directory': {'csv', 'directory', 'file', 'raster', 'vector'}
}
@ -128,7 +126,7 @@ class ValidateModelSpecs(unittest.TestCase):
self.assertIsInstance(spec['units'], pint.Unit)
attrs.remove('units')
elif t in {'raster', 'raster_or_vector'}:
elif t == 'raster':
# raster type should have a bands property that maps each band
# index to a nested type dictionary describing the band's data
self.assertIn('bands', spec)
@ -141,7 +139,7 @@ class ValidateModelSpecs(unittest.TestCase):
parent_type=t)
attrs.remove('bands')
elif t in {'vector', 'raster_or_vector'}:
elif t == 'vector':
# vector type should have:
# - a fields property that maps each field header to a nested
# type dictionary describing the data in that field
@ -251,191 +249,195 @@ class ValidateModelSpecs(unittest.TestCase):
for attr in ['name', 'about']:
self.assertIn(attr, arg)
# arg['type'] can be either a string or a set of strings
types = arg['type'] if isinstance(
arg['type'], set) else [arg['type']]
attrs = set(arg.keys())
self.assertIn(arg['type'], valid_nested_types[parent_type])
for t in types:
self.assertIn(t, valid_nested_types[parent_type])
if arg['type'] == 'option_string':
# option_string type should have an options property that
# describes the valid options
self.assertIn('options', arg)
# May be a list or dict because some option sets are self
# explanatory and others need a description
self.assertIsInstance(arg['options'], dict)
for key, val in arg['options'].items():
self.assertTrue(
isinstance(key, str) or
isinstance(key, int))
self.assertIsInstance(val, dict)
# top-level option_string args are shown as dropdowns
# so each option needs a display name
# an additional description is optional
if parent_type is None:
if t == 'option_string':
# option_string type should have an options property that
# describes the valid options
self.assertIn('options', arg)
# May be a list or dict because some option sets are self
# explanatory and others need a description
self.assertIsInstance(arg['options'], dict)
for key, val in arg['options'].items():
self.assertTrue(
set(val.keys()) == {'display_name'} or
set(val.keys()) == {
'display_name', 'description'})
# option_strings within a CSV or vector don't get a
# display name. the user has to enter the key.
else:
self.assertEqual(set(val.keys()), {'description'})
isinstance(key, str) or
isinstance(key, int))
self.assertIsInstance(val, dict)
# top-level option_string args are shown as dropdowns
# so each option needs a display name
# an additional description is optional
if parent_type is None:
self.assertTrue(
set(val.keys()) == {'display_name'} or
set(val.keys()) == {
'display_name', 'description'})
# option_strings within a CSV or vector don't get a
# display name. the user has to enter the key.
else:
self.assertEqual(set(val.keys()), {'description'})
if 'display_name' in val:
self.assertIsInstance(val['display_name'], str)
if 'description' in val:
self.assertIsInstance(val['description'], str)
if 'display_name' in val:
self.assertIsInstance(val['display_name'], str)
if 'description' in val:
self.assertIsInstance(val['description'], str)
attrs.remove('options')
attrs.remove('options')
elif arg['type'] == 'freestyle_string':
# freestyle_string may optionally have a regexp attribute
# this is a regular expression that the string must match
if 'regexp' in arg:
self.assertIsInstance(arg['regexp'], str)
re.compile(arg['regexp']) # should be regex compilable
attrs.remove('regexp')
elif t == 'freestyle_string':
# freestyle_string may optionally have a regexp attribute
# this is a regular expression that the string must match
if 'regexp' in arg:
self.assertIsInstance(arg['regexp'], str)
re.compile(arg['regexp']) # should be regex compilable
attrs.remove('regexp')
elif arg['type'] == 'number':
# number type should have a units property
self.assertIn('units', arg)
# Undefined units should use the custom u.none unit
self.assertIsInstance(arg['units'], pint.Unit)
attrs.remove('units')
elif t == 'number':
# number type should have a units property
self.assertIn('units', arg)
# Undefined units should use the custom u.none unit
self.assertIsInstance(arg['units'], pint.Unit)
attrs.remove('units')
# number type may optionally have an 'expression' attribute
# this is a string expression to be evaluated with the
# intent of determining that the value is within a range.
# The expression must contain the string ``value``, which
# will represent the user-provided value (after it has been
# cast to a float). Example: "(value >= 0) & (value <= 1)"
if 'expression' in arg:
self.assertIsInstance(arg['expression'], str)
attrs.remove('expression')
# number type may optionally have an 'expression' attribute
# this is a string expression to be evaluated with the
# intent of determining that the value is within a range.
# The expression must contain the string ``value``, which
# will represent the user-provided value (after it has been
# cast to a float). Example: "(value >= 0) & (value <= 1)"
if 'expression' in arg:
self.assertIsInstance(arg['expression'], str)
attrs.remove('expression')
elif arg['type'] in {'raster', 'raster_or_vector'}:
# raster type should have a bands property that maps each band
# index to a nested type dictionary describing the band's data
self.assertIn('bands', arg)
self.assertIsInstance(arg['bands'], dict)
for band in arg['bands']:
self.assertIsInstance(band, int)
self.validate_args(
arg['bands'][band],
f'{name}.bands.{band}',
parent_type=arg['type'])
attrs.remove('bands')
# may optionally have a 'projected' attribute that says
# whether the raster must be linearly projected
if 'projected' in arg:
self.assertIsInstance(arg['projected'], bool)
attrs.remove('projected')
# if 'projected' is True, may also have a 'projection_units'
# attribute saying the expected linear projection unit
if 'projection_units' in arg:
# doesn't make sense to have projection units unless
# projected is True
self.assertTrue(arg['projected'])
self.assertIsInstance(
arg['projection_units'], pint.Unit)
attrs.remove('projection_units')
elif arg['type'] in {'vector', 'raster_or_vector'}:
# vector type should have:
# - a fields property that maps each field header to a nested
# type dictionary describing the data in that field
# - a geometries property: the set of valid geometry types
self.assertIn('fields', arg)
self.assertIsInstance(arg['fields'], dict)
for field in arg['fields']:
self.assertIsInstance(field, str)
self.validate_args(
arg['fields'][field],
f'{name}.fields.{field}',
parent_type=arg['type'])
self.assertIn('geometries', arg)
self.assertIsInstance(arg['geometries'], set)
attrs.remove('fields')
attrs.remove('geometries')
# may optionally have a 'projected' attribute that says
# whether the vector must be linearly projected
if 'projected' in arg:
self.assertIsInstance(arg['projected'], bool)
attrs.remove('projected')
# if 'projected' is True, may also have a 'projection_units'
# attribute saying the expected linear projection unit
if 'projection_units' in arg:
# doesn't make sense to have projection units unless
# projected is True
self.assertTrue(arg['projected'])
self.assertIsInstance(
arg['projection_units'], pint.Unit)
attrs.remove('projection_units')
elif arg['type'] == 'csv':
# csv type should have a rows property, columns property, or
# neither. rows or columns properties map each expected header
# name/pattern to a nested type dictionary describing the data
# in that row/column. may have neither if the table structure
# is too complex to describe this way.
has_rows = 'rows' in arg
has_cols = 'columns' in arg
# should not have both
self.assertFalse(has_rows and has_cols)
if has_cols or has_rows:
direction = 'rows' if has_rows else 'columns'
headers = arg[direction]
self.assertIsInstance(headers, dict)
for header in headers:
self.assertIsInstance(header, str)
elif t == 'raster':
# raster type should have a bands property that maps each band
# index to a nested type dictionary describing the band's data
self.assertIn('bands', arg)
self.assertIsInstance(arg['bands'], dict)
for band in arg['bands']:
self.assertIsInstance(band, int)
self.validate_args(
headers[header],
f'{name}.{direction}.{header}',
parent_type=arg['type'])
arg['bands'][band],
f'{name}.bands.{band}',
parent_type=t)
attrs.remove('bands')
if 'index_col' in arg:
self.assertIn(arg['index_col'], arg['columns'])
attrs.discard('index_col')
# may optionally have a 'projected' attribute that says
# whether the raster must be linearly projected
if 'projected' in arg:
self.assertIsInstance(arg['projected'], bool)
attrs.remove('projected')
# if 'projected' is True, may also have a 'projection_units'
# attribute saying the expected linear projection unit
if 'projection_units' in arg:
# doesn't make sense to have projection units unless
# projected is True
self.assertTrue(arg['projected'])
self.assertIsInstance(
arg['projection_units'], pint.Unit)
attrs.remove('projection_units')
attrs.discard('rows')
attrs.discard('columns')
elif t == 'vector':
# vector type should have:
# - a fields property that maps each field header to a nested
# type dictionary describing the data in that field
# - a geometries property: the set of valid geometry types
self.assertIn('fields', arg)
self.assertIsInstance(arg['fields'], dict)
for field in arg['fields']:
self.assertIsInstance(field, str)
self.validate_args(
arg['fields'][field],
f'{name}.fields.{field}',
parent_type=t)
elif arg['type'] == 'directory':
# directory type should have a contents property that maps each
# expected path name/pattern within the directory to a nested
# type dictionary describing the data at that filepath
self.assertIn('contents', arg)
self.assertIsInstance(arg['contents'], dict)
for path in arg['contents']:
self.assertIsInstance(path, str)
self.validate_args(
arg['contents'][path],
f'{name}.contents.{path}',
parent_type=arg['type'])
attrs.remove('contents')
self.assertIn('geometries', arg)
self.assertIsInstance(arg['geometries'], set)
# may optionally have a 'permissions' attribute, which is a
# string of the unix-style directory permissions e.g. 'rwx'
if 'permissions' in arg:
self.validate_permissions_value(arg['permissions'])
attrs.remove('permissions')
# may optionally have an 'must_exist' attribute, which says
# whether the directory must already exist
# this defaults to True
if 'must_exist' in arg:
self.assertIsInstance(arg['must_exist'], bool)
attrs.remove('must_exist')
attrs.remove('fields')
attrs.remove('geometries')
elif arg['type'] == 'file':
# file type may optionally have a 'permissions' attribute
# this is a string listing the permissions e.g. 'rwx'
if 'permissions' in arg:
self.validate_permissions_value(arg['permissions'])
# may optionally have a 'projected' attribute that says
# whether the vector must be linearly projected
if 'projected' in arg:
self.assertIsInstance(arg['projected'], bool)
attrs.remove('projected')
# if 'projected' is True, may also have a 'projection_units'
# attribute saying the expected linear projection unit
if 'projection_units' in arg:
# doesn't make sense to have projection units unless
# projected is True
self.assertTrue(arg['projected'])
self.assertIsInstance(
arg['projection_units'], pint.Unit)
attrs.remove('projection_units')
elif t == 'csv':
# csv type should have a rows property, columns property, or
# neither. rows or columns properties map each expected header
# name/pattern to a nested type dictionary describing the data
# in that row/column. may have neither if the table structure
# is too complex to describe this way.
has_rows = 'rows' in arg
has_cols = 'columns' in arg
# should not have both
self.assertFalse(has_rows and has_cols)
if has_cols or has_rows:
direction = 'rows' if has_rows else 'columns'
headers = arg[direction]
self.assertIsInstance(headers, dict)
for header in headers:
self.assertIsInstance(header, str)
self.validate_args(
headers[header],
f'{name}.{direction}.{header}',
parent_type=t)
if 'index_col' in arg:
self.assertIn(arg['index_col'], arg['columns'])
attrs.discard('index_col')
attrs.discard('rows')
attrs.discard('columns')
elif t == 'directory':
# directory type should have a contents property that maps each
# expected path name/pattern within the directory to a nested
# type dictionary describing the data at that filepath
self.assertIn('contents', arg)
self.assertIsInstance(arg['contents'], dict)
for path in arg['contents']:
self.assertIsInstance(path, str)
self.validate_args(
arg['contents'][path],
f'{name}.contents.{path}',
parent_type=t)
attrs.remove('contents')
# may optionally have a 'permissions' attribute, which is a
# string of the unix-style directory permissions e.g. 'rwx'
if 'permissions' in arg:
self.validate_permissions_value(arg['permissions'])
attrs.remove('permissions')
# may optionally have an 'must_exist' attribute, which says
# whether the directory must already exist
# this defaults to True
if 'must_exist' in arg:
self.assertIsInstance(arg['must_exist'], bool)
attrs.remove('must_exist')
elif t == 'file':
# file type may optionally have a 'permissions' attribute
# this is a string listing the permissions e.g. 'rwx'
if 'permissions' in arg:
self.validate_permissions_value(arg['permissions'])
# iterate over the remaining attributes
# type-specific ones have been removed by this point