update tests

2024-01-22 10:44:09 -08:00 · 2024-01-22 10:44:09 -08:00 · a5cd7186bf
parent 5ca1ec3da3
commit a5cd7186bf
8 changed files with 207 additions and 186 deletions
--- a/src/natcap/invest/coastal_vulnerability.py
+++ b/src/natcap/invest/coastal_vulnerability.py
@ -240,7 +240,7 @@ MODEL_SPEC = {
                    "type": "freestyle_string",
                    "about": gettext("Unique name for the habitat. No spaces allowed.")},
                "path": {
-                    "type": "raster_or_vector",
+                    "type": {"raster", "vector"},
                    "fields": {},
                    "geometries": {"POLYGON", "MULTIPOLYGON"},
                    "bands": {1: {"type": "number", "units": u.none}},
--- a/src/natcap/invest/datastack.py
+++ b/src/natcap/invest/datastack.py
@ -314,8 +314,13 @@ def build_datastack_archive(args, model_name, datastack_path):
            if 'columns' in args_spec[key]:
                for col_name, col_definition in (
                        args_spec[key]['columns'].items()):
-                    if col_definition['type'] in [
-                            'raster', 'vector', 'raster_or_vector']:
+                    # Type attribute may be a string (one type) or set
+                    # (multiple types allowed), so always convert to a set for
+                    # easier comparison.
+                    col_types = col_definition['type']
+                    if isinstance(col_types, str):
+                        col_types = set([col_types])
+                    if col_types.intersection(spatial_types):
                        spatial_columns.append(col_name)
            LOGGER.debug(f'Detected spatial columns: {spatial_columns}')

--- a/src/natcap/invest/hra.py
+++ b/src/natcap/invest/hra.py
@ -70,7 +70,7 @@ MODEL_SPEC = {
                        "names must match the habitat and stressor names in "
                        "the Criteria Scores Table.")},
                "path": {
-                    "type": "raster_or_vector",
+                    "type": {"raster", "vector"},
                    "bands": {1: {
                        "type": "number",
                        "units": u.none,
--- a/src/natcap/invest/recreation/recmodel_client.py
+++ b/src/natcap/invest/recreation/recmodel_client.py
@ -52,7 +52,7 @@ predictor_table_columns = {
                   "characters or less).")
    },
    "path": {
-        "type": "raster_or_vector",
+        "type": {"raster", "vector"},
        "about": gettext("A spatial file to use as a predictor."),
        "bands": {1: {"type": "number", "units": u.none}},
        "fields": {},
--- a/src/natcap/invest/validation.py
+++ b/src/natcap/invest/validation.py
@ -648,7 +648,7 @@ def get_validated_dataframe(csv_path, columns=None, rows=None, index_col=None,
        for col in matching_cols:
            try:
                # frozenset needed to make the set hashable.  A frozenset and set with the same members are equal.
-                if col_spec['type'] in {'csv', 'directory', 'file', 'raster', 'vector', 'raster_or_vector'}:
+                if col_spec['type'] in {'csv', 'directory', 'file', 'raster', 'vector', frozenset({'raster', 'vector'})}:
                    df[col] = df[col].apply(
                        lambda p: p if pandas.isna(p) else utils.expand_path(str(p).strip(), csv_path))
                    df[col] = df[col].astype(pandas.StringDtype())
@ -669,12 +669,15 @@ def get_validated_dataframe(csv_path, columns=None, rows=None, index_col=None,
                    f'Value(s) in the "{col}" column could not be interpreted '
                    f'as {col_spec["type"]}s. Original error: {err}')

-            if col_spec['type'] in {'csv', 'directory', 'file', 'raster', 'vector', 'raster_or_vector'}:
+            col_type = col_spec['type']
+            if isinstance(col_type, set):
+                col_type = frozenset(col_type)
+            if col_type in {'csv', 'directory', 'file', 'raster', 'vector', frozenset({'raster', 'vector'})}:
                # recursively validate the files within the column
                def check_value(value):
                    if pandas.isna(value):
                        return
-                    err_msg = _VALIDATION_FUNCS[col_spec['type']](value, **col_spec)
+                    err_msg = _VALIDATION_FUNCS[col_type](value, **col_spec)
                    if err_msg:
                        raise ValueError(
                            f'Error in {axis} "{col}", value "{value}": {err_msg}')
@ -838,6 +841,8 @@ def timeout(func, *args, timeout=5, **kwargs):
    message_queue = queue.Queue()

    def wrapper_func():
+        print(func)
+        print(args, kwargs)
        message_queue.put(func(*args, **kwargs))

    thread = threading.Thread(target=wrapper_func)
@ -903,7 +908,7 @@ _VALIDATION_FUNCS = {
    'option_string': check_option_string,
    'raster': functools.partial(timeout, check_raster),
    'vector': functools.partial(timeout, check_vector),
-    'raster_or_vector': functools.partial(timeout, check_raster_or_vector),
+    frozenset({'raster', 'vector'}): functools.partial(timeout, check_raster_or_vector),
    'other': None,  # Up to the user to define their validate()
 }

@ -1041,7 +1046,10 @@ def validate(args, spec, spatial_overlap_opts=None):
            LOGGER.debug(f'Provided key {key} does not exist in MODEL_SPEC')
            continue

-        type_validation_func = _VALIDATION_FUNCS[parameter_spec['type']]
+        param_type = parameter_spec['type']
+        if isinstance(param_type, set):
+            param_type = frozenset(param_type)
+        type_validation_func = _VALIDATION_FUNCS[param_type]

        if type_validation_func is None:
            # Validation for 'other' type must be performed by the user.
@ -1182,6 +1190,8 @@ def invest_validator(validate_func):
                # need to validate it.
                if args_value not in ('', None):
                    input_type = args_key_spec['type']
+                    if isinstance(input_type, set):
+                        input_type = frozenset(input_type)
                    validator_func = _VALIDATION_FUNCS[input_type]
                    error_msg = validator_func(args_value, **args_key_spec)

--- a/tests/test_datastack_modules/archive_extraction.py
+++ b/tests/test_datastack_modules/archive_extraction.py
@ -14,7 +14,11 @@ MODEL_SPEC = {
            'type': 'csv',
            'columns': {
                'ID': {'type': 'integer'},
-                'path': {'type': {'raster', 'vector'}},
+                'path': {
+                    'type': {'raster', 'vector'},
+                    'geometries': {'POINT', 'POLYGON'},
+                    'bands': {1: {'type': 'number'}}
+                }
            }
        }
    }
--- a/tests/test_hra.py
+++ b/tests/test_hra.py
@ -1260,7 +1260,7 @@ class HRAModelTests(unittest.TestCase):
                    array, 255, (10, -10), (ORIGIN[0] - 50, ORIGIN[1] - 50),
                    SRS_WKT, full_path)

-        archive_path = '/Users/emily/Documents/hrad_dat.tar.gz'#os.path.join(self.workspace_dir, 'datstack.tar.gz')
+        archive_path = os.path.join(self.workspace_dir, 'datstack.tar.gz')
        datastack.build_datastack_archive(
            args, 'natcap.invest.hra', archive_path)

--- a/tests/test_model_specs.py
+++ b/tests/test_model_specs.py
@ -19,7 +19,6 @@ valid_nested_types = {
        'raster',
        'ratio',
        'vector',
-        'raster_or_vector'
    },
    'raster': {'integer', 'number', 'ratio', 'percent'},
    'vector': {
@ -38,8 +37,7 @@ valid_nested_types = {
        'percent',
        'raster',
        'ratio',
-        'vector',
-        'raster_or_vector'},
+        'vector'},
    'directory': {'csv', 'directory', 'file', 'raster', 'vector'}
 }

@ -128,7 +126,7 @@ class ValidateModelSpecs(unittest.TestCase):
                self.assertIsInstance(spec['units'], pint.Unit)
                attrs.remove('units')

-            elif t in {'raster', 'raster_or_vector'}:
+            elif t == 'raster':
                # raster type should have a bands property that maps each band
                # index to a nested type dictionary describing the band's data
                self.assertIn('bands', spec)
@ -141,7 +139,7 @@ class ValidateModelSpecs(unittest.TestCase):
                        parent_type=t)
                attrs.remove('bands')

-            elif t in {'vector', 'raster_or_vector'}:
+            elif t == 'vector':
                # vector type should have:
                # - a fields property that maps each field header to a nested
                #   type dictionary describing the data in that field
@ -251,191 +249,195 @@ class ValidateModelSpecs(unittest.TestCase):
                for attr in ['name', 'about']:
                    self.assertIn(attr, arg)

+            # arg['type'] can be either a string or a set of strings
+            types = arg['type'] if isinstance(
+                arg['type'], set) else [arg['type']]
            attrs = set(arg.keys())

-            self.assertIn(arg['type'], valid_nested_types[parent_type])
+            for t in types:
+                self.assertIn(t, valid_nested_types[parent_type])

-            if arg['type'] == 'option_string':
-                # option_string type should have an options property that
-                # describes the valid options
-                self.assertIn('options', arg)
-                # May be a list or dict because some option sets are self
-                # explanatory and others need a description
-                self.assertIsInstance(arg['options'], dict)
-                for key, val in arg['options'].items():
-                    self.assertTrue(
-                        isinstance(key, str) or
-                        isinstance(key, int))
-                    self.assertIsInstance(val, dict)
-                    # top-level option_string args are shown as dropdowns
-                    # so each option needs a display name
-                    # an additional description is optional
-                    if parent_type is None:
+                if t == 'option_string':
+                    # option_string type should have an options property that
+                    # describes the valid options
+                    self.assertIn('options', arg)
+                    # May be a list or dict because some option sets are self
+                    # explanatory and others need a description
+                    self.assertIsInstance(arg['options'], dict)
+                    for key, val in arg['options'].items():
                        self.assertTrue(
-                            set(val.keys()) == {'display_name'} or
-                            set(val.keys()) == {
-                                'display_name', 'description'})
-                    # option_strings within a CSV or vector don't get a
-                    # display name. the user has to enter the key.
-                    else:
-                        self.assertEqual(set(val.keys()), {'description'})
+                            isinstance(key, str) or
+                            isinstance(key, int))
+                        self.assertIsInstance(val, dict)
+                        # top-level option_string args are shown as dropdowns
+                        # so each option needs a display name
+                        # an additional description is optional
+                        if parent_type is None:
+                            self.assertTrue(
+                                set(val.keys()) == {'display_name'} or
+                                set(val.keys()) == {
+                                    'display_name', 'description'})
+                        # option_strings within a CSV or vector don't get a
+                        # display name. the user has to enter the key.
+                        else:
+                            self.assertEqual(set(val.keys()), {'description'})

-                    if 'display_name' in val:
-                        self.assertIsInstance(val['display_name'], str)
-                    if 'description' in val:
-                        self.assertIsInstance(val['description'], str)
+                        if 'display_name' in val:
+                            self.assertIsInstance(val['display_name'], str)
+                        if 'description' in val:
+                            self.assertIsInstance(val['description'], str)

-                attrs.remove('options')
+                    attrs.remove('options')

-            elif arg['type'] == 'freestyle_string':
-                # freestyle_string may optionally have a regexp attribute
-                # this is a regular expression that the string must match
-                if 'regexp' in arg:
-                    self.assertIsInstance(arg['regexp'], str)
-                    re.compile(arg['regexp'])  # should be regex compilable
-                    attrs.remove('regexp')
+                elif t == 'freestyle_string':
+                    # freestyle_string may optionally have a regexp attribute
+                    # this is a regular expression that the string must match
+                    if 'regexp' in arg:
+                        self.assertIsInstance(arg['regexp'], str)
+                        re.compile(arg['regexp'])  # should be regex compilable
+                        attrs.remove('regexp')

-            elif arg['type'] == 'number':
-                # number type should have a units property
-                self.assertIn('units', arg)
-                # Undefined units should use the custom u.none unit
-                self.assertIsInstance(arg['units'], pint.Unit)
-                attrs.remove('units')
+                elif t == 'number':
+                    # number type should have a units property
+                    self.assertIn('units', arg)
+                    # Undefined units should use the custom u.none unit
+                    self.assertIsInstance(arg['units'], pint.Unit)
+                    attrs.remove('units')

-                # number type may optionally have an 'expression' attribute
-                # this is a string expression to be evaluated with the
-                # intent of determining that the value is within a range.
-                # The expression must contain the string ``value``, which
-                # will represent the user-provided value (after it has been
-                # cast to a float).  Example: "(value >= 0) & (value <= 1)"
-                if 'expression' in arg:
-                    self.assertIsInstance(arg['expression'], str)
-                    attrs.remove('expression')
+                    # number type may optionally have an 'expression' attribute
+                    # this is a string expression to be evaluated with the
+                    # intent of determining that the value is within a range.
+                    # The expression must contain the string ``value``, which
+                    # will represent the user-provided value (after it has been
+                    # cast to a float).  Example: "(value >= 0) & (value <= 1)"
+                    if 'expression' in arg:
+                        self.assertIsInstance(arg['expression'], str)
+                        attrs.remove('expression')

-            elif arg['type'] in {'raster', 'raster_or_vector'}:
-                # raster type should have a bands property that maps each band
-                # index to a nested type dictionary describing the band's data
-                self.assertIn('bands', arg)
-                self.assertIsInstance(arg['bands'], dict)
-                for band in arg['bands']:
-                    self.assertIsInstance(band, int)
-                    self.validate_args(
-                        arg['bands'][band],
-                        f'{name}.bands.{band}',
-                        parent_type=arg['type'])
-                attrs.remove('bands')
-
-                # may optionally have a 'projected' attribute that says
-                # whether the raster must be linearly projected
-                if 'projected' in arg:
-                    self.assertIsInstance(arg['projected'], bool)
-                    attrs.remove('projected')
-                # if 'projected' is True, may also have a 'projection_units'
-                # attribute saying the expected linear projection unit
-                if 'projection_units' in arg:
-                    # doesn't make sense to have projection units unless
-                    # projected is True
-                    self.assertTrue(arg['projected'])
-                    self.assertIsInstance(
-                        arg['projection_units'], pint.Unit)
-                    attrs.remove('projection_units')
-
-            elif arg['type'] in {'vector', 'raster_or_vector'}:
-                # vector type should have:
-                # - a fields property that maps each field header to a nested
-                #   type dictionary describing the data in that field
-                # - a geometries property: the set of valid geometry types
-                self.assertIn('fields', arg)
-                self.assertIsInstance(arg['fields'], dict)
-                for field in arg['fields']:
-                    self.assertIsInstance(field, str)
-                    self.validate_args(
-                        arg['fields'][field],
-                        f'{name}.fields.{field}',
-                        parent_type=arg['type'])
-
-                self.assertIn('geometries', arg)
-                self.assertIsInstance(arg['geometries'], set)
-
-                attrs.remove('fields')
-                attrs.remove('geometries')
-
-                # may optionally have a 'projected' attribute that says
-                # whether the vector must be linearly projected
-                if 'projected' in arg:
-                    self.assertIsInstance(arg['projected'], bool)
-                    attrs.remove('projected')
-                # if 'projected' is True, may also have a 'projection_units'
-                # attribute saying the expected linear projection unit
-                if 'projection_units' in arg:
-                    # doesn't make sense to have projection units unless
-                    # projected is True
-                    self.assertTrue(arg['projected'])
-                    self.assertIsInstance(
-                        arg['projection_units'], pint.Unit)
-                    attrs.remove('projection_units')
-
-            elif arg['type'] == 'csv':
-                # csv type should have a rows property, columns property, or
-                # neither. rows or columns properties map each expected header
-                # name/pattern to a nested type dictionary describing the data
-                # in that row/column. may have neither if the table structure
-                # is too complex to describe this way.
-                has_rows = 'rows' in arg
-                has_cols = 'columns' in arg
-                # should not have both
-                self.assertFalse(has_rows and has_cols)
-
-                if has_cols or has_rows:
-                    direction = 'rows' if has_rows else 'columns'
-                    headers = arg[direction]
-                    self.assertIsInstance(headers, dict)
-
-                    for header in headers:
-                        self.assertIsInstance(header, str)
+                elif t == 'raster':
+                    # raster type should have a bands property that maps each band
+                    # index to a nested type dictionary describing the band's data
+                    self.assertIn('bands', arg)
+                    self.assertIsInstance(arg['bands'], dict)
+                    for band in arg['bands']:
+                        self.assertIsInstance(band, int)
                        self.validate_args(
-                            headers[header],
-                            f'{name}.{direction}.{header}',
-                            parent_type=arg['type'])
+                            arg['bands'][band],
+                            f'{name}.bands.{band}',
+                            parent_type=t)
+                    attrs.remove('bands')

-                if 'index_col' in arg:
-                    self.assertIn(arg['index_col'], arg['columns'])
-                    attrs.discard('index_col')
+                    # may optionally have a 'projected' attribute that says
+                    # whether the raster must be linearly projected
+                    if 'projected' in arg:
+                        self.assertIsInstance(arg['projected'], bool)
+                        attrs.remove('projected')
+                    # if 'projected' is True, may also have a 'projection_units'
+                    # attribute saying the expected linear projection unit
+                    if 'projection_units' in arg:
+                        # doesn't make sense to have projection units unless
+                        # projected is True
+                        self.assertTrue(arg['projected'])
+                        self.assertIsInstance(
+                            arg['projection_units'], pint.Unit)
+                        attrs.remove('projection_units')

-                attrs.discard('rows')
-                attrs.discard('columns')
+                elif t == 'vector':
+                    # vector type should have:
+                    # - a fields property that maps each field header to a nested
+                    #   type dictionary describing the data in that field
+                    # - a geometries property: the set of valid geometry types
+                    self.assertIn('fields', arg)
+                    self.assertIsInstance(arg['fields'], dict)
+                    for field in arg['fields']:
+                        self.assertIsInstance(field, str)
+                        self.validate_args(
+                            arg['fields'][field],
+                            f'{name}.fields.{field}',
+                            parent_type=t)

-            elif arg['type'] == 'directory':
-                # directory type should have a contents property that maps each
-                # expected path name/pattern within the directory to a nested
-                # type dictionary describing the data at that filepath
-                self.assertIn('contents', arg)
-                self.assertIsInstance(arg['contents'], dict)
-                for path in arg['contents']:
-                    self.assertIsInstance(path, str)
-                    self.validate_args(
-                        arg['contents'][path],
-                        f'{name}.contents.{path}',
-                        parent_type=arg['type'])
-                attrs.remove('contents')
+                    self.assertIn('geometries', arg)
+                    self.assertIsInstance(arg['geometries'], set)

-                # may optionally have a 'permissions' attribute, which is a
-                # string of the unix-style directory permissions e.g. 'rwx'
-                if 'permissions' in arg:
-                    self.validate_permissions_value(arg['permissions'])
-                    attrs.remove('permissions')
-                # may optionally have an 'must_exist' attribute, which says
-                # whether the directory must already exist
-                # this defaults to True
-                if 'must_exist' in arg:
-                    self.assertIsInstance(arg['must_exist'], bool)
-                    attrs.remove('must_exist')
+                    attrs.remove('fields')
+                    attrs.remove('geometries')

-            elif arg['type'] == 'file':
-                # file type may optionally have a 'permissions' attribute
-                # this is a string listing the permissions e.g. 'rwx'
-                if 'permissions' in arg:
-                    self.validate_permissions_value(arg['permissions'])
+                    # may optionally have a 'projected' attribute that says
+                    # whether the vector must be linearly projected
+                    if 'projected' in arg:
+                        self.assertIsInstance(arg['projected'], bool)
+                        attrs.remove('projected')
+                    # if 'projected' is True, may also have a 'projection_units'
+                    # attribute saying the expected linear projection unit
+                    if 'projection_units' in arg:
+                        # doesn't make sense to have projection units unless
+                        # projected is True
+                        self.assertTrue(arg['projected'])
+                        self.assertIsInstance(
+                            arg['projection_units'], pint.Unit)
+                        attrs.remove('projection_units')
+
+                elif t == 'csv':
+                    # csv type should have a rows property, columns property, or
+                    # neither. rows or columns properties map each expected header
+                    # name/pattern to a nested type dictionary describing the data
+                    # in that row/column. may have neither if the table structure
+                    # is too complex to describe this way.
+                    has_rows = 'rows' in arg
+                    has_cols = 'columns' in arg
+                    # should not have both
+                    self.assertFalse(has_rows and has_cols)
+
+                    if has_cols or has_rows:
+                        direction = 'rows' if has_rows else 'columns'
+                        headers = arg[direction]
+                        self.assertIsInstance(headers, dict)
+
+                        for header in headers:
+                            self.assertIsInstance(header, str)
+                            self.validate_args(
+                                headers[header],
+                                f'{name}.{direction}.{header}',
+                                parent_type=t)
+
+                    if 'index_col' in arg:
+                        self.assertIn(arg['index_col'], arg['columns'])
+                        attrs.discard('index_col')
+
+                    attrs.discard('rows')
+                    attrs.discard('columns')
+
+                elif t == 'directory':
+                    # directory type should have a contents property that maps each
+                    # expected path name/pattern within the directory to a nested
+                    # type dictionary describing the data at that filepath
+                    self.assertIn('contents', arg)
+                    self.assertIsInstance(arg['contents'], dict)
+                    for path in arg['contents']:
+                        self.assertIsInstance(path, str)
+                        self.validate_args(
+                            arg['contents'][path],
+                            f'{name}.contents.{path}',
+                            parent_type=t)
+                    attrs.remove('contents')
+
+                    # may optionally have a 'permissions' attribute, which is a
+                    # string of the unix-style directory permissions e.g. 'rwx'
+                    if 'permissions' in arg:
+                        self.validate_permissions_value(arg['permissions'])
+                        attrs.remove('permissions')
+                    # may optionally have an 'must_exist' attribute, which says
+                    # whether the directory must already exist
+                    # this defaults to True
+                    if 'must_exist' in arg:
+                        self.assertIsInstance(arg['must_exist'], bool)
+                        attrs.remove('must_exist')
+
+                elif t == 'file':
+                    # file type may optionally have a 'permissions' attribute
+                    # this is a string listing the permissions e.g. 'rwx'
+                    if 'permissions' in arg:
+                        self.validate_permissions_value(arg['permissions'])

            # iterate over the remaining attributes
            # type-specific ones have been removed by this point