Varname mangling for value-based conditional req.

RE:#1503
2024-01-24 11:55:38 -08:00 · 2024-01-24 11:55:38 -08:00 · 845c49f2a1
parent 9f4afe8536
commit 845c49f2a1
4 changed files with 172 additions and 39 deletions
--- a/src/natcap/invest/urban_nature_access.py
+++ b/src/natcap/invest/urban_nature_access.py
@ -92,7 +92,7 @@ MODEL_SPEC = {
                    'type': 'number',
                    'units': u.meter,
                    'required':
-                        f'search_radius_mode == "{RADIUS_OPT_URBAN_NATURE}"',
+                        f'search_radius_mode.value == "{RADIUS_OPT_URBAN_NATURE}"',
                    'expression': 'value >= 0',
                    'about': (
                        'The distance within which a LULC type is relevant '
@ -125,7 +125,7 @@ MODEL_SPEC = {
                "pop_[POP_GROUP]": {
                    "type": "ratio",
                    "required": (
-                        f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') "
+                        f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') "
                        "or aggregate_by_pop_group"),
                    "about": gettext(
                        "The proportion of the population within each "
@ -248,7 +248,7 @@ MODEL_SPEC = {
            'name': 'uniform search radius',
            'units': u.m,
            'expression': 'value > 0',
-            'required': f'search_radius_mode == "{RADIUS_OPT_UNIFORM}"',
+            'required': f'search_radius_mode.value == "{RADIUS_OPT_UNIFORM}"',
            'about': gettext(
                'The search radius to use when running the model under a '
                'uniform search radius. Required when running the model '
@ -257,7 +257,7 @@ MODEL_SPEC = {
        'population_group_radii_table': {
            'name': 'population group radii table',
            'type': 'csv',
-            'required': f'search_radius_mode == "{RADIUS_OPT_POP_GROUP}"',
+            'required': f'search_radius_mode.value == "{RADIUS_OPT_POP_GROUP}"',
            'index_col': 'pop_group',
            'columns': {
                "pop_group": {
@ -273,7 +273,7 @@ MODEL_SPEC = {
                    'type': 'number',
                    'units': u.meter,
                    'required':
-                        f'search_radius_mode == "{RADIUS_OPT_POP_GROUP}"',
+                        f'search_radius_mode.value == "{RADIUS_OPT_POP_GROUP}"',
                    'expression': 'value >= 0',
                    'about': gettext(
                        "The search radius in meters to use "
@ -379,7 +379,7 @@ MODEL_SPEC = {
                                "group POP_GROUP within this administrative "
                                "unit."),
                            "created_if": (
-                                f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') "
+                                f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') "
                                "or aggregate_by_pop_group"),
                        },
                        "Pund_adm_[POP_GROUP]": {
@ -391,7 +391,7 @@ MODEL_SPEC = {
                                "administrative unit that are undersupplied "
                                "with urban nature."),
                            "created_if": (
-                                f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') "
+                                f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') "
                                "or aggregate_by_pop_group"),
                        },
                        "Povr_adm_[POP_GROUP]": {
@ -403,7 +403,7 @@ MODEL_SPEC = {
                                "administrative unit that is oversupplied "
                                "with urban nature."),
                            "created_if": (
-                                f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') "
+                                f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') "
                                "or aggregate_by_pop_group"),
                        },
                    },
@ -418,7 +418,7 @@ MODEL_SPEC = {
                        "to the stated urban nature demand."),
                    "bands": {1: {"type": "number", "units": u.m**2/u.person}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
                },

                # when RADIUS_OPT_UNIFORM
@ -428,7 +428,7 @@ MODEL_SPEC = {
                        "radius, weighted by the selected decay function."),
                    "bands": {1: {"type": "number", "units": u.m**2}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'",
                },

                # When RADIUS_OPT_URBAN_NATURE
@ -439,7 +439,7 @@ MODEL_SPEC = {
                        "by the selected decay function."),
                    "bands": {1: {"type": "number", "units": u.m**2}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'",
                },

                # When RADIUS_OPT_POP_GROUP
@ -450,7 +450,7 @@ MODEL_SPEC = {
                        "selected decay function."),
                    "bands": {1: {"type": "number", "units": u.m**2}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
                },
            },
        },
@ -492,8 +492,8 @@ MODEL_SPEC = {
                        "function."),
                    "bands": {1: {'type': 'number', 'units': u.count}},
                    "created_if": (
-                        f"search_radius_mode == '{RADIUS_OPT_UNIFORM}' or "
-                        f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'"),
+                        f"search_radius_mode.value == '{RADIUS_OPT_UNIFORM}' or "
+                        f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'"),
                },
                "urban_nature_area.tif": {
                    "about": gettext(
@ -501,15 +501,15 @@ MODEL_SPEC = {
                        "represented in each pixel."),
                    "bands": {1: {"type": "number", "units": u.m**2}},
                    "created_if":
-                        (f"search_radius_mode == '{RADIUS_OPT_UNIFORM}' or "
-                         f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'"),
+                        (f"search_radius_mode.value == '{RADIUS_OPT_UNIFORM}' or "
+                         f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'"),
                },
                "urban_nature_population_ratio.tif": {
                    "about": gettext(
                        "The calculated urban nature/population ratio."),
                    "bands": {1: {"type": "number", "units": u.m**2/u.person}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_UNIFORM}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_UNIFORM}'",
                },

                # When RADIUS_OPT_URBAN_NATURE
@ -521,7 +521,7 @@ MODEL_SPEC = {
                        "land cover code LUCODE."),
                    "bands": {1: {"type": "number", "units": u.m**2}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'",
                },
                "urban_nature_supply_percapita_lucode_[LUCODE].tif": {
                    "about": gettext(
@ -529,7 +529,7 @@ MODEL_SPEC = {
                        "land use land cover code LUCODE"),
                    "bands": {1: {"type": "number", "units": u.m**2/u.person}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'",
                },
                "urban_nature_population_ratio_lucode_[LUCODE].tif": {
                    "about": gettext(
@ -538,7 +538,7 @@ MODEL_SPEC = {
                        "land cover code LUCODE."),
                    "bands": {1: {"type": "number", "units": u.m**2/u.person}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'",
                },

                # When RADIUS_OPT_POP_GROUP
@ -549,7 +549,7 @@ MODEL_SPEC = {
                        "POP_GROUP."),
                    "bands": {1: {"type": "number", "units": u.count}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
                },
                "proportion_of_population_in_[POP_GROUP].tif": {
                    "about": gettext(
@ -558,7 +558,7 @@ MODEL_SPEC = {
                        "POP_GROUP."),
                    "bands": {1: {"type": "number", "units": u.none}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
                },
                "distance_weighted_population_in_[POP_GROUP].tif": {
                    "about": gettext(
@ -568,7 +568,7 @@ MODEL_SPEC = {
                        "decay function."),
                    "bands": {1: {"type": "number", "units": u.people}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
                },
                "distance_weighted_population_all_groups.tif": {
                    "about": gettext(
@ -576,7 +576,7 @@ MODEL_SPEC = {
                        "decay function."),
                    "bands": {1: {"type": "number", "units": u.people}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
                },
                "urban_nature_supply_percapita_to_[POP_GROUP].tif": {
                    "about": gettext(
@ -584,7 +584,7 @@ MODEL_SPEC = {
                        "group POP_GROUP."),
                    "bands": {1: {"type": "number", "units": u.m**2/u.person}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
                },
                "undersupplied_population_[POP_GROUP].tif": {
                    "about": gettext(
@ -592,7 +592,7 @@ MODEL_SPEC = {
                        "are experiencing an urban nature deficit."),
                    "bands": {1: {"type": "number", "units": u.people}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
                },
                "oversupplied_population_[POP_GROUP].tif": {
                    "about": gettext(
@ -600,7 +600,7 @@ MODEL_SPEC = {
                        "are experiencing an urban nature surplus."),
                    "bands": {1: {"type": "number", "units": u.people}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
+                        f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
                }
            }
        },
--- a/src/natcap/invest/validation.py
+++ b/src/natcap/invest/validation.py
@ -3,12 +3,15 @@ import ast
 import functools
 import importlib
 import inspect
+import io
 import logging
 import os
 import pprint
 import queue
 import re
 import threading
+import token
+import tokenize
 import warnings

 import numpy
@ -16,8 +19,8 @@ import pandas
 import pint
 import pygeoprocessing
 from osgeo import gdal
-from osgeo import osr
 from osgeo import ogr
+from osgeo import osr

 from . import gettext
 from . import spec_utils
@ -63,6 +66,45 @@ MESSAGES = {
 }


+def _rewrite_name_dot_value(target_key, expression):
+    """Rewrite a ``name.value`` attribute as a single variable.
+
+    This function uses python's ``tokenize`` library to tokenize the expression
+    before checking for the target key and presence of a ``value`` attribute.
+    This eliminates false-positives with similarly-named variables.
+
+    Args:
+        target_key (string): The target symbol that we expect to have a
+            ``.value`` attribute.
+        expression (string): A string expression likely containing
+            ``{target_key}.value``
+
+    Returns:
+        A rewritten, valid python code string where ``{target_key}.value`` has
+        been rewritten as ``__{target_key}__value__``.
+    """
+    tokens = [t for t in tokenize.generate_tokens(
+        io.StringIO(expression).readline)]
+
+    replacement_name = f"__{target_key}__value__"
+
+    output_tokens = []
+    index = 0
+    while index < (len(tokens) - 2):
+        if all([tokens[index].string == target_key,
+                tokens[index+1].string == '.',
+                tokens[index+2].string == 'value']):
+            # Only the type and string value are required for untokenization
+            output_tokens.append((token.NAME, replacement_name))
+            index += 3  # skip the "." and "value" tokens.
+        else:
+            # We can just use the existing token if we're keeping it
+            output_tokens.append(tokens[index])
+            index += 1
+
+    return tokenize.untokenize(output_tokens)
+
+
 def _evaluate_expression(expression, variable_map):
    """Evaluate a python expression.

@ -670,8 +712,6 @@ def get_validated_dataframe(csv_path, columns=None, rows=None, index_col=None,
    return df


-
-
 def check_csv(filepath, **kwargs):
    """Validate a table.

@ -980,9 +1020,36 @@ def validate(args, spec, spatial_overlap_opts=None):
    for key in conditionally_required_keys:
        # An input is conditionally required when the expression given
        # evaluates to True.
+        # We handle 2 cases of how the expression is written:
+        #    * Case 1: a logical expression of boolean operations on another
+        #      parameter.  Example: "not <arg_name>"
+        #      In this case, the <arg_name> symbol is interpreted as a bool.
+        #    * Case 2: a logical expression comparing the value of the
+        #      parameter against another known value.
+        #      Example: "<other_arg_name>.value == 'uniform radius'"
+        expression_values = {}
+        expression_with_value = spec[key]['required'][:]  # make a copy
+        for sufficient_key, is_sufficient in sufficient_inputs.items():
+            try:
+                args_value = args[sufficient_key]
+            except KeyError:
+                # Handle the case where a sufficient key (e.g. optional) is
+                # missing from args.
+                args_value = None
+
+            # If the expression contains a {key}.value pattern, rewrite the
+            # name to avoid dot-notation.
+            # Because sufficiency is a bool and bools are singletons, we cannot
+            # actually assign a .value attribute on a bool.
+            value_symbol = f'__{sufficient_key}__value__'
+            expression_with_value = _rewrite_name_dot_value(
+                sufficient_key, expression_with_value)
+            expression_values[value_symbol] = args_value
+            expression_values[sufficient_key] = is_sufficient
+
        is_conditionally_required = _evaluate_expression(
-            expression=spec[key]['required'],
-            variable_map=sufficient_inputs)
+            expression=expression_with_value,
+            variable_map=expression_values)
        if is_conditionally_required:
            if key not in args:
                validation_warnings.append(([key], MESSAGES['MISSING_KEY']))
--- a/tests/test_urban_nature_access.py
+++ b/tests/test_urban_nature_access.py
@ -1030,3 +1030,16 @@ class UNATests(unittest.TestCase):
        args['search_radius_mode'] = (
            urban_nature_access.RADIUS_OPT_URBAN_NATURE)
        self.assertEqual(urban_nature_access.validate(args), [])
+
+    def test_validate_uniform_search_radius(self):
+        """UNA: Search radius is required when using uniform search radii."""
+        from natcap.invest import urban_nature_access
+        from natcap.invest import validation
+
+        args = _build_model_args(self.workspace_dir)
+        args['search_radius_mode'] = urban_nature_access.RADIUS_OPT_UNIFORM
+        args['search_radius'] = ''
+
+        warnings = urban_nature_access.validate(args)
+        self.assertEqual(warnings, [(['search_radius'],
+                                     validation.MESSAGES['MISSING_VALUE'])])
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@ -1,5 +1,6 @@
 """Testing module for validation."""
 import codecs
+import collections
 import functools
 import os
 import platform
@ -9,12 +10,14 @@ import tempfile
 import textwrap
 import time
 import unittest
-from unittest.mock import Mock
 import warnings
+from unittest.mock import Mock

 import numpy
-from osgeo import gdal, osr, ogr
 import pandas
+from osgeo import gdal
+from osgeo import ogr
+from osgeo import osr


 class SpatialOverlapTest(unittest.TestCase):
@ -30,8 +33,8 @@ class SpatialOverlapTest(unittest.TestCase):

    def test_no_overlap(self):
        """Validation: verify lack of overlap."""
-        from natcap.invest import validation
        import pygeoprocessing
+        from natcap.invest import validation

        driver = gdal.GetDriverByName('GTiff')
        filepath_1 = os.path.join(self.workspace_dir, 'raster_1.tif')
@ -226,7 +229,8 @@ class ValidatorTest(unittest.TestCase):

    def test_n_workers(self):
        """Validation: validation error returned on invalid n_workers."""
-        from natcap.invest import spec_utils, validation
+        from natcap.invest import spec_utils
+        from natcap.invest import validation

        args_spec = {
            'n_workers': spec_utils.N_WORKERS,
@ -427,7 +431,8 @@ class RasterValidation(unittest.TestCase):

    def test_raster_incorrect_units(self):
        """Validation: test when a raster projection has wrong units."""
-        from natcap.invest import spec_utils, validation
+        from natcap.invest import spec_utils
+        from natcap.invest import validation

        # Use EPSG:32066  # NAD27 / BLM 16N (in US Survey Feet)
        driver = gdal.GetDriverByName('GTiff')
@ -508,7 +513,8 @@ class VectorValidation(unittest.TestCase):

    def test_vector_projected_in_m(self):
        """Validation: test that a vector's projection has expected units."""
-        from natcap.invest import spec_utils, validation
+        from natcap.invest import spec_utils
+        from natcap.invest import validation

        driver = gdal.GetDriverByName('GPKG')
        filepath = os.path.join(self.workspace_dir, 'vector.gpkg')
@ -533,7 +539,8 @@ class VectorValidation(unittest.TestCase):

    def test_wrong_geom_type(self):
        """Validation: checks that the vector's geometry type is correct."""
-        from natcap.invest import spec_utils, validation
+        from natcap.invest import spec_utils
+        from natcap.invest import validation
        driver = gdal.GetDriverByName('GPKG')
        filepath = os.path.join(self.workspace_dir, 'vector.gpkg')
        vector = driver.Create(filepath, 0, 0, 0, gdal.GDT_Unknown)
@ -1966,3 +1973,49 @@ class TestValidationFromSpec(unittest.TestCase):
        patterns = validation.get_headers_to_validate(spec)
        # should only get the patterns that are static and always required
        self.assertEqual(sorted(patterns), ['a'])
+
+
+class TestExpressionNameRewrite(unittest.TestCase):
+    def test_rewrite(self):
+        from natcap.invest import validation
+
+        target_key = "search_radius_mode"
+        expression = (
+            'search_radius_mode.value == "uniform radius" '
+            'and not my_search_radius_mode')
+        result = validation._rewrite_name_dot_value(target_key, expression)
+
+        # The spacing is a little weird, but it should still evaluate.
+        self.assertEqual(result, (
+            '__search_radius_mode__value__ =="uniform radius"'
+            'and not my_search_radius_mode '))
+
+        # Make sure we can still evaluate the result if we simulate some
+        # objects for the local references.
+        eval_result = eval(result, __builtins__, {
+            "search_radius_mode": True,
+            "__search_radius_mode__value__": 1})
+        self.assertEqual(eval_result, False)
+
+    def test_rewrite_at_end_of_expression(self):
+        from natcap.invest import validation
+
+        target_key = "search_radius_mode"
+        expression = (
+            'my_search_radius_mode.value == "uniform radius" '
+            'and not search_radius_mode.value')
+        result = validation._rewrite_name_dot_value(target_key, expression)
+
+        # The spacing is a little weird, but it should still evaluate.
+        self.assertEqual(result, (
+            'my_search_radius_mode.value == "uniform radius" '
+            'and not__search_radius_mode__value__ '))
+
+        # Make sure we can still evaluate the result if we simulate some
+        # objects for the local references.
+        mode_obj_tpl = collections.namedtuple('mode_obj', ['value'])
+        mode_obj = mode_obj_tpl('foo')
+        eval_result = eval(result, __builtins__, {
+            "my_search_radius_mode": mode_obj,
+            "__search_radius_mode__value__": 1})
+        self.assertEqual(eval_result, False)