Varname mangling for value-based conditional req.

RE:#1503
This commit is contained in:
James Douglass 2024-01-24 11:55:38 -08:00
parent 9f4afe8536
commit 845c49f2a1
4 changed files with 172 additions and 39 deletions

View File

@ -92,7 +92,7 @@ MODEL_SPEC = {
'type': 'number',
'units': u.meter,
'required':
f'search_radius_mode == "{RADIUS_OPT_URBAN_NATURE}"',
f'search_radius_mode.value == "{RADIUS_OPT_URBAN_NATURE}"',
'expression': 'value >= 0',
'about': (
'The distance within which a LULC type is relevant '
@ -125,7 +125,7 @@ MODEL_SPEC = {
"pop_[POP_GROUP]": {
"type": "ratio",
"required": (
f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') "
f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') "
"or aggregate_by_pop_group"),
"about": gettext(
"The proportion of the population within each "
@ -248,7 +248,7 @@ MODEL_SPEC = {
'name': 'uniform search radius',
'units': u.m,
'expression': 'value > 0',
'required': f'search_radius_mode == "{RADIUS_OPT_UNIFORM}"',
'required': f'search_radius_mode.value == "{RADIUS_OPT_UNIFORM}"',
'about': gettext(
'The search radius to use when running the model under a '
'uniform search radius. Required when running the model '
@ -257,7 +257,7 @@ MODEL_SPEC = {
'population_group_radii_table': {
'name': 'population group radii table',
'type': 'csv',
'required': f'search_radius_mode == "{RADIUS_OPT_POP_GROUP}"',
'required': f'search_radius_mode.value == "{RADIUS_OPT_POP_GROUP}"',
'index_col': 'pop_group',
'columns': {
"pop_group": {
@ -273,7 +273,7 @@ MODEL_SPEC = {
'type': 'number',
'units': u.meter,
'required':
f'search_radius_mode == "{RADIUS_OPT_POP_GROUP}"',
f'search_radius_mode.value == "{RADIUS_OPT_POP_GROUP}"',
'expression': 'value >= 0',
'about': gettext(
"The search radius in meters to use "
@ -379,7 +379,7 @@ MODEL_SPEC = {
"group POP_GROUP within this administrative "
"unit."),
"created_if": (
f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') "
f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') "
"or aggregate_by_pop_group"),
},
"Pund_adm_[POP_GROUP]": {
@ -391,7 +391,7 @@ MODEL_SPEC = {
"administrative unit that are undersupplied "
"with urban nature."),
"created_if": (
f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') "
f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') "
"or aggregate_by_pop_group"),
},
"Povr_adm_[POP_GROUP]": {
@ -403,7 +403,7 @@ MODEL_SPEC = {
"administrative unit that is oversupplied "
"with urban nature."),
"created_if": (
f"(search_radius_mode == '{RADIUS_OPT_POP_GROUP}') "
f"(search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}') "
"or aggregate_by_pop_group"),
},
},
@ -418,7 +418,7 @@ MODEL_SPEC = {
"to the stated urban nature demand."),
"bands": {1: {"type": "number", "units": u.m**2/u.person}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
},
# when RADIUS_OPT_UNIFORM
@ -428,7 +428,7 @@ MODEL_SPEC = {
"radius, weighted by the selected decay function."),
"bands": {1: {"type": "number", "units": u.m**2}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'",
},
# When RADIUS_OPT_URBAN_NATURE
@ -439,7 +439,7 @@ MODEL_SPEC = {
"by the selected decay function."),
"bands": {1: {"type": "number", "units": u.m**2}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'",
},
# When RADIUS_OPT_POP_GROUP
@ -450,7 +450,7 @@ MODEL_SPEC = {
"selected decay function."),
"bands": {1: {"type": "number", "units": u.m**2}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
},
},
},
@ -492,8 +492,8 @@ MODEL_SPEC = {
"function."),
"bands": {1: {'type': 'number', 'units': u.count}},
"created_if": (
f"search_radius_mode == '{RADIUS_OPT_UNIFORM}' or "
f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'"),
f"search_radius_mode.value == '{RADIUS_OPT_UNIFORM}' or "
f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'"),
},
"urban_nature_area.tif": {
"about": gettext(
@ -501,15 +501,15 @@ MODEL_SPEC = {
"represented in each pixel."),
"bands": {1: {"type": "number", "units": u.m**2}},
"created_if":
(f"search_radius_mode == '{RADIUS_OPT_UNIFORM}' or "
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'"),
(f"search_radius_mode.value == '{RADIUS_OPT_UNIFORM}' or "
f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'"),
},
"urban_nature_population_ratio.tif": {
"about": gettext(
"The calculated urban nature/population ratio."),
"bands": {1: {"type": "number", "units": u.m**2/u.person}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_UNIFORM}'",
f"search_radius_mode.value == '{RADIUS_OPT_UNIFORM}'",
},
# When RADIUS_OPT_URBAN_NATURE
@ -521,7 +521,7 @@ MODEL_SPEC = {
"land cover code LUCODE."),
"bands": {1: {"type": "number", "units": u.m**2}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'",
},
"urban_nature_supply_percapita_lucode_[LUCODE].tif": {
"about": gettext(
@ -529,7 +529,7 @@ MODEL_SPEC = {
"land use land cover code LUCODE"),
"bands": {1: {"type": "number", "units": u.m**2/u.person}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'",
},
"urban_nature_population_ratio_lucode_[LUCODE].tif": {
"about": gettext(
@ -538,7 +538,7 @@ MODEL_SPEC = {
"land cover code LUCODE."),
"bands": {1: {"type": "number", "units": u.m**2/u.person}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
f"search_radius_mode.value == '{RADIUS_OPT_URBAN_NATURE}'",
},
# When RADIUS_OPT_POP_GROUP
@ -549,7 +549,7 @@ MODEL_SPEC = {
"POP_GROUP."),
"bands": {1: {"type": "number", "units": u.count}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
},
"proportion_of_population_in_[POP_GROUP].tif": {
"about": gettext(
@ -558,7 +558,7 @@ MODEL_SPEC = {
"POP_GROUP."),
"bands": {1: {"type": "number", "units": u.none}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
},
"distance_weighted_population_in_[POP_GROUP].tif": {
"about": gettext(
@ -568,7 +568,7 @@ MODEL_SPEC = {
"decay function."),
"bands": {1: {"type": "number", "units": u.people}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
},
"distance_weighted_population_all_groups.tif": {
"about": gettext(
@ -576,7 +576,7 @@ MODEL_SPEC = {
"decay function."),
"bands": {1: {"type": "number", "units": u.people}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
},
"urban_nature_supply_percapita_to_[POP_GROUP].tif": {
"about": gettext(
@ -584,7 +584,7 @@ MODEL_SPEC = {
"group POP_GROUP."),
"bands": {1: {"type": "number", "units": u.m**2/u.person}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
},
"undersupplied_population_[POP_GROUP].tif": {
"about": gettext(
@ -592,7 +592,7 @@ MODEL_SPEC = {
"are experiencing an urban nature deficit."),
"bands": {1: {"type": "number", "units": u.people}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
},
"oversupplied_population_[POP_GROUP].tif": {
"about": gettext(
@ -600,7 +600,7 @@ MODEL_SPEC = {
"are experiencing an urban nature surplus."),
"bands": {1: {"type": "number", "units": u.people}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
f"search_radius_mode.value == '{RADIUS_OPT_POP_GROUP}'",
}
}
},

View File

@ -3,12 +3,15 @@ import ast
import functools
import importlib
import inspect
import io
import logging
import os
import pprint
import queue
import re
import threading
import token
import tokenize
import warnings
import numpy
@ -16,8 +19,8 @@ import pandas
import pint
import pygeoprocessing
from osgeo import gdal
from osgeo import osr
from osgeo import ogr
from osgeo import osr
from . import gettext
from . import spec_utils
@ -63,6 +66,45 @@ MESSAGES = {
}
def _rewrite_name_dot_value(target_key, expression):
"""Rewrite a ``name.value`` attribute as a single variable.
This function uses python's ``tokenize`` library to tokenize the expression
before checking for the target key and presence of a ``value`` attribute.
This eliminates false-positives with similarly-named variables.
Args:
target_key (string): The target symbol that we expect to have a
``.value`` attribute.
expression (string): A string expression likely containing
``{target_key}.value``
Returns:
A rewritten, valid python code string where ``{target_key}.value`` has
been rewritten as ``__{target_key}__value__``.
"""
tokens = [t for t in tokenize.generate_tokens(
io.StringIO(expression).readline)]
replacement_name = f"__{target_key}__value__"
output_tokens = []
index = 0
while index < (len(tokens) - 2):
if all([tokens[index].string == target_key,
tokens[index+1].string == '.',
tokens[index+2].string == 'value']):
# Only the type and string value are required for untokenization
output_tokens.append((token.NAME, replacement_name))
index += 3 # skip the "." and "value" tokens.
else:
# We can just use the existing token if we're keeping it
output_tokens.append(tokens[index])
index += 1
return tokenize.untokenize(output_tokens)
def _evaluate_expression(expression, variable_map):
"""Evaluate a python expression.
@ -670,8 +712,6 @@ def get_validated_dataframe(csv_path, columns=None, rows=None, index_col=None,
return df
def check_csv(filepath, **kwargs):
"""Validate a table.
@ -980,9 +1020,36 @@ def validate(args, spec, spatial_overlap_opts=None):
for key in conditionally_required_keys:
# An input is conditionally required when the expression given
# evaluates to True.
# We handle 2 cases of how the expression is written:
# * Case 1: a logical expression of boolean operations on another
# parameter. Example: "not <arg_name>"
# In this case, the <arg_name> symbol is interpreted as a bool.
# * Case 2: a logical expression comparing the value of the
# parameter against another known value.
# Example: "<other_arg_name>.value == 'uniform radius'"
expression_values = {}
expression_with_value = spec[key]['required'][:] # make a copy
for sufficient_key, is_sufficient in sufficient_inputs.items():
try:
args_value = args[sufficient_key]
except KeyError:
# Handle the case where a sufficient key (e.g. optional) is
# missing from args.
args_value = None
# If the expression contains a {key}.value pattern, rewrite the
# name to avoid dot-notation.
# Because sufficiency is a bool and bools are singletons, we cannot
# actually assign a .value attribute on a bool.
value_symbol = f'__{sufficient_key}__value__'
expression_with_value = _rewrite_name_dot_value(
sufficient_key, expression_with_value)
expression_values[value_symbol] = args_value
expression_values[sufficient_key] = is_sufficient
is_conditionally_required = _evaluate_expression(
expression=spec[key]['required'],
variable_map=sufficient_inputs)
expression=expression_with_value,
variable_map=expression_values)
if is_conditionally_required:
if key not in args:
validation_warnings.append(([key], MESSAGES['MISSING_KEY']))

View File

@ -1030,3 +1030,16 @@ class UNATests(unittest.TestCase):
args['search_radius_mode'] = (
urban_nature_access.RADIUS_OPT_URBAN_NATURE)
self.assertEqual(urban_nature_access.validate(args), [])
def test_validate_uniform_search_radius(self):
"""UNA: Search radius is required when using uniform search radii."""
from natcap.invest import urban_nature_access
from natcap.invest import validation
args = _build_model_args(self.workspace_dir)
args['search_radius_mode'] = urban_nature_access.RADIUS_OPT_UNIFORM
args['search_radius'] = ''
warnings = urban_nature_access.validate(args)
self.assertEqual(warnings, [(['search_radius'],
validation.MESSAGES['MISSING_VALUE'])])

View File

@ -1,5 +1,6 @@
"""Testing module for validation."""
import codecs
import collections
import functools
import os
import platform
@ -9,12 +10,14 @@ import tempfile
import textwrap
import time
import unittest
from unittest.mock import Mock
import warnings
from unittest.mock import Mock
import numpy
from osgeo import gdal, osr, ogr
import pandas
from osgeo import gdal
from osgeo import ogr
from osgeo import osr
class SpatialOverlapTest(unittest.TestCase):
@ -30,8 +33,8 @@ class SpatialOverlapTest(unittest.TestCase):
def test_no_overlap(self):
"""Validation: verify lack of overlap."""
from natcap.invest import validation
import pygeoprocessing
from natcap.invest import validation
driver = gdal.GetDriverByName('GTiff')
filepath_1 = os.path.join(self.workspace_dir, 'raster_1.tif')
@ -226,7 +229,8 @@ class ValidatorTest(unittest.TestCase):
def test_n_workers(self):
"""Validation: validation error returned on invalid n_workers."""
from natcap.invest import spec_utils, validation
from natcap.invest import spec_utils
from natcap.invest import validation
args_spec = {
'n_workers': spec_utils.N_WORKERS,
@ -427,7 +431,8 @@ class RasterValidation(unittest.TestCase):
def test_raster_incorrect_units(self):
"""Validation: test when a raster projection has wrong units."""
from natcap.invest import spec_utils, validation
from natcap.invest import spec_utils
from natcap.invest import validation
# Use EPSG:32066 # NAD27 / BLM 16N (in US Survey Feet)
driver = gdal.GetDriverByName('GTiff')
@ -508,7 +513,8 @@ class VectorValidation(unittest.TestCase):
def test_vector_projected_in_m(self):
"""Validation: test that a vector's projection has expected units."""
from natcap.invest import spec_utils, validation
from natcap.invest import spec_utils
from natcap.invest import validation
driver = gdal.GetDriverByName('GPKG')
filepath = os.path.join(self.workspace_dir, 'vector.gpkg')
@ -533,7 +539,8 @@ class VectorValidation(unittest.TestCase):
def test_wrong_geom_type(self):
"""Validation: checks that the vector's geometry type is correct."""
from natcap.invest import spec_utils, validation
from natcap.invest import spec_utils
from natcap.invest import validation
driver = gdal.GetDriverByName('GPKG')
filepath = os.path.join(self.workspace_dir, 'vector.gpkg')
vector = driver.Create(filepath, 0, 0, 0, gdal.GDT_Unknown)
@ -1966,3 +1973,49 @@ class TestValidationFromSpec(unittest.TestCase):
patterns = validation.get_headers_to_validate(spec)
# should only get the patterns that are static and always required
self.assertEqual(sorted(patterns), ['a'])
class TestExpressionNameRewrite(unittest.TestCase):
def test_rewrite(self):
from natcap.invest import validation
target_key = "search_radius_mode"
expression = (
'search_radius_mode.value == "uniform radius" '
'and not my_search_radius_mode')
result = validation._rewrite_name_dot_value(target_key, expression)
# The spacing is a little weird, but it should still evaluate.
self.assertEqual(result, (
'__search_radius_mode__value__ =="uniform radius"'
'and not my_search_radius_mode '))
# Make sure we can still evaluate the result if we simulate some
# objects for the local references.
eval_result = eval(result, __builtins__, {
"search_radius_mode": True,
"__search_radius_mode__value__": 1})
self.assertEqual(eval_result, False)
def test_rewrite_at_end_of_expression(self):
from natcap.invest import validation
target_key = "search_radius_mode"
expression = (
'my_search_radius_mode.value == "uniform radius" '
'and not search_radius_mode.value')
result = validation._rewrite_name_dot_value(target_key, expression)
# The spacing is a little weird, but it should still evaluate.
self.assertEqual(result, (
'my_search_radius_mode.value == "uniform radius" '
'and not__search_radius_mode__value__ '))
# Make sure we can still evaluate the result if we simulate some
# objects for the local references.
mode_obj_tpl = collections.namedtuple('mode_obj', ['value'])
mode_obj = mode_obj_tpl('foo')
eval_result = eval(result, __builtins__, {
"my_search_radius_mode": mode_obj,
"__search_radius_mode__value__": 1})
self.assertEqual(eval_result, False)