Merge branch 'release/3.16.0' into feature/plugins

2025-05-27 10:55:57 -07:00 · 2025-05-27 10:55:57 -07:00 · 0488a8ec4f
parent 98d9992a11 34e1bf8ca3
commit 0488a8ec4f
5 changed files with 209 additions and 40 deletions
--- a/.github/workflows/release-part-2.yml
+++ b/.github/workflows/release-part-2.yml
@ -42,7 +42,11 @@ jobs:
      - uses: actions/checkout@v4

      - name: Install dependencies
-        run: pip install twine
+        run: |
+          # Workaround for setuptools generating invalid metadata
+          # https://github.com/natcap/invest/issues/1913
+          pip install "twine>=6.1.0"
+          pip install -U packaging

      - name: Extract version from autorelease branch name
        if: ${{ github.head_ref }}
--- a/HISTORY.rst
+++ b/HISTORY.rst
@ -94,6 +94,17 @@ NDR
  results. (`#1845 <https://github.com/natcap/invest/issues/1845>`_)
 * ``stream.tif`` is now saved in the main output folder rather than the
  intermediate folder (`#1864 <https://github.com/natcap/invest/issues/1864>`_).
+* Added a feature that allows the nutrient load to be entered as an
+  application rate or as an "extensive"/export measured value.
+  Previously, the model's biophysical table expected the ``load_[n|p]``
+  column to be an "extensive"/export measured value. Now, a new
+  column for both nitrogen and phosphorous, ``load_type_[n|p]``, is
+  required with expected values of either ``application-rate`` or
+  ``measured-runoff``. See the Data Needs section of the NDR User
+  Guide for more details.
+  (`#1044 <https://github.com/natcap/invest/issues/1044>`_).
+* Fixed a bug where input rasters (e.g. LULC) without a defined nodata value could
+  cause an OverflowError. (`#1904 <https://github.com/natcap/invest/issues/1904>`_).

 Seasonal Water Yield
 ====================
--- a/4
+++ b/4
@ -2,11 +2,11 @@
 DATA_DIR := data
 GIT_SAMPLE_DATA_REPO        := https://bitbucket.org/natcap/invest-sample-data.git
 GIT_SAMPLE_DATA_REPO_PATH   := $(DATA_DIR)/invest-sample-data
-GIT_SAMPLE_DATA_REPO_REV    := ecdab62bd6e2d3d9105e511cfd6884bf07f3d27b
+GIT_SAMPLE_DATA_REPO_REV    := 3d7a33c3d599daaec087a9c283a0c6b8377210f5

 GIT_TEST_DATA_REPO          := https://bitbucket.org/natcap/invest-test-data.git
 GIT_TEST_DATA_REPO_PATH     := $(DATA_DIR)/invest-test-data
-GIT_TEST_DATA_REPO_REV      := 4692cbab39cca4e49e39ee88926024e81acbb8cd
+GIT_TEST_DATA_REPO_REV      := d8a7397ba5992de7a80260e40956e4c29176383d

 GIT_UG_REPO                 := https://github.com/natcap/invest.users-guide
 GIT_UG_REPO_PATH            := doc/users-guide
--- a/src/natcap/invest/ndr/ndr.py
+++ b/src/natcap/invest/ndr/ndr.py
@ -9,6 +9,7 @@ import pygeoprocessing
 import pygeoprocessing.routing
 import taskgraph
 from osgeo import gdal
+from osgeo import gdal_array
 from osgeo import ogr

 from .. import gettext
@ -85,7 +86,53 @@ MODEL_SPEC = spec.build_model_spec({
            "index_col": "lucode",
            "columns": {
                "lucode": spec.LULC_TABLE_COLUMN,
-                "load_n": {
+                "load_type_p": {
+                    "type": "option_string",
+                    "required": True,
+                    "options": {
+                        "application-rate": {
+                            "description": gettext(
+                                "Treat the load values as nutrient "
+                                "application rates (e.g. fertilizer, livestock "
+                                "waste, ...)."
+                                "The model will adjust the load using the "
+                                "application rate and retention efficiency: "
+                                "load_p * (1 - eff_p).")},
+                        "measured-runoff": {
+                            "description": gettext(
+                                "Treat the load values as measured contaminant "
+                                "runoff.")},
+                    },
+                    "about": gettext(
+                        "Whether the nutrient load in column "
+                        "load_p should be treated as "
+                        "nutrient application rate or measured contaminant "
+                        "runoff. 'application-rate' | 'measured-runoff'")
+                },
+                "load_type_n": {
+                    "type": "option_string",
+                    "required": True,
+                    "options": {
+                        "application-rate": {
+                            "description": gettext(
+                                "Treat the load values as nutrient "
+                                "application rates (e.g. fertilizer, livestock "
+                                "waste, ...)."
+                                "The model will adjust the load using the "
+                                "application rate and retention efficiency: "
+                                "load_n * (1 - eff_n).")},
+                        "measured-runoff": {
+                            "description": gettext(
+                                "Treat the load values as measured contaminant "
+                                "runoff.")},
+                    },
+                    "about": gettext(
+                        "Whether the nutrient load in column "
+                        "load_n should be treated as "
+                        "nutrient application rate or measured contaminant "
+                        "runoff. 'application-rate' | 'measured-runoff'")
+                },
+                "load_n": {  # nitrogen or phosphorus nutrient loads
                    "type": "number",
                    "units": u.kilogram/u.hectare/u.year,
                    "required": "calc_n",
@ -135,7 +182,7 @@ MODEL_SPEC = spec.build_model_spec({
                        "is dissolved into the subsurface. By default, this "
                        "value should be set to 0, indicating that all "
                        "nutrients are delivered via surface flow. There is "
-                        "no equivalent of this for phosphorus.")}
+                        "no equivalent of this for phosphorus.")},
            },
            "about": gettext(
                "A table mapping each LULC class to its biophysical "
@ -394,14 +441,14 @@ MODEL_SPEC = spec.build_model_spec({
                    "about": "Above ground nitrogen loads",
                    "bands": {1: {
                        "type": "number",
-                        "units": u.kilogram/u.year
+                        "units": u.kilogram/u.hectare/u.year,
                    }}
                },
                "surface_load_p.tif": {
                    "about": "Above ground phosphorus loads",
                    "bands": {1: {
                        "type": "number",
-                        "units": u.kilogram/u.year
+                        "units": u.kilogram/u.hectare/u.year,
                    }}
                },
                "thresholded_slope.tif": {
@ -682,7 +729,7 @@ def execute(args):
            'mask_raster_path': f_reg['mask_path'],
            'target_masked_raster_path': f_reg['masked_runoff_proxy_path'],
            'target_dtype': gdal.GDT_Float32,
-            'default_nodata': _TARGET_NODATA,
+            'target_nodata': _TARGET_NODATA,
        },
        dependent_task_list=[mask_task, align_raster_task],
        target_path_list=[f_reg['masked_runoff_proxy_path']],
@ -695,7 +742,7 @@ def execute(args):
            'mask_raster_path': f_reg['mask_path'],
            'target_masked_raster_path': f_reg['masked_dem_path'],
            'target_dtype': gdal.GDT_Float32,
-            'default_nodata': float(numpy.finfo(numpy.float32).min),
+            'target_nodata': float(numpy.finfo(numpy.float32).min),
        },
        dependent_task_list=[mask_task, align_raster_task],
        target_path_list=[f_reg['masked_dem_path']],
@ -708,7 +755,7 @@ def execute(args):
            'mask_raster_path': f_reg['mask_path'],
            'target_masked_raster_path': f_reg['masked_lulc_path'],
            'target_dtype': gdal.GDT_Int32,
-            'default_nodata': numpy.iinfo(numpy.int32).min,
+            'target_nodata': numpy.iinfo(numpy.int32).min,
        },
        dependent_task_list=[mask_task, align_raster_task],
        target_path_list=[f_reg['masked_lulc_path']],
@ -932,7 +979,10 @@ def execute(args):
            func=_calculate_load,
            args=(
                f_reg['masked_lulc_path'],
-                biophysical_df[f'load_{nutrient}'],
+                biophysical_df[
+                    [f'load_{nutrient}', f'eff_{nutrient}',
+                     f'load_type_{nutrient}']].to_dict('index'),
+                nutrient,
                load_path),
            dependent_task_list=[align_raster_task, mask_lulc_task],
            target_path_list=[load_path],
@ -1190,7 +1240,7 @@ def _create_mask_raster(source_raster_path, source_vector_path,


 def _mask_raster(source_raster_path, mask_raster_path,
-                 target_masked_raster_path, default_nodata, target_dtype):
+                 target_masked_raster_path, target_nodata, target_dtype):
    """Using a raster of 1s and 0s, determine which pixels remain in output.

    Args:
@ -1202,8 +1252,8 @@ def _mask_raster(source_raster_path, mask_raster_path,
            target raster.
        target_masked_raster_path (str): The path to where the target raster
            should be written.
-        default_nodata (int, float, None): The nodata value that should be used
-            if ``source_raster_path`` does not have a defined nodata value.
+        target_nodata (int, float): The target nodata value that should match
+            ``target_dtype``.
        target_dtype (int): The ``gdal.GDT_*`` datatype of the target raster.

    Returns:
@ -1211,22 +1261,20 @@ def _mask_raster(source_raster_path, mask_raster_path,
    """
    source_raster_info = pygeoprocessing.get_raster_info(source_raster_path)
    source_nodata = source_raster_info['nodata'][0]
-    nodata = source_nodata
-    if nodata is None:
-        nodata = default_nodata
+    target_numpy_dtype = gdal_array.GDALTypeCodeToNumericTypeCode(target_dtype)

    def _mask_op(mask, raster):
-        result = numpy.full(mask.shape, nodata,
-                            dtype=source_raster_info['numpy_type'])
+        result = numpy.full(mask.shape, target_nodata,
+                            dtype=target_numpy_dtype)
        valid_pixels = (
-            ~pygeoprocessing.array_equals_nodata(raster, nodata) &
+            ~pygeoprocessing.array_equals_nodata(raster, source_nodata) &
            (mask == 1))
        result[valid_pixels] = raster[valid_pixels]
        return result

    pygeoprocessing.raster_calculator(
        [(mask_raster_path, 1), (source_raster_path, 1)], _mask_op,
-        target_masked_raster_path, target_dtype, nodata)
+        target_masked_raster_path, target_dtype, target_nodata)


 def _add_fields_to_shapefile(field_pickle_map, target_vector_path):
@ -1351,13 +1399,18 @@ def _normalize_raster(base_raster_path_band, target_normalized_raster_path,
        target_dtype=numpy.float32)


-def _calculate_load(lulc_raster_path, lucode_to_load, target_load_raster):
+def _calculate_load(
+        lulc_raster_path, lucode_to_load, nutrient_type, target_load_raster):
    """Calculate load raster by mapping landcover.

+    If load type is 'application-rate' adjust by ``1 - efficiency``.
+
    Args:
        lulc_raster_path (string): path to integer landcover raster.
-        lucode_to_load (dict): a mapping of landcover IDs to per-area
-            nutrient load.
+        lucode_to_load (dict): a mapping of landcover IDs to nutrient load,
+            efficiency, and load type. The load type value can be one of:
+            [ 'measured-runoff' | 'appliation-rate' ].
+        nutrient_type (str): the nutrient type key ('p' | 'n').
        target_load_raster (string): path to target raster that will have
            load values (kg/ha) mapped to pixels based on LULC.

@ -1365,12 +1418,34 @@ def _calculate_load(lulc_raster_path, lucode_to_load, target_load_raster):
        None.

    """
+    app_rate = 'application-rate'
+    measured_runoff = 'measured-runoff'
+    load_key = f'load_{nutrient_type}'
+    eff_key = f'eff_{nutrient_type}'
+    load_type_key = f'load_type_{nutrient_type}'
+
+    # Raise ValueError if unknown load_type
+    for key, value in lucode_to_load.items():
+        load_type = value[load_type_key]
+        if not load_type in [app_rate, measured_runoff]:
+            # unknown load type, raise ValueError
+            raise ValueError(
+                'nutrient load type must be: '
+                f'"{app_rate}" | "{measured_runoff}". Instead '
+                f'found value of: "{load_type}".')
+
    def _map_load_op(lucode_array):
-        """Convert unit load to total load & handle nodata."""
+        """Convert unit load to total load."""
        result = numpy.empty(lucode_array.shape)
        for lucode in numpy.unique(lucode_array):
            try:
-                result[lucode_array == lucode] = (lucode_to_load[lucode])
+                if lucode_to_load[lucode][load_type_key] == measured_runoff:
+                    result[lucode_array == lucode] = (
+                        lucode_to_load[lucode][load_key])
+                elif lucode_to_load[lucode][load_type_key] == app_rate:
+                    result[lucode_array == lucode] = (
+                        lucode_to_load[lucode][load_key] * (
+                            1 - lucode_to_load[lucode][eff_key]))
            except KeyError:
                raise KeyError(
                    'lucode: %d is present in the landuse raster but '
@ -1391,8 +1466,7 @@ def _map_surface_load(
    """Calculate surface load from landcover raster.

    Args:
-        modified_load_path (string): path to modified load raster with units
-            of kg/pixel.
+        modified_load_path (string): path to modified load raster.
        lulc_raster_path (string): path to landcover raster.
        lucode_to_subsurface_proportion (dict): maps landcover codes to
            subsurface proportion values. Or if None, no subsurface transfer
--- a/tests/test_ndr.py
+++ b/tests/test_ndr.py
@ -124,18 +124,12 @@ class NDRTests(unittest.TestCase):
        # use predefined directory so test can clean up files during teardown
        args = NDRTests.generate_base_args(self.workspace_dir)
        new_table_path = os.path.join(self.workspace_dir, 'table_c_len_0.csv')
-        with open(new_table_path, 'w') as target_file:
-            with open(args['biophysical_table_path'], 'r') as table_file:
-                target_file.write(table_file.readline())
-                while True:
-                    line = table_file.readline()
-                    if not line:
-                        break
-                    line_list = line.split(',')
+
+        bio_df = pandas.read_csv(args['biophysical_table_path'])
        # replace the crit_len_p with 0 in this column
-                    line = (
-                        ','.join(line_list[0:12] + ['0.0'] + line_list[13::]))
-                    target_file.write(line)
+        bio_df['crit_len_p'] = 0
+        bio_df.to_csv(new_table_path)
+        bio_df = None

        args['biophysical_table_path'] = new_table_path
        ndr.execute(args)
@ -376,6 +370,39 @@ class NDRTests(unittest.TestCase):
        if mismatch_list:
            raise RuntimeError("results not expected: %s" % mismatch_list)

+    def test_mask_raster_nodata_overflow(self):
+        """NDR test when target nodata value overflows source dtype."""
+        from natcap.invest.ndr import ndr
+
+        source_raster_path = os.path.join(self.workspace_dir, 'source.tif')
+        target_raster_path = os.path.join(
+            self.workspace_dir, 'target.tif')
+        source_dtype = numpy.int8
+        target_dtype = gdal.GDT_Int32
+        target_nodata = numpy.iinfo(numpy.int32).min
+
+        pygeoprocessing.numpy_array_to_raster(
+            base_array=numpy.full((4, 4), 1, dtype=source_dtype),
+            target_nodata=None,
+            pixel_size=(1, -1),
+            origin=(0, 0),
+            projection_wkt=None,
+            target_path=source_raster_path)
+
+        ndr._mask_raster(
+            source_raster_path=source_raster_path,
+            mask_raster_path=source_raster_path,  # mask=source for convenience
+            target_masked_raster_path=target_raster_path,
+            target_nodata=target_nodata,
+            target_dtype=target_dtype)
+
+        # Mostly we're testing that _mask_raster did not raise an OverflowError,
+        # but we can assert the results anyway.
+        array = pygeoprocessing.raster_to_numpy_array(target_raster_path)
+        numpy.testing.assert_array_equal(
+            array,
+            numpy.full((4, 4), 1, dtype=numpy.int32))  # matches target_dtype
+
    def test_validation(self):
        """NDR test argument validation."""
        from natcap.invest import validation
@ -537,3 +564,56 @@ class NDRTests(unittest.TestCase):
        expected_rpi = runoff_proxy_array/numpy.mean(runoff_proxy_array)

        numpy.testing.assert_allclose(actual_rpi, expected_rpi)
+    
+    def test_calculate_load_type(self):
+        """Test ``_calculate_load`` for both load_types."""
+        from natcap.invest.ndr import ndr
+
+        # make simple lulc raster
+        lulc_path = os.path.join(self.workspace_dir, "lulc-load-type.tif")
+        lulc_array = numpy.array(
+            [[1, 2, 3, 4], [4, 3, 2, 1]], dtype=numpy.int16)
+        srs = osr.SpatialReference()
+        srs.ImportFromEPSG(26910)
+        projection_wkt = srs.ExportToWkt()
+        origin = (461251, 4923445)
+        pixel_size = (30, -30)
+        no_data = -1
+        pygeoprocessing.numpy_array_to_raster(
+            lulc_array, no_data, pixel_size, origin, projection_wkt,
+            lulc_path)
+
+        target_load_path = os.path.join(self.workspace_dir, "load_raster.tif")
+
+        # Calculate load
+        lucode_to_params = {
+            1: {'load_n': 10.0, 'eff_n': 0.5, 'load_type_n': 'measured-runoff'},
+            2: {'load_n': 20.0, 'eff_n': 0.5, 'load_type_n': 'measured-runoff'},
+            3: {'load_n': 10.0, 'eff_n': 0.5, 'load_type_n': 'application-rate'},
+            4: {'load_n': 20.0, 'eff_n': 0.5, 'load_type_n': 'application-rate'}}
+        ndr._calculate_load(lulc_path, lucode_to_params, 'n', target_load_path)
+
+        expected_results = numpy.array(
+            [[10.0, 20.0, 5.0, 10.0], [10.0, 5.0, 20.0, 10.0]])
+        actual_results = pygeoprocessing.raster_to_numpy_array(target_load_path)
+
+        numpy.testing.assert_allclose(actual_results, expected_results)
+    
+    def test_calculate_load_type_raises_error(self):
+        """Test ``_calculate_load`` raises ValueError on bad load_type's."""
+        from natcap.invest.ndr import ndr
+
+        lulc_path = os.path.join(self.workspace_dir, "lulc-load-type.tif")
+        target_load_path = os.path.join(self.workspace_dir, "load_raster.tif")
+
+        # Calculate load
+        lucode_to_params = {
+            1: {'load_n': 10.0, 'eff_n': 0.5, 'load_type_n': 'measured-runoff'},
+            2: {'load_n': 20.0, 'eff_n': 0.5, 'load_type_n': 'cheese'},
+            3: {'load_n': 10.0, 'eff_n': 0.5, 'load_type_n': 'application-rate'},
+            4: {'load_n': 20.0, 'eff_n': 0.5, 'load_type_n': 'application-rate'}}
+
+        with self.assertRaises(ValueError) as cm:
+            ndr._calculate_load(lulc_path, lucode_to_params, 'n', target_load_path)
+        actual_message = str(cm.exception)
+        self.assertTrue('found value of: "cheese"' in actual_message)