Update Crop Production (Regression) to produce per-hectare values in raster outputs

2024-12-18 16:26:39 -07:00 · 2024-12-18 16:26:39 -07:00 · 2ea369d271
parent 0136454cdd
commit 2ea369d271
2 changed files with 77 additions and 49 deletions
--- a/src/natcap/invest/crop_production_regression.py
+++ b/src/natcap/invest/crop_production_regression.py
@ -1,4 +1,4 @@
-"""InVEST Crop Production Percentile Model."""
+"""InVEST Crop Production Regression Model."""
 import collections
 import logging
 import os
@ -217,8 +217,8 @@ MODEL_SPEC = {
                        "about": f"{x} {name} production within the polygon",
                        "type": "number",
                        "units": units
-                    } for nutrient, name, units in NUTRIENTS
-                      for x in ["modeled", "observed"]
+                    } for (nutrient, name, units) in NUTRIENTS
+                    for x in ["modeled", "observed"]
                }
            }
        },
@ -251,7 +251,7 @@ MODEL_SPEC = {
                        "type": "number",
                        "units": units
                    } for nutrient, name, units in NUTRIENTS
-                      for x in ["modeled", "observed"]
+                    for x in ["modeled", "observed"]
                }
            }
        },
@ -659,8 +659,7 @@ def execute(args):
                   (regression_parameter_raster_path_lookup['c_n'], 1),
                   (args['landcover_raster_path'], 1),
                   (crop_to_fertilization_rate_df['nitrogen_rate'][crop_name],
-                    'raw'),
-                   (crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
+                    'raw'), (crop_lucode, 'raw')],
                  _x_yield_op,
                  nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
            target_path_list=[nitrogen_yield_raster_path],
@ -678,8 +677,7 @@ def execute(args):
                   (regression_parameter_raster_path_lookup['c_p2o5'], 1),
                   (args['landcover_raster_path'], 1),
                   (crop_to_fertilization_rate_df['phosphorus_rate'][crop_name],
-                    'raw'),
-                   (crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
+                    'raw'), (crop_lucode, 'raw')],
                  _x_yield_op,
                  phosphorus_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
            target_path_list=[phosphorus_yield_raster_path],
@ -697,8 +695,7 @@ def execute(args):
                   (regression_parameter_raster_path_lookup['c_k2o'], 1),
                   (args['landcover_raster_path'], 1),
                   (crop_to_fertilization_rate_df['potassium_rate'][crop_name],
-                    'raw'),
-                   (crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
+                    'raw'), (crop_lucode, 'raw')],
                  _x_yield_op,
                  potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
            target_path_list=[potassium_yield_raster_path],
@ -793,7 +790,7 @@ def execute(args):
            args=([(args['landcover_raster_path'], 1),
                   (interpolated_observed_yield_raster_path, 1),
                   (observed_yield_nodata, 'raw'), (landcover_nodata, 'raw'),
-                   (crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
+                   (crop_lucode, 'raw')],
                  _mask_observed_yield_op, observed_production_raster_path,
                  gdal.GDT_Float32, observed_yield_nodata),
            target_path_list=[observed_production_raster_path],
@ -834,9 +831,8 @@ def execute(args):
            args=(args['aggregate_polygon_path'],
                  target_aggregate_vector_path,
                  landcover_raster_info['projection_wkt'],
-                  crop_names, nutrient_df,
-                  output_dir, file_suffix,
-                  aggregate_results_table_path),
+                  crop_names, nutrient_df, pixel_area_ha,
+                  output_dir, file_suffix),
            target_path_list=[target_aggregate_vector_path,
                              aggregate_results_table_path],
            dependent_task_list=dependent_task_list,
@ -847,12 +843,12 @@ def execute(args):


 def _x_yield_op(
-        y_max, b_x, c_x, lulc_array, fert_rate, crop_lucode, pixel_area_ha):
+        y_max, b_x, c_x, lulc_array, fert_rate, crop_lucode):
    """Calc generalized yield op, Ymax*(1-b_NP*exp(-cN * N_GC)).

    The regression model has identical mathematical equations for
-    the nitrogen, phosphorus, and potassium.  The only difference is
-    the scalars in the equation (fertilization rate and pixel area).
+    the nitrogen, phosphorus, and potassium. The only difference is
+    the scalar in the equation (fertilization rate).
    """
    result = numpy.empty(b_x.shape, dtype=numpy.float32)
    result[:] = _NODATA_YIELD
@ -861,7 +857,7 @@ def _x_yield_op(
        ~pygeoprocessing.array_equals_nodata(b_x, _NODATA_YIELD) &
        ~pygeoprocessing.array_equals_nodata(c_x, _NODATA_YIELD) &
        (lulc_array == crop_lucode))
-    result[valid_mask] = pixel_area_ha * y_max[valid_mask] * (
+    result[valid_mask] = y_max[valid_mask] * (
        1 - b_x[valid_mask] * numpy.exp(
            -c_x[valid_mask] * fert_rate))

@ -896,7 +892,7 @@ def _zero_observed_yield_op(observed_yield_array, observed_yield_nodata):

 def _mask_observed_yield_op(
        lulc_array, observed_yield_array, observed_yield_nodata,
-        landcover_nodata, crop_lucode, pixel_area_ha):
+        landcover_nodata, crop_lucode):
    """Mask total observed yield to crop lulc type.

    Args:
@ -905,7 +901,6 @@ def _mask_observed_yield_op(
        observed_yield_nodata (float): yield raster nodata value
        landcover_nodata (float): landcover raster nodata value
        crop_lucode (int): code used to mask in the current crop
-        pixel_area_ha (float): area of lulc raster cells (hectares)

    Returns:
        numpy.ndarray with float values of yields masked to crop_lucode
@ -914,13 +909,13 @@ def _mask_observed_yield_op(
    result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
    if landcover_nodata is not None:
        result[:] = observed_yield_nodata
-        valid_mask = ~pygeoprocessing.array_equals_nodata(lulc_array, landcover_nodata)
+        valid_mask = ~pygeoprocessing.array_equals_nodata(
+            lulc_array, landcover_nodata)
        result[valid_mask] = 0
    else:
        result[:] = 0
    lulc_mask = lulc_array == crop_lucode
-    result[lulc_mask] = (
-        observed_yield_array[lulc_mask] * pixel_area_ha)
+    result[lulc_mask] = observed_yield_array[lulc_mask]
    return result


@ -951,6 +946,11 @@ def tabulate_regression_results(
        nutrient_id + '_' + mode
        for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
        for mode in ['modeled', 'observed']]
+
+    # Since pixel values in observed and percentile rasters are Mg/(ha•yr),
+    # raster sums are (Mg•px)/(ha•yr). Before recording sums in
+    # production_lookup dictionary, convert to Mg/yr by multiplying by ha/px.
+
    with open(target_table_path, 'w') as result_table:
        result_table.write(
            'crop,area (ha),' + 'production_observed,production_modeled,' +
@ -981,6 +981,7 @@ def tabulate_regression_results(
                production_pixel_count += numpy.count_nonzero(
                    valid_mask & (yield_block > 0.0))
                yield_sum += numpy.sum(yield_block[valid_mask])
+            yield_sum *= pixel_area_ha
            production_area = production_pixel_count * pixel_area_ha
            production_lookup['observed'] = yield_sum
            result_table.write(',%f' % production_area)
@ -996,6 +997,7 @@ def tabulate_regression_results(
                    # _NODATA_YIELD will always have a value (defined above)
                    yield_block[~pygeoprocessing.array_equals_nodata(
                        yield_block, _NODATA_YIELD)])
+            yield_sum *= pixel_area_ha
            production_lookup['modeled'] = yield_sum
            result_table.write(",%f" % yield_sum)

@ -1031,8 +1033,7 @@ def tabulate_regression_results(
 def aggregate_regression_results_to_polygons(
        base_aggregate_vector_path, target_aggregate_vector_path,
        landcover_raster_projection, crop_names,
-        nutrient_df, output_dir, file_suffix,
-        target_aggregate_table_path):
+        nutrient_df, pixel_area_ha, output_dir, file_suffix):
    """Write table with aggregate results of yield and nutrient values.

    Use zonal statistics to summarize total observed and interpolated
@ -1046,10 +1047,9 @@ def aggregate_regression_results_to_polygons(
        landcover_raster_projection (string): a WKT projection string
        crop_names (list): list of crop names
        nutrient_df (pandas.DataFrame): a table of nutrient values by crop
+        pixel_area_ha (float): area of lulc raster cells (hectares)
        output_dir (string): the file path to the output workspace.
        file_suffix (string): string to append to any output filenames.
-        target_aggregate_table_path (string): path to 'aggregate_results.csv'
-            in the output workspace

    Returns:
        None
@ -1061,6 +1061,10 @@ def aggregate_regression_results_to_polygons(
        target_aggregate_vector_path,
        driver_name='ESRI Shapefile')

+    # Since pixel values are Mg/(ha•yr), zonal stats sum is (Mg•px)/(ha•yr).
+    # Before writing sum to results tables or when using sum to calculate
+    # nutrient yields, convert to Mg/yr by multiplying by ha/px.
+
    # loop over every crop and query with pgp function
    total_yield_lookup = {}
    total_nutrient_table = collections.defaultdict(
@ -1084,10 +1088,11 @@ def aggregate_regression_results_to_polygons(
            for fid_index in total_yield_lookup['%s_modeled' % crop_name]:
                total_nutrient_table[nutrient_id][
                    'modeled'][fid_index] += (
-                        nutrient_factor *
-                        total_yield_lookup['%s_modeled' % crop_name][
-                            fid_index]['sum'] *
-                        nutrient_df[nutrient_id][crop_name])
+                        nutrient_factor
+                        * total_yield_lookup['%s_modeled' % crop_name][
+                            fid_index]['sum']
+                        * pixel_area_ha
+                        * nutrient_df[nutrient_id][crop_name])

        # process observed
        observed_yield_path = os.path.join(
@ -1102,10 +1107,11 @@ def aggregate_regression_results_to_polygons(
                    '%s_observed' % crop_name]:
                total_nutrient_table[
                    nutrient_id]['observed'][fid_index] += (
-                        nutrient_factor * # percent crop used * 1000 [100g per Mg]
-                        total_yield_lookup[
-                            '%s_observed' % crop_name][fid_index]['sum'] *
-                        nutrient_df[nutrient_id][crop_name])  # nutrient unit per 100g crop
+                        nutrient_factor  # percent crop used * 1000 [100g per Mg]
+                        * total_yield_lookup[
+                            '%s_observed' % crop_name][fid_index]['sum']
+                        * pixel_area_ha
+                        * nutrient_df[nutrient_id][crop_name])  # nutrient unit per 100g crop

    # report everything to a table
    aggregate_table_path = os.path.join(
@ -1126,7 +1132,8 @@ def aggregate_regression_results_to_polygons(
        for id_index in list(total_yield_lookup.values())[0]:
            aggregate_table.write('%s,' % id_index)
            aggregate_table.write(','.join([
-                str(total_yield_lookup[yield_header][id_index]['sum'])
+                str(total_yield_lookup[yield_header][id_index]['sum']
+                    * pixel_area_ha)
                for yield_header in sorted(total_yield_lookup)]))

            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
--- a/tests/test_crop_production.py
+++ b/tests/test_crop_production.py
@ -270,9 +270,6 @@ class CropProductionTests(unittest.TestCase):
            'model_data_path': MODEL_DATA_PATH,
            'fertilization_rate_table_path': os.path.join(
                SAMPLE_DATA_PATH, 'crop_fertilization_rates.csv'),
-            'nitrogen_fertilization_rate': 29.6,
-            'phosphorus_fertilization_rate': 8.4,
-            'potassium_fertilization_rate': 14.2,
            'n_workers': '-1'
        }

@ -301,9 +298,6 @@ class CropProductionTests(unittest.TestCase):
            'model_data_path': MODEL_DATA_PATH,
            'fertilization_rate_table_path': os.path.join(
                SAMPLE_DATA_PATH, 'crop_fertilization_rates.csv'),
-            'nitrogen_fertilization_rate': 29.6,
-            'phosphorus_fertilization_rate': 8.4,
-            'potassium_fertilization_rate': 14.2,
            'n_workers': '-1'
        }

@ -355,15 +349,13 @@ class CropProductionTests(unittest.TestCase):
            'model_data_path': MODEL_DATA_PATH,
            'fertilization_rate_table_path': os.path.join(
                SAMPLE_DATA_PATH, 'crop_fertilization_rates.csv'),
-            'nitrogen_fertilization_rate': 29.6,
-            'phosphorus_fertilization_rate': 8.4,
-            'potassium_fertilization_rate': 14.2,
        }

        crop_production_regression.execute(args)

        expected_agg_result_table = pandas.read_csv(
-            os.path.join(TEST_DATA_PATH, 'expected_regression_aggregate_results.csv'))
+            os.path.join(TEST_DATA_PATH,
+                         'expected_regression_aggregate_results.csv'))
        agg_result_table = pandas.read_csv(
            os.path.join(args['workspace_dir'], 'aggregate_results.csv'))
        pandas.testing.assert_frame_equal(
@ -381,6 +373,38 @@ class CropProductionTests(unittest.TestCase):
        pandas.testing.assert_frame_equal(
            expected_result_table, result_table, check_dtype=False)

+        # Check raster outputs to make sure values are in Mg/ha.
+        # Raster sum is (Mg•px)/(ha•yr).
+        # Result table reports totals in Mg/yr.
+        # To convert from Mg/yr to (Mg•px)/(ha•yr), multiply by px/ha.
+        expected_raster_sums = {}
+        for (index, crop) in [(0, 'barley'), (1, 'soybean'), (2, 'wheat')]:
+            filename = crop + '_observed_production.tif'
+            pixels_per_hectare = _get_pixels_per_hectare(
+                os.path.join(args['workspace_dir'], filename))
+            expected_raster_sums[filename] = (
+                expected_result_table.loc[index]['production_observed']
+                * pixels_per_hectare)
+            filename = crop + '_regression_production.tif'
+            pixels_per_hectare = _get_pixels_per_hectare(
+                os.path.join(args['workspace_dir'], filename))
+            expected_raster_sums[filename] = (
+                expected_result_table.loc[index]['production_modeled']
+                * pixels_per_hectare)
+
+        for filename in expected_raster_sums:
+            raster_path = os.path.join(args['workspace_dir'], filename)
+            raster_info = pygeoprocessing.get_raster_info(raster_path)
+            nodata = raster_info['nodata'][0]
+            raster_sum = 0.0
+            for _, block in pygeoprocessing.iterblocks((raster_path, 1)):
+                raster_sum += numpy.sum(
+                    block[~pygeoprocessing.array_equals_nodata(
+                            block, nodata)], dtype=numpy.float32)
+            expected_sum = expected_raster_sums[filename]
+            numpy.testing.assert_allclose(raster_sum, expected_sum,
+                                          rtol=0, atol=0.001)
+
    def test_crop_production_regression_no_nodata(self):
        """Crop Production: test regression model with undefined nodata raster.

@ -399,9 +423,6 @@ class CropProductionTests(unittest.TestCase):
            'model_data_path': MODEL_DATA_PATH,
            'fertilization_rate_table_path': os.path.join(
                SAMPLE_DATA_PATH, 'crop_fertilization_rates.csv'),
-            'nitrogen_fertilization_rate': 29.6,
-            'phosphorus_fertilization_rate': 8.4,
-            'potassium_fertilization_rate': 14.2,
        }

        # Create a raster based on the test data geotransform, but smaller and