Update Crop Production (Percentile) to produce per-hectare values in raster outputs

2024-12-17 17:29:49 -07:00 · 2024-12-17 17:29:49 -07:00 · 0136454cdd
parent 56a69dd327
commit 0136454cdd
2 changed files with 93 additions and 31 deletions
--- a/src/natcap/invest/crop_production_percentile.py
+++ b/src/natcap/invest/crop_production_percentile.py
@ -55,7 +55,7 @@ CROP_OPTIONS = {
        "quince", "quinoa", "ramie", "rapeseed", "rasberry",
        "rice", "rootnes", "rubber", "rye", "ryefor",
        "safflower", "sesame", "sisal", "sorghum",
-        "sorghumfor", "sourcherry, soybean", "spicenes",
+        "sorghumfor", "sourcherry", "soybean", "spicenes",
        "spinach", "stonefruitnes", "strawberry", "stringbean",
        "sugarbeet", "sugarcane", "sugarnes", "sunflower",
        "swedefor", "sweetpotato", "tangetc", "taro", "tea",
@ -622,7 +622,6 @@ def execute(args):
                    args['landcover_raster_path'],
                    interpolated_yield_percentile_raster_path,
                    crop_lucode,
-                    pixel_area_ha,
                    percentile_crop_production_raster_path),
                target_path_list=[percentile_crop_production_raster_path],
                dependent_task_list=[
@ -697,7 +696,7 @@ def execute(args):
            args=([(args['landcover_raster_path'], 1),
                   (interpolated_observed_yield_raster_path, 1),
                   (observed_yield_nodata, 'raw'), (landcover_nodata, 'raw'),
-                   (crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
+                   (crop_lucode, 'raw')],
                  _mask_observed_yield_op, observed_production_raster_path,
                  gdal.GDT_Float32, observed_yield_nodata),
            target_path_list=[observed_production_raster_path],
@ -714,7 +713,7 @@ def execute(args):
        output_dir, 'result_table%s.csv' % file_suffix)

    crop_names = crop_to_landcover_df.index.to_list()
-    tabulate_results_task = task_graph.add_task(
+    _ = task_graph.add_task(
        func=tabulate_results,
        args=(nutrient_df, yield_percentile_headers,
              crop_names, pixel_area_ha,
@ -731,14 +730,14 @@ def execute(args):
            output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix))
        aggregate_results_table_path = os.path.join(
            output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix)
-        aggregate_results_task = task_graph.add_task(
+        _ = task_graph.add_task(
            func=aggregate_to_polygons,
            args=(args['aggregate_polygon_path'],
                  target_aggregate_vector_path,
                  landcover_raster_info['projection_wkt'],
                  crop_names, nutrient_df,
-                  yield_percentile_headers, output_dir, file_suffix,
-                  aggregate_results_table_path),
+                  yield_percentile_headers, pixel_area_ha, output_dir,
+                  file_suffix, aggregate_results_table_path),
            target_path_list=[target_aggregate_vector_path,
                              aggregate_results_table_path],
            dependent_task_list=dependent_task_list,
@ -749,14 +748,14 @@ def execute(args):


 def calculate_crop_production(lulc_path, yield_path, crop_lucode,
-                              pixel_area_ha, target_path):
+                              target_path):
    """Calculate crop production for a particular crop.

    The resulting production value is:

    - nodata, where either the LULC or yield input has nodata
    - 0, where the LULC does not match the given LULC code
-    - yield * pixel area, where the given LULC code exists
+    - yield (in Mg/ha), where the given LULC code exists

    Args:
        lulc_path (str): path to a raster of LULC codes
@ -764,7 +763,6 @@ def calculate_crop_production(lulc_path, yield_path, crop_lucode,
            by ``crop_lucode``, in units per hectare
        crop_lucode (int): LULC code that identifies the crop of interest in
            the ``lulc_path`` raster.
-        pixel_area_ha (number): Pixel area in hectares for both input rasters
        target_path (str): Path to write the output crop production raster

    Returns:
@ -772,7 +770,7 @@ def calculate_crop_production(lulc_path, yield_path, crop_lucode,
    """
    pygeoprocessing.raster_map(
        op=lambda lulc, _yield: numpy.where(
-            lulc == crop_lucode, _yield * pixel_area_ha, 0),
+            lulc == crop_lucode, _yield, 0),
        rasters=[lulc_path, yield_path],
        target_path=target_path,
        target_nodata=_NODATA_YIELD)
@ -802,7 +800,7 @@ def _zero_observed_yield_op(observed_yield_array, observed_yield_nodata):

 def _mask_observed_yield_op(
        lulc_array, observed_yield_array, observed_yield_nodata,
-        landcover_nodata, crop_lucode, pixel_area_ha):
+        landcover_nodata, crop_lucode):
    """Mask total observed yield to crop lulc type.

    Args:
@ -811,7 +809,6 @@ def _mask_observed_yield_op(
        observed_yield_nodata (float): yield raster nodata value
        landcover_nodata (float): landcover raster nodata value
        crop_lucode (int): code used to mask in the current crop
-        pixel_area_ha (float): area of lulc raster cells (hectares)

    Returns:
        numpy.ndarray with float values of yields masked to crop_lucode
@ -820,13 +817,13 @@ def _mask_observed_yield_op(
    result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
    if landcover_nodata is not None:
        result[:] = observed_yield_nodata
-        valid_mask = ~pygeoprocessing.array_equals_nodata(lulc_array, landcover_nodata)
+        valid_mask = ~pygeoprocessing.array_equals_nodata(lulc_array,
+                                                          landcover_nodata)
        result[valid_mask] = 0
    else:
        result[:] = 0
    lulc_mask = lulc_array == crop_lucode
-    result[lulc_mask] = (
-        observed_yield_array[lulc_mask] * pixel_area_ha)
+    result[lulc_mask] = observed_yield_array[lulc_mask]
    return result


@ -847,7 +844,7 @@ def tabulate_results(
        landcover_raster_path (string): path to landcover raster
        landcover_nodata (float): landcover raster nodata value
        output_dir (string): the file path to the output workspace.
-        file_suffix (string): string to appened to any output filenames.
+        file_suffix (string): string to append to any output filenames.
        target_table_path (string): path to 'result_table.csv' in the output
            workspace

@ -868,6 +865,11 @@ def tabulate_results(
        for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
        for yield_percentile_id in sorted(yield_percentile_headers) + [
            'yield_observed']]
+
+    # Since pixel values in observed and percentile rasters are Mg/(ha•yr),
+    # raster sums are (Mg•px)/(ha•yr). Before recording sums in
+    # production_lookup dictionary, convert to Mg/yr by multiplying by ha/px.
+
    with open(target_table_path, 'w') as result_table:
        result_table.write(
            'crop,area (ha),' + 'production_observed,' +
@ -899,6 +901,7 @@ def tabulate_results(
                production_pixel_count += numpy.count_nonzero(
                    valid_mask & (yield_block > 0))
                yield_sum += numpy.sum(yield_block[valid_mask])
+            yield_sum *= pixel_area_ha
            production_area = production_pixel_count * pixel_area_ha
            production_lookup['observed'] = yield_sum
            result_table.write(',%f' % production_area)
@ -916,6 +919,7 @@ def tabulate_results(
                    yield_sum += numpy.sum(
                        yield_block[~pygeoprocessing.array_equals_nodata(
                            yield_block, _NODATA_YIELD)])
+                yield_sum *= pixel_area_ha
                production_lookup[yield_percentile_id] = yield_sum
                result_table.write(",%f" % yield_sum)

@ -941,7 +945,8 @@ def tabulate_results(
                (landcover_raster_path, 1)):
            if landcover_nodata is not None:
                total_area += numpy.count_nonzero(
-                    ~pygeoprocessing.array_equals_nodata(band_values, landcover_nodata))
+                    ~pygeoprocessing.array_equals_nodata(band_values,
+                                                         landcover_nodata))
            else:
                total_area += band_values.size
        result_table.write(
@ -951,8 +956,8 @@ def tabulate_results(

 def aggregate_to_polygons(
        base_aggregate_vector_path, target_aggregate_vector_path,
-        landcover_raster_projection, crop_names,
-        nutrient_df, yield_percentile_headers, output_dir, file_suffix,
+        landcover_raster_projection, crop_names, nutrient_df,
+        yield_percentile_headers, pixel_area_ha, output_dir, file_suffix,
        target_aggregate_table_path):
    """Write table with aggregate results of yield and nutrient values.

@ -969,8 +974,9 @@ def aggregate_to_polygons(
        nutrient_df (pandas.DataFrame): a table of nutrient values by crop
        yield_percentile_headers (list): list of strings indicating percentiles
            at which yield was calculated.
+        pixel_area_ha (float): area of lulc raster cells (hectares)
        output_dir (string): the file path to the output workspace.
-        file_suffix (string): string to appened to any output filenames.
+        file_suffix (string): string to append to any output filenames.
        target_aggregate_table_path (string): path to 'aggregate_results.csv'
            in the output workspace

@ -985,6 +991,10 @@ def aggregate_to_polygons(
        target_aggregate_vector_path,
        driver_name='ESRI Shapefile')

+    # Since pixel values are Mg/(ha•yr), zonal stats sum is (Mg•px)/(ha•yr).
+    # Before writing sum to results tables or when using sum to calculate
+    # nutrient yields, convert to Mg/yr by multiplying by ha/px.
+
    # loop over every crop and query with pgp function
    total_yield_lookup = {}
    total_nutrient_table = collections.defaultdict(
@ -1014,11 +1024,12 @@ def aggregate_to_polygons(
                        crop_name, yield_percentile_id)]:
                    total_nutrient_table[nutrient_id][
                        yield_percentile_id][id_index] += (
-                            nutrient_factor *
-                            total_yield_lookup['%s_%s' % (
-                                crop_name, yield_percentile_id)][
-                                    id_index]['sum'] *
-                            nutrient_df[nutrient_id][crop_name])
+                            nutrient_factor
+                            * total_yield_lookup[
+                                    '%s_%s' % (crop_name, yield_percentile_id)
+                                ][id_index]['sum']
+                            * pixel_area_ha
+                            * nutrient_df[nutrient_id][crop_name])

        # process observed
        observed_yield_path = os.path.join(
@ -1032,10 +1043,11 @@ def aggregate_to_polygons(
            for id_index in total_yield_lookup[f'{crop_name}_observed']:
                total_nutrient_table[
                    nutrient_id]['observed'][id_index] += (
-                        nutrient_factor *
-                        total_yield_lookup[
-                            f'{crop_name}_observed'][id_index]['sum'] *
-                        nutrient_df[nutrient_id][crop_name])
+                        nutrient_factor
+                        * total_yield_lookup[
+                            f'{crop_name}_observed'][id_index]['sum']
+                        * pixel_area_ha
+                        * nutrient_df[nutrient_id][crop_name])

    # report everything to a table
    with open(target_aggregate_table_path, 'w') as aggregate_table:
@ -1054,7 +1066,8 @@ def aggregate_to_polygons(
        for id_index in list(total_yield_lookup.values())[0]:
            aggregate_table.write('%s,' % id_index)
            aggregate_table.write(','.join([
-                str(total_yield_lookup[yield_header][id_index]['sum'])
+                str(total_yield_lookup[yield_header][id_index]['sum']
+                    * pixel_area_ha)
                for yield_header in sorted(total_yield_lookup)]))

            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
--- a/tests/test_crop_production.py
+++ b/tests/test_crop_production.py
@ -21,6 +21,20 @@ TEST_DATA_PATH = os.path.join(
    'crop_production_model')


+def _get_pixels_per_hectare(raster_path):
+    """Calculate number of pixels per hectare for a given raster.
+
+    Args:
+        raster_path (str): full path to the raster.
+
+    Returns:
+        A float representing the number of pixels per hectare.
+    """
+    raster_info = pygeoprocessing.get_raster_info(raster_path)
+    pixel_area = abs(numpy.prod(raster_info['pixel_size']))
+    return 10000 / pixel_area
+
+
 class CropProductionTests(unittest.TestCase):
    """Tests for the Crop Production model."""

@ -73,6 +87,41 @@ class CropProductionTests(unittest.TestCase):
        pandas.testing.assert_frame_equal(
            expected_result_table, result_table, check_dtype=False)

+        # Check raster outputs to make sure values are in Mg/ha.
+        # Raster sum is (Mg•px)/(ha•yr).
+        # Result table reports totals in Mg/yr.
+        # To convert from Mg/yr to (Mg•px)/(ha•yr), multiply by px/ha.
+        expected_raster_sums = {}
+        for (index, crop) in [(0, 'barley'), (1, 'soybean'), (2, 'wheat')]:
+            filename = crop + '_observed_production.tif'
+            pixels_per_hectare = _get_pixels_per_hectare(
+                os.path.join(args['workspace_dir'], filename))
+            expected_raster_sums[filename] = (
+                expected_result_table.loc[index]['production_observed']
+                * pixels_per_hectare)
+            for percentile in ['25', '50', '75', '95']:
+                filename = (
+                    crop + '_yield_' + percentile + 'th_production.tif')
+                col_name = 'production_' + percentile + 'th'
+                pixels_per_hectare = _get_pixels_per_hectare(
+                    os.path.join(args['workspace_dir'], filename))
+                expected_raster_sums[filename] = (
+                    expected_result_table.loc[index][col_name]
+                    * pixels_per_hectare)
+
+        for filename in expected_raster_sums:
+            raster_path = os.path.join(args['workspace_dir'], filename)
+            raster_info = pygeoprocessing.get_raster_info(raster_path)
+            nodata = raster_info['nodata'][0]
+            raster_sum = 0.0
+            for _, block in pygeoprocessing.iterblocks((raster_path, 1)):
+                raster_sum += numpy.sum(
+                    block[~pygeoprocessing.array_equals_nodata(
+                            block, nodata)], dtype=numpy.float32)
+            expected_sum = expected_raster_sums[filename]
+            numpy.testing.assert_allclose(raster_sum, expected_sum,
+                                          rtol=0, atol=0.1)
+
    def test_crop_production_percentile_no_nodata(self):
        """Crop Production: test percentile model with undefined nodata raster.