Update Crop Production (Regression) to produce per-hectare values in raster outputs

This commit is contained in:
Emily Davis 2024-12-18 16:26:39 -07:00
parent 0136454cdd
commit 2ea369d271
2 changed files with 77 additions and 49 deletions

View File

@ -1,4 +1,4 @@
"""InVEST Crop Production Percentile Model."""
"""InVEST Crop Production Regression Model."""
import collections
import logging
import os
@ -217,8 +217,8 @@ MODEL_SPEC = {
"about": f"{x} {name} production within the polygon",
"type": "number",
"units": units
} for nutrient, name, units in NUTRIENTS
for x in ["modeled", "observed"]
} for (nutrient, name, units) in NUTRIENTS
for x in ["modeled", "observed"]
}
}
},
@ -251,7 +251,7 @@ MODEL_SPEC = {
"type": "number",
"units": units
} for nutrient, name, units in NUTRIENTS
for x in ["modeled", "observed"]
for x in ["modeled", "observed"]
}
}
},
@ -659,8 +659,7 @@ def execute(args):
(regression_parameter_raster_path_lookup['c_n'], 1),
(args['landcover_raster_path'], 1),
(crop_to_fertilization_rate_df['nitrogen_rate'][crop_name],
'raw'),
(crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
'raw'), (crop_lucode, 'raw')],
_x_yield_op,
nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
target_path_list=[nitrogen_yield_raster_path],
@ -678,8 +677,7 @@ def execute(args):
(regression_parameter_raster_path_lookup['c_p2o5'], 1),
(args['landcover_raster_path'], 1),
(crop_to_fertilization_rate_df['phosphorus_rate'][crop_name],
'raw'),
(crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
'raw'), (crop_lucode, 'raw')],
_x_yield_op,
phosphorus_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
target_path_list=[phosphorus_yield_raster_path],
@ -697,8 +695,7 @@ def execute(args):
(regression_parameter_raster_path_lookup['c_k2o'], 1),
(args['landcover_raster_path'], 1),
(crop_to_fertilization_rate_df['potassium_rate'][crop_name],
'raw'),
(crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
'raw'), (crop_lucode, 'raw')],
_x_yield_op,
potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
target_path_list=[potassium_yield_raster_path],
@ -793,7 +790,7 @@ def execute(args):
args=([(args['landcover_raster_path'], 1),
(interpolated_observed_yield_raster_path, 1),
(observed_yield_nodata, 'raw'), (landcover_nodata, 'raw'),
(crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
(crop_lucode, 'raw')],
_mask_observed_yield_op, observed_production_raster_path,
gdal.GDT_Float32, observed_yield_nodata),
target_path_list=[observed_production_raster_path],
@ -834,9 +831,8 @@ def execute(args):
args=(args['aggregate_polygon_path'],
target_aggregate_vector_path,
landcover_raster_info['projection_wkt'],
crop_names, nutrient_df,
output_dir, file_suffix,
aggregate_results_table_path),
crop_names, nutrient_df, pixel_area_ha,
output_dir, file_suffix),
target_path_list=[target_aggregate_vector_path,
aggregate_results_table_path],
dependent_task_list=dependent_task_list,
@ -847,12 +843,12 @@ def execute(args):
def _x_yield_op(
y_max, b_x, c_x, lulc_array, fert_rate, crop_lucode, pixel_area_ha):
y_max, b_x, c_x, lulc_array, fert_rate, crop_lucode):
"""Calc generalized yield op, Ymax*(1-b_NP*exp(-cN * N_GC)).
The regression model has identical mathematical equations for
the nitrogen, phosphorus, and potassium. The only difference is
the scalars in the equation (fertilization rate and pixel area).
the nitrogen, phosphorus, and potassium. The only difference is
the scalar in the equation (fertilization rate).
"""
result = numpy.empty(b_x.shape, dtype=numpy.float32)
result[:] = _NODATA_YIELD
@ -861,7 +857,7 @@ def _x_yield_op(
~pygeoprocessing.array_equals_nodata(b_x, _NODATA_YIELD) &
~pygeoprocessing.array_equals_nodata(c_x, _NODATA_YIELD) &
(lulc_array == crop_lucode))
result[valid_mask] = pixel_area_ha * y_max[valid_mask] * (
result[valid_mask] = y_max[valid_mask] * (
1 - b_x[valid_mask] * numpy.exp(
-c_x[valid_mask] * fert_rate))
@ -896,7 +892,7 @@ def _zero_observed_yield_op(observed_yield_array, observed_yield_nodata):
def _mask_observed_yield_op(
lulc_array, observed_yield_array, observed_yield_nodata,
landcover_nodata, crop_lucode, pixel_area_ha):
landcover_nodata, crop_lucode):
"""Mask total observed yield to crop lulc type.
Args:
@ -905,7 +901,6 @@ def _mask_observed_yield_op(
observed_yield_nodata (float): yield raster nodata value
landcover_nodata (float): landcover raster nodata value
crop_lucode (int): code used to mask in the current crop
pixel_area_ha (float): area of lulc raster cells (hectares)
Returns:
numpy.ndarray with float values of yields masked to crop_lucode
@ -914,13 +909,13 @@ def _mask_observed_yield_op(
result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
if landcover_nodata is not None:
result[:] = observed_yield_nodata
valid_mask = ~pygeoprocessing.array_equals_nodata(lulc_array, landcover_nodata)
valid_mask = ~pygeoprocessing.array_equals_nodata(
lulc_array, landcover_nodata)
result[valid_mask] = 0
else:
result[:] = 0
lulc_mask = lulc_array == crop_lucode
result[lulc_mask] = (
observed_yield_array[lulc_mask] * pixel_area_ha)
result[lulc_mask] = observed_yield_array[lulc_mask]
return result
@ -951,6 +946,11 @@ def tabulate_regression_results(
nutrient_id + '_' + mode
for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
for mode in ['modeled', 'observed']]
# Since pixel values in observed and percentile rasters are Mg/(ha•yr),
# raster sums are (Mg•px)/(ha•yr). Before recording sums in
# production_lookup dictionary, convert to Mg/yr by multiplying by ha/px.
with open(target_table_path, 'w') as result_table:
result_table.write(
'crop,area (ha),' + 'production_observed,production_modeled,' +
@ -981,6 +981,7 @@ def tabulate_regression_results(
production_pixel_count += numpy.count_nonzero(
valid_mask & (yield_block > 0.0))
yield_sum += numpy.sum(yield_block[valid_mask])
yield_sum *= pixel_area_ha
production_area = production_pixel_count * pixel_area_ha
production_lookup['observed'] = yield_sum
result_table.write(',%f' % production_area)
@ -996,6 +997,7 @@ def tabulate_regression_results(
# _NODATA_YIELD will always have a value (defined above)
yield_block[~pygeoprocessing.array_equals_nodata(
yield_block, _NODATA_YIELD)])
yield_sum *= pixel_area_ha
production_lookup['modeled'] = yield_sum
result_table.write(",%f" % yield_sum)
@ -1031,8 +1033,7 @@ def tabulate_regression_results(
def aggregate_regression_results_to_polygons(
base_aggregate_vector_path, target_aggregate_vector_path,
landcover_raster_projection, crop_names,
nutrient_df, output_dir, file_suffix,
target_aggregate_table_path):
nutrient_df, pixel_area_ha, output_dir, file_suffix):
"""Write table with aggregate results of yield and nutrient values.
Use zonal statistics to summarize total observed and interpolated
@ -1046,10 +1047,9 @@ def aggregate_regression_results_to_polygons(
landcover_raster_projection (string): a WKT projection string
crop_names (list): list of crop names
nutrient_df (pandas.DataFrame): a table of nutrient values by crop
pixel_area_ha (float): area of lulc raster cells (hectares)
output_dir (string): the file path to the output workspace.
file_suffix (string): string to append to any output filenames.
target_aggregate_table_path (string): path to 'aggregate_results.csv'
in the output workspace
Returns:
None
@ -1061,6 +1061,10 @@ def aggregate_regression_results_to_polygons(
target_aggregate_vector_path,
driver_name='ESRI Shapefile')
# Since pixel values are Mg/(ha•yr), zonal stats sum is (Mg•px)/(ha•yr).
# Before writing sum to results tables or when using sum to calculate
# nutrient yields, convert to Mg/yr by multiplying by ha/px.
# loop over every crop and query with pgp function
total_yield_lookup = {}
total_nutrient_table = collections.defaultdict(
@ -1084,10 +1088,11 @@ def aggregate_regression_results_to_polygons(
for fid_index in total_yield_lookup['%s_modeled' % crop_name]:
total_nutrient_table[nutrient_id][
'modeled'][fid_index] += (
nutrient_factor *
total_yield_lookup['%s_modeled' % crop_name][
fid_index]['sum'] *
nutrient_df[nutrient_id][crop_name])
nutrient_factor
* total_yield_lookup['%s_modeled' % crop_name][
fid_index]['sum']
* pixel_area_ha
* nutrient_df[nutrient_id][crop_name])
# process observed
observed_yield_path = os.path.join(
@ -1102,10 +1107,11 @@ def aggregate_regression_results_to_polygons(
'%s_observed' % crop_name]:
total_nutrient_table[
nutrient_id]['observed'][fid_index] += (
nutrient_factor * # percent crop used * 1000 [100g per Mg]
total_yield_lookup[
'%s_observed' % crop_name][fid_index]['sum'] *
nutrient_df[nutrient_id][crop_name]) # nutrient unit per 100g crop
nutrient_factor # percent crop used * 1000 [100g per Mg]
* total_yield_lookup[
'%s_observed' % crop_name][fid_index]['sum']
* pixel_area_ha
* nutrient_df[nutrient_id][crop_name]) # nutrient unit per 100g crop
# report everything to a table
aggregate_table_path = os.path.join(
@ -1126,7 +1132,8 @@ def aggregate_regression_results_to_polygons(
for id_index in list(total_yield_lookup.values())[0]:
aggregate_table.write('%s,' % id_index)
aggregate_table.write(','.join([
str(total_yield_lookup[yield_header][id_index]['sum'])
str(total_yield_lookup[yield_header][id_index]['sum']
* pixel_area_ha)
for yield_header in sorted(total_yield_lookup)]))
for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:

View File

@ -270,9 +270,6 @@ class CropProductionTests(unittest.TestCase):
'model_data_path': MODEL_DATA_PATH,
'fertilization_rate_table_path': os.path.join(
SAMPLE_DATA_PATH, 'crop_fertilization_rates.csv'),
'nitrogen_fertilization_rate': 29.6,
'phosphorus_fertilization_rate': 8.4,
'potassium_fertilization_rate': 14.2,
'n_workers': '-1'
}
@ -301,9 +298,6 @@ class CropProductionTests(unittest.TestCase):
'model_data_path': MODEL_DATA_PATH,
'fertilization_rate_table_path': os.path.join(
SAMPLE_DATA_PATH, 'crop_fertilization_rates.csv'),
'nitrogen_fertilization_rate': 29.6,
'phosphorus_fertilization_rate': 8.4,
'potassium_fertilization_rate': 14.2,
'n_workers': '-1'
}
@ -355,15 +349,13 @@ class CropProductionTests(unittest.TestCase):
'model_data_path': MODEL_DATA_PATH,
'fertilization_rate_table_path': os.path.join(
SAMPLE_DATA_PATH, 'crop_fertilization_rates.csv'),
'nitrogen_fertilization_rate': 29.6,
'phosphorus_fertilization_rate': 8.4,
'potassium_fertilization_rate': 14.2,
}
crop_production_regression.execute(args)
expected_agg_result_table = pandas.read_csv(
os.path.join(TEST_DATA_PATH, 'expected_regression_aggregate_results.csv'))
os.path.join(TEST_DATA_PATH,
'expected_regression_aggregate_results.csv'))
agg_result_table = pandas.read_csv(
os.path.join(args['workspace_dir'], 'aggregate_results.csv'))
pandas.testing.assert_frame_equal(
@ -381,6 +373,38 @@ class CropProductionTests(unittest.TestCase):
pandas.testing.assert_frame_equal(
expected_result_table, result_table, check_dtype=False)
# Check raster outputs to make sure values are in Mg/ha.
# Raster sum is (Mg•px)/(ha•yr).
# Result table reports totals in Mg/yr.
# To convert from Mg/yr to (Mg•px)/(ha•yr), multiply by px/ha.
expected_raster_sums = {}
for (index, crop) in [(0, 'barley'), (1, 'soybean'), (2, 'wheat')]:
filename = crop + '_observed_production.tif'
pixels_per_hectare = _get_pixels_per_hectare(
os.path.join(args['workspace_dir'], filename))
expected_raster_sums[filename] = (
expected_result_table.loc[index]['production_observed']
* pixels_per_hectare)
filename = crop + '_regression_production.tif'
pixels_per_hectare = _get_pixels_per_hectare(
os.path.join(args['workspace_dir'], filename))
expected_raster_sums[filename] = (
expected_result_table.loc[index]['production_modeled']
* pixels_per_hectare)
for filename in expected_raster_sums:
raster_path = os.path.join(args['workspace_dir'], filename)
raster_info = pygeoprocessing.get_raster_info(raster_path)
nodata = raster_info['nodata'][0]
raster_sum = 0.0
for _, block in pygeoprocessing.iterblocks((raster_path, 1)):
raster_sum += numpy.sum(
block[~pygeoprocessing.array_equals_nodata(
block, nodata)], dtype=numpy.float32)
expected_sum = expected_raster_sums[filename]
numpy.testing.assert_allclose(raster_sum, expected_sum,
rtol=0, atol=0.001)
def test_crop_production_regression_no_nodata(self):
"""Crop Production: test regression model with undefined nodata raster.
@ -399,9 +423,6 @@ class CropProductionTests(unittest.TestCase):
'model_data_path': MODEL_DATA_PATH,
'fertilization_rate_table_path': os.path.join(
SAMPLE_DATA_PATH, 'crop_fertilization_rates.csv'),
'nitrogen_fertilization_rate': 29.6,
'phosphorus_fertilization_rate': 8.4,
'potassium_fertilization_rate': 14.2,
}
# Create a raster based on the test data geotransform, but smaller and