Update Crop Production (Percentile) to produce per-hectare values in raster outputs

This commit is contained in:
Emily Davis 2024-12-17 17:29:49 -07:00
parent 56a69dd327
commit 0136454cdd
2 changed files with 93 additions and 31 deletions

View File

@ -55,7 +55,7 @@ CROP_OPTIONS = {
"quince", "quinoa", "ramie", "rapeseed", "rasberry",
"rice", "rootnes", "rubber", "rye", "ryefor",
"safflower", "sesame", "sisal", "sorghum",
"sorghumfor", "sourcherry, soybean", "spicenes",
"sorghumfor", "sourcherry", "soybean", "spicenes",
"spinach", "stonefruitnes", "strawberry", "stringbean",
"sugarbeet", "sugarcane", "sugarnes", "sunflower",
"swedefor", "sweetpotato", "tangetc", "taro", "tea",
@ -622,7 +622,6 @@ def execute(args):
args['landcover_raster_path'],
interpolated_yield_percentile_raster_path,
crop_lucode,
pixel_area_ha,
percentile_crop_production_raster_path),
target_path_list=[percentile_crop_production_raster_path],
dependent_task_list=[
@ -697,7 +696,7 @@ def execute(args):
args=([(args['landcover_raster_path'], 1),
(interpolated_observed_yield_raster_path, 1),
(observed_yield_nodata, 'raw'), (landcover_nodata, 'raw'),
(crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
(crop_lucode, 'raw')],
_mask_observed_yield_op, observed_production_raster_path,
gdal.GDT_Float32, observed_yield_nodata),
target_path_list=[observed_production_raster_path],
@ -714,7 +713,7 @@ def execute(args):
output_dir, 'result_table%s.csv' % file_suffix)
crop_names = crop_to_landcover_df.index.to_list()
tabulate_results_task = task_graph.add_task(
_ = task_graph.add_task(
func=tabulate_results,
args=(nutrient_df, yield_percentile_headers,
crop_names, pixel_area_ha,
@ -731,14 +730,14 @@ def execute(args):
output_dir, _AGGREGATE_VECTOR_FILE_PATTERN % (file_suffix))
aggregate_results_table_path = os.path.join(
output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix)
aggregate_results_task = task_graph.add_task(
_ = task_graph.add_task(
func=aggregate_to_polygons,
args=(args['aggregate_polygon_path'],
target_aggregate_vector_path,
landcover_raster_info['projection_wkt'],
crop_names, nutrient_df,
yield_percentile_headers, output_dir, file_suffix,
aggregate_results_table_path),
yield_percentile_headers, pixel_area_ha, output_dir,
file_suffix, aggregate_results_table_path),
target_path_list=[target_aggregate_vector_path,
aggregate_results_table_path],
dependent_task_list=dependent_task_list,
@ -749,14 +748,14 @@ def execute(args):
def calculate_crop_production(lulc_path, yield_path, crop_lucode,
pixel_area_ha, target_path):
target_path):
"""Calculate crop production for a particular crop.
The resulting production value is:
- nodata, where either the LULC or yield input has nodata
- 0, where the LULC does not match the given LULC code
- yield * pixel area, where the given LULC code exists
- yield (in Mg/ha), where the given LULC code exists
Args:
lulc_path (str): path to a raster of LULC codes
@ -764,7 +763,6 @@ def calculate_crop_production(lulc_path, yield_path, crop_lucode,
by ``crop_lucode``, in units per hectare
crop_lucode (int): LULC code that identifies the crop of interest in
the ``lulc_path`` raster.
pixel_area_ha (number): Pixel area in hectares for both input rasters
target_path (str): Path to write the output crop production raster
Returns:
@ -772,7 +770,7 @@ def calculate_crop_production(lulc_path, yield_path, crop_lucode,
"""
pygeoprocessing.raster_map(
op=lambda lulc, _yield: numpy.where(
lulc == crop_lucode, _yield * pixel_area_ha, 0),
lulc == crop_lucode, _yield, 0),
rasters=[lulc_path, yield_path],
target_path=target_path,
target_nodata=_NODATA_YIELD)
@ -802,7 +800,7 @@ def _zero_observed_yield_op(observed_yield_array, observed_yield_nodata):
def _mask_observed_yield_op(
lulc_array, observed_yield_array, observed_yield_nodata,
landcover_nodata, crop_lucode, pixel_area_ha):
landcover_nodata, crop_lucode):
"""Mask total observed yield to crop lulc type.
Args:
@ -811,7 +809,6 @@ def _mask_observed_yield_op(
observed_yield_nodata (float): yield raster nodata value
landcover_nodata (float): landcover raster nodata value
crop_lucode (int): code used to mask in the current crop
pixel_area_ha (float): area of lulc raster cells (hectares)
Returns:
numpy.ndarray with float values of yields masked to crop_lucode
@ -820,13 +817,13 @@ def _mask_observed_yield_op(
result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
if landcover_nodata is not None:
result[:] = observed_yield_nodata
valid_mask = ~pygeoprocessing.array_equals_nodata(lulc_array, landcover_nodata)
valid_mask = ~pygeoprocessing.array_equals_nodata(lulc_array,
landcover_nodata)
result[valid_mask] = 0
else:
result[:] = 0
lulc_mask = lulc_array == crop_lucode
result[lulc_mask] = (
observed_yield_array[lulc_mask] * pixel_area_ha)
result[lulc_mask] = observed_yield_array[lulc_mask]
return result
@ -847,7 +844,7 @@ def tabulate_results(
landcover_raster_path (string): path to landcover raster
landcover_nodata (float): landcover raster nodata value
output_dir (string): the file path to the output workspace.
file_suffix (string): string to appened to any output filenames.
file_suffix (string): string to append to any output filenames.
target_table_path (string): path to 'result_table.csv' in the output
workspace
@ -868,6 +865,11 @@ def tabulate_results(
for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
for yield_percentile_id in sorted(yield_percentile_headers) + [
'yield_observed']]
# Since pixel values in observed and percentile rasters are Mg/(ha•yr),
# raster sums are (Mg•px)/(ha•yr). Before recording sums in
# production_lookup dictionary, convert to Mg/yr by multiplying by ha/px.
with open(target_table_path, 'w') as result_table:
result_table.write(
'crop,area (ha),' + 'production_observed,' +
@ -899,6 +901,7 @@ def tabulate_results(
production_pixel_count += numpy.count_nonzero(
valid_mask & (yield_block > 0))
yield_sum += numpy.sum(yield_block[valid_mask])
yield_sum *= pixel_area_ha
production_area = production_pixel_count * pixel_area_ha
production_lookup['observed'] = yield_sum
result_table.write(',%f' % production_area)
@ -916,6 +919,7 @@ def tabulate_results(
yield_sum += numpy.sum(
yield_block[~pygeoprocessing.array_equals_nodata(
yield_block, _NODATA_YIELD)])
yield_sum *= pixel_area_ha
production_lookup[yield_percentile_id] = yield_sum
result_table.write(",%f" % yield_sum)
@ -941,7 +945,8 @@ def tabulate_results(
(landcover_raster_path, 1)):
if landcover_nodata is not None:
total_area += numpy.count_nonzero(
~pygeoprocessing.array_equals_nodata(band_values, landcover_nodata))
~pygeoprocessing.array_equals_nodata(band_values,
landcover_nodata))
else:
total_area += band_values.size
result_table.write(
@ -951,8 +956,8 @@ def tabulate_results(
def aggregate_to_polygons(
base_aggregate_vector_path, target_aggregate_vector_path,
landcover_raster_projection, crop_names,
nutrient_df, yield_percentile_headers, output_dir, file_suffix,
landcover_raster_projection, crop_names, nutrient_df,
yield_percentile_headers, pixel_area_ha, output_dir, file_suffix,
target_aggregate_table_path):
"""Write table with aggregate results of yield and nutrient values.
@ -969,8 +974,9 @@ def aggregate_to_polygons(
nutrient_df (pandas.DataFrame): a table of nutrient values by crop
yield_percentile_headers (list): list of strings indicating percentiles
at which yield was calculated.
pixel_area_ha (float): area of lulc raster cells (hectares)
output_dir (string): the file path to the output workspace.
file_suffix (string): string to appened to any output filenames.
file_suffix (string): string to append to any output filenames.
target_aggregate_table_path (string): path to 'aggregate_results.csv'
in the output workspace
@ -985,6 +991,10 @@ def aggregate_to_polygons(
target_aggregate_vector_path,
driver_name='ESRI Shapefile')
# Since pixel values are Mg/(ha•yr), zonal stats sum is (Mg•px)/(ha•yr).
# Before writing sum to results tables or when using sum to calculate
# nutrient yields, convert to Mg/yr by multiplying by ha/px.
# loop over every crop and query with pgp function
total_yield_lookup = {}
total_nutrient_table = collections.defaultdict(
@ -1014,11 +1024,12 @@ def aggregate_to_polygons(
crop_name, yield_percentile_id)]:
total_nutrient_table[nutrient_id][
yield_percentile_id][id_index] += (
nutrient_factor *
total_yield_lookup['%s_%s' % (
crop_name, yield_percentile_id)][
id_index]['sum'] *
nutrient_df[nutrient_id][crop_name])
nutrient_factor
* total_yield_lookup[
'%s_%s' % (crop_name, yield_percentile_id)
][id_index]['sum']
* pixel_area_ha
* nutrient_df[nutrient_id][crop_name])
# process observed
observed_yield_path = os.path.join(
@ -1032,10 +1043,11 @@ def aggregate_to_polygons(
for id_index in total_yield_lookup[f'{crop_name}_observed']:
total_nutrient_table[
nutrient_id]['observed'][id_index] += (
nutrient_factor *
total_yield_lookup[
f'{crop_name}_observed'][id_index]['sum'] *
nutrient_df[nutrient_id][crop_name])
nutrient_factor
* total_yield_lookup[
f'{crop_name}_observed'][id_index]['sum']
* pixel_area_ha
* nutrient_df[nutrient_id][crop_name])
# report everything to a table
with open(target_aggregate_table_path, 'w') as aggregate_table:
@ -1054,7 +1066,8 @@ def aggregate_to_polygons(
for id_index in list(total_yield_lookup.values())[0]:
aggregate_table.write('%s,' % id_index)
aggregate_table.write(','.join([
str(total_yield_lookup[yield_header][id_index]['sum'])
str(total_yield_lookup[yield_header][id_index]['sum']
* pixel_area_ha)
for yield_header in sorted(total_yield_lookup)]))
for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:

View File

@ -21,6 +21,20 @@ TEST_DATA_PATH = os.path.join(
'crop_production_model')
def _get_pixels_per_hectare(raster_path):
"""Calculate number of pixels per hectare for a given raster.
Args:
raster_path (str): full path to the raster.
Returns:
A float representing the number of pixels per hectare.
"""
raster_info = pygeoprocessing.get_raster_info(raster_path)
pixel_area = abs(numpy.prod(raster_info['pixel_size']))
return 10000 / pixel_area
class CropProductionTests(unittest.TestCase):
"""Tests for the Crop Production model."""
@ -73,6 +87,41 @@ class CropProductionTests(unittest.TestCase):
pandas.testing.assert_frame_equal(
expected_result_table, result_table, check_dtype=False)
# Check raster outputs to make sure values are in Mg/ha.
# Raster sum is (Mg•px)/(ha•yr).
# Result table reports totals in Mg/yr.
# To convert from Mg/yr to (Mg•px)/(ha•yr), multiply by px/ha.
expected_raster_sums = {}
for (index, crop) in [(0, 'barley'), (1, 'soybean'), (2, 'wheat')]:
filename = crop + '_observed_production.tif'
pixels_per_hectare = _get_pixels_per_hectare(
os.path.join(args['workspace_dir'], filename))
expected_raster_sums[filename] = (
expected_result_table.loc[index]['production_observed']
* pixels_per_hectare)
for percentile in ['25', '50', '75', '95']:
filename = (
crop + '_yield_' + percentile + 'th_production.tif')
col_name = 'production_' + percentile + 'th'
pixels_per_hectare = _get_pixels_per_hectare(
os.path.join(args['workspace_dir'], filename))
expected_raster_sums[filename] = (
expected_result_table.loc[index][col_name]
* pixels_per_hectare)
for filename in expected_raster_sums:
raster_path = os.path.join(args['workspace_dir'], filename)
raster_info = pygeoprocessing.get_raster_info(raster_path)
nodata = raster_info['nodata'][0]
raster_sum = 0.0
for _, block in pygeoprocessing.iterblocks((raster_path, 1)):
raster_sum += numpy.sum(
block[~pygeoprocessing.array_equals_nodata(
block, nodata)], dtype=numpy.float32)
expected_sum = expected_raster_sums[filename]
numpy.testing.assert_allclose(raster_sum, expected_sum,
rtol=0, atol=0.1)
def test_crop_production_percentile_no_nodata(self):
"""Crop Production: test percentile model with undefined nodata raster.