Update Crop Production (Regression) to produce per-hectare values in raster outputs
This commit is contained in:
parent
0136454cdd
commit
2ea369d271
|
@ -1,4 +1,4 @@
|
|||
"""InVEST Crop Production Percentile Model."""
|
||||
"""InVEST Crop Production Regression Model."""
|
||||
import collections
|
||||
import logging
|
||||
import os
|
||||
|
@ -217,8 +217,8 @@ MODEL_SPEC = {
|
|||
"about": f"{x} {name} production within the polygon",
|
||||
"type": "number",
|
||||
"units": units
|
||||
} for nutrient, name, units in NUTRIENTS
|
||||
for x in ["modeled", "observed"]
|
||||
} for (nutrient, name, units) in NUTRIENTS
|
||||
for x in ["modeled", "observed"]
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -251,7 +251,7 @@ MODEL_SPEC = {
|
|||
"type": "number",
|
||||
"units": units
|
||||
} for nutrient, name, units in NUTRIENTS
|
||||
for x in ["modeled", "observed"]
|
||||
for x in ["modeled", "observed"]
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -659,8 +659,7 @@ def execute(args):
|
|||
(regression_parameter_raster_path_lookup['c_n'], 1),
|
||||
(args['landcover_raster_path'], 1),
|
||||
(crop_to_fertilization_rate_df['nitrogen_rate'][crop_name],
|
||||
'raw'),
|
||||
(crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
|
||||
'raw'), (crop_lucode, 'raw')],
|
||||
_x_yield_op,
|
||||
nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
|
||||
target_path_list=[nitrogen_yield_raster_path],
|
||||
|
@ -678,8 +677,7 @@ def execute(args):
|
|||
(regression_parameter_raster_path_lookup['c_p2o5'], 1),
|
||||
(args['landcover_raster_path'], 1),
|
||||
(crop_to_fertilization_rate_df['phosphorus_rate'][crop_name],
|
||||
'raw'),
|
||||
(crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
|
||||
'raw'), (crop_lucode, 'raw')],
|
||||
_x_yield_op,
|
||||
phosphorus_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
|
||||
target_path_list=[phosphorus_yield_raster_path],
|
||||
|
@ -697,8 +695,7 @@ def execute(args):
|
|||
(regression_parameter_raster_path_lookup['c_k2o'], 1),
|
||||
(args['landcover_raster_path'], 1),
|
||||
(crop_to_fertilization_rate_df['potassium_rate'][crop_name],
|
||||
'raw'),
|
||||
(crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
|
||||
'raw'), (crop_lucode, 'raw')],
|
||||
_x_yield_op,
|
||||
potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
|
||||
target_path_list=[potassium_yield_raster_path],
|
||||
|
@ -793,7 +790,7 @@ def execute(args):
|
|||
args=([(args['landcover_raster_path'], 1),
|
||||
(interpolated_observed_yield_raster_path, 1),
|
||||
(observed_yield_nodata, 'raw'), (landcover_nodata, 'raw'),
|
||||
(crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
|
||||
(crop_lucode, 'raw')],
|
||||
_mask_observed_yield_op, observed_production_raster_path,
|
||||
gdal.GDT_Float32, observed_yield_nodata),
|
||||
target_path_list=[observed_production_raster_path],
|
||||
|
@ -834,9 +831,8 @@ def execute(args):
|
|||
args=(args['aggregate_polygon_path'],
|
||||
target_aggregate_vector_path,
|
||||
landcover_raster_info['projection_wkt'],
|
||||
crop_names, nutrient_df,
|
||||
output_dir, file_suffix,
|
||||
aggregate_results_table_path),
|
||||
crop_names, nutrient_df, pixel_area_ha,
|
||||
output_dir, file_suffix),
|
||||
target_path_list=[target_aggregate_vector_path,
|
||||
aggregate_results_table_path],
|
||||
dependent_task_list=dependent_task_list,
|
||||
|
@ -847,12 +843,12 @@ def execute(args):
|
|||
|
||||
|
||||
def _x_yield_op(
|
||||
y_max, b_x, c_x, lulc_array, fert_rate, crop_lucode, pixel_area_ha):
|
||||
y_max, b_x, c_x, lulc_array, fert_rate, crop_lucode):
|
||||
"""Calc generalized yield op, Ymax*(1-b_NP*exp(-cN * N_GC)).
|
||||
|
||||
The regression model has identical mathematical equations for
|
||||
the nitrogen, phosphorus, and potassium. The only difference is
|
||||
the scalars in the equation (fertilization rate and pixel area).
|
||||
the nitrogen, phosphorus, and potassium. The only difference is
|
||||
the scalar in the equation (fertilization rate).
|
||||
"""
|
||||
result = numpy.empty(b_x.shape, dtype=numpy.float32)
|
||||
result[:] = _NODATA_YIELD
|
||||
|
@ -861,7 +857,7 @@ def _x_yield_op(
|
|||
~pygeoprocessing.array_equals_nodata(b_x, _NODATA_YIELD) &
|
||||
~pygeoprocessing.array_equals_nodata(c_x, _NODATA_YIELD) &
|
||||
(lulc_array == crop_lucode))
|
||||
result[valid_mask] = pixel_area_ha * y_max[valid_mask] * (
|
||||
result[valid_mask] = y_max[valid_mask] * (
|
||||
1 - b_x[valid_mask] * numpy.exp(
|
||||
-c_x[valid_mask] * fert_rate))
|
||||
|
||||
|
@ -896,7 +892,7 @@ def _zero_observed_yield_op(observed_yield_array, observed_yield_nodata):
|
|||
|
||||
def _mask_observed_yield_op(
|
||||
lulc_array, observed_yield_array, observed_yield_nodata,
|
||||
landcover_nodata, crop_lucode, pixel_area_ha):
|
||||
landcover_nodata, crop_lucode):
|
||||
"""Mask total observed yield to crop lulc type.
|
||||
|
||||
Args:
|
||||
|
@ -905,7 +901,6 @@ def _mask_observed_yield_op(
|
|||
observed_yield_nodata (float): yield raster nodata value
|
||||
landcover_nodata (float): landcover raster nodata value
|
||||
crop_lucode (int): code used to mask in the current crop
|
||||
pixel_area_ha (float): area of lulc raster cells (hectares)
|
||||
|
||||
Returns:
|
||||
numpy.ndarray with float values of yields masked to crop_lucode
|
||||
|
@ -914,13 +909,13 @@ def _mask_observed_yield_op(
|
|||
result = numpy.empty(lulc_array.shape, dtype=numpy.float32)
|
||||
if landcover_nodata is not None:
|
||||
result[:] = observed_yield_nodata
|
||||
valid_mask = ~pygeoprocessing.array_equals_nodata(lulc_array, landcover_nodata)
|
||||
valid_mask = ~pygeoprocessing.array_equals_nodata(
|
||||
lulc_array, landcover_nodata)
|
||||
result[valid_mask] = 0
|
||||
else:
|
||||
result[:] = 0
|
||||
lulc_mask = lulc_array == crop_lucode
|
||||
result[lulc_mask] = (
|
||||
observed_yield_array[lulc_mask] * pixel_area_ha)
|
||||
result[lulc_mask] = observed_yield_array[lulc_mask]
|
||||
return result
|
||||
|
||||
|
||||
|
@ -951,6 +946,11 @@ def tabulate_regression_results(
|
|||
nutrient_id + '_' + mode
|
||||
for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS
|
||||
for mode in ['modeled', 'observed']]
|
||||
|
||||
# Since pixel values in observed and percentile rasters are Mg/(ha•yr),
|
||||
# raster sums are (Mg•px)/(ha•yr). Before recording sums in
|
||||
# production_lookup dictionary, convert to Mg/yr by multiplying by ha/px.
|
||||
|
||||
with open(target_table_path, 'w') as result_table:
|
||||
result_table.write(
|
||||
'crop,area (ha),' + 'production_observed,production_modeled,' +
|
||||
|
@ -981,6 +981,7 @@ def tabulate_regression_results(
|
|||
production_pixel_count += numpy.count_nonzero(
|
||||
valid_mask & (yield_block > 0.0))
|
||||
yield_sum += numpy.sum(yield_block[valid_mask])
|
||||
yield_sum *= pixel_area_ha
|
||||
production_area = production_pixel_count * pixel_area_ha
|
||||
production_lookup['observed'] = yield_sum
|
||||
result_table.write(',%f' % production_area)
|
||||
|
@ -996,6 +997,7 @@ def tabulate_regression_results(
|
|||
# _NODATA_YIELD will always have a value (defined above)
|
||||
yield_block[~pygeoprocessing.array_equals_nodata(
|
||||
yield_block, _NODATA_YIELD)])
|
||||
yield_sum *= pixel_area_ha
|
||||
production_lookup['modeled'] = yield_sum
|
||||
result_table.write(",%f" % yield_sum)
|
||||
|
||||
|
@ -1031,8 +1033,7 @@ def tabulate_regression_results(
|
|||
def aggregate_regression_results_to_polygons(
|
||||
base_aggregate_vector_path, target_aggregate_vector_path,
|
||||
landcover_raster_projection, crop_names,
|
||||
nutrient_df, output_dir, file_suffix,
|
||||
target_aggregate_table_path):
|
||||
nutrient_df, pixel_area_ha, output_dir, file_suffix):
|
||||
"""Write table with aggregate results of yield and nutrient values.
|
||||
|
||||
Use zonal statistics to summarize total observed and interpolated
|
||||
|
@ -1046,10 +1047,9 @@ def aggregate_regression_results_to_polygons(
|
|||
landcover_raster_projection (string): a WKT projection string
|
||||
crop_names (list): list of crop names
|
||||
nutrient_df (pandas.DataFrame): a table of nutrient values by crop
|
||||
pixel_area_ha (float): area of lulc raster cells (hectares)
|
||||
output_dir (string): the file path to the output workspace.
|
||||
file_suffix (string): string to append to any output filenames.
|
||||
target_aggregate_table_path (string): path to 'aggregate_results.csv'
|
||||
in the output workspace
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
@ -1061,6 +1061,10 @@ def aggregate_regression_results_to_polygons(
|
|||
target_aggregate_vector_path,
|
||||
driver_name='ESRI Shapefile')
|
||||
|
||||
# Since pixel values are Mg/(ha•yr), zonal stats sum is (Mg•px)/(ha•yr).
|
||||
# Before writing sum to results tables or when using sum to calculate
|
||||
# nutrient yields, convert to Mg/yr by multiplying by ha/px.
|
||||
|
||||
# loop over every crop and query with pgp function
|
||||
total_yield_lookup = {}
|
||||
total_nutrient_table = collections.defaultdict(
|
||||
|
@ -1084,10 +1088,11 @@ def aggregate_regression_results_to_polygons(
|
|||
for fid_index in total_yield_lookup['%s_modeled' % crop_name]:
|
||||
total_nutrient_table[nutrient_id][
|
||||
'modeled'][fid_index] += (
|
||||
nutrient_factor *
|
||||
total_yield_lookup['%s_modeled' % crop_name][
|
||||
fid_index]['sum'] *
|
||||
nutrient_df[nutrient_id][crop_name])
|
||||
nutrient_factor
|
||||
* total_yield_lookup['%s_modeled' % crop_name][
|
||||
fid_index]['sum']
|
||||
* pixel_area_ha
|
||||
* nutrient_df[nutrient_id][crop_name])
|
||||
|
||||
# process observed
|
||||
observed_yield_path = os.path.join(
|
||||
|
@ -1102,10 +1107,11 @@ def aggregate_regression_results_to_polygons(
|
|||
'%s_observed' % crop_name]:
|
||||
total_nutrient_table[
|
||||
nutrient_id]['observed'][fid_index] += (
|
||||
nutrient_factor * # percent crop used * 1000 [100g per Mg]
|
||||
total_yield_lookup[
|
||||
'%s_observed' % crop_name][fid_index]['sum'] *
|
||||
nutrient_df[nutrient_id][crop_name]) # nutrient unit per 100g crop
|
||||
nutrient_factor # percent crop used * 1000 [100g per Mg]
|
||||
* total_yield_lookup[
|
||||
'%s_observed' % crop_name][fid_index]['sum']
|
||||
* pixel_area_ha
|
||||
* nutrient_df[nutrient_id][crop_name]) # nutrient unit per 100g crop
|
||||
|
||||
# report everything to a table
|
||||
aggregate_table_path = os.path.join(
|
||||
|
@ -1126,7 +1132,8 @@ def aggregate_regression_results_to_polygons(
|
|||
for id_index in list(total_yield_lookup.values())[0]:
|
||||
aggregate_table.write('%s,' % id_index)
|
||||
aggregate_table.write(','.join([
|
||||
str(total_yield_lookup[yield_header][id_index]['sum'])
|
||||
str(total_yield_lookup[yield_header][id_index]['sum']
|
||||
* pixel_area_ha)
|
||||
for yield_header in sorted(total_yield_lookup)]))
|
||||
|
||||
for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
|
||||
|
|
|
@ -270,9 +270,6 @@ class CropProductionTests(unittest.TestCase):
|
|||
'model_data_path': MODEL_DATA_PATH,
|
||||
'fertilization_rate_table_path': os.path.join(
|
||||
SAMPLE_DATA_PATH, 'crop_fertilization_rates.csv'),
|
||||
'nitrogen_fertilization_rate': 29.6,
|
||||
'phosphorus_fertilization_rate': 8.4,
|
||||
'potassium_fertilization_rate': 14.2,
|
||||
'n_workers': '-1'
|
||||
}
|
||||
|
||||
|
@ -301,9 +298,6 @@ class CropProductionTests(unittest.TestCase):
|
|||
'model_data_path': MODEL_DATA_PATH,
|
||||
'fertilization_rate_table_path': os.path.join(
|
||||
SAMPLE_DATA_PATH, 'crop_fertilization_rates.csv'),
|
||||
'nitrogen_fertilization_rate': 29.6,
|
||||
'phosphorus_fertilization_rate': 8.4,
|
||||
'potassium_fertilization_rate': 14.2,
|
||||
'n_workers': '-1'
|
||||
}
|
||||
|
||||
|
@ -355,15 +349,13 @@ class CropProductionTests(unittest.TestCase):
|
|||
'model_data_path': MODEL_DATA_PATH,
|
||||
'fertilization_rate_table_path': os.path.join(
|
||||
SAMPLE_DATA_PATH, 'crop_fertilization_rates.csv'),
|
||||
'nitrogen_fertilization_rate': 29.6,
|
||||
'phosphorus_fertilization_rate': 8.4,
|
||||
'potassium_fertilization_rate': 14.2,
|
||||
}
|
||||
|
||||
crop_production_regression.execute(args)
|
||||
|
||||
expected_agg_result_table = pandas.read_csv(
|
||||
os.path.join(TEST_DATA_PATH, 'expected_regression_aggregate_results.csv'))
|
||||
os.path.join(TEST_DATA_PATH,
|
||||
'expected_regression_aggregate_results.csv'))
|
||||
agg_result_table = pandas.read_csv(
|
||||
os.path.join(args['workspace_dir'], 'aggregate_results.csv'))
|
||||
pandas.testing.assert_frame_equal(
|
||||
|
@ -381,6 +373,38 @@ class CropProductionTests(unittest.TestCase):
|
|||
pandas.testing.assert_frame_equal(
|
||||
expected_result_table, result_table, check_dtype=False)
|
||||
|
||||
# Check raster outputs to make sure values are in Mg/ha.
|
||||
# Raster sum is (Mg•px)/(ha•yr).
|
||||
# Result table reports totals in Mg/yr.
|
||||
# To convert from Mg/yr to (Mg•px)/(ha•yr), multiply by px/ha.
|
||||
expected_raster_sums = {}
|
||||
for (index, crop) in [(0, 'barley'), (1, 'soybean'), (2, 'wheat')]:
|
||||
filename = crop + '_observed_production.tif'
|
||||
pixels_per_hectare = _get_pixels_per_hectare(
|
||||
os.path.join(args['workspace_dir'], filename))
|
||||
expected_raster_sums[filename] = (
|
||||
expected_result_table.loc[index]['production_observed']
|
||||
* pixels_per_hectare)
|
||||
filename = crop + '_regression_production.tif'
|
||||
pixels_per_hectare = _get_pixels_per_hectare(
|
||||
os.path.join(args['workspace_dir'], filename))
|
||||
expected_raster_sums[filename] = (
|
||||
expected_result_table.loc[index]['production_modeled']
|
||||
* pixels_per_hectare)
|
||||
|
||||
for filename in expected_raster_sums:
|
||||
raster_path = os.path.join(args['workspace_dir'], filename)
|
||||
raster_info = pygeoprocessing.get_raster_info(raster_path)
|
||||
nodata = raster_info['nodata'][0]
|
||||
raster_sum = 0.0
|
||||
for _, block in pygeoprocessing.iterblocks((raster_path, 1)):
|
||||
raster_sum += numpy.sum(
|
||||
block[~pygeoprocessing.array_equals_nodata(
|
||||
block, nodata)], dtype=numpy.float32)
|
||||
expected_sum = expected_raster_sums[filename]
|
||||
numpy.testing.assert_allclose(raster_sum, expected_sum,
|
||||
rtol=0, atol=0.001)
|
||||
|
||||
def test_crop_production_regression_no_nodata(self):
|
||||
"""Crop Production: test regression model with undefined nodata raster.
|
||||
|
||||
|
@ -399,9 +423,6 @@ class CropProductionTests(unittest.TestCase):
|
|||
'model_data_path': MODEL_DATA_PATH,
|
||||
'fertilization_rate_table_path': os.path.join(
|
||||
SAMPLE_DATA_PATH, 'crop_fertilization_rates.csv'),
|
||||
'nitrogen_fertilization_rate': 29.6,
|
||||
'phosphorus_fertilization_rate': 8.4,
|
||||
'potassium_fertilization_rate': 14.2,
|
||||
}
|
||||
|
||||
# Create a raster based on the test data geotransform, but smaller and
|
||||
|
|
Loading…
Reference in New Issue