replace kernel functions with pygeoprocessing.kernels

This commit is contained in:
Emily Soth 2023-11-01 10:43:37 -07:00
parent d0b880e564
commit 4b581bc851
13 changed files with 98 additions and 720 deletions

View File

@ -3,11 +3,14 @@ import logging
import math
import os
import pickle
import shutil
import tempfile
import time
import numpy
import pandas
import pygeoprocessing
import pygeoprocessing.kernels
import rtree
import shapely
import shapely.errors
@ -2130,7 +2133,7 @@ def calculate_relief_exposure(
_aggregate_raster_values_in_radius(
base_shore_point_vector_path, positive_dem_path, dem_averaging_radius,
target_relief_pickle_path, mean_op)
workspace_dir, target_relief_pickle_path, mean_op)
if missing_values:
LOGGER.warning(
@ -2270,7 +2273,7 @@ def aggregate_population_density(
_aggregate_raster_values_in_radius(
base_shore_point_vector_path, clipped_projected_pop_path,
search_radius, target_pickle_path, density_op)
search_radius, workspace_dir, target_pickle_path, density_op)
if missing_values:
LOGGER.warning(
@ -2339,6 +2342,7 @@ def _schedule_habitat_tasks(
habitat_row['path'],
target_habitat_pickle_path,
model_resolution,
working_dir,
file_suffix),
target_path_list=[target_habitat_pickle_path],
task_name=f'searching for {_id}'))
@ -2349,7 +2353,7 @@ def _schedule_habitat_tasks(
def search_for_raster_habitat(
base_shore_point_vector_path, search_radius, habitat_rank,
habitat_id, habitat_raster_path, target_habitat_pickle_path,
model_resolution, file_suffix):
model_resolution, workspace_dir, file_suffix):
"""Search for habitat raster within a radius of each shore point.
If habitat is present within the search radius, assign the habitat_rank
@ -2405,7 +2409,8 @@ def search_for_raster_habitat(
_aggregate_raster_values_in_radius(
base_shore_point_vector_path, clipped_projected_hab_path,
search_radius, target_habitat_pickle_path, habitat_op)
search_radius, workspace_dir, target_habitat_pickle_path,
habitat_op)
LOGGER.info(f'Finished searching for {habitat_id}')
@ -3106,7 +3111,7 @@ def clip_and_project_vector(
def _aggregate_raster_values_in_radius(
base_point_vector_path, base_raster_path, sample_distance,
target_pickle_path, aggregation_op):
work_dir, target_pickle_path, aggregation_op):
"""Aggregate raster values in radius around a point.
Do the radial search by constructing a rectangular kernel mask
@ -3132,6 +3137,7 @@ def _aggregate_raster_values_in_radius(
projection matching base_point_vector_path.
sample_distance (float): radius around each point to search
for valid pixels.
kernel_path (string): path to create the kernel raster.
target_pickle_path (string): path to pickle file storing dict
keyed by point id.
aggregation_op (function): takes a signal array and a mask array
@ -3150,21 +3156,17 @@ def _aggregate_raster_values_in_radius(
# we can assume square pixels at this point because
# we already warped input raster and defined square pixels
pixel_dist = int(abs(
sample_distance / (geotransform[1])))
pixel_dist = int(abs(sample_distance / (geotransform[1])))
# kernel dimensions will be 2 * pixel_dist + 1 so that
# point feature is always inside the center pixel of the kernel.
# create rectangular kernel and a mask so it looks circular.
X, Y = numpy.ogrid[:(1 + pixel_dist), :(1 + pixel_dist)]
lr_quadrant = numpy.hypot(X, Y)
ll_quadrant = numpy.flip(lr_quadrant[:, 1:], axis=1)
bottom_half = numpy.concatenate((ll_quadrant, lr_quadrant), axis=1)
top_half = numpy.flip(bottom_half[1:, :], axis=0)
kernel_index_distances = numpy.concatenate((top_half, bottom_half), axis=0)
radial_kernel_mask = numpy.where(
kernel_index_distances > pixel_dist, False, True)
temp_dir = tempfile.mkdtemp(dir=work_dir)
kernel_path = os.path.join(temp_dir, 'kernel.tif')
pygeoprocessing.kernels.dichotomous_kernel(
target_kernel_path=kernel_path,
max_distance=pixel_dist,
normalize=False)
radial_kernel_mask = pygeoprocessing.raster_to_numpy_array(
kernel_path).astype(bool)
shutil.rmtree(temp_dir, ignore_errors=True)
result = {}
vector = gdal.OpenEx(

View File

@ -9,6 +9,7 @@ import re
import numpy
import pygeoprocessing
import pygeoprocessing.kernels
import taskgraph
from osgeo import gdal
from osgeo import ogr
@ -767,8 +768,11 @@ def execute(args):
except:
alpha_kernel_raster_task = task_graph.add_task(
task_name=f'decay_kernel_raster_{alpha}',
func=utils.exponential_decay_kernel_raster,
args=(alpha, kernel_path),
func=pygeoprocessing.kernels.exponential_decay_kernel,
kwargs=dict(
target_kernel_path=kernel_path,
max_distance=alpha * 5,
expected_distance=alpha),
target_path_list=[kernel_path])
alpha_kernel_map[kernel_path] = alpha_kernel_raster_task
@ -1403,7 +1407,7 @@ def _sum_arrays(*array_list):
result = numpy.empty_like(array_list[0])
result[:] = 0
for array in array_list:
local_valid_mask = ~utils.array_equals_nodata(array, _INDEX_NODATA)
local_valid_mask = ~pygeoprocessing.array_equals_nodata(array, _INDEX_NODATA)
result[local_valid_mask] += array[local_valid_mask]
valid_mask |= local_valid_mask
result[~valid_mask] = _INDEX_NODATA

View File

@ -11,6 +11,7 @@ import time
import numpy
import pygeoprocessing
import pygeoprocessing.kernels
import scipy
import taskgraph
from osgeo import gdal
@ -445,8 +446,8 @@ def _convert_landscape(
}
# a sigma of 1.0 gives nice visual results to smooth pixel level artifacts
# since a pixel is the 1.0 unit
utils.gaussian_decay_kernel_raster(
1.0, tmp_file_registry['gaussian_kernel'])
pygeoprocessing.kernels.normal_distribution_kernel(
tmp_file_registry['gaussian_kernel'], sigma=1)
# create the output raster first as a copy of the base landcover so it can
# be looped on for each step

View File

@ -5,6 +5,7 @@ import os
import numpy
import pygeoprocessing
import pygeoprocessing.kernels
import taskgraph
from osgeo import gdal
from osgeo import ogr
@ -1104,43 +1105,6 @@ def is_near(input_path, radius, distance_path, out_path):
target_nodata=UINT8_NODATA)
def make_search_kernel(raster_path, radius):
"""Make a search kernel for a raster that marks pixels within a radius.
Args:
raster_path (str): path to a raster to make kernel for. It is assumed
that the raster has square pixels.
radius (float): distance around each pixel's centerpoint to search
in raster coordinate system units
Returns:
2D boolean numpy.ndarray. '1' pixels are within ``radius`` of the
center pixel, measured centerpoint-to-centerpoint. '0' pixels are
outside the radius. The array dimensions are as small as possible
while still including the entire radius.
"""
raster_info = pygeoprocessing.get_raster_info(raster_path)
pixel_radius = radius / abs(raster_info['pixel_size'][0])
pixel_margin = math.floor(pixel_radius)
# the search kernel is just large enough to contain all pixels that
# *could* be within the radius of the center pixel
search_kernel_shape = tuple([pixel_margin * 2 + 1] * 2)
# arrays of the column index and row index of each pixel
col_indices, row_indices = numpy.indices(search_kernel_shape)
# adjust them so that (0, 0) is the center pixel
col_indices -= pixel_margin
row_indices -= pixel_margin
# hypotenuse_i = sqrt(col_indices_i**2 + row_indices_i**2) for each pixel i
hypotenuse = numpy.hypot(col_indices, row_indices)
# boolean kernel where 1=pixel centerpoint is within the radius of the
# center pixel's centerpoint
search_kernel = numpy.array(hypotenuse <= pixel_radius, dtype=numpy.uint8)
LOGGER.debug(
f'Search kernel for {raster_path} with radius {radius}:'
f'\n{search_kernel}')
return search_kernel
def raster_average(raster_path, radius, kernel_path, out_path):
"""Average pixel values within a radius.
@ -1168,19 +1132,12 @@ def raster_average(raster_path, radius, kernel_path, out_path):
Returns:
None
"""
search_kernel = make_search_kernel(raster_path, radius)
srs = osr.SpatialReference()
srs.ImportFromEPSG(3857)
projection_wkt = srs.ExportToWkt()
pygeoprocessing.numpy_array_to_raster(
# float32 here to avoid pygeoprocessing bug issue #180
search_kernel.astype(numpy.float32),
FLOAT_NODATA,
(20, -20),
(0, 0),
projection_wkt,
kernel_path)
pixel_radius = radius / abs(pygeoprocessing.get_raster_info(
raster_path)['pixel_size'][0])
pygeoprocessing.kernels.dichotomous_kernel(
target_kernel_path=kernel_path,
max_distance=pixel_radius,
normalize=False)
# convolve the signal (input raster) with the kernel and normalize
# this is equivalent to taking an average of each pixel's neighborhood

View File

@ -9,6 +9,7 @@ import time
import numpy
import pygeoprocessing
import pygeoprocessing.kernels
import rtree
import shapely.prepared
import shapely.wkb
@ -530,8 +531,11 @@ def execute(args):
area_kernel_path = os.path.join(
intermediate_dir, f'area_kernel{file_suffix}.tif')
area_kernel_task = task_graph.add_task(
func=flat_disk_kernel,
args=(green_area_decay_kernel_distance, area_kernel_path),
func=pygeoprocessing.kernels.dichotomous_kernel,
kwargs=dict(
target_kernel_path=area_kernel_path,
max_distance=green_area_decay_kernel_distance,
normalize=False),
target_path_list=[area_kernel_path],
task_name='area kernel')
@ -1185,7 +1189,7 @@ def calc_t_air_nomix_op(t_ref_val, hm_array, uhi_max):
result = numpy.empty(hm_array.shape, dtype=numpy.float32)
result[:] = TARGET_NODATA
# TARGET_NODATA should never be None
valid_mask = ~utils.array_equals_nodata(hm_array, TARGET_NODATA)
valid_mask = ~pygeoprocessing.array_equals_nodata(hm_array, TARGET_NODATA)
result[valid_mask] = t_ref_val + (1-hm_array[valid_mask]) * uhi_max
return result
@ -1213,9 +1217,9 @@ def calc_cc_op_factors(
result = numpy.empty(shade_array.shape, dtype=numpy.float32)
result[:] = TARGET_NODATA
valid_mask = ~(
utils.array_equals_nodata(shade_array, TARGET_NODATA) |
utils.array_equals_nodata(albedo_array, TARGET_NODATA) |
utils.array_equals_nodata(eti_array, TARGET_NODATA))
pygeoprocessing.array_equals_nodata(shade_array, TARGET_NODATA) |
pygeoprocessing.array_equals_nodata(albedo_array, TARGET_NODATA) |
pygeoprocessing.array_equals_nodata(eti_array, TARGET_NODATA))
result[valid_mask] = (
cc_weight_shade*shade_array[valid_mask] +
cc_weight_albedo*albedo_array[valid_mask] +
@ -1242,9 +1246,9 @@ def calc_eti_op(
result = numpy.empty(kc_array.shape, dtype=numpy.float32)
result[:] = target_nodata
# kc intermediate output should always have a nodata value defined
valid_mask = ~utils.array_equals_nodata(kc_array, kc_nodata)
valid_mask = ~pygeoprocessing.array_equals_nodata(kc_array, kc_nodata)
if et0_nodata is not None:
valid_mask &= ~utils.array_equals_nodata(et0_array, et0_nodata)
valid_mask &= ~pygeoprocessing.array_equals_nodata(et0_array, et0_nodata)
result[valid_mask] = (
kc_array[valid_mask] * et0_array[valid_mask] / et_max)
return result
@ -1276,85 +1280,6 @@ def calculate_wbgt(
target_path=target_vapor_pressure_path)
def flat_disk_kernel(max_distance, kernel_filepath):
"""Create a flat disk kernel.
The raster created will be a tiled GeoTiff, with 256x256 memory blocks.
Args:
max_distance (int): The distance (in pixels) of the
kernel's radius.
kernel_filepath (string): The path to the file on disk where this
kernel should be stored. If this file exists, it will be
overwritten.
Returns:
None
"""
LOGGER.info(f'Creating a disk kernel of distance {max_distance} at '
f'{kernel_filepath}')
kernel_size = int(numpy.round(max_distance * 2 + 1))
driver = gdal.GetDriverByName('GTiff')
kernel_dataset = driver.Create(
kernel_filepath.encode('utf-8'), kernel_size, kernel_size, 1,
gdal.GDT_Byte, options=[
'BIGTIFF=IF_SAFER', 'TILED=YES', 'BLOCKXSIZE=256',
'BLOCKYSIZE=256'])
# Make some kind of geotransform and SRS. It doesn't matter what, but
# having one will make GIS libraries behave better if it's all defined
kernel_dataset.SetGeoTransform([0, 1, 0, 0, 0, -1])
srs = osr.SpatialReference()
srs.SetWellKnownGeogCS('WGS84')
kernel_dataset.SetProjection(srs.ExportToWkt())
kernel_band = kernel_dataset.GetRasterBand(1)
kernel_band.SetNoDataValue(255)
cols_per_block, rows_per_block = kernel_band.GetBlockSize()
n_cols = kernel_dataset.RasterXSize
n_rows = kernel_dataset.RasterYSize
n_col_blocks = int(math.ceil(n_cols / float(cols_per_block)))
n_row_blocks = int(math.ceil(n_rows / float(rows_per_block)))
for row_block_index in range(n_row_blocks):
row_offset = row_block_index * rows_per_block
row_block_width = n_rows - row_offset
if row_block_width > rows_per_block:
row_block_width = rows_per_block
for col_block_index in range(n_col_blocks):
col_offset = col_block_index * cols_per_block
col_block_width = n_cols - col_offset
if col_block_width > cols_per_block:
col_block_width = cols_per_block
# Numpy creates index rasters as ints by default, which sometimes
# creates problems on 32-bit builds when we try to add Int32
# matrices to float64 matrices.
row_indices, col_indices = numpy.indices((row_block_width,
col_block_width),
dtype=float)
row_indices += float(row_offset - max_distance)
col_indices += float(col_offset - max_distance)
kernel_index_distances = numpy.hypot(
row_indices, col_indices)
kernel = kernel_index_distances < max_distance
kernel_band.WriteArray(kernel, xoff=col_offset,
yoff=row_offset)
# Need to flush the kernel's cache to disk before opening up a new Dataset
# object in interblocks()
kernel_dataset.FlushCache()
def hm_op(cc_array, green_area_sum, cc_park_array, green_area_threshold):
"""Calculate HM.
@ -1374,8 +1299,8 @@ def hm_op(cc_array, green_area_sum, cc_park_array, green_area_threshold):
"""
result = numpy.empty(cc_array.shape, dtype=numpy.float32)
result[:] = TARGET_NODATA
valid_mask = ~(utils.array_equals_nodata(cc_array, TARGET_NODATA) &
utils.array_equals_nodata(cc_park_array, TARGET_NODATA))
valid_mask = ~(pygeoprocessing.array_equals_nodata(cc_array, TARGET_NODATA) &
pygeoprocessing.array_equals_nodata(cc_park_array, TARGET_NODATA))
cc_mask = ((cc_array >= cc_park_array) |
(green_area_sum < green_area_threshold))
result[cc_mask & valid_mask] = cc_array[cc_mask & valid_mask]
@ -1469,8 +1394,10 @@ def convolve_2d_by_exponential(
dir=os.path.dirname(target_convolve_raster_path))
exponential_kernel_path = os.path.join(
temporary_working_dir, 'exponential_decay_kernel.tif')
utils.exponential_decay_kernel_raster(
decay_kernel_distance, exponential_kernel_path)
pygeoprocessing.kernels.exponential_decay_kernel(
target_kernel_path=exponential_kernel_path,
max_distance=decay_kernel_distance * 5,
expected_distance=decay_kernel_distance)
pygeoprocessing.convolve_2d(
(signal_raster_path, 1), (exponential_kernel_path, 1),
target_convolve_raster_path, working_dir=temporary_working_dir,

View File

@ -9,6 +9,7 @@ import tempfile
import numpy
import numpy.testing
import pygeoprocessing
import pygeoprocessing.kernels
import pygeoprocessing.symbolic
import shapely.ops
import shapely.wkb
@ -716,8 +717,6 @@ def execute(args):
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
kernel_creation_functions = {
KERNEL_LABEL_DICHOTOMY: _kernel_dichotomy,
KERNEL_LABEL_EXPONENTIAL: _kernel_exponential,
KERNEL_LABEL_GAUSSIAN: _kernel_gaussian,
KERNEL_LABEL_DENSITY: _kernel_density,
# Use the user-provided beta args parameter if the user has provided
@ -728,11 +727,7 @@ def execute(args):
# Taskgraph needs a __name__ attribute, so adding one here.
# kernel_creation_functions[KERNEL_LABEL_POWER].__name__ = (
# 'functools_partial_decay_power')
# Since we have these keys defined in two places, I want to be super sure
# that the labels match.
assert sorted(kernel_creation_functions.keys()) == (
sorted(MODEL_SPEC['args']['decay_function']['options']))
#
decay_function = args['decay_function']
LOGGER.info(f'Using decay function {decay_function}')
@ -969,17 +964,41 @@ def execute(args):
kernel_path = os.path.join(
intermediate_dir, f'kernel_{search_radius_m}{suffix}.tif')
kernel_paths[search_radius_m] = kernel_path
if decay_function == KERNEL_LABEL_DICHOTOMY:
kernel_func = pygeoprocessing.kernels.dichotomous_kernel
kernel_kwargs = dict(
target_kernel_path=kernel_path,
max_distance=search_radius_in_pixels,
normalize=False)
elif decay_function == KERNEL_LABEL_EXPONENTIAL:
kernel_func = pygeoprocessing.kernels.exponential_decay_kernel
kernel_kwargs = dict(
target_kernel_path=kernel_path,
max_distance=math.ceil(expected_distance) * 2 + 1,
expected_distance=search_radius_in_pixels,
normalize=False)
elif decay_function in [KERNEL_LABEL_GAUSSIAN, KERNEL_LABEL_DENSITY]:
kernel_func = pygeoprocessing.kernels.create_distance_decay_kernel
def decay_func(dist_array):
return kernel_creation_functions[decay_function](
dist_array, max_distance=search_radius_in_pixels)
kernel_kwargs = dict(
target_kernel_path=kernel_path,
distance_decay_function=decay_func,
max_distance=search_radius_in_pixels,
normalize=False)
else:
raise ValueError('Invalid kernel creation option selected')
kernel_tasks[search_radius_m] = graph.add_task(
_create_kernel_raster,
kwargs={
'kernel_function': kernel_creation_functions[decay_function],
'expected_distance': search_radius_in_pixels,
'kernel_filepath': kernel_path,
'normalize': False}, # Model math calls for un-normalized
kernel_func,
kwargs=kernel_kwargs,
task_name=(
f'Create {decay_function} kernel - {search_radius_m}m'),
target_path_list=[kernel_path]
)
target_path_list=[kernel_path])
# Search radius mode 1: the same search radius applies to everything
if args['search_radius_mode'] == RADIUS_OPT_UNIFORM:
@ -2388,44 +2407,6 @@ def _resample_population_raster(
shutil.rmtree(tmp_working_dir, ignore_errors=True)
def _kernel_dichotomy(distance, max_distance):
"""Create a dichotomous kernel.
All pixels within ``max_distance`` have a value of 1.
Args:
distance (numpy.array): An array of euclidean distances (in pixels)
from the center of the kernel.
max_distance (float): The maximum distance of the kernel. Pixels that
are more than this number of pixels will have a value of 0.
Returns:
``numpy.array`` with dtype of numpy.float32 and same shape as
``distance.
"""
return (distance <= max_distance).astype(numpy.float32)
def _kernel_exponential(distance, max_distance):
"""Create an exponential-decay kernel.
Args:
distance (numpy.array): An array of euclidean distances (in pixels)
from the center of the kernel.
max_distance (float): The maximum distance of the kernel. Pixels that
are more than this number of pixels will have a value of 0.
Returns:
``numpy.array`` with dtype of numpy.float32 and same shape as
``distance.
"""
kernel = numpy.zeros(distance.shape, dtype=numpy.float32)
pixels_in_radius = (distance <= max_distance)
kernel[pixels_in_radius] = numpy.exp(
-distance[pixels_in_radius] / max_distance)
return kernel
def _kernel_power(distance, max_distance, beta):
"""Create a power kernel with user-defined beta.
@ -2491,95 +2472,6 @@ def _kernel_density(distance, max_distance):
return kernel
def _create_kernel_raster(
kernel_function, expected_distance, kernel_filepath, normalize=False):
"""Create a raster distance-weighted decay kernel from a function.
Args:
kernel_function (callable): The kernel function to use.
expected_distance (int or float): The distance (in pixels) after which
the kernel becomes 0.
kernel_filepath (string): The string path on disk to where this kernel
should be stored.
normalize=False (bool): Whether to divide the kernel values by the sum
of all values in the kernel.
Returns:
``None``
"""
pixel_radius = math.ceil(expected_distance)
kernel_size = pixel_radius * 2 + 1 # allow for a center pixel
driver = gdal.GetDriverByName('GTiff')
kernel_dataset = driver.Create(
kernel_filepath.encode('utf-8'), kernel_size, kernel_size, 1,
gdal.GDT_Float32, options=[
'BIGTIFF=IF_SAFER', 'TILED=YES', 'BLOCKXSIZE=256',
'BLOCKYSIZE=256'])
# Make some kind of geotransform, it doesn't matter what but
# will make GIS libraries behave better if it's all defined
kernel_dataset.SetGeoTransform([0, 1, 0, 0, 0, -1])
srs = osr.SpatialReference()
srs.SetWellKnownGeogCS('WGS84')
kernel_dataset.SetProjection(srs.ExportToWkt())
kernel_band = kernel_dataset.GetRasterBand(1)
kernel_nodata = float(numpy.finfo(numpy.float32).min)
kernel_band.SetNoDataValue(kernel_nodata)
kernel_band = None
kernel_dataset = None
kernel_raster = gdal.OpenEx(kernel_filepath, gdal.GA_Update)
kernel_band = kernel_raster.GetRasterBand(1)
band_x_size = kernel_band.XSize
band_y_size = kernel_band.YSize
running_sum = 0
for block_data in pygeoprocessing.iterblocks(
(kernel_filepath, 1), offset_only=True):
array_xmin = block_data['xoff'] - pixel_radius
array_xmax = min(
array_xmin + block_data['win_xsize'],
band_x_size - pixel_radius)
array_ymin = block_data['yoff'] - pixel_radius
array_ymax = min(
array_ymin + block_data['win_ysize'],
band_y_size - pixel_radius)
pixel_dist_from_center = numpy.hypot(
*numpy.mgrid[
array_ymin:array_ymax,
array_xmin:array_xmax])
kernel = kernel_function(distance=pixel_dist_from_center,
max_distance=expected_distance)
if normalize:
running_sum += kernel.sum()
kernel_band.WriteArray(
kernel,
yoff=block_data['yoff'],
xoff=block_data['xoff'])
kernel_raster.FlushCache()
kernel_band = None
kernel_raster = None
if normalize:
kernel_raster = gdal.OpenEx(kernel_filepath, gdal.GA_Update)
kernel_band = kernel_raster.GetRasterBand(1)
for block_data, kernel_block in pygeoprocessing.iterblocks(
(kernel_filepath, 1)):
# divide by sum to normalize
kernel_block /= running_sum
kernel_band.WriteArray(
kernel_block, xoff=block_data['xoff'], yoff=block_data['yoff'])
kernel_raster.FlushCache()
kernel_band = None
kernel_raster = None
def _create_valid_pixels_nodata_mask(raster_list, target_mask_path):
"""Create a valid pixels mask across a stack of aligned rasters.

View File

@ -326,193 +326,6 @@ def make_suffix_string(args, suffix_key):
return file_suffix
def exponential_decay_kernel_raster(expected_distance, kernel_filepath,
normalize=True):
"""Create a raster-based exponential decay kernel.
The raster created will be a tiled GeoTiff, with 256x256 memory blocks.
Args:
expected_distance (int or float): The distance (in pixels) of the
kernel's radius, the distance at which the value of the decay
function is equal to `1/e`.
kernel_filepath (string): The path to the file on disk where this
kernel should be stored. If this file exists, it will be
overwritten.
normalize=True (bool): Whether to divide the kernel values by the sum
of all values in the kernel.
Returns:
None
"""
max_distance = expected_distance * 5
kernel_size = int(numpy.round(max_distance * 2 + 1))
driver = gdal.GetDriverByName('GTiff')
kernel_dataset = driver.Create(
kernel_filepath.encode('utf-8'), kernel_size, kernel_size, 1,
gdal.GDT_Float32, options=[
'BIGTIFF=IF_SAFER', 'TILED=YES', 'BLOCKXSIZE=256',
'BLOCKYSIZE=256'])
# Make some kind of geotransform, it doesn't matter what but
# will make GIS libraries behave better if it's all defined
kernel_dataset.SetGeoTransform([0, 1, 0, 0, 0, -1])
srs = osr.SpatialReference()
srs.SetWellKnownGeogCS('WGS84')
kernel_dataset.SetProjection(srs.ExportToWkt())
kernel_band = kernel_dataset.GetRasterBand(1)
kernel_band.SetNoDataValue(-9999)
cols_per_block, rows_per_block = kernel_band.GetBlockSize()
n_cols = kernel_dataset.RasterXSize
n_rows = kernel_dataset.RasterYSize
n_col_blocks = int(math.ceil(n_cols / float(cols_per_block)))
n_row_blocks = int(math.ceil(n_rows / float(rows_per_block)))
integration = 0.0
for row_block_index in range(n_row_blocks):
row_offset = row_block_index * rows_per_block
row_block_width = n_rows - row_offset
if row_block_width > rows_per_block:
row_block_width = rows_per_block
for col_block_index in range(n_col_blocks):
col_offset = col_block_index * cols_per_block
col_block_width = n_cols - col_offset
if col_block_width > cols_per_block:
col_block_width = cols_per_block
# Numpy creates index rasters as ints by default, which sometimes
# creates problems on 32-bit builds when we try to add Int32
# matrices to float64 matrices.
row_indices, col_indices = numpy.indices((row_block_width,
col_block_width),
dtype=float)
row_indices += float(row_offset - max_distance)
col_indices += float(col_offset - max_distance)
kernel_index_distances = numpy.hypot(
row_indices, col_indices)
kernel = numpy.where(
kernel_index_distances > max_distance, 0.0,
numpy.exp(-kernel_index_distances / expected_distance))
integration += numpy.sum(kernel)
kernel_band.WriteArray(kernel, xoff=col_offset,
yoff=row_offset)
# Need to flush the kernel's cache to disk before opening up a new Dataset
# object in interblocks()
kernel_band.FlushCache()
kernel_dataset.FlushCache()
kernel_band = None
kernel_dataset = None
if normalize:
kernel_dataset = gdal.OpenEx(kernel_filepath, gdal.GA_Update)
kernel_band = kernel_dataset.GetRasterBand(1)
for block_data in pygeoprocessing.iterblocks(
(kernel_filepath, 1), offset_only=True):
kernel_block = kernel_band.ReadAsArray(**block_data)
kernel_block /= integration
kernel_band.WriteArray(kernel_block, xoff=block_data['xoff'],
yoff=block_data['yoff'])
kernel_band.FlushCache()
kernel_dataset.FlushCache()
kernel_band = None
kernel_dataset = None
def gaussian_decay_kernel_raster(
sigma, kernel_filepath, n_std_dev=3.0, normalize=True):
"""Create a raster-based gaussian decay kernel.
The raster will be a tiled GeoTIFF, with 256x256 memory blocks.
While the ``sigma`` parameter represents the width of a standard deviation
in pixels, the ``n_std_dev`` parameter defines how many standard deviations
should be included in the resulting kernel. The resulting kernel raster
will be square in shape, with a width of ``(sigma * n_std_dev * 2) + 1``
pixels.
Args:
sigma (int or float): The distance (in pixels) of the standard
deviation from the center of the raster.
kernel_filepath (string): The path to the file on disk where this
kernel should be stored. If a file exists at this path, it will be
overwritten.
n_std_dev=3.0 (int or float): The number of times sigma should be
multiplied in order to get the pixel radius of the resulting
kernel. The default of 3 standard deviations will cover 99.7% of
the area under the gaussian curve.
normalize=True (bool): Whether to divide the kernel values by the sum
of all values in the kernel.
Returns:
``None``
"""
# going 3.0 times out from the sigma gives you over 99% of area under
# the gaussian curve
max_distance = sigma * n_std_dev
kernel_size = int(numpy.round(max_distance * 2 + 1))
driver = gdal.GetDriverByName('GTiff')
kernel_dataset = driver.Create(
kernel_filepath.encode('utf-8'), kernel_size, kernel_size, 1,
gdal.GDT_Float32, options=[
'BIGTIFF=IF_SAFER', 'TILED=YES', 'BLOCKXSIZE=256',
'BLOCKYSIZE=256'])
# Make some kind of geotransform, it doesn't matter what but
# will make GIS libraries behave better if it's all defined
kernel_dataset.SetGeoTransform([0, 1, 0, 0, 0, -1])
srs = osr.SpatialReference()
srs.SetWellKnownGeogCS('WGS84')
kernel_dataset.SetProjection(srs.ExportToWkt())
kernel_band = kernel_dataset.GetRasterBand(1)
kernel_nodata = -9999
kernel_band.SetNoDataValue(kernel_nodata)
col_index = numpy.array(range(kernel_size))
running_sum = 0.0
for row_index in range(kernel_size):
distance_kernel_row = numpy.sqrt(
(row_index - max_distance) ** 2 +
(col_index - max_distance) ** 2).reshape(1, kernel_size)
kernel = numpy.where(
distance_kernel_row > max_distance, 0.0,
(1 / (2.0 * numpy.pi * sigma ** 2) *
numpy.exp(-distance_kernel_row**2 / (2 * sigma ** 2))))
running_sum += numpy.sum(kernel)
kernel_band.WriteArray(kernel, xoff=0, yoff=row_index)
kernel_dataset.FlushCache()
kernel_band = None
kernel_dataset = None
if normalize:
kernel_dataset = gdal.OpenEx(kernel_filepath, gdal.GA_Update)
kernel_band = kernel_dataset.GetRasterBand(1)
for kernel_data, kernel_block in pygeoprocessing.iterblocks(
(kernel_filepath, 1)):
# divide by sum to normalize
kernel_block /= running_sum
kernel_band.WriteArray(
kernel_block, xoff=kernel_data['xoff'],
yoff=kernel_data['yoff'])
kernel_dataset.FlushCache()
kernel_band = None
kernel_dataset = None
def build_file_registry(base_file_path_list, file_suffix):
"""Combine file suffixes with key names, base filenames, and directories.

View File

@ -747,7 +747,7 @@ class CoastalVulnerabilityTests(unittest.TestCase):
ogr_geom_type=ogr.wkbPoint)
coastal_vulnerability._aggregate_raster_values_in_radius(
simple_points_path, raster_path, search_radius,
simple_points_path, raster_path, search_radius, self.workspace_dir,
target_pickle_path, _mean_op)
with open(target_pickle_path, 'rb') as file:
@ -800,7 +800,7 @@ class CoastalVulnerabilityTests(unittest.TestCase):
coastal_vulnerability._aggregate_raster_values_in_radius(
simple_points_path, raster_path, sample_distance,
target_pickle_path, _mean_op)
self.workspace_dir, target_pickle_path, _mean_op)
with open(target_pickle_path, 'rb') as file:
actual_values = pickle.load(file)

View File

@ -199,7 +199,7 @@ class PollinationTests(unittest.TestCase):
result_sum += numpy.sum(data_block)
# the number below is just what the sum rounded to two decimal places
# when I manually inspected a run that appeared to be correct.
self.assertAlmostEqual(result_sum, 58.669518, places=2)
self.assertAlmostEqual(result_sum, 58.407155, places=2)
def test_pollination_constant_abundance(self):
"""Pollination: regression testing when abundance is all 1."""
@ -224,7 +224,7 @@ class PollinationTests(unittest.TestCase):
result_sum += numpy.sum(data_block)
# the number below is just what the sum rounded to two decimal places
# when I manually inspected a run that appeared to be correct.
self.assertAlmostEqual(result_sum, 68.44777, places=2)
self.assertAlmostEqual(result_sum, 68.14167, places=2)
def test_pollination_bad_guild_headers(self):
"""Pollination: testing that model detects bad guild headers."""

View File

@ -666,36 +666,6 @@ class StormwaterTests(unittest.TestCase):
actual = pygeoprocessing.raster_to_numpy_array(out_path)
numpy.testing.assert_equal(expected, actual)
def test_make_search_kernel(self):
"""Stormwater: test make_search_kernel function."""
from natcap.invest import stormwater
array = numpy.zeros((10, 10))
path = os.path.join(self.workspace_dir, 'make_search_kernel.tif')
to_raster(array, path, pixel_size=(10, -10))
expected_5 = numpy.array([[1]], dtype=numpy.uint8)
actual_5 = stormwater.make_search_kernel(path, 5)
numpy.testing.assert_equal(expected_5, actual_5)
expected_9 = numpy.array([[1]], dtype=numpy.uint8)
actual_9 = stormwater.make_search_kernel(path, 9)
numpy.testing.assert_equal(expected_9, actual_9)
expected_10 = numpy.array([
[0, 1, 0],
[1, 1, 1],
[0, 1, 0]], dtype=numpy.uint8)
actual_10 = stormwater.make_search_kernel(path, 10)
numpy.testing.assert_equal(expected_10, actual_10)
expected_15 = numpy.array([
[1, 1, 1],
[1, 1, 1],
[1, 1, 1]], dtype=numpy.uint8)
actual_15 = stormwater.make_search_kernel(path, 15)
numpy.testing.assert_equal(expected_15, actual_15)
def test_raster_average(self):
"""Stormwater: test raster_average function."""
from natcap.invest import stormwater

View File

@ -27,7 +27,7 @@ class UCMTests(unittest.TestCase):
"""UCM: regression: CC Factors."""
import natcap.invest.urban_cooling_model
args = {
'workspace_dir': os.path.join(self.workspace_dir, 'workspace'),
'workspace_dir': '/Users/emily/Documents/testucmfail',#os.path.join(self.workspace_dir, 'workspace'),
'results_suffix': 'test_suffix',
't_ref': 35.0,
't_obs_raster_path': os.path.join(
@ -69,7 +69,7 @@ class UCMTests(unittest.TestCase):
'avg_cc': 0.222150472947109,
'avg_tmp_v': 37.325275675470998,
'avg_tmp_an': 2.325275675470998,
'avd_eng_cn': 3520203.713694,
'avd_eng_cn': 3520217.313878,
'avg_wbgt_v': 32.60417266705069,
'avg_ltls_v': 75.000000000000000,
'avg_hvls_v': 75.000000000000000,
@ -90,7 +90,7 @@ class UCMTests(unittest.TestCase):
# Assert that the decimal value of the energy savings value is what we
# expect.
expected_energy_sav = 3564024.929250
expected_energy_sav = 3564038.678764
energy_sav = 0.0
n_nonetype = 0
@ -206,7 +206,7 @@ class UCMTests(unittest.TestCase):
'avg_cc': 0.428302583240327,
'avg_tmp_v': 36.60869797039769,
'avg_tmp_an': 1.608697970397692,
'avd_eng_cn': 7239979.192322,
'avd_eng_cn': 7239992.744486,
'avg_wbgt_v': 31.91108630952381,
'avg_ltls_v': 28.73463901689708, # changed due to using float64 vs float32
'avg_hvls_v': 75.000000000000000,

View File

@ -169,93 +169,6 @@ class UNATests(unittest.TestCase):
population_array.sum(), resampled_population_array.sum(),
rtol=1e-3)
def test_dichotomous_decay_simple(self):
"""UNA: Test dichotomous decay kernel on a simple case."""
from natcap.invest import urban_nature_access
expected_distance = 5
kernel_filepath = os.path.join(self.workspace_dir, 'kernel.tif')
urban_nature_access._create_kernel_raster(
urban_nature_access._kernel_dichotomy, expected_distance,
kernel_filepath)
expected_array = numpy.array([
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]], dtype=numpy.uint8)
extracted_kernel_array = pygeoprocessing.raster_to_numpy_array(
kernel_filepath)
numpy.testing.assert_array_equal(
expected_array, extracted_kernel_array)
def test_dichotomous_decay_normalized(self):
"""UNA: Test normalized dichotomous kernel."""
from natcap.invest import urban_nature_access
expected_distance = 5
kernel_filepath = os.path.join(self.workspace_dir, 'kernel.tif')
urban_nature_access._create_kernel_raster(
urban_nature_access._kernel_dichotomy,
expected_distance, kernel_filepath, normalize=True)
expected_array = numpy.array([
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]], dtype=numpy.float32)
expected_array /= numpy.sum(expected_array)
extracted_kernel_array = pygeoprocessing.raster_to_numpy_array(
kernel_filepath)
numpy.testing.assert_allclose(
expected_array, extracted_kernel_array)
def test_dichotomous_decay_large(self):
"""UNA: Test dichotomous decay on a very large pixel radius."""
from natcap.invest import urban_nature_access
# kernel with > 268 million pixels. This is big enough to force my
# laptop to noticeably hang while swapping memory on an all in-memory
# implementation.
expected_distance = 2**13
kernel_filepath = os.path.join(self.workspace_dir, 'kernel.tif')
urban_nature_access._create_kernel_raster(
urban_nature_access._kernel_dichotomy,
expected_distance, kernel_filepath)
expected_shape = (expected_distance*2+1, expected_distance*2+1)
expected_n_1_pixels = math.pi*expected_distance**2
kernel_info = pygeoprocessing.get_raster_info(kernel_filepath)
n_1_pixels = 0
for _, block in pygeoprocessing.iterblocks((kernel_filepath, 1)):
n_1_pixels += numpy.count_nonzero(block)
# 210828417 is only a slight overestimate from the area of the circle
# at this radius: math.pi*expected_distance**2 = 210828714.13315654
numpy.testing.assert_allclose(
n_1_pixels, expected_n_1_pixels, rtol=1e-5)
self.assertEqual(kernel_info['raster_size'], expected_shape)
def test_density_decay_simple(self):
"""UNA: Test density decay."""
from natcap.invest import urban_nature_access
@ -310,20 +223,6 @@ class UNATests(unittest.TestCase):
numpy.testing.assert_allclose(
expected_array, kernel)
def test_exponential_kernel(self):
"""UNA: Test the exponential decay kernel."""
from natcap.invest import urban_nature_access
max_distance = 3
distance = numpy.array([0, 1, 2, 3, 4])
kernel = urban_nature_access._kernel_exponential(
distance, max_distance)
# Regression values are calculated by hand
expected_array = numpy.array(
[1, 0.71653134, 0.5134171, 0.36787945, 0])
numpy.testing.assert_allclose(
expected_array, kernel)
def test_gaussian_kernel(self):
"""UNA: Test the gaussian decay kernel."""
from natcap.invest import urban_nature_access

View File

@ -180,93 +180,6 @@ class FileRegistryUtilsTests(unittest.TestCase):
return result_dict
class ExponentialDecayUtilsTests(unittest.TestCase):
"""Tests for natcap.invest.utils.exponential_decay_kernel_raster."""
_REGRESSION_PATH = os.path.join(
os.path.dirname(__file__), '..', 'data', 'invest-test-data',
'exp_decay_kernel')
def setUp(self):
"""Setup workspace."""
self.workspace_dir = tempfile.mkdtemp()
def tearDown(self):
"""Delete workspace."""
shutil.rmtree(self.workspace_dir)
def test_exp_decay_kernel_raster(self):
"""Utils: test exponential_decay_kernel_raster."""
from natcap.invest import utils
expected_distance = 100 # 10 pixels
kernel_filepath = os.path.join(self.workspace_dir, 'kernel_100.tif')
utils.exponential_decay_kernel_raster(
expected_distance, kernel_filepath)
model_array = pygeoprocessing.raster_to_numpy_array(
kernel_filepath)
reg_array = pygeoprocessing.raster_to_numpy_array(
os.path.join(
ExponentialDecayUtilsTests._REGRESSION_PATH,
'kernel_100.tif'))
numpy.testing.assert_allclose(model_array, reg_array, atol=1e-6)
class GaussianDecayUtilsTests(unittest.TestCase):
"""Tests for natcap.invest.utils.gaussian_decay_kernel_raster."""
def setUp(self):
"""Setup workspace."""
self.workspace_dir = tempfile.mkdtemp()
def tearDown(self):
"""Delete workspace."""
shutil.rmtree(self.workspace_dir)
def test_gaussian_kernel_raster(self):
"""Utils: test gaussian_decay_kernel_raster."""
from natcap.invest import utils
sigma = 10
# The kernel should have a radius of 3 standard deviations.
# We then double that for the diameter and add 1 to ensure there's a
# single pixel at the center.
kernel_dimension = sigma * 3 * 2 + 1
def gkern():
"""
Creates a gaussian kernel with side length ``kernel_dimension``
and a sigma of ``sigma``.
This function adapted from the following stackoverflow answer:
https://stackoverflow.com/a/43346070/299084
"""
ax = numpy.linspace(
-(kernel_dimension - 1) / 2.,
(kernel_dimension - 1) / 2.,
kernel_dimension)
gauss = numpy.exp(-0.5 * numpy.square(ax) / numpy.square(sigma))
kernel = numpy.outer(gauss, gauss)
dist_from_center = numpy.hypot(
*numpy.mgrid[
-kernel_dimension/2:kernel_dimension/2,
-kernel_dimension/2:kernel_dimension/2])
# The sigma*3 is the maximum radius from the center
# Anything greater than that distance should be set to 0 by the
# gaussian kernel creation function.
kernel[dist_from_center > (sigma * 3)] = 0
return kernel / numpy.sum(kernel)
expected_matrix = gkern()
kernel_filepath = os.path.join(self.workspace_dir, 'kernel.tif')
utils.gaussian_decay_kernel_raster(sigma, kernel_filepath)
disk_kernel = pygeoprocessing.raster_to_numpy_array(kernel_filepath)
numpy.testing.assert_allclose(expected_matrix, disk_kernel, atol=1e-4)
class SandboxTempdirTests(unittest.TestCase):
"""Test Sandbox Tempdir."""