1646 lines
65 KiB
Python
1646 lines
65 KiB
Python
"""InVEST Recreation model tests."""
|
|
import datetime
|
|
import functools
|
|
import logging
|
|
import json
|
|
import math
|
|
import multiprocessing
|
|
import os
|
|
import pickle
|
|
import queue
|
|
import random
|
|
import shutil
|
|
import socket
|
|
import string
|
|
import tempfile
|
|
import threading
|
|
import time
|
|
import pytest
|
|
import unittest
|
|
from unittest.mock import patch
|
|
import zipfile
|
|
|
|
import numpy
|
|
from osgeo import gdal
|
|
from osgeo import ogr
|
|
from osgeo import osr
|
|
import pandas
|
|
import pygeoprocessing
|
|
import Pyro5
|
|
import shapely
|
|
import taskgraph
|
|
import warnings
|
|
|
|
from natcap.invest import utils
|
|
|
|
gdal.UseExceptions()
|
|
Pyro5.config.SERIALIZER = 'marshal' # allow null bytes in strings
|
|
|
|
REGRESSION_DATA = os.path.join(
|
|
os.path.dirname(__file__), '..', 'data', 'invest-test-data',
|
|
'recreation')
|
|
SAMPLE_DATA = os.path.join(REGRESSION_DATA, 'input')
|
|
|
|
LOGGER = logging.getLogger('test_recreation')
|
|
|
|
def _timeout(max_timeout):
|
|
"""Timeout decorator, parameter in seconds."""
|
|
def timeout_decorator(target):
|
|
"""Wrap the original function."""
|
|
work_queue = queue.Queue()
|
|
result_queue = queue.Queue()
|
|
|
|
def worker():
|
|
"""Read one func,args,kwargs tuple and execute."""
|
|
try:
|
|
func, args, kwargs = work_queue.get()
|
|
result = func(*args, **kwargs)
|
|
result_queue.put(result)
|
|
except Exception as e:
|
|
result_queue.put(e)
|
|
raise
|
|
|
|
work_thread = threading.Thread(target=worker)
|
|
work_thread.daemon = True
|
|
work_thread.start()
|
|
|
|
@functools.wraps(target)
|
|
def func_wrapper(*args, **kwargs):
|
|
"""Closure for function."""
|
|
try:
|
|
work_queue.put((target, args, kwargs))
|
|
result = result_queue.get(timeout=max_timeout)
|
|
if isinstance(result, Exception):
|
|
raise result
|
|
return result
|
|
except queue.Empty:
|
|
raise RuntimeError("Timeout of %f exceeded" % max_timeout)
|
|
return func_wrapper
|
|
return timeout_decorator
|
|
|
|
|
|
def _make_empty_files(base_file_list):
|
|
"""Create a list of empty files.
|
|
|
|
Args:
|
|
base_file_list: a list of paths to empty files to be created.
|
|
|
|
Returns:
|
|
None.
|
|
|
|
"""
|
|
for file_path in base_file_list:
|
|
with open(file_path, 'w') as open_file:
|
|
open_file.write('')
|
|
|
|
|
|
def _resample_csv(base_csv_path, base_dst_path, resample_factor):
|
|
"""Resample (downsize) a csv file by a certain resample factor.
|
|
|
|
Args:
|
|
base_csv_path (str): path to the source csv file to be resampled.
|
|
base_dst_path (str): path to the destination csv file.
|
|
resample_factor (int): the factor used to determined how many rows
|
|
should be skipped before writing a row to the destination file.
|
|
|
|
Returns:
|
|
None
|
|
|
|
"""
|
|
with open(base_csv_path, 'r') as read_table:
|
|
with open(base_dst_path, 'w') as write_table:
|
|
for i, line in enumerate(read_table):
|
|
if i % resample_factor == 0:
|
|
write_table.write(line)
|
|
|
|
|
|
def _make_simple_lat_lon_aoi(geom_list, aoi_path, fields=None, attribute_list=None):
|
|
srs = osr.SpatialReference()
|
|
srs.ImportFromEPSG(4326) # WGS84
|
|
wkt = srs.ExportToWkt()
|
|
pygeoprocessing.shapely_geometry_to_vector(
|
|
geom_list, aoi_path, wkt, 'GeoJSON', fields, attribute_list)
|
|
|
|
|
|
class TestBufferedNumpyDiskMap(unittest.TestCase):
|
|
"""Tests for BufferedNumpyDiskMap."""
|
|
|
|
def setUp(self):
|
|
"""Setup workspace."""
|
|
self.workspace_dir = tempfile.mkdtemp()
|
|
|
|
def tearDown(self):
|
|
"""Delete workspace."""
|
|
shutil.rmtree(self.workspace_dir)
|
|
|
|
def test_basic_operation(self):
|
|
"""Recreation test buffered file manager basic ops w/ no buffer."""
|
|
from natcap.invest.recreation import buffered_numpy_disk_map
|
|
file_manager = buffered_numpy_disk_map.BufferedNumpyDiskMap(
|
|
os.path.join(self.workspace_dir, 'test'), 0)
|
|
|
|
file_manager.append(1234, numpy.array([1, 2, 3, 4]))
|
|
file_manager.append(1234, numpy.array([1, 2, 3, 4]))
|
|
file_manager.append(4321, numpy.array([-4, -1, -2, 4]))
|
|
|
|
numpy.testing.assert_equal(
|
|
file_manager.read(1234), numpy.array([1, 2, 3, 4, 1, 2, 3, 4]))
|
|
|
|
numpy.testing.assert_equal(
|
|
file_manager.read(4321), numpy.array([-4, -1, -2, 4]))
|
|
|
|
file_manager.delete(1234)
|
|
with self.assertRaises(IOError):
|
|
file_manager.read(1234)
|
|
|
|
def test_buffered_multiprocess_operation(self):
|
|
"""Recreation test buffered file manager parallel flushes."""
|
|
from natcap.invest.recreation import buffered_numpy_disk_map
|
|
|
|
array1 = numpy.array([1, 2, 3, 4])
|
|
array2 = numpy.array([-4, -1, -2, 4])
|
|
arraysize = array1.size * buffered_numpy_disk_map.BufferedNumpyDiskMap._ARRAY_TUPLE_TYPE.itemsize
|
|
buffer_size = arraysize * 2 # will flush every 3rd append
|
|
|
|
file_manager = buffered_numpy_disk_map.BufferedNumpyDiskMap(
|
|
os.path.join(self.workspace_dir, 'test'), buffer_size, n_workers=2)
|
|
|
|
# Make sure multiple array IDs are in present in
|
|
# the cache to trigger multiprocess flush
|
|
file_manager.append(1234, array1)
|
|
file_manager.append(4321, array2)
|
|
file_manager.append(1234, array1)
|
|
file_manager.append(4321, array2)
|
|
file_manager.append(1234, array1)
|
|
file_manager.append(4321, array2)
|
|
|
|
numpy.testing.assert_equal(
|
|
file_manager.read(1234), numpy.tile(array1, 3))
|
|
|
|
numpy.testing.assert_equal(
|
|
file_manager.read(4321), numpy.tile(array2, 3))
|
|
|
|
file_manager.delete(1234)
|
|
with self.assertRaises(IOError):
|
|
file_manager.read(1234)
|
|
|
|
|
|
class UnitTestRecServer(unittest.TestCase):
|
|
"""Tests for recmodel_server functions and the RecModel object."""
|
|
|
|
def setUp(self):
|
|
"""Setup workspace."""
|
|
self.workspace_dir = tempfile.mkdtemp()
|
|
self.resampled_data_path = os.path.join(
|
|
self.workspace_dir, 'resampled_data.csv')
|
|
_resample_csv(
|
|
os.path.join(SAMPLE_DATA, 'sample_data.csv'),
|
|
self.resampled_data_path, resample_factor=10)
|
|
|
|
def tearDown(self):
|
|
"""Delete workspace."""
|
|
shutil.rmtree(self.workspace_dir, ignore_errors=True)
|
|
|
|
def test_hashfile(self):
|
|
"""Recreation test for hash of file."""
|
|
from natcap.invest.recreation import recmodel_server
|
|
file_hash = recmodel_server._hashfile(
|
|
self.resampled_data_path, blocksize=2**20, fast_hash=False)
|
|
# The exact encoded string that is hashed is dependent on python
|
|
# version, with Python 3 including b prefix and \n suffix.
|
|
# these hashes are for [py2.7, py3.6]
|
|
self.assertIn(file_hash, ['c052e7a0a4c5e528', 'c8054b109d7a9d2a'])
|
|
|
|
def test_hashfile_fast(self):
|
|
"""Recreation test for hash and fast hash of file."""
|
|
from natcap.invest.recreation import recmodel_server
|
|
file_hash = recmodel_server._hashfile(
|
|
self.resampled_data_path, blocksize=2**20, fast_hash=True)
|
|
# we can't assert the full hash since it is dependant on the file
|
|
# last access time and we can't reliably set that in Python.
|
|
# instead we just check that at the very least it ends with _fast_hash
|
|
self.assertTrue(file_hash.endswith('_fast_hash'))
|
|
|
|
def test_year_order(self):
|
|
"""Recreation ensure that end year < start year raise ValueError."""
|
|
from natcap.invest.recreation import recmodel_server
|
|
|
|
with self.assertRaises(ValueError):
|
|
# intentionally construct start year > end year
|
|
recmodel_server.RecModel(
|
|
2014, 2005, os.path.join(self.workspace_dir, 'server_cache'),
|
|
raw_csv_filename=self.resampled_data_path)
|
|
|
|
def test_local_aggregate_points(self):
|
|
"""Recreation test single threaded local AOI aggregate calculation."""
|
|
from natcap.invest.recreation import recmodel_server
|
|
|
|
recreation_server = recmodel_server.RecModel(
|
|
2005, 2014, os.path.join(self.workspace_dir, 'server_cache'),
|
|
raw_csv_filename=self.resampled_data_path)
|
|
|
|
aoi_filename = 'test_aoi.geojson'
|
|
aoi_path = os.path.join(
|
|
self.workspace_dir, aoi_filename)
|
|
# This polygon matches the test data shapefile we used formerly.
|
|
geomstring = """
|
|
POLYGON ((-5.54101768507434 56.1006500736864,
|
|
1.2562729659521 56.007023480697,
|
|
1.01284382417981 50.2396253525534,
|
|
-5.2039619503127 49.9961962107811,
|
|
-5.54101768507434 56.1006500736864))"""
|
|
polygon = shapely.wkt.loads(geomstring)
|
|
fields = {'poly_id': ogr.OFTInteger}
|
|
attribute_list = [{'poly_id': 0}]
|
|
_make_simple_lat_lon_aoi([polygon], aoi_path, fields, attribute_list)
|
|
aoi_vector = gdal.OpenEx(aoi_path)
|
|
|
|
aoi_archive_path = os.path.join(
|
|
self.workspace_dir, 'aoi_zipped.zip')
|
|
with zipfile.ZipFile(aoi_archive_path, 'w') as myzip:
|
|
for filename in aoi_vector.GetFileList():
|
|
myzip.write(filename, os.path.basename(filename))
|
|
|
|
# convert shapefile to binary string for serialization
|
|
with open(aoi_archive_path, 'rb') as file:
|
|
zip_file_binary = file.read()
|
|
|
|
# transfer zipped file to server
|
|
start_year = '2005'
|
|
end_year = '2014'
|
|
out_vector_filename = 'results_pud.gpkg'
|
|
|
|
zip_result, workspace_id, version_str = (
|
|
recreation_server.calc_user_days_in_aoi(
|
|
zip_file_binary, aoi_filename, start_year, end_year,
|
|
out_vector_filename))
|
|
|
|
# unpack result
|
|
result_zip_path = os.path.join(self.workspace_dir, 'pud_result.zip')
|
|
with open(result_zip_path, 'wb') as file:
|
|
file.write(zip_result)
|
|
zipfile.ZipFile(result_zip_path, 'r').extractall(self.workspace_dir)
|
|
result_vector_path = os.path.join(
|
|
self.workspace_dir, out_vector_filename)
|
|
|
|
expected_attributes = [
|
|
{'poly_id': 0,
|
|
'PUD_YR_AVG': 83.2,
|
|
'PUD_JAN': 2.5,
|
|
'PUD_FEB': 2.4,
|
|
'PUD_MAR': 33.3,
|
|
'PUD_APR': 24.9,
|
|
'PUD_MAY': 6.5,
|
|
'PUD_JUN': 4.6,
|
|
'PUD_JUL': 1.6,
|
|
'PUD_AUG': 1.9,
|
|
'PUD_SEP': 1.3,
|
|
'PUD_OCT': 2.0,
|
|
'PUD_NOV': 1.0,
|
|
'PUD_DEC': 1.2}
|
|
]
|
|
fields = {field: ogr.OFTReal for field in expected_attributes[0]}
|
|
|
|
expected_vector_path = os.path.join(
|
|
self.workspace_dir, 'regression_pud.geojson')
|
|
_make_simple_lat_lon_aoi(
|
|
[polygon], expected_vector_path, fields, expected_attributes)
|
|
utils._assert_vectors_equal(expected_vector_path, result_vector_path)
|
|
|
|
def test_local_calc_poly_ud(self):
|
|
"""Recreation test single threaded local PUD calculation."""
|
|
from natcap.invest.recreation import recmodel_server
|
|
|
|
recreation_server = recmodel_server.RecModel(
|
|
2005, 2014, os.path.join(self.workspace_dir, 'server_cache'),
|
|
raw_csv_filename=self.resampled_data_path)
|
|
|
|
date_range = (
|
|
numpy.datetime64('2005-01-01'),
|
|
numpy.datetime64('2014-12-31'))
|
|
|
|
aoi_path = os.path.join('aoi.geojson')
|
|
# This polygon matches the test data shapefile we used formerly.
|
|
geomstring = """
|
|
POLYGON ((-5.54101768507434 56.1006500736864,
|
|
1.2562729659521 56.007023480697,
|
|
1.01284382417981 50.2396253525534,
|
|
-5.2039619503127 49.9961962107811,
|
|
-5.54101768507434 56.1006500736864))"""
|
|
|
|
polygon = shapely.wkt.loads(geomstring)
|
|
_make_simple_lat_lon_aoi([polygon], aoi_path)
|
|
|
|
poly_test_queue = queue.Queue()
|
|
poly_test_queue.put(0)
|
|
poly_test_queue.put('STOP')
|
|
pud_poly_feature_queue = queue.Queue()
|
|
recmodel_server._calc_poly_ud(
|
|
recreation_server.qt_pickle_filename, aoi_path,
|
|
date_range, poly_test_queue, pud_poly_feature_queue)
|
|
|
|
# assert annual average PUD is the same as regression
|
|
self.assertEqual(
|
|
83.2, pud_poly_feature_queue.get()[1][0])
|
|
|
|
def test_local_calc_poly_ud_bad_aoi(self):
|
|
"""Recreation test PUD calculation with missing AOI features."""
|
|
from natcap.invest.recreation import recmodel_server
|
|
|
|
recreation_server = recmodel_server.RecModel(
|
|
2005, 2014, os.path.join(self.workspace_dir, 'server_cache'),
|
|
raw_csv_filename=self.resampled_data_path)
|
|
|
|
date_range = (
|
|
numpy.datetime64('2005-01-01'),
|
|
numpy.datetime64('2014-12-31'))
|
|
|
|
aoi_vector_path = os.path.join(self.workspace_dir, 'aoi.gpkg')
|
|
gpkg_driver = gdal.GetDriverByName('GPKG')
|
|
srs = osr.SpatialReference()
|
|
srs.ImportFromEPSG(32731) # WGS84/UTM zone 31s
|
|
target_vector = gpkg_driver.Create(
|
|
aoi_vector_path, 0, 0, 0, gdal.GDT_Unknown)
|
|
target_layer = target_vector.CreateLayer(
|
|
'target_layer', srs, ogr.wkbUnknown)
|
|
|
|
# Testing with an AOI of 2 features, one is missing Geometry.
|
|
input_geom_list = [
|
|
None,
|
|
ogr.CreateGeometryFromWkt(
|
|
'POLYGON ((1 1, 1 0, 0 0, 0 1, 1 1))')]
|
|
poly_test_queue = queue.Queue()
|
|
poly_test_queue.put(1) # gpkg FIDs start at 1
|
|
poly_test_queue.put(2)
|
|
target_layer.StartTransaction()
|
|
for geometry in input_geom_list:
|
|
feature = ogr.Feature(target_layer.GetLayerDefn())
|
|
feature.SetGeometry(geometry)
|
|
target_layer.CreateFeature(feature)
|
|
target_layer.CommitTransaction()
|
|
poly_test_queue.put('STOP')
|
|
target_layer = None
|
|
target_vector = None
|
|
|
|
pud_poly_feature_queue = queue.Queue()
|
|
recmodel_server._calc_poly_ud(
|
|
recreation_server.qt_pickle_filename,
|
|
aoi_vector_path, date_range, poly_test_queue,
|
|
pud_poly_feature_queue)
|
|
|
|
# assert PUD was calculated for the one good AOI feature.
|
|
self.assertEqual(
|
|
0.0, pud_poly_feature_queue.get()[1][0])
|
|
|
|
def test_reuse_of_existing_quadtree(self):
|
|
"""Test init RecModel can reuse an existing quadtree on disk."""
|
|
from natcap.invest.recreation import recmodel_server
|
|
|
|
_ = recmodel_server.RecModel(
|
|
2005, 2014, os.path.join(self.workspace_dir, 'server_cache'),
|
|
raw_csv_filename=self.resampled_data_path)
|
|
|
|
# This will not generate a new quadtree but instead load existing one
|
|
with patch.object(recmodel_server, 'construct_userday_quadtree') as mock:
|
|
_ = recmodel_server.RecModel(
|
|
2005, 2014, os.path.join(self.workspace_dir, 'server_cache'),
|
|
raw_csv_filename=self.resampled_data_path)
|
|
self.assertFalse(mock.called)
|
|
|
|
def test_parse_big_input_csv(self):
|
|
"""Recreation test parsing raw CSV."""
|
|
from natcap.invest.recreation import recmodel_server
|
|
|
|
block_offset_size_queue = queue.Queue()
|
|
block_offset_size_queue.put((0, 2**10))
|
|
block_offset_size_queue.put('STOP')
|
|
numpy_array_queue = queue.Queue()
|
|
recmodel_server._parse_big_input_csv(
|
|
block_offset_size_queue, numpy_array_queue,
|
|
self.resampled_data_path, 'flickr')
|
|
val = recmodel_server._numpy_loads(numpy_array_queue.get())
|
|
# we know what the first date is
|
|
self.assertEqual(val[0][0], datetime.date(2013, 3, 16))
|
|
|
|
def test_numpy_pickling_queue(self):
|
|
"""Recreation test _numpy_dumps and _numpy_loads"""
|
|
from natcap.invest.recreation import recmodel_server
|
|
|
|
numpy_array_queue = multiprocessing.Queue()
|
|
array = numpy.empty(1, dtype='datetime64,f4')
|
|
numpy_array_queue.put(recmodel_server._numpy_dumps(array))
|
|
|
|
out_array = recmodel_server._numpy_loads(numpy_array_queue.get())
|
|
numpy.testing.assert_equal(out_array, array)
|
|
# without _numpy_loads, the queue pickles the array imperfectly,
|
|
# adding a metadata value to the `datetime64` dtype.
|
|
# assert that this doesn't happen. 'f0' is the first subdtype.
|
|
self.assertEqual(out_array.dtype['f0'].metadata, None)
|
|
|
|
# assert that saving the array does not raise a warning
|
|
with warnings.catch_warnings(record=True) as ws:
|
|
# cause all warnings to always be triggered
|
|
warnings.simplefilter("always")
|
|
numpy.save(os.path.join(self.workspace_dir, 'out'), out_array)
|
|
# assert that no warning was raised
|
|
self.assertTrue(len(ws) == 0)
|
|
|
|
def test_construct_query_twitter_qt(self):
|
|
"""Recreation test constructing and querying twitter quadtree."""
|
|
from natcap.invest.recreation import recmodel_server
|
|
|
|
# user,date,lat,lon
|
|
# 1117195232,2023-01-01,-22.908,-43.1975
|
|
# 54900515,2023-01-01,44.62804,10.60603
|
|
|
|
def make_twitter_csv(target_filename):
|
|
dates = numpy.arange(
|
|
numpy.datetime64('2017-01-01'), numpy.datetime64('2017-12-31'))
|
|
lats = numpy.arange(-90.0, 90.0, 180/len(dates))
|
|
lons = numpy.arange(-180.0, 180.0, 360/len(dates))
|
|
users = [
|
|
''.join(random.choices(string.digits, k=n))
|
|
for n in random.choices(range(7, 18), k=len(dates))
|
|
]
|
|
pandas.DataFrame({
|
|
'user': users,
|
|
'date': dates,
|
|
'lat': lats,
|
|
'lon': lons
|
|
}, index=None).to_csv(target_filename, index=False)
|
|
|
|
raw_csv_file_list = [
|
|
os.path.join(self.workspace_dir, 'a.csv'),
|
|
os.path.join(self.workspace_dir, 'b.csv'),
|
|
]
|
|
for filename in raw_csv_file_list:
|
|
make_twitter_csv(filename)
|
|
|
|
cache_dir = os.path.join(self.workspace_dir, 'cache')
|
|
ooc_qt_picklefilename = os.path.join(cache_dir, 'qt.pickle')
|
|
|
|
recmodel_server.construct_userday_quadtree(
|
|
recmodel_server.INITIAL_BOUNDING_BOX,
|
|
raw_csv_file_list, 'twitter',
|
|
cache_dir, ooc_qt_picklefilename,
|
|
recmodel_server.GLOBAL_MAX_POINTS_PER_NODE,
|
|
recmodel_server.GLOBAL_DEPTH)
|
|
|
|
min_year = 2016
|
|
max_year = 2018
|
|
server = recmodel_server.RecModel(
|
|
min_year, max_year, cache_dir,
|
|
quadtree_pickle_filename=ooc_qt_picklefilename,
|
|
dataset_name='twitter')
|
|
|
|
aoi_path = os.path.join(self.workspace_dir, 'aoi.geojson')
|
|
target_filename = 'results.shp' # model uses ESRI Shapefile driver
|
|
srs = osr.SpatialReference()
|
|
srs.ImportFromEPSG(4326) # WGS84
|
|
wkt = srs.ExportToWkt()
|
|
polygon = shapely.geometry.box(-120, -60, 120, 60)
|
|
poly_id = 999
|
|
pygeoprocessing.shapely_geometry_to_vector(
|
|
[polygon], aoi_path, wkt, 'GeoJSON',
|
|
fields={'poly_id': ogr.OFTInteger},
|
|
attribute_list=[{'poly_id': poly_id}])
|
|
|
|
start_year = '2017'
|
|
end_year = '2017'
|
|
_, monthly_table_path = server._calc_aggregated_points_in_aoi(
|
|
aoi_path, self.workspace_dir, start_year, end_year, target_filename)
|
|
|
|
expected_result_table = pandas.DataFrame({
|
|
'poly_id': [999],
|
|
'2017-1': [0],
|
|
'2017-2': [0],
|
|
'2017-3': [58],
|
|
'2017-4': [60],
|
|
'2017-5': [62],
|
|
'2017-6': [60],
|
|
'2017-7': [62],
|
|
'2017-8': [62],
|
|
'2017-9': [60],
|
|
'2017-10': [62],
|
|
'2017-11': [0],
|
|
'2017-12': [0]
|
|
}, index=None)
|
|
result_table = pandas.read_csv(
|
|
monthly_table_path)
|
|
pandas.testing.assert_frame_equal(
|
|
expected_result_table, result_table, check_dtype=False)
|
|
|
|
def test_local_query_bad_dates(self):
|
|
"""Recreation test local AOI aggregation with invalid date range."""
|
|
from natcap.invest.recreation import recmodel_server
|
|
|
|
recreation_server = recmodel_server.RecModel(
|
|
2005, 2014, os.path.join(self.workspace_dir, 'server_cache'),
|
|
raw_csv_filename=self.resampled_data_path)
|
|
|
|
start_year = 2005
|
|
end_year = 2099
|
|
with self.assertRaises(ValueError) as error:
|
|
recreation_server._calc_aggregated_points_in_aoi(
|
|
'aoi.gpkg', self.workspace_dir, start_year, end_year,
|
|
'results.gpkg')
|
|
self.assertIn('End year must be between', str(error.exception))
|
|
|
|
|
|
def _synthesize_points_in_aoi(aoi_path, target_flickr_path, target_twitter_path, n_points):
|
|
srs = osr.SpatialReference()
|
|
srs.ImportFromEPSG(4326) # WGS84
|
|
target_wkt = srs.ExportToWkt()
|
|
|
|
aoi_info = pygeoprocessing.get_vector_info(aoi_path)
|
|
lonlat_bbox = pygeoprocessing.transform_bounding_box(
|
|
aoi_info['bounding_box'], aoi_info['projection_wkt'], target_wkt)
|
|
origin_x, origin_y = lonlat_bbox[:2]
|
|
|
|
x_size = int(math.sqrt(n_points))
|
|
lon = numpy.linspace(lonlat_bbox[0], lonlat_bbox[2], num=x_size)
|
|
lat = numpy.linspace(lonlat_bbox[1], lonlat_bbox[3], num=x_size)
|
|
offsets = numpy.geomspace(0.01, 1, num=x_size)
|
|
lon = lon + offsets
|
|
lat = lat + offsets
|
|
|
|
flickr_dates = pandas.date_range(
|
|
numpy.datetime64('2017-01-01 12:12:12'),
|
|
numpy.datetime64('2017-12-31 12:12:12'), freq='D')
|
|
twitter_dates = pandas.date_range(
|
|
numpy.datetime64('2017-01-01'),
|
|
numpy.datetime64('2017-12-31'), freq='D')
|
|
|
|
users = numpy.array([
|
|
''.join(random.choices(string.digits, k=n))
|
|
for n in random.choices(range(7, 18), k=50) # 50 unique people
|
|
])
|
|
|
|
x, y = numpy.meshgrid(lon, lat)
|
|
user_array = users[numpy.arange(n_points) % len(users)]
|
|
|
|
flickr_df = pandas.DataFrame({
|
|
'id': range(n_points),
|
|
'user': user_array,
|
|
'date': flickr_dates[numpy.arange(n_points) % len(flickr_dates)],
|
|
'lat': y.flatten(),
|
|
'lon': x.flatten(),
|
|
'accuracy': [16] * n_points
|
|
}, index=None)
|
|
flickr_df.to_csv(target_flickr_path, index=False)
|
|
|
|
twitter_df = flickr_df.drop(columns=['id', 'accuracy'])
|
|
twitter_df['date'] = twitter_dates[numpy.arange(n_points) % len(twitter_dates)]
|
|
twitter_df.to_csv(target_twitter_path, index=False)
|
|
|
|
|
|
class TestRecClientServer(unittest.TestCase):
|
|
"""Client regression tests using a server executing in a local process."""
|
|
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
"""Setup Rec model server."""
|
|
from natcap.invest.recreation import recmodel_server
|
|
|
|
cls.server_workspace_dir = tempfile.mkdtemp()
|
|
flickr_csv_path = os.path.join(
|
|
cls.server_workspace_dir, 'flickr_sample_data.csv')
|
|
twitter_csv_path = os.path.join(
|
|
cls.server_workspace_dir, 'twitter_sample_data.csv')
|
|
n_points = 50**2 # a perfect square for convenience
|
|
_synthesize_points_in_aoi(
|
|
os.path.join(SAMPLE_DATA, 'andros_aoi.shp'),
|
|
flickr_csv_path, twitter_csv_path,
|
|
n_points=n_points)
|
|
|
|
# attempt to get an open port; could result in race condition but
|
|
# will be okay for a test. if this test ever fails because of port
|
|
# in use, that's probably why
|
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
sock.bind(('', 0))
|
|
cls.port = sock.getsockname()[1]
|
|
sock.close()
|
|
sock = None
|
|
cls.hostname = 'localhost'
|
|
|
|
server_args = {
|
|
'hostname': cls.hostname,
|
|
'port': cls.port,
|
|
'cache_workspace': cls.server_workspace_dir,
|
|
'max_points_per_node': 200,
|
|
'max_allowable_query': n_points - 100,
|
|
'datasets': {
|
|
'flickr': {
|
|
'raw_csv_point_data_path': flickr_csv_path,
|
|
'min_year': 2005,
|
|
'max_year': 2017
|
|
},
|
|
'twitter': {
|
|
'raw_csv_point_data_path': twitter_csv_path,
|
|
'min_year': 2012,
|
|
'max_year': 2022
|
|
}
|
|
}
|
|
}
|
|
|
|
cls.server_process = multiprocessing.Process(
|
|
target=recmodel_server.execute, args=(server_args,), daemon=False)
|
|
cls.server_process.start()
|
|
|
|
# The recmodel_server.execute takes ~10-20 seconds to build quadtrees
|
|
# before the remote Pyro object is ready.
|
|
proxy = Pyro5.api.Proxy(
|
|
f'PYRO:natcap.invest.recreation@{cls.hostname}:{cls.port}')
|
|
ready = False
|
|
while not ready and cls.server_process.is_alive():
|
|
try:
|
|
# _pyroBind() forces the client-server handshake and
|
|
# seems like a good way to check if the remote object is ready
|
|
proxy._pyroBind()
|
|
except Pyro5.errors.CommunicationError:
|
|
time.sleep(2)
|
|
continue
|
|
ready = True
|
|
proxy._pyroRelease()
|
|
|
|
@classmethod
|
|
def tearDownClass(cls):
|
|
"""Delete workspace and terminate child process."""
|
|
cls.server_process.terminate()
|
|
shutil.rmtree(cls.server_workspace_dir, ignore_errors=True)
|
|
|
|
def setUp(self):
|
|
"""Create workspace"""
|
|
self.workspace_dir = tempfile.mkdtemp()
|
|
|
|
def tearDown(self):
|
|
"""Delete workspace"""
|
|
shutil.rmtree(self.workspace_dir, ignore_errors=True)
|
|
|
|
def test_all_metrics_local_server(self):
|
|
"""Recreation test with all but trivial predictor metrics."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
from natcap.invest import validation
|
|
|
|
suffix = 'foo'
|
|
args = {
|
|
'aoi_path': os.path.join(
|
|
SAMPLE_DATA, 'andros_aoi.shp'),
|
|
'compute_regression': True,
|
|
'start_year': recmodel_client.MIN_YEAR,
|
|
'end_year': recmodel_client.MAX_YEAR,
|
|
'grid_aoi': True,
|
|
'cell_size': 30000,
|
|
'grid_type': 'hexagon',
|
|
'predictor_table_path': os.path.join(
|
|
SAMPLE_DATA, 'predictors_all.csv'),
|
|
'scenario_predictor_table_path': os.path.join(
|
|
SAMPLE_DATA, 'predictors_all.csv'),
|
|
'results_suffix': suffix,
|
|
'workspace_dir': self.workspace_dir,
|
|
'hostname': self.hostname,
|
|
'port': self.port,
|
|
}
|
|
recmodel_client.execute(args)
|
|
|
|
out_regression_vector_path = os.path.join(
|
|
args['workspace_dir'], f'regression_data_{suffix}.gpkg')
|
|
|
|
predictor_df = recmodel_client.MODEL_SPEC.get_input(
|
|
'predictor_table_path').get_validated_dataframe(
|
|
os.path.join(SAMPLE_DATA, 'predictors_all.csv'))
|
|
field_list = list(predictor_df.index) + ['pr_TUD', 'pr_PUD', 'avg_pr_UD']
|
|
|
|
# For convenience, assert the sums of the columns instead of all
|
|
# the individual values.
|
|
actual_sums = sum_vector_columns(out_regression_vector_path, field_list)
|
|
expected_sums = {
|
|
'ports': 11.0,
|
|
'airdist': 875291.8190812231,
|
|
'bonefish_a': 4630187907.293639,
|
|
'bathy': 47.16540460441528,
|
|
'roads': 5072.707571235277,
|
|
'bonefish_p': 792.0711806443292,
|
|
'bathy_sum': 348.04177433624864,
|
|
'pr_TUD': 1.0,
|
|
'pr_PUD': 1.0,
|
|
'avg_pr_UD': 1.0
|
|
}
|
|
for key in expected_sums:
|
|
numpy.testing.assert_almost_equal(
|
|
actual_sums[key], expected_sums[key], decimal=3)
|
|
|
|
out_scenario_path = os.path.join(
|
|
args['workspace_dir'], f'scenario_results_{suffix}.gpkg')
|
|
field_list = list(predictor_df.index) + ['pr_UD_EST']
|
|
actual_scenario_sums = sum_vector_columns(out_scenario_path, field_list)
|
|
expected_scenario_sums = {
|
|
'ports': 11.0,
|
|
'airdist': 875291.8190812231,
|
|
'bonefish_a': 4630187907.293639,
|
|
'bathy': 47.16540460441528,
|
|
'roads': 5072.707571235277,
|
|
'bonefish_p': 792.0711806443292,
|
|
'bathy_sum': 348.04177433624864,
|
|
'pr_UD_EST': 0.996366808597374
|
|
}
|
|
for key in expected_scenario_sums:
|
|
numpy.testing.assert_almost_equal(
|
|
actual_scenario_sums[key], expected_scenario_sums[key], decimal=3)
|
|
|
|
# assert that all tabular outputs are indexed by the same poly_id
|
|
output_aoi_path = os.path.join(
|
|
args['workspace_dir'], 'intermediate', f'aoi_{suffix}.gpkg')
|
|
aoi_vector = gdal.OpenEx(output_aoi_path, gdal.OF_VECTOR)
|
|
aoi_layer = aoi_vector.GetLayer()
|
|
aoi_poly_id_list = [feature.GetField(
|
|
recmodel_client.POLYGON_ID_FIELD) for feature in aoi_layer]
|
|
aoi_layer = aoi_vector = None
|
|
|
|
output_vector_list = [
|
|
out_scenario_path,
|
|
out_regression_vector_path,
|
|
os.path.join(args['workspace_dir'], f'PUD_results_{suffix}.gpkg'),
|
|
os.path.join(args['workspace_dir'], f'TUD_results_{suffix}.gpkg')]
|
|
for vector_filepath in output_vector_list:
|
|
vector = gdal.OpenEx(vector_filepath, gdal.OF_VECTOR)
|
|
layer = vector.GetLayer()
|
|
id_list = [feature.GetField(
|
|
recmodel_client.POLYGON_ID_FIELD) for feature in layer]
|
|
self.assertEqual(id_list, aoi_poly_id_list)
|
|
vector = layer = None
|
|
|
|
def test_workspace_fetcher(self):
|
|
"""Recreation test workspace fetcher on a local Pyro5 server."""
|
|
from natcap.invest.recreation import recmodel_workspace_fetcher
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
args = {
|
|
'aoi_path': os.path.join(
|
|
SAMPLE_DATA, 'andros_aoi.shp'),
|
|
'compute_regression': False,
|
|
'start_year': recmodel_client.MIN_YEAR,
|
|
'end_year': recmodel_client.MAX_YEAR,
|
|
'grid_aoi': False,
|
|
'workspace_dir': self.workspace_dir,
|
|
'hostname': self.hostname,
|
|
'port': self.port,
|
|
}
|
|
recmodel_client.execute(args)
|
|
|
|
# workspace IDs are stored in this file
|
|
server_version_path = os.path.join(
|
|
args['workspace_dir'], 'intermediate', 'server_version.pickle')
|
|
with open(server_version_path, 'rb') as f:
|
|
server_data = pickle.load(f)
|
|
|
|
server = 'flickr'
|
|
workspace_id = server_data[server]['workspace_id']
|
|
fetcher_args = {
|
|
'workspace_dir': os.path.join(self.workspace_dir, server),
|
|
'hostname': 'localhost',
|
|
'port': self.port,
|
|
'workspace_id': workspace_id,
|
|
'server_id': server,
|
|
}
|
|
|
|
recmodel_workspace_fetcher.execute(fetcher_args)
|
|
zip_path = os.path.join(
|
|
fetcher_args['workspace_dir'], f'{server}_{workspace_id}.zip')
|
|
zipfile.ZipFile(zip_path, 'r').extractall(
|
|
fetcher_args['workspace_dir'])
|
|
|
|
utils._assert_vectors_equal(
|
|
os.path.join(
|
|
fetcher_args['workspace_dir'],
|
|
'aoi.gpkg'),
|
|
os.path.join(
|
|
args['workspace_dir'], 'intermediate',
|
|
'aoi.gpkg'))
|
|
|
|
def test_start_year_out_of_range(self):
|
|
"""Test server sends valid date-ranges; client raises ValueError."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
args = {
|
|
'aoi_path': os.path.join(SAMPLE_DATA, 'andros_aoi.shp'),
|
|
'compute_regression': False,
|
|
'start_year': '1219', # start year ridiculously out of range
|
|
'end_year': recmodel_client.MAX_YEAR,
|
|
'grid_aoi': False,
|
|
'workspace_dir': self.workspace_dir,
|
|
'hostname': self.hostname,
|
|
'port': self.port
|
|
}
|
|
|
|
with self.assertRaises(ValueError) as cm:
|
|
recmodel_client.execute(args)
|
|
actual_message = str(cm.exception)
|
|
expected_message = 'Start year must be between'
|
|
self.assertIn(expected_message, actual_message)
|
|
|
|
def test_end_year_out_of_range(self):
|
|
"""Test server sends valid date-ranges; client raises ValueError."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
args = {
|
|
'aoi_path': os.path.join(SAMPLE_DATA, 'andros_aoi.shp'),
|
|
'compute_regression': False,
|
|
'start_year': recmodel_client.MIN_YEAR,
|
|
'end_year': '2219', # end year ridiculously out of range
|
|
'grid_aoi': False,
|
|
'workspace_dir': self.workspace_dir,
|
|
'hostname': self.hostname,
|
|
'port': self.port
|
|
}
|
|
|
|
with self.assertRaises(ValueError) as cm:
|
|
recmodel_client.execute(args)
|
|
actual_message = str(cm.exception)
|
|
expected_message = 'End year must be between'
|
|
self.assertIn(expected_message, actual_message)
|
|
|
|
def test_aoi_too_large(self):
|
|
"""Test server checks aoi size; client raises exception."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
aoi_path = os.path.join(self.workspace_dir, 'aoi.geojson')
|
|
srs = osr.SpatialReference()
|
|
srs.ImportFromEPSG(4326) # WGS84
|
|
wkt = srs.ExportToWkt()
|
|
|
|
# This AOI should capture all the points in the quadtree
|
|
# and that should exceed the max_allowable limit set
|
|
# in the server args.
|
|
aoi_geometries = [shapely.geometry.Polygon([
|
|
(-180, -90), (-180, 90), (180, 90), (180, -90), (-180, -90)])]
|
|
pygeoprocessing.shapely_geometry_to_vector(
|
|
aoi_geometries, aoi_path, wkt, 'GeoJSON')
|
|
args = {
|
|
'aoi_path': aoi_path,
|
|
'compute_regression': False,
|
|
'start_year': recmodel_client.MIN_YEAR,
|
|
'end_year': recmodel_client.MAX_YEAR,
|
|
'grid_aoi': False,
|
|
'workspace_dir': self.workspace_dir,
|
|
'hostname': self.hostname,
|
|
'port': self.port
|
|
}
|
|
|
|
with self.assertRaises(ValueError) as cm:
|
|
recmodel_client.execute(args)
|
|
actual_message = str(cm.exception)
|
|
expected_message = 'The AOI extent is too large'
|
|
self.assertIn(expected_message, actual_message)
|
|
|
|
|
|
class RecreationClientRegressionTests(unittest.TestCase):
|
|
"""Regression & Unit tests for recmodel_client."""
|
|
|
|
def setUp(self):
|
|
"""Setup workspace directory."""
|
|
# this lets us delete the workspace after its done no matter the
|
|
# the rest result
|
|
self.workspace_dir = tempfile.mkdtemp()
|
|
|
|
def tearDown(self):
|
|
"""Delete workspace."""
|
|
shutil.rmtree(self.workspace_dir)
|
|
|
|
def test_data_different_projection(self):
|
|
"""Recreation can validate if data in different projection."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
response_vector_path = os.path.join(SAMPLE_DATA, 'andros_aoi.shp')
|
|
table_path = os.path.join(
|
|
SAMPLE_DATA, 'predictors_wrong_projection.csv')
|
|
msg = recmodel_client._validate_same_projection(
|
|
response_vector_path, table_path)
|
|
self.assertIn('did not match the projection', msg)
|
|
|
|
def test_different_tables(self):
|
|
"""Recreation can validate if scenario ids different than predictor."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
base_table_path = os.path.join(
|
|
SAMPLE_DATA, 'predictors_all.csv')
|
|
scenario_table_path = os.path.join(
|
|
SAMPLE_DATA, 'predictors.csv')
|
|
msg = recmodel_client._validate_same_ids_and_types(
|
|
base_table_path, scenario_table_path)
|
|
self.assertIn('table pairs unequal', msg)
|
|
|
|
def test_delay_op(self):
|
|
"""Recreation coverage of delay op function."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
# not much to test here but that the function is invoked
|
|
# guarantee the time has exceeded since we can't have negative time
|
|
last_time = -1.0
|
|
time_delay = 1.0
|
|
called = [False]
|
|
|
|
def func():
|
|
"""Set `called` to True."""
|
|
called[0] = True
|
|
recmodel_client.delay_op(last_time, time_delay, func)
|
|
self.assertTrue(called[0])
|
|
|
|
def test_raster_sum_mean_no_nodata(self):
|
|
"""Recreation test sum/mean if raster doesn't have nodata defined."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
# The following raster has no nodata value
|
|
raster_path = os.path.join(SAMPLE_DATA, 'no_nodata_raster.tif')
|
|
|
|
response_vector_path = os.path.join(SAMPLE_DATA, 'andros_aoi.shp')
|
|
target_path = os.path.join(self.workspace_dir, "predictor.json")
|
|
recmodel_client._raster_sum_mean(
|
|
raster_path, "mean", response_vector_path, target_path)
|
|
|
|
with open(target_path, 'r') as file:
|
|
predictor_results = json.load(file)
|
|
# These constants were calculated by hand by Dave.
|
|
numpy.testing.assert_allclose(
|
|
predictor_results['0'], 13.0, rtol=0, atol=1e-6)
|
|
|
|
def test_raster_sum_mean_nodata(self):
|
|
"""Recreation test sum/mean if raster has no valid pixels.
|
|
|
|
This may be a raster that does not intersect with the AOI, or
|
|
one that does intersect, but is entirely nodata within the AOI.
|
|
Such a raster is not usable as a predictor variable.
|
|
"""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
# The following raster has only nodata pixels.
|
|
raster_path = os.path.join(SAMPLE_DATA, 'nodata_raster.tif')
|
|
response_vector_path = os.path.join(SAMPLE_DATA, 'andros_aoi.shp')
|
|
target_path = os.path.join(self.workspace_dir, "predictor.json")
|
|
|
|
recmodel_client._raster_sum_mean(
|
|
raster_path, "sum", response_vector_path, target_path)
|
|
|
|
with open(target_path, 'r') as file:
|
|
predictor_results = json.load(file)
|
|
# Assert that target file was written and it is an empty dictionary
|
|
self.assertEqual(len(predictor_results), 0)
|
|
|
|
def test_overlapping_features_in_polygon_predictor(self):
|
|
"""Test overlapping predictor features are not double-counted.
|
|
|
|
If a polygon predictor contains features that overlap, the overlapping
|
|
area should only be counted once when calculating
|
|
`polygon_area_coverage` or `polygon_percent_coverage`.
|
|
"""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
response_vector_path = os.path.join(self.workspace_dir, 'aoi.geojson')
|
|
response_polygons_pickle_path = os.path.join(
|
|
self.workspace_dir, 'response.pickle')
|
|
predictor_vector_path = os.path.join(
|
|
self.workspace_dir, 'predictor.geojson')
|
|
predictor_target_path = os.path.join(
|
|
self.workspace_dir, 'predictor.json')
|
|
|
|
srs = osr.SpatialReference()
|
|
srs.ImportFromEPSG(32610) # a UTM system
|
|
|
|
# A unit square
|
|
response_geom = shapely.geometry.Polygon(
|
|
((0., 0.), (0., 1.), (1., 1.), (1., 0.), (0., 0.)))
|
|
pygeoprocessing.shapely_geometry_to_vector(
|
|
[response_geom],
|
|
response_vector_path,
|
|
srs.ExportToWkt(),
|
|
'GEOJSON')
|
|
|
|
# Two overlapping polygons, including a unit square
|
|
predictor_geom_list = [
|
|
shapely.geometry.Polygon(
|
|
((0., 0.), (0., 1.), (1., 1.), (1., 0.), (0., 0.))),
|
|
shapely.geometry.Polygon(
|
|
((0., 0.), (0., 0.5), (0.5, 0.5), (0.5, 0.), (0., 0.)))]
|
|
pygeoprocessing.shapely_geometry_to_vector(
|
|
predictor_geom_list,
|
|
predictor_vector_path,
|
|
srs.ExportToWkt(),
|
|
'GEOJSON')
|
|
|
|
recmodel_client._prepare_response_polygons_lookup(
|
|
response_vector_path, response_polygons_pickle_path)
|
|
recmodel_client._polygon_area(
|
|
'polygon_area_coverage',
|
|
response_polygons_pickle_path,
|
|
predictor_vector_path,
|
|
predictor_target_path)
|
|
|
|
with open(predictor_target_path, 'r') as file:
|
|
data = json.load(file)
|
|
actual_value = list(data.values())[0]
|
|
expected_value = 1
|
|
self.assertEqual(actual_value, expected_value)
|
|
|
|
def test_compute_and_summarize_regression(self):
|
|
"""Recreation regression test for the least-squares linear model."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
data_vector_path = os.path.join(
|
|
self.workspace_dir, 'regression_data.gpkg')
|
|
driver = 'GPKG'
|
|
n_features = 10
|
|
userdays = numpy.linspace(0, 1, n_features)
|
|
avg_pr_UD = userdays / userdays.sum()
|
|
roads = numpy.linspace(0, 100, n_features)
|
|
parks = numpy.linspace(0, 100, n_features)**2
|
|
response_id = 'avg_pr_UD'
|
|
|
|
attribute_list = []
|
|
for i in range(n_features):
|
|
attribute_list.append({
|
|
response_id: avg_pr_UD[i],
|
|
'roads': roads[i],
|
|
'parks': parks[i]
|
|
})
|
|
field_map = {
|
|
response_id: ogr.OFTReal,
|
|
'roads': ogr.OFTReal,
|
|
'parks': ogr.OFTReal,
|
|
}
|
|
|
|
# The geometries don't matter.
|
|
geom_list = [shapely.geometry.Point(1, -1)] * n_features
|
|
srs = osr.SpatialReference()
|
|
srs.ImportFromEPSG(4326)
|
|
pygeoprocessing.shapely_geometry_to_vector(
|
|
geom_list,
|
|
data_vector_path,
|
|
srs.ExportToWkt(),
|
|
driver,
|
|
fields=field_map,
|
|
attribute_list=attribute_list,
|
|
ogr_geom_type=ogr.wkbPoint)
|
|
predictor_list = ['roads', 'parks']
|
|
server_version_path = 'server_version.pickle'
|
|
with open(server_version_path, 'ab') as f:
|
|
pickle.dump('version: foo', f)
|
|
target_coefficient_json_path = os.path.join(self.workspace_dir, 'estimates.json')
|
|
target_coefficient_csv_path = os.path.join(self.workspace_dir, 'estimates.csv')
|
|
target_regression_summary_path = os.path.join(self.workspace_dir, 'summary.txt')
|
|
recmodel_client._compute_and_summarize_regression(
|
|
data_vector_path, response_id, predictor_list, server_version_path,
|
|
target_coefficient_json_path, target_coefficient_csv_path,
|
|
target_regression_summary_path)
|
|
|
|
coefficient_results = {}
|
|
coefficient_results['estimate'] = [5.953980e-02, -3.056440e-04, -4.388366e+00]
|
|
coefficient_results['stderr'] = [3.769794e-03, 3.629146e-05, 8.094584e-02]
|
|
coefficient_results['t-value'] = [15.793912, -8.421927, -54.213612]
|
|
summary_results = {
|
|
'SSres': '0.0742',
|
|
'Multiple R-squared': '0.9921',
|
|
'Adjusted R-squared': '0.9898',
|
|
'Residual standard error': '0.1030 on 7 degrees of freedom'
|
|
}
|
|
|
|
results = pandas.read_csv(
|
|
target_coefficient_csv_path).to_dict(orient='list')
|
|
for key in coefficient_results:
|
|
numpy.testing.assert_allclose(
|
|
results[key], coefficient_results[key], rtol=1e-05)
|
|
|
|
with open(target_regression_summary_path, 'r') as file:
|
|
for line in file.readlines():
|
|
for k, v in summary_results.items():
|
|
if line.startswith(k):
|
|
self.assertEqual(line.split(': ')[1].rstrip(), v)
|
|
|
|
def test_regression_edge_case(self):
|
|
"""Recreation unit test only one non-zero userday observation."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
data_vector_path = os.path.join(
|
|
self.workspace_dir, 'regression_data.gpkg')
|
|
driver = 'GPKG'
|
|
n_features = 10
|
|
userdays = numpy.array([0] * n_features)
|
|
userdays[0] = 1 # one non-zero value
|
|
avg_pr_UD = userdays / userdays.sum()
|
|
roads = numpy.linspace(0, 100, n_features)
|
|
response_id = 'avg_pr_UD'
|
|
|
|
attribute_list = []
|
|
for i in range(n_features):
|
|
attribute_list.append({
|
|
response_id: avg_pr_UD[i],
|
|
'roads': roads[i],
|
|
})
|
|
field_map = {
|
|
response_id: ogr.OFTReal,
|
|
'roads': ogr.OFTReal,
|
|
}
|
|
|
|
# The geometries don't matter.
|
|
geom_list = [shapely.geometry.Point(1, -1)] * n_features
|
|
srs = osr.SpatialReference()
|
|
srs.ImportFromEPSG(4326)
|
|
pygeoprocessing.shapely_geometry_to_vector(
|
|
geom_list,
|
|
data_vector_path,
|
|
srs.ExportToWkt(),
|
|
driver,
|
|
fields=field_map,
|
|
attribute_list=attribute_list,
|
|
ogr_geom_type=ogr.wkbPoint)
|
|
predictor_list = ['roads']
|
|
|
|
with self.assertRaises(ValueError):
|
|
recmodel_client._build_regression(
|
|
data_vector_path, predictor_list,
|
|
response_id)
|
|
|
|
def test_copy_aoi_no_grid(self):
|
|
"""Recreation test AOI copy adds poly_id field."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
out_grid_vector_path = os.path.join(
|
|
self.workspace_dir, 'aoi.gpkg')
|
|
|
|
recmodel_client._copy_aoi_no_grid(
|
|
os.path.join(SAMPLE_DATA, 'andros_aoi.shp'),
|
|
out_grid_vector_path)
|
|
|
|
vector = gdal.OpenEx(out_grid_vector_path, gdal.OF_VECTOR)
|
|
layer = vector.GetLayer()
|
|
n_features = layer.GetFeatureCount()
|
|
poly_id_list = [feature.GetField(
|
|
recmodel_client.POLYGON_ID_FIELD) for feature in layer]
|
|
self.assertEqual(poly_id_list, list(range(n_features)))
|
|
layer = vector = None
|
|
|
|
def test_square_grid(self):
|
|
"""Recreation square grid regression test."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
out_grid_vector_path = os.path.join(
|
|
self.workspace_dir, 'square_grid_vector_path.gpkg')
|
|
|
|
recmodel_client._grid_vector(
|
|
os.path.join(SAMPLE_DATA, 'andros_aoi.shp'), 'square', 20000.0,
|
|
out_grid_vector_path)
|
|
|
|
vector = gdal.OpenEx(out_grid_vector_path, gdal.OF_VECTOR)
|
|
layer = vector.GetLayer()
|
|
n_features = layer.GetFeatureCount()
|
|
poly_id_list = [feature.GetField(
|
|
recmodel_client.POLYGON_ID_FIELD) for feature in layer]
|
|
self.assertEqual(poly_id_list, list(range(n_features)))
|
|
layer = vector = None
|
|
# andros_aoi.shp fits 38 squares at 20000 meters cell size
|
|
self.assertEqual(n_features, 38)
|
|
|
|
def test_hex_grid(self):
|
|
"""Recreation hex grid regression test."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
out_grid_vector_path = os.path.join(
|
|
self.workspace_dir, 'hex_grid_vector_path.gpkg')
|
|
|
|
recmodel_client._grid_vector(
|
|
os.path.join(SAMPLE_DATA, 'andros_aoi.shp'), 'hexagon', 20000.0,
|
|
out_grid_vector_path)
|
|
|
|
vector = gdal.OpenEx(out_grid_vector_path, gdal.OF_VECTOR)
|
|
layer = vector.GetLayer()
|
|
n_features = layer.GetFeatureCount()
|
|
layer = vector = None
|
|
# andros_aoi.shp fits 71 hexes at 20000 meters cell size
|
|
self.assertEqual(n_features, 71)
|
|
|
|
def test_predictor_id_too_long(self):
|
|
"""Recreation can validate predictor ID length."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
args = {
|
|
'aoi_path': os.path.join(SAMPLE_DATA, 'andros_aoi.shp'),
|
|
'compute_regression': True,
|
|
'start_year': recmodel_client.MIN_YEAR,
|
|
'end_year': recmodel_client.MAX_YEAR,
|
|
'grid_aoi': True,
|
|
'grid_type': 'square',
|
|
'cell_size': 20000,
|
|
'predictor_table_path': os.path.join(
|
|
SAMPLE_DATA, 'predictors_id_too_long.csv'),
|
|
'results_suffix': '',
|
|
'workspace_dir': self.workspace_dir,
|
|
}
|
|
msgs = recmodel_client.validate(args)
|
|
self.assertIn('more than 10 characters long', msgs[0][1])
|
|
|
|
def test_existing_gridded_aoi_shapefiles(self):
|
|
"""Recreation grid test when output files need to be overwritten."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
out_grid_vector_path = os.path.join(
|
|
self.workspace_dir, 'hex_grid_vector_path.gpkg')
|
|
|
|
recmodel_client._grid_vector(
|
|
os.path.join(SAMPLE_DATA, 'andros_aoi.shp'), 'hexagon', 20000.0,
|
|
out_grid_vector_path)
|
|
# overwrite output
|
|
recmodel_client._grid_vector(
|
|
os.path.join(SAMPLE_DATA, 'andros_aoi.shp'), 'hexagon', 20000.0,
|
|
out_grid_vector_path)
|
|
|
|
vector = gdal.OpenEx(out_grid_vector_path, gdal.OF_VECTOR)
|
|
layer = vector.GetLayer()
|
|
n_features = layer.GetFeatureCount()
|
|
layer = vector = None
|
|
# andros_aoi.shp fits 71 hexes at 20000 meters cell size
|
|
self.assertEqual(n_features, 71)
|
|
|
|
def test_existing_regression_coef(self):
|
|
"""Recreation test regression coefficients handle existing output."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
from natcap.invest import validation
|
|
|
|
# Initialize a TaskGraph
|
|
taskgraph_db_dir = os.path.join(
|
|
self.workspace_dir, '_taskgraph_working_dir')
|
|
n_workers = -1 # single process mode.
|
|
task_graph = taskgraph.TaskGraph(taskgraph_db_dir, n_workers)
|
|
|
|
response_vector_path = os.path.join(
|
|
self.workspace_dir, 'no_grid_vector_path.gpkg')
|
|
response_polygons_lookup_path = os.path.join(
|
|
self.workspace_dir, 'response_polygons_lookup.pickle')
|
|
recmodel_client._copy_aoi_no_grid(
|
|
os.path.join(SAMPLE_DATA, 'andros_aoi.shp'), response_vector_path)
|
|
|
|
predictor_table_path = os.path.join(SAMPLE_DATA, 'predictors.csv')
|
|
|
|
# make outputs to be overwritten
|
|
predictor_dict = recmodel_client.MODEL_SPEC.get_input(
|
|
'predictor_table_path').get_validated_dataframe(
|
|
predictor_table_path).to_dict(orient='index')
|
|
predictor_list = predictor_dict.keys()
|
|
tmp_working_dir = tempfile.mkdtemp(dir=self.workspace_dir)
|
|
empty_json_list = [
|
|
os.path.join(tmp_working_dir, x + '.json') for x in predictor_list]
|
|
out_coefficient_vector_path = os.path.join(
|
|
self.workspace_dir, 'out_coefficient_vector.shp')
|
|
_make_empty_files(
|
|
[out_coefficient_vector_path] + empty_json_list)
|
|
|
|
prepare_response_polygons_task = task_graph.add_task(
|
|
func=recmodel_client._prepare_response_polygons_lookup,
|
|
args=(response_vector_path,
|
|
response_polygons_lookup_path),
|
|
target_path_list=[response_polygons_lookup_path],
|
|
task_name='prepare response polygons for geoprocessing')
|
|
# build again to test against overwriting output
|
|
recmodel_client._schedule_predictor_data_processing(
|
|
response_vector_path, response_polygons_lookup_path,
|
|
prepare_response_polygons_task, predictor_table_path,
|
|
out_coefficient_vector_path, tmp_working_dir, task_graph)
|
|
|
|
# Copied over from a shapefile formerly in our test-data repo:
|
|
expected_values = {
|
|
'bonefish': 19.96503546104,
|
|
'airdist': 40977.89565353348,
|
|
'ports': 14.0,
|
|
'bathy': 1.17308099107
|
|
}
|
|
vector = gdal.OpenEx(out_coefficient_vector_path)
|
|
layer = vector.GetLayer()
|
|
for feature in layer:
|
|
for k, v in expected_values.items():
|
|
numpy.testing.assert_almost_equal(feature.GetField(k), v)
|
|
|
|
def test_predictor_table_absolute_paths(self):
|
|
"""Recreation test validation from full path."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
response_vector_path = os.path.join(
|
|
self.workspace_dir, 'no_grid_vector_path.shp')
|
|
recmodel_client._copy_aoi_no_grid(
|
|
os.path.join(SAMPLE_DATA, 'andros_aoi.shp'), response_vector_path)
|
|
|
|
predictor_table_path = os.path.join(
|
|
self.workspace_dir, 'predictors.csv')
|
|
|
|
# these are absolute paths for predictor data
|
|
predictor_list = [
|
|
('ports',
|
|
os.path.join(SAMPLE_DATA, 'predictors', 'dredged_ports.shp'),
|
|
'point_count'),
|
|
('airdist',
|
|
os.path.join(SAMPLE_DATA, 'predictors', 'airport.shp'),
|
|
'point_nearest_distance'),
|
|
('bonefish',
|
|
os.path.join(SAMPLE_DATA, 'predictors', 'bonefish_simp.shp'),
|
|
'polygon_percent_coverage'),
|
|
('bathy',
|
|
os.path.join(SAMPLE_DATA, 'predictors', 'dem90m_coarse.tif'),
|
|
'raster_mean'),
|
|
]
|
|
|
|
with open(predictor_table_path, 'w') as table_file:
|
|
table_file.write('id,path,type\n')
|
|
for predictor_id, path, predictor_type in predictor_list:
|
|
table_file.write(
|
|
'%s,%s,%s\n' % (predictor_id, path, predictor_type))
|
|
|
|
# The expected behavior here is that _validate_same_projection does
|
|
# not raise a ValueError. The try/except block makes that explicit
|
|
# and also explicitly fails the test if it does. Note if a different
|
|
# exception is raised the test will raise an error, thus
|
|
# differentiating between a failed test and an error.
|
|
try:
|
|
recmodel_client._validate_same_projection(
|
|
response_vector_path, predictor_table_path)
|
|
except ValueError:
|
|
self.fail(
|
|
"_validate_same_projection raised ValueError unexpectedly!")
|
|
|
|
def test_year_order(self):
|
|
"""Recreation ensure that end year < start year raise ValueError."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
args = {
|
|
'aoi_path': os.path.join(SAMPLE_DATA, 'andros_aoi.shp'),
|
|
'compute_regression': False,
|
|
'start_year': recmodel_client.MAX_YEAR, # note start_year > end_year
|
|
'end_year': recmodel_client.MIN_YEAR,
|
|
'grid_aoi': False,
|
|
'workspace_dir': self.workspace_dir,
|
|
}
|
|
msgs = recmodel_client.validate(args)
|
|
self.assertEqual(
|
|
'Start year must be less than or equal to end year.', msgs[0][1])
|
|
with self.assertRaises(ValueError):
|
|
recmodel_client.execute(args)
|
|
|
|
def test_bad_grid_type(self):
|
|
"""Recreation ensure that bad grid type raises ValueError."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
args = {
|
|
'aoi_path': os.path.join(SAMPLE_DATA, 'andros_aoi.shp'),
|
|
'cell_size': 7000.0,
|
|
'compute_regression': False,
|
|
'start_year': recmodel_client.MIN_YEAR,
|
|
'end_year': recmodel_client.MAX_YEAR,
|
|
'grid_aoi': True,
|
|
'grid_type': 'circle', # intentionally bad gridtype
|
|
'workspace_dir': self.workspace_dir,
|
|
}
|
|
|
|
with self.assertRaises(ValueError):
|
|
recmodel_client.execute(args)
|
|
|
|
|
|
class RecreationValidationTests(unittest.TestCase):
|
|
"""Tests for the Recreation Model MODEL_SPEC and validation."""
|
|
|
|
def setUp(self):
|
|
"""Create a temporary workspace."""
|
|
self.workspace_dir = tempfile.mkdtemp()
|
|
self.base_required_keys = [
|
|
'workspace_dir',
|
|
'aoi_path',
|
|
'start_year',
|
|
'end_year'
|
|
]
|
|
|
|
def tearDown(self):
|
|
"""Remove the temporary workspace after a test."""
|
|
shutil.rmtree(self.workspace_dir)
|
|
|
|
def test_missing_keys(self):
|
|
"""Recreation Validate: assert missing required keys."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
from natcap.invest import validation
|
|
|
|
validation_errors = recmodel_client.validate({}) # empty args dict.
|
|
invalid_keys = validation.get_invalid_keys(validation_errors)
|
|
expected_missing_keys = set(self.base_required_keys)
|
|
self.assertEqual(invalid_keys, expected_missing_keys)
|
|
|
|
def test_missing_keys_grid_aoi(self):
|
|
"""Recreation Validate: assert missing keys for grid option."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
from natcap.invest import validation
|
|
|
|
validation_errors = recmodel_client.validate({'grid_aoi': True})
|
|
invalid_keys = validation.get_invalid_keys(validation_errors)
|
|
expected_missing_keys = set(
|
|
self.base_required_keys + ['grid_type', 'cell_size'])
|
|
self.assertEqual(invalid_keys, expected_missing_keys)
|
|
|
|
def test_missing_keys_compute_regression(self):
|
|
"""Recreation Validate: assert missing keys for regression option."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
from natcap.invest import validation
|
|
|
|
validation_errors = recmodel_client.validate(
|
|
{'compute_regression': True})
|
|
invalid_keys = validation.get_invalid_keys(validation_errors)
|
|
expected_missing_keys = set(
|
|
self.base_required_keys + ['predictor_table_path'])
|
|
self.assertEqual(invalid_keys, expected_missing_keys)
|
|
|
|
def test_bad_predictor_table_header(self):
|
|
"""Recreation Validate: assert messages for bad table headers."""
|
|
from natcap.invest.recreation import recmodel_client
|
|
from natcap.invest import validation
|
|
|
|
table_path = os.path.join(self.workspace_dir, 'table.csv')
|
|
with open(table_path, 'w') as file:
|
|
file.write('foo,bar,baz\n')
|
|
file.write('a,b,c\n')
|
|
|
|
expected_message = [(
|
|
['predictor_table_path'],
|
|
validation.MESSAGES['MATCHED_NO_HEADERS'].format(
|
|
header='column', header_name='id'))]
|
|
validation_warnings = recmodel_client.validate({
|
|
'compute_regression': True,
|
|
'predictor_table_path': table_path,
|
|
'start_year': recmodel_client.MIN_YEAR,
|
|
'end_year': recmodel_client.MAX_YEAR,
|
|
'workspace_dir': self.workspace_dir,
|
|
'aoi_path': os.path.join(SAMPLE_DATA, 'andros_aoi.shp')})
|
|
|
|
self.assertEqual(validation_warnings, expected_message)
|
|
|
|
validation_warnings = recmodel_client.validate({
|
|
'compute_regression': True,
|
|
'predictor_table_path': table_path,
|
|
'scenario_predictor_table_path': table_path,
|
|
'start_year': recmodel_client.MIN_YEAR,
|
|
'end_year': recmodel_client.MAX_YEAR,
|
|
'workspace_dir': self.workspace_dir,
|
|
'aoi_path': os.path.join(SAMPLE_DATA, 'andros_aoi.shp')})
|
|
expected_messages = [
|
|
(['predictor_table_path'],
|
|
validation.MESSAGES['MATCHED_NO_HEADERS'].format(
|
|
header='column', header_name='id')),
|
|
(['scenario_predictor_table_path'],
|
|
validation.MESSAGES['MATCHED_NO_HEADERS'] .format(
|
|
header='column', header_name='id'))]
|
|
self.assertEqual(len(validation_warnings), 2)
|
|
for message in expected_messages:
|
|
self.assertTrue(message in validation_warnings)
|
|
|
|
def test_validate_predictor_types_incorrect(self):
|
|
"""Recreation Validate: assert error on incorrect type value"""
|
|
from natcap.invest.recreation import recmodel_client
|
|
|
|
predictor_id = 'dem90m'
|
|
raster_path = os.path.join(SAMPLE_DATA, 'predictors/dem90m_coarse.tif')
|
|
# include a typo in the type, this should fail
|
|
bad_table_path = os.path.join(self.workspace_dir, 'bad_table.csv')
|
|
with open(bad_table_path, 'w') as file:
|
|
file.write('id,path,type\n')
|
|
file.write(f'{predictor_id},{raster_path},raster?mean\n')
|
|
|
|
args = {
|
|
'aoi_path': os.path.join(SAMPLE_DATA, 'andros_aoi.shp'),
|
|
'compute_regression': True,
|
|
'start_year': recmodel_client.MIN_YEAR,
|
|
'end_year': recmodel_client.MAX_YEAR,
|
|
'grid_aoi': False,
|
|
'predictor_table_path': bad_table_path,
|
|
'workspace_dir': self.workspace_dir,
|
|
}
|
|
msgs = recmodel_client.validate(args)
|
|
self.assertIn('The table contains invalid type value(s)', msgs[0][1])
|
|
|
|
|
|
class RecreationProductionServerHealth(unittest.TestCase):
|
|
"""Health check for the production server."""
|
|
|
|
def test_production_server(self):
|
|
from natcap.invest.recreation import recmodel_client
|
|
import requests
|
|
|
|
server_url = requests.get(recmodel_client.SERVER_URL).text.rstrip()
|
|
try:
|
|
proxy = Pyro5.api.Proxy(server_url)
|
|
# _pyroBind() forces the client-server handshake and
|
|
# seems like a good way to check if the remote object is ready
|
|
proxy._pyroBind()
|
|
except Exception as exc:
|
|
self.fail(exc)
|
|
finally:
|
|
proxy._pyroRelease()
|
|
|
|
|
|
def _assert_regression_results_eq(
|
|
workspace_dir, file_list_path, result_vector_path,
|
|
expected_results_path):
|
|
"""Test workspace against the expected list of files and results.
|
|
|
|
Args:
|
|
workspace_dir (string): path to the completed model workspace
|
|
file_list_path (string): path to a file that has a list of all
|
|
the expected files relative to the workspace base
|
|
result_vector_path (string): path to shapefile
|
|
produced by the Recreation model.
|
|
expected_results_path (string): path to a csv file that has the
|
|
expected results of a scenario prediction model run.
|
|
|
|
Returns:
|
|
None
|
|
|
|
Raises:
|
|
AssertionError if any files are missing or results are out of
|
|
range by `tolerance_places`
|
|
"""
|
|
try:
|
|
# Test that the workspace has the same files as we expect
|
|
_test_same_files(file_list_path, workspace_dir)
|
|
|
|
# The tolerance of 3 digits after the decimal was determined by
|
|
# experimentation on the application with the given range of
|
|
# numbers. This is an apparently reasonable approach as described
|
|
# by ChrisF: http://stackoverflow.com/a/3281371/42897
|
|
# and even more reading about picking numerical tolerance
|
|
# https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
|
|
tolerance_places = 3
|
|
|
|
result_vector = gdal.OpenEx(result_vector_path, gdal.OF_VECTOR)
|
|
result_layer = result_vector.GetLayer()
|
|
expected_results = pandas.read_csv(expected_results_path, dtype=float)
|
|
field_names = list(expected_results)
|
|
for feature in result_layer:
|
|
values = [feature.GetField(field) for field in field_names]
|
|
fid = feature.GetFID()
|
|
expected_values = list(expected_results.iloc[fid])
|
|
for v, ev in zip(values, expected_values):
|
|
if v is not None:
|
|
numpy.testing.assert_allclose(
|
|
v, ev, rtol=0, atol=10**-tolerance_places)
|
|
else:
|
|
# Could happen when a raster predictor is only nodata
|
|
assert(numpy.isnan(ev))
|
|
feature = None
|
|
|
|
finally:
|
|
result_layer = None
|
|
result_vector = None
|
|
|
|
|
|
def _test_same_files(base_list_path, directory_path):
|
|
"""Assert expected files are in the `directory_path`.
|
|
|
|
Args:
|
|
base_list_path (string): a path to a file that has one relative
|
|
file path per line.
|
|
directory_path (string): a path to a directory whose contents will
|
|
be checked against the files listed in `base_list_file`
|
|
|
|
Returns:
|
|
None
|
|
|
|
Raises:
|
|
AssertionError when there are files listed in `base_list_file`
|
|
that don't exist in the directory indicated by `path`
|
|
"""
|
|
missing_files = []
|
|
with open(base_list_path, 'r') as file_list:
|
|
for file_path in file_list:
|
|
full_path = os.path.join(directory_path, file_path.rstrip())
|
|
if full_path == '':
|
|
# skip blank lines
|
|
continue
|
|
if not os.path.isfile(full_path):
|
|
missing_files.append(full_path)
|
|
if len(missing_files) > 0:
|
|
raise AssertionError(
|
|
"The following files were expected but not found: " +
|
|
'\n'.join(missing_files))
|
|
|
|
|
|
def sum_vector_columns(vector_path, field_list):
|
|
"""Calculate the sum of values in each field of a gdal vector.
|
|
|
|
Args:
|
|
vector_path (str): path to a gdal vector.
|
|
field_list (list): list of field names to sum.
|
|
|
|
Returns:
|
|
dict: mapping field name to sum of values.
|
|
|
|
"""
|
|
vector = gdal.OpenEx(vector_path, gdal.OF_VECTOR)
|
|
layer = vector.GetLayer()
|
|
result = {}
|
|
for field in field_list:
|
|
result[field] = sum([
|
|
feature.GetField(field)
|
|
for feature in layer])
|
|
layer = vector = None
|
|
return result
|