Merge branch 'main' of https://github.com/natcap/invest into bugfix/1350-una-cryptic-gdal-typeerror

Conflicts:
	HISTORY.rst
	tests/test_urban_nature_access.py
This commit is contained in:
James Douglass 2023-08-29 11:11:21 -07:00
commit 87c945748d
86 changed files with 4559 additions and 4134 deletions

View File

@ -60,13 +60,14 @@ runs:
cat environment.yml
- name: Setup conda environment
uses: mamba-org/provision-with-micromamba@main
uses: mamba-org/setup-micromamba@v1
with:
environment-file: environment.yml
environment-name: env
channels: conda-forge
cache-env: true
cache-env-key: ${{ runner.os }}${{ runner.arch }}-${{ env.WEEK }}-${{ hashFiles('environment.yml') }}
init-shell: bash
cache-environment: true
cache-environment-key: ${{ runner.os }}${{ runner.arch }}-${{ env.WEEK }}-${{ hashFiles('environment.yml') }}
- name: List conda environment
shell: bash -l {0}

View File

@ -4,4 +4,4 @@ Fixes #
## Checklist
- [ ] Updated HISTORY.rst and link to any relevant issue (if these changes are user-facing)
- [ ] Updated the user's guide (if needed)
- [ ] Tested the affected models' UIs (if relevant)
- [ ] Tested the Workbench UI (if relevant)

View File

@ -317,7 +317,7 @@ jobs:
run: make userguide
- name: Build binaries
run: make CONDA=micromamba binaries
run: make CONDA="$MAMBA_EXE" binaries
- name: Run invest-autotest with binaries
if : |
@ -344,20 +344,6 @@ jobs:
yarn config set network-timeout 600000 -g
yarn install
- name: Build Workbench
working-directory: workbench
env:
GH_TOKEN: env.GITHUB_TOKEN
DEBUG: electron-builder
CSC_IDENTITY_AUTO_DISCOVERY: false # disable electron-builder code signing
run: |
yarn run build
yarn run dist
- name: Test electron app with puppeteer
working-directory: workbench
run: npx cross-env CI=true yarn run test-electron-app
- name: Authenticate GCP
if: github.event_name != 'pull_request'
uses: google-github-actions/auth@v0
@ -368,6 +354,47 @@ jobs:
if: github.event_name != 'pull_request'
uses: google-github-actions/setup-gcloud@v0
- name: Build Workbench (PRs)
if: github.event_name == 'pull_request'
working-directory: workbench
env:
GH_TOKEN: env.GITHUB_TOKEN
DEBUG: electron-builder
CSC_IDENTITY_AUTO_DISCOVERY: false # disable electron-builder code signing
run: |
yarn run build
yarn run dist
- name: Build Workbench (macOS)
if: github.event_name != 'pull_request' && matrix.os == 'macos-latest' # secrets not available in PR
working-directory: workbench
env:
GH_TOKEN: env.GITHUB_TOKEN
DEBUG: electron-builder
CSC_LINK: 2025-01-16-Expiry-AppStore-App.p12
CSC_KEY_PASSWORD: ${{ secrets.MACOS_CODESIGN_CERT_PASS }}
run: |
gsutil cp gs://stanford_cert/$CSC_LINK $CSC_LINK
yarn run build
yarn run dist
- name: Build Workbench (Windows)
if: github.event_name != 'pull_request' && matrix.os == 'windows-latest' # secrets not available in PR
working-directory: workbench
env:
GH_TOKEN: env.GITHUB_TOKEN
DEBUG: electron-builder
CSC_LINK: Stanford-natcap-code-signing-cert-expires-2024-01-26.p12
CSC_KEY_PASSWORD: ${{ secrets.WINDOWS_CODESIGN_CERT_PASS }}
run: |
gsutil cp gs://stanford_cert/$CSC_LINK $CSC_LINK
yarn run build
yarn run dist
- name: Test electron app with puppeteer
working-directory: workbench
run: npx cross-env CI=true yarn run test-electron-app
- name: Sign binaries (macOS)
if: github.event_name != 'pull_request' && matrix.os == 'macos-latest' # secrets not available in PR
env:

View File

@ -35,12 +35,34 @@
.. :changelog:
3.14.0 (YYYY-MM-DD)
-------------------
* SDR
* We implemented two major functional changes to the InVEST LS Factor
that significantly affect most outputs of SDR and will bring the LS
factor output more in line with the outputs of SAGA-GIS's LS Factor.
A discussion of differences between these two implementations can be
viewed at https://github.com/natcap/invest/tree/main/doc/decision-records/ADR-0001-Update-SDR-LS-Factor.md.
The two specific changes implemented are:
* The LS Factor's on-pixel aspect length is now calculated as
``abs(sin(slope)) + abs(cos(slope))``.
* The LS Factor's upstream contributing area is now calculated as
an estimate for the specific catchment area, calculated by
``sqrt(n_pixels_upstream * pixel_area)``.
Unreleased Changes
------------------
* General
* Fixed a bug in the CLI where ``invest getspec --json`` failed on
non-json-serializable objects such as ``pint.Unit``.
https://github.com/natcap/invest/issues/1280
* A new directory at `./doc/decision-records` has been created for
"Architecture/Any Decision Records", which will serve as a record of
nontrivial decisions that were made to InVEST and why. This is
intended for reference by our science and software teams, and also by
the community at large when inquiring about a nontrivial change.
https://github.com/natcap/invest/issues/1079
* Updated the package installation instructions in the API docs for clarity
and also to highlight the ease of installation through ``conda-forge``.
https://github.com/natcap/invest/issues/1256
@ -48,10 +70,33 @@ Unreleased Changes
has been merged into ``utils.read_csv_to_dataframe``
(`#1319 <https://github.com/natcap/invest/issues/1319>`_),
(`#1327 <https://github.com/natcap/invest/issues/1327>`_)
* Improved the validation message that is returned when not all spatial
inputs overlap (`#502 <https://github.com/natcap/invest/issues/502>`_)
* Standardized the name and location of the taskgraph cache directory for
all models. It is now called ``taskgraph_cache`` and located in the top
level of the workspace directory.
(`#1230 <https://github.com/natcap/invest/issues/1230>`_)
* Workbench
* Fixed a bug where sampledata downloads failed silently (and progress bar
became innacurate) if the Workbench did not have write permission to
the download location. https://github.com/natcap/invest/issues/1070
* The workbench app is now distributed with a valid code signature
(`#727 <https://github.com/natcap/invest/issues/727>`_)
* Changing the language setting will now cause the app to relaunch
(`#1168 <https://github.com/natcap/invest/issues/1168>`_)
* Closing the main window will now close any user's guide windows that are
open. Fixed a bug where the app could not be reopened after closing.
(`#1258 <https://github.com/natcap/invest/issues/1258>`_)
* Fixed a bug where invalid metadata for a recent run would result
in an uncaught exception.
(`#1286 <https://github.com/natcap/invest/issues/1286>`_)
* Middle clicking an InVEST model tab was opening a blank window. Now
middle clicking will close that tab as expected.
(`#1261 <https://github.com/natcap/invest/issues/1261>`_)
* Coastal Blue Carbon
* Added validation for the transition table, raising a validation error if
unexpected values are encountered.
(`#729 <https://github.com/natcap/invest/issues/729>`_)
* Forest Carbon
* The biophysical table is now case-insensitive.
* HRA
@ -59,7 +104,17 @@ Unreleased Changes
consequence criteria were skipped for a single habitat. The model now
correctly handles this case. https://github.com/natcap/invest/issues/1250
* Tables in the .xls format are no longer supported. This format was
deprecated by ``pandas``. (`#1271 <https://github.com/natcap/invest/issues/1271>`_)
deprecated by ``pandas``.
(`#1271 <https://github.com/natcap/invest/issues/1271>`_)
* Fixed a bug where vector inputs could be rasterized onto a grid that is
not exactly aligned with other raster inputs.
(`#1312 <https://github.com/natcap/invest/issues/1312>`_)
* NDR
* The contents of the output ``cache_dir`` have been consolidated into
``intermediate_outputs``.
* Fixed a bug where results were calculated incorrectly if the runoff proxy
raster (or the DEM or LULC) had no nodata value
(`#1005 <https://github.com/natcap/invest/issues/1005>`_)
* Pollination
* Several exceptions have been tidied up so that only fieldnames are
printed instead of the python data structures representing the whole
@ -85,6 +140,8 @@ Unreleased Changes
* Fixed an issue with sediment deposition progress logging that was
causing the "percent complete" indicator to not progress linearly.
https://github.com/natcap/invest/issues/1262
* The contents of the output ``churn_dir_not_for_humans`` have been
consolidated into ``intermediate_outputs``.
* Seasonal Water Yield
* Fixed a bug where monthy quickflow nodata pixels were not being passed
on to the total quickflow raster, which could result in negative values
@ -96,18 +153,76 @@ Unreleased Changes
set to 0. The old behavior was not well documented and caused some
confusion when nodata pixels did not line up. It's safer not to fill in
unknown data. (`#1317 <https://github.com/natcap/invest/issues/1317>`_)
* Negative monthly quickflow values will now be set to 0. This is because
very small negative values occasionally result from valid data, but they
should be interpreted as 0.
(`#1318 <https://github.com/natcap/invest/issues/1318>`_)
* In the monthly quickflow calculation, QF_im will be set to 0 on any pixel
where s_i / a_im > 100. This is done to avoid overflow errors when
calculating edge cases where the result would round down to 0 anyway.
(`#1318 <https://github.com/natcap/invest/issues/1318>`_)
* The contents of the output ``cache_dir`` have been consolidated into
``intermediate_outputs``.
* Urban Flood Risk
* Fixed a bug where the model incorrectly raised an error if the
biophysical table contained a row of all 0s.
(`#1123 <https://github.com/natcap/invest/issues/1123>`_)
* The contents of the output ``temp_working_dir_not_for_humans`` have been
consolidated into ``intermediate_files``.
* Biophysical table Workbench validation now warns if there is a missing
curve number value.
(`#1346 <https://github.com/natcap/invest/issues/1346>`_)
* Urban Nature Access
* Urban nature supply outputs have been renamed to add ``percapita`` to the
filename.
* In uniform search radius mode, ``urban_nature_supply.tif`` has been
renamed to ``urban_nature_supply_percapita.tif``.
* When defining search radii by urban nature class,
``urban_nature_supply_lucode_[LUCODE].tif`` has been renamed to
``urban_nature_supply_percapita_lucode_[LUCODE].tif``.
* When defining search radii by population groups,
``urban_nature_supply_to_[POP_GROUP].tif`` has been renamed to
``urban_nature_supply_percapita_to_[POP_GROUP].tif``.
* A new output for "Accessible Urban Nature" is created, indicating the
area of accessible greenspace available to people within the search
radius, weighted by the selected decay function. The outputs vary
slightly depending on the selected execution mode.
* In uniform search radius mode, a single new output is created,
``accessible_urban_nature.tif``.
* When defining search radii by urban nature class, one new
output raster is created for each class of urban nature. These files
are named ``accessible_urban_nature_lucode_[LUCODE].tif``.
* When defining search radii for population groups, one new output
raster is created for each population group. These files are named
``accessible_urban_nature_to_[POP_GROUP].tif``.
* Urban nature classes can now be defined to occupy a proportion of a
pixel, such as a park that is semi-developed. This proportion is
provided through user input as a proportion (0-1) in the
``urban_nature`` column of the LULC Attribute Table. A value of ``0``
indicates that there is no urban nature in this class, ``0.333``
indicates that a third of the area of this LULC class is urban nature,
and ``1`` would indicate that the entire LULC class's area is urban
nature. https://github.com/natcap/invest/issues/1180
* Fixed an issue where, under certain circumstances, the model would raise
a cryptic ``TypeError`` when creating the summary vector.
https://github.com/natcap/invest/issues/1350
* Visitation: Recreation and Tourism
* Fixed a bug where overlapping predictor polygons would be double-counted
in ``polygon_area_coverage`` and ``polygon_percent_coverage`` calculations.
(`#1310 <https://github.com/natcap/invest/issues/1310>`_)
in ``polygon_area_coverage`` and ``polygon_percent_coverage``
calculations. (`#1310 <https://github.com/natcap/invest/issues/1310>`_)
* Changed the calculation of ``point_nearest_distance`` metric to match
the description in the User's Guide. Values are now the distance to the
centroid of the AOI polygon instead of the distance to the nearest
edge of the AOI polygon.
(`#1347 <https://github.com/natcap/invest/issues/1347>`_)
* Wind Energy
* Updated a misleading error message that is raised when the AOI does
not spatially overlap another input.
(`#1054 <https://github.com/natcap/invest/issues/1054>`_)
3.13.0 (2023-03-17)
-------------------

View File

@ -2,11 +2,11 @@
DATA_DIR := data
GIT_SAMPLE_DATA_REPO := https://bitbucket.org/natcap/invest-sample-data.git
GIT_SAMPLE_DATA_REPO_PATH := $(DATA_DIR)/invest-sample-data
GIT_SAMPLE_DATA_REPO_REV := a58b9c7bdd8a31cab469ea919fe0ebf23a6c668e
GIT_SAMPLE_DATA_REPO_REV := 2e7cd618c661ec3f3b2a3bddfd2ce7d4704abc05
GIT_TEST_DATA_REPO := https://bitbucket.org/natcap/invest-test-data.git
GIT_TEST_DATA_REPO_PATH := $(DATA_DIR)/invest-test-data
GIT_TEST_DATA_REPO_REV := a89253d83d5f70a8ea2d8a951b2d47d603505f14
GIT_TEST_DATA_REPO_REV := e7d32d65612f4f3578a4fb57824af4e297c65283
GIT_UG_REPO := https://github.com/natcap/invest.users-guide
GIT_UG_REPO_PATH := doc/users-guide

View File

@ -0,0 +1,94 @@
# ADR-0001: Update the InVEST SDR LS Factor
Author: James
Science Lead: Rafa
## Context
Since we released the updated InVEST SDR model in InVEST 3.1.0, we have seen a
common refrain of users and NatCap science staff noticing that the LS factor
output of SDR did not produce realistic results and that the LS factor produced
by SAGA was much more realistic. We have over the years made a couple of notable
changes to the model and to the LS factor that have altered the output including:
1. The SDR model's underlying routing model was changed from d-infinity to MFD in 3.5.0
2. The $x$ parameter was changed in InVEST 3.8.1 from the true on-pixel aspect
$|\sin \theta|+|\cos \theta|$ (described in Zevenbergen & Thorne 1987 and repeated
in Desmet & Govers 1996) to the weighted mean of proportional flow from the
current pixel to its neighbors.
3. A typo in a constant value in the LS factor was corrected in InVEST 3.9.1
4. An `l_max` parameter was exposed to the user in InVEST 3.9.1
Despite these changes to the LS factor, we still received occasional reports
describing unrealistic LS factor outputs from SDR and that SAGA's LS factor
was much more realistic.
After diving into the SAGA source code, it turns out that there are several
important differences between the two despite both using Desmet & Govers (1996)
for their LS factor equations:
1. The contributing area $A_{i,j-in}$ is not strictly defined in Desmet &
Govers (1996), it is only referred to as "the contributing area at the inlet
of a grid cell with coordinates (i, j) (m^2)".
InVEST assumes that "contributing area" is $area_{pixel} \cdot n\\_upstream\\_pixels$.
SAGA refers to this as "specific catchment area" and allows the user to choose their
specific catchment area equation, where the available options are
"contour length simply as cell size", "contour length dependent on aspect", "square
root of catchment area" and "effective flow length".
2. SAGA uses on-pixel aspect, $|\sin \theta|+|\cos \theta|$, and does not consider
flow direction derived from a routing model when calculating the LS factor.
3. The length exponent $m$ differs between the implementations. In SAGA,
$m = \beta / (1 + \beta)$. In InVEST, we have a discontinuous function where
$m$ is dependent on the slope of the current pixel and described as "classical USLE"
in the user's guide and discussed in Oliveira et al (2013).
4. SAGA's flow accumulation function [`Get_Flow()`](https://github.com/saga-gis/saga-gis/blob/master/saga-gis/src/tools/terrain_analysis/ta_hydrology/Erosion_LS_Fields.cpp#L394)
only considers a pixel downstream if and only if its elevation is strictly less
than the current pixel's elevation, which implies that flow accumulation will
not navigate plateaus. InVEST's flow accumulation handles plateaus well,
which can lead to longer flow accumulation values on the same DEM.
5. SAGA's flow accumulation function `Get_Flow()` uses D8, InVEST's flow
accumulation uses MFD.
It is important to note that when evaluating differences between the SAGA and InVEST
LS Factor implementations, it is _critical_ to use a hydrologically conditioned DEM such
as conditioned by Wang & Liu so that we control for differences in output due
to the presence of plateaus.
Once we finally understood these discrepancies, James implemented several of the
contributing area functions available in SAGA to see what might be most comparable
to the real world. Source code and a docker container for these experiments are
available at
https://github.com/phargogh/invest-ls-factor-vs-saga/blob/main/src/natcap/invest/sdr/sdr.py#L901.
Some additional discussion and notes can be viewed in the related github issue:
https://github.com/natcap/invest/issues/915.
## Decision
After inspecting the results, Rafa decided that we should make these changes to
the LS Factor calculation:
1. We will revert to using the on-pixel aspect, $|\sin \theta|+|\cos \theta|$.
This is in line with the published literature.
2. We will convert the "contributing area" portion of the LS Factor to be
$\sqrt{ n\\_upstream\\_pixels \cdot area\_{pixel} }$. Rafa's opinion on this
is that the LS factor equations were designed for a 1-dimensional situation,
so our specific catchment area number should reflect this.
## Status
## Consequences
Once implemented and released, the LS factor outputs of SDR will be
significantly different, but they should more closely match reality.
We hope that there will be fewer support requests about this once the change is
released.
## References
Zevenbergen & Thorne (1987): https://searchworks.stanford.edu/articles/edb__89861226
Desmet & Govers (1996): https://searchworks.stanford.edu/articles/edsgac__edsgac.A18832564
Oliveira et al (2013): http://dx.doi.org/10.5772/54439

View File

@ -0,0 +1,12 @@
# Architecture/Any Decision Records
An ADR is a way to track decisions and their rationale in a way that is tied to
the source code, easy to digest, and written in a way that future us will
understand. An ADR consists of several sections:
1. The title and ADR number (for easier sorting)
2. Context about the problem
3. The decision that was made and why
4. The status of implementation
5. Consequences of the implementation
6. Any references (especially if describing a science/software issue)

View File

@ -87,10 +87,19 @@ VALUATION_OUTPUT_FIELDS = {
}
}
SUBWATERSHED_OUTPUT_FIELDS = {
"subws_id": {
"type": "integer",
"about": gettext("Unique identifier for each subwatershed.")
},
**BASE_OUTPUT_FIELDS,
**SCARCITY_OUTPUT_FIELDS
**SCARCITY_OUTPUT_FIELDS,
}
WATERSHED_OUTPUT_FIELDS = {
"ws_id": {
"type": "integer",
"about": gettext("Unique identifier for each watershed.")
},
**BASE_OUTPUT_FIELDS,
**SCARCITY_OUTPUT_FIELDS,
**VALUATION_OUTPUT_FIELDS
@ -209,6 +218,7 @@ MODEL_SPEC = {
"units": u.none,
"about": gettext("Crop coefficient for this LULC class.")}
},
"index_col": "lucode",
"about": gettext(
"Table of biophysical parameters for each LULC class. All "
"values in the LULC raster must have corresponding entries "
@ -239,6 +249,7 @@ MODEL_SPEC = {
"units": u.meter**3/u.year/u.pixel
}
},
"index_col": "lucode",
"required": False,
"about": gettext(
"A table of water demand for each LULC class. Each LULC code "
@ -310,6 +321,7 @@ MODEL_SPEC = {
"the time span.")
}
},
"index_col": "ws_id",
"required": False,
"about": gettext(
"A table mapping each watershed to the associated valuation "
@ -328,6 +340,7 @@ MODEL_SPEC = {
},
"watershed_results_wyield.csv": {
"columns": {**WATERSHED_OUTPUT_FIELDS},
"index_col": "ws_id",
"about": "Table containing biophysical output values per watershed."
},
"subwatershed_results_wyield.shp": {
@ -337,6 +350,7 @@ MODEL_SPEC = {
},
"subwatershed_results_wyield.csv": {
"columns": {**SUBWATERSHED_OUTPUT_FIELDS},
"index_col": "subws_id",
"about": "Table containing biophysical output values per subwatershed."
},
"per_pixel": {
@ -415,12 +429,12 @@ MODEL_SPEC = {
"veg.tif": {
"about": "Map of vegetated state.",
"bands": {1: {"type": "integer"}},
},
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
}
}
}
}
}
},
"taskgraph_dir": spec_utils.TASKGRAPH_DIR
}
}
@ -509,23 +523,23 @@ def execute(args):
if invalid_parameters:
raise ValueError(f'Invalid parameters passed: {invalid_parameters}')
# valuation_params is passed to create_vector_output()
# which computes valuation if valuation_params is not None.
valuation_params = None
# valuation_df is passed to create_vector_output()
# which computes valuation if valuation_df is not None.
valuation_df = None
if 'valuation_table_path' in args and args['valuation_table_path'] != '':
LOGGER.info(
'Checking that watersheds have entries for every `ws_id` in the '
'valuation table.')
# Open/read in valuation parameters from CSV file
valuation_params = utils.read_csv_to_dataframe(
args['valuation_table_path'], 'ws_id').to_dict(orient='index')
valuation_df = utils.read_csv_to_dataframe(
args['valuation_table_path'], MODEL_SPEC['args']['valuation_table_path'])
watershed_vector = gdal.OpenEx(
args['watersheds_path'], gdal.OF_VECTOR)
watershed_layer = watershed_vector.GetLayer()
missing_ws_ids = []
for watershed_feature in watershed_layer:
watershed_ws_id = watershed_feature.GetField('ws_id')
if watershed_ws_id not in valuation_params:
if watershed_ws_id not in valuation_df.index:
missing_ws_ids.append(watershed_ws_id)
watershed_feature = None
watershed_layer = None
@ -587,7 +601,6 @@ def execute(args):
seasonality_constant = float(args['seasonality_constant'])
# Initialize a TaskGraph
work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir')
try:
n_workers = int(args['n_workers'])
except (KeyError, ValueError, TypeError):
@ -595,7 +608,8 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # single process mode.
graph = taskgraph.TaskGraph(work_token_dir, n_workers)
graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
base_raster_path_list = [
args['eto_path'],
@ -636,48 +650,43 @@ def execute(args):
'lulc': pygeoprocessing.get_raster_info(clipped_lulc_path)['nodata'][0]}
# Open/read in the csv file into a dictionary and add to arguments
bio_dict = utils.read_csv_to_dataframe(
args['biophysical_table_path'], 'lucode').to_dict(orient='index')
bio_lucodes = set(bio_dict.keys())
bio_df = utils.read_csv_to_dataframe(args['biophysical_table_path'],
MODEL_SPEC['args']['biophysical_table_path'])
bio_lucodes = set(bio_df.index.values)
bio_lucodes.add(nodata_dict['lulc'])
LOGGER.debug(f'bio_lucodes: {bio_lucodes}')
if 'demand_table_path' in args and args['demand_table_path'] != '':
demand_dict = utils.read_csv_to_dataframe(
args['demand_table_path'], 'lucode').to_dict(orient='index')
demand_df = utils.read_csv_to_dataframe(
args['demand_table_path'], MODEL_SPEC['args']['demand_table_path'])
demand_reclassify_dict = dict(
[(lucode, demand_dict[lucode]['demand'])
for lucode in demand_dict])
demand_lucodes = set(demand_dict.keys())
[(lucode, row['demand']) for lucode, row in demand_df.iterrows()])
demand_lucodes = set(demand_df.index.values)
demand_lucodes.add(nodata_dict['lulc'])
LOGGER.debug(f'demand_lucodes: {demand_lucodes}', )
else:
demand_lucodes = None
# Break the bio_dict into three separate dictionaries based on
# Break the bio_df into three separate dictionaries based on
# Kc, root_depth, and LULC_veg fields to use for reclassifying
Kc_dict = {}
root_dict = {}
vegetated_dict = {}
for lulc_code in bio_dict:
Kc_dict[lulc_code] = bio_dict[lulc_code]['kc']
for lulc_code, row in bio_df.iterrows():
Kc_dict[lulc_code] = row['kc']
# Catch invalid LULC_veg values with an informative error.
lulc_veg_value = bio_dict[lulc_code]['lulc_veg']
try:
vegetated_dict[lulc_code] = int(lulc_veg_value)
if vegetated_dict[lulc_code] not in set([0, 1]):
raise ValueError()
except ValueError:
if row['lulc_veg'] not in set([0, 1]):
# If the user provided an invalid LULC_veg value, raise an
# informative error.
raise ValueError(
f'LULC_veg value must be either 1 or 0, not {lulc_veg_value}')
f'LULC_veg value must be either 1 or 0, not {row["lulc_veg"]}')
vegetated_dict[lulc_code] = row['lulc_veg']
# If LULC_veg value is 1 get root depth value
if vegetated_dict[lulc_code] == 1:
root_dict[lulc_code] = bio_dict[lulc_code]['root_depth']
root_dict[lulc_code] = row['root_depth']
# If LULC_veg value is 0 then we do not care about root
# depth value so will just substitute in a 1. This
# value will not end up being used.
@ -843,7 +852,7 @@ def execute(args):
write_output_vector_attributes_task = graph.add_task(
func=write_output_vector_attributes,
args=(target_ws_path, ws_id_name, zonal_stats_pickle_list,
valuation_params),
valuation_df),
target_path_list=[target_ws_path],
dependent_task_list=[
*zonal_stats_task_list, copy_watersheds_vector_task],
@ -879,7 +888,7 @@ def copy_vector(base_vector_path, target_vector_path):
def write_output_vector_attributes(target_vector_path, ws_id_name,
stats_path_list, valuation_params):
stats_path_list, valuation_df):
"""Add data attributes to the vector outputs of this model.
Join results of zonal stats to copies of the watershed shapefiles.
@ -893,7 +902,7 @@ def write_output_vector_attributes(target_vector_path, ws_id_name,
represent watersheds or subwatersheds.
stats_path_list (list): List of file paths to pickles storing the zonal
stats results.
valuation_params (dict): The dictionary built from
valuation_df (pandas.DataFrame): dataframe built from
args['valuation_table_path']. Or None if valuation table was not
provided.
@ -929,10 +938,10 @@ def write_output_vector_attributes(target_vector_path, ws_id_name,
_add_zonal_stats_dict_to_shape(
target_vector_path, ws_stats_dict, key_name, 'mean')
if valuation_params:
if valuation_df is not None:
# only do valuation for watersheds, not subwatersheds
if ws_id_name == 'ws_id':
compute_watershed_valuation(target_vector_path, valuation_params)
compute_watershed_valuation(target_vector_path, valuation_df)
def convert_vector_to_csv(base_vector_path, target_csv_path):
@ -1141,14 +1150,14 @@ def pet_op(eto_pix, Kc_pix, eto_nodata, output_nodata):
return result
def compute_watershed_valuation(watershed_results_vector_path, val_dict):
def compute_watershed_valuation(watershed_results_vector_path, val_df):
"""Compute net present value and energy for the watersheds.
Args:
watershed_results_vector_path (string):
Path to an OGR shapefile for the watershed results.
Where the results will be added.
val_dict (dict): a python dictionary that has all the valuation
val_df (pandas.DataFrame): a dataframe that has all the valuation
parameters for each watershed.
Returns:
@ -1183,26 +1192,23 @@ def compute_watershed_valuation(watershed_results_vector_path, val_dict):
# there won't be a rsupply_vl value if the polygon feature only
# covers nodata raster values, so check before doing math.
if rsupply_vl is not None:
# Get the valuation parameters for watershed 'ws_id'
val_row = val_dict[ws_id]
# Compute hydropower energy production (KWH)
# This is from the equation given in the Users' Guide
energy = (
val_row['efficiency'] * val_row['fraction'] *
val_row['height'] * rsupply_vl * 0.00272)
val_df['efficiency'][ws_id] * val_df['fraction'][ws_id] *
val_df['height'][ws_id] * rsupply_vl * 0.00272)
dsum = 0
# Divide by 100 because it is input at a percent and we need
# decimal value
disc = val_row['discount'] / 100
disc = val_df['discount'][ws_id] / 100
# To calculate the summation of the discount rate term over the life
# span of the dam we can use a geometric series
ratio = 1 / (1 + disc)
if ratio != 1:
dsum = (1 - math.pow(ratio, val_row['time_span'])) / (1 - ratio)
dsum = (1 - math.pow(ratio, val_df['time_span'][ws_id])) / (1 - ratio)
npv = ((val_row['kw_price'] * energy) - val_row['cost']) * dsum
npv = ((val_df['kw_price'][ws_id] * energy) - val_df['cost'][ws_id]) * dsum
# Get the volume field index and add value
ws_feat.SetField(energy_field, energy)

View File

@ -130,6 +130,7 @@ MODEL_SPEC = {
"units": u.metric_ton/u.hectare,
"about": gettext("Carbon density of dead matter.")}
},
"index_col": "lucode",
"about": gettext(
"A table that maps each LULC code to carbon pool data for "
"that LULC type."),
@ -254,10 +255,10 @@ MODEL_SPEC = {
"intermediate": {
"type": "directory",
"contents": {
**CARBON_OUTPUTS,
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
**CARBON_OUTPUTS
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -366,11 +367,9 @@ def execute(args):
(_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
(_TMP_BASE_FILES, output_dir)], file_suffix)
carbon_pool_table = utils.read_csv_to_dataframe(
args['carbon_pools_path'], 'lucode').to_dict(orient='index')
carbon_pool_df = utils.read_csv_to_dataframe(
args['carbon_pools_path'], MODEL_SPEC['args']['carbon_pools_path'])
work_token_dir = os.path.join(
intermediate_output_dir, '_taskgraph_working_dir')
try:
n_workers = int(args['n_workers'])
except (KeyError, ValueError, TypeError):
@ -378,7 +377,8 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # Synchronous mode.
graph = taskgraph.TaskGraph(work_token_dir, n_workers)
graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
cell_size_set = set()
raster_size_set = set()
@ -413,9 +413,7 @@ def execute(args):
carbon_map_task_lookup[scenario_type] = []
storage_path_list = []
for pool_type in ['c_above', 'c_below', 'c_soil', 'c_dead']:
carbon_pool_by_type = dict([
(lucode, float(carbon_pool_table[lucode][pool_type]))
for lucode in carbon_pool_table])
carbon_pool_by_type = carbon_pool_df[pool_type].to_dict()
lulc_key = 'lulc_%s_path' % scenario_type
storage_key = '%s_%s' % (pool_type, scenario_type)

View File

@ -97,6 +97,7 @@ import time
import shutil
import numpy
import pandas
import pygeoprocessing
import scipy.sparse
import taskgraph
@ -117,6 +118,9 @@ INVALID_ANALYSIS_YEAR_MSG = gettext(
"({latest_year})")
INVALID_SNAPSHOT_RASTER_MSG = gettext(
"Raster for snapshot {snapshot_year} could not be validated.")
INVALID_TRANSITION_VALUES_MSG = gettext(
"The transition table expects values of {model_transitions} but found "
"values of {transition_values}.")
POOL_SOIL = 'soil'
POOL_BIOMASS = 'biomass'
@ -154,7 +158,6 @@ NET_PRESENT_VALUE_RASTER_PATTERN = 'net-present-value-at-{year}{suffix}.tif'
CARBON_STOCK_AT_YEAR_RASTER_PATTERN = 'carbon-stock-at-{year}{suffix}.tif'
INTERMEDIATE_DIR_NAME = 'intermediate'
TASKGRAPH_CACHE_DIR_NAME = 'task_cache'
OUTPUT_DIR_NAME = 'output'
MODEL_SPEC = {
@ -167,10 +170,10 @@ MODEL_SPEC = {
"n_workers": spec_utils.N_WORKERS,
"landcover_snapshot_csv": {
"type": "csv",
"index_col": "snapshot_year",
"columns": {
"snapshot_year": {
"type": "number",
"units": u.year_AD,
"type": "integer",
"about": gettext(
"The snapshot year that this row's LULC raster "
"represents. Each year in this table must be unique.")
@ -204,6 +207,7 @@ MODEL_SPEC = {
"biophysical_table_path": {
"name": gettext("biophysical table"),
"type": "csv",
"index_col": "code",
"columns": {
"code": {
"type": "integer",
@ -300,11 +304,12 @@ MODEL_SPEC = {
"landcover_transitions_table": {
"name": gettext("landcover transitions table"),
"type": "csv",
"index_col": "lulc-class",
"columns": {
"lulc-class": {
"type": "integer",
"type": "freestyle_string",
"about": gettext(
"LULC codes matching the codes in the biophysical "
"LULC class names matching those in the biophysical "
"table.")},
"[LULC CODE]": {
"type": "option_string",
@ -382,6 +387,7 @@ MODEL_SPEC = {
"name": gettext("price table"),
"type": "csv",
"required": "use_price_table",
"index_col": "year",
"columns": {
"year": {
"type": "number",
@ -517,7 +523,7 @@ MODEL_SPEC = {
}
}
},
"task_cache": spec_utils.TASKGRAPH_DIR
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -564,7 +570,10 @@ def execute(args):
task_graph, n_workers, intermediate_dir, output_dir, suffix = (
_set_up_workspace(args))
snapshots = _extract_snapshots_from_table(args['landcover_snapshot_csv'])
snapshots = utils.read_csv_to_dataframe(
args['landcover_snapshot_csv'],
MODEL_SPEC['args']['landcover_snapshot_csv']
)['raster_path'].to_dict()
# Phase 1: alignment and preparation of inputs
baseline_lulc_year = min(snapshots.keys())
@ -584,15 +593,14 @@ def execute(args):
# We're assuming that the LULC initial variables and the carbon pool
# transient table are combined into a single lookup table.
biophysical_parameters = utils.read_csv_to_dataframe(
args['biophysical_table_path'], 'code').to_dict(orient='index')
biophysical_df = utils.read_csv_to_dataframe(
args['biophysical_table_path'],
MODEL_SPEC['args']['biophysical_table_path'])
# LULC Classnames are critical to the transition mapping, so they must be
# unique. This check is here in ``execute`` because it's possible that
# someone might have a LOT of classes in their biophysical table.
unique_lulc_classnames = set(
params['lulc-class'] for params in biophysical_parameters.values())
if len(unique_lulc_classnames) != len(biophysical_parameters):
if not biophysical_df['lulc-class'].is_unique:
raise ValueError(
"All values in `lulc-class` column must be unique, but "
"duplicates were found.")
@ -630,7 +638,7 @@ def execute(args):
task_name='Align input landcover rasters.')
(disturbance_matrices, accumulation_matrices) = _read_transition_matrix(
args['landcover_transitions_table'], biophysical_parameters)
args['landcover_transitions_table'], biophysical_df)
# Baseline stocks are simply reclassified.
# Baseline accumulation are simply reclassified
@ -664,8 +672,7 @@ def execute(args):
func=pygeoprocessing.reclassify_raster,
args=(
(aligned_lulc_paths[baseline_lulc_year], 1),
{lucode: values[f'{pool}-initial'] for (lucode, values)
in biophysical_parameters.items()},
biophysical_df[f'{pool}-initial'].to_dict(),
stock_rasters[baseline_lulc_year][pool],
gdal.GDT_Float32,
NODATA_FLOAT32_MIN),
@ -682,9 +689,7 @@ def execute(args):
func=pygeoprocessing.reclassify_raster,
args=(
(aligned_lulc_paths[baseline_lulc_year], 1),
{lucode: values[f'{pool}-yearly-accumulation']
for (lucode, values)
in biophysical_parameters.items()},
biophysical_df[f'{pool}-yearly-accumulation'].to_dict(),
yearly_accum_rasters[baseline_lulc_year][pool],
gdal.GDT_Float32,
NODATA_FLOAT32_MIN),
@ -805,9 +810,7 @@ def execute(args):
func=pygeoprocessing.reclassify_raster,
args=(
(aligned_lulc_paths[prior_transition_year], 1),
{lucode: values[f'{pool}-half-life']
for (lucode, values)
in biophysical_parameters.items()},
biophysical_df[f'{pool}-half-life'].to_dict(),
halflife_rasters[current_transition_year][pool],
gdal.GDT_Float32,
NODATA_FLOAT32_MIN),
@ -868,9 +871,7 @@ def execute(args):
yearly_accum_tasks[current_transition_year][POOL_LITTER] = task_graph.add_task(
func=pygeoprocessing.reclassify_raster,
args=((aligned_lulc_paths[current_transition_year], 1),
{lucode: values[f'{POOL_LITTER}-yearly-accumulation']
for (lucode, values) in
biophysical_parameters.items()},
biophysical_df[f'{POOL_LITTER}-yearly-accumulation'].to_dict(),
yearly_accum_rasters[current_transition_year][POOL_LITTER],
gdal.GDT_Float32,
NODATA_FLOAT32_MIN),
@ -962,11 +963,10 @@ def execute(args):
prices = None
if args.get('do_economic_analysis', False): # Do if truthy
if args.get('use_price_table', False):
prices = {
year: values['price'] for (year, values) in
utils.read_csv_to_dataframe(
args['price_table_path'], 'year'
).to_dict(orient='index').items()}
prices = utils.read_csv_to_dataframe(
args['price_table_path'],
MODEL_SPEC['args']['price_table_path']
)['price'].to_dict()
else:
inflation_rate = float(args['inflation_rate']) * 0.01
annual_price = float(args['price'])
@ -1068,10 +1068,9 @@ def _set_up_workspace(args):
# TypeError when n_workers is None.
n_workers = -1 # Synchronous mode.
taskgraph_cache_dir = os.path.join(
args['workspace_dir'], TASKGRAPH_CACHE_DIR_NAME)
task_graph = taskgraph.TaskGraph(
taskgraph_cache_dir, n_workers, reporting_interval=5.0)
os.path.join(args['workspace_dir'], 'taskgraph_cache'),
n_workers, reporting_interval=5.0)
suffix = utils.make_suffix_string(args, 'results_suffix')
intermediate_dir = os.path.join(
@ -1079,7 +1078,7 @@ def _set_up_workspace(args):
output_dir = os.path.join(
args['workspace_dir'], OUTPUT_DIR_NAME)
utils.make_directories([output_dir, intermediate_dir, taskgraph_cache_dir])
utils.make_directories([output_dir, intermediate_dir])
return task_graph, n_workers, intermediate_dir, output_dir, suffix
@ -1957,7 +1956,7 @@ def _sum_n_rasters(
target_raster = None
def _read_transition_matrix(transition_csv_path, biophysical_dict):
def _read_transition_matrix(transition_csv_path, biophysical_df):
"""Read a transition CSV table in to a series of sparse matrices.
Args:
@ -1975,7 +1974,7 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict):
* ``'high-impact-disturb'`` indicating a
high-impact disturbance
* ``''`` (blank), which is equivalent to no carbon change.o
biophysical_dict (dict): A ``dict`` mapping of integer landcover codes
biophysical_df (pandas.DataFrame): A table mapping integer landcover codes
to biophysical values for disturbance and accumulation values for
soil and biomass carbon pools.
@ -1987,14 +1986,13 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict):
the pool for the landcover transition.
"""
table = utils.read_csv_to_dataframe(
transition_csv_path, convert_cols_to_lower=False, convert_vals_to_lower=False)
transition_csv_path, MODEL_SPEC['args']['landcover_transitions_table']
).reset_index()
lulc_class_to_lucode = {}
max_lucode = 0
for (lucode, values) in biophysical_dict.items():
lulc_class_to_lucode[
str(values['lulc-class']).strip().lower()] = lucode
max_lucode = max(max_lucode, lucode)
max_lucode = biophysical_df.index.max()
for lucode, row in biophysical_df.iterrows():
lulc_class_to_lucode[row['lulc-class']] = lucode
# Load up a sparse matrix with the transitions to save on memory usage.
# The number of possible rows/cols is the value of the maximum possible
@ -2029,24 +2027,19 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict):
"blank line encountered.")
break
# Strip any whitespace to eliminate leading/trailing whitespace
row = row.str.strip()
# skip rows starting with a blank cell, these are part of the legend
if not row['lulc-class']:
if pandas.isna(row['lulc-class']):
continue
try:
from_colname = str(row['lulc-class']).lower()
from_lucode = lulc_class_to_lucode[from_colname]
from_lucode = lulc_class_to_lucode[row['lulc-class']]
except KeyError:
raise ValueError("The transition table's 'lulc-class' column has "
f"a value, '{from_colname}', that was expected "
f"a value, '{row['lulc-class']}', that was expected "
"in the biophysical table but could not be "
"found.")
for colname, field_value in row.items():
to_colname = str(colname).strip().lower()
for to_colname, field_value in row.items():
# Skip the top row, only contains headers.
if to_colname == 'lulc-class':
@ -2062,27 +2055,24 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict):
# Only set values where the transition HAS a value.
# Takes advantage of the sparse characteristic of the model.
if (isinstance(field_value, float) and
numpy.isnan(field_value)):
if pandas.isna(field_value):
continue
# When transition is a disturbance, we use the source landcover's
# disturbance values.
if field_value.endswith('disturb'):
soil_disturbance_matrix[from_lucode, to_lucode] = (
biophysical_dict[from_lucode][f'soil-{field_value}'])
biophysical_df[f'soil-{field_value}'][from_lucode])
biomass_disturbance_matrix[from_lucode, to_lucode] = (
biophysical_dict[from_lucode][f'biomass-{field_value}'])
biophysical_df[f'biomass-{field_value}'][from_lucode])
# When we're transitioning to a landcover that accumulates, use the
# target landcover's accumulation value.
elif field_value == 'accum':
soil_accumulation_matrix[from_lucode, to_lucode] = (
biophysical_dict[to_lucode][
'soil-yearly-accumulation'])
biophysical_df['soil-yearly-accumulation'][to_lucode])
biomass_accumulation_matrix[from_lucode, to_lucode] = (
biophysical_dict[to_lucode][
'biomass-yearly-accumulation'])
biophysical_df['biomass-yearly-accumulation'][to_lucode])
disturbance_matrices = {
'soil': soil_disturbance_matrix,
@ -2224,37 +2214,6 @@ def _reclassify_disturbance_magnitude(
target_raster_path, gdal.GDT_Float32, NODATA_FLOAT32_MIN)
def _extract_snapshots_from_table(csv_path):
"""Extract the year/raster snapshot mapping from a CSV.
No validation is performed on the years or raster paths.
Args:
csv_path (string): The path to a CSV on disk containing snapshot
years and a corresponding transition raster path. Snapshot years
may be in any order in the CSV, but must be integers and no two
years may be the same. Snapshot raster paths must refer to a
raster file located on disk representing the landcover at that
transition. If the path is absolute, the path will be used as
given. If the path is relative, the path will be interpreted as
relative to the parent directory of this CSV file.
Returns:
A ``dict`` mapping int snapshot years to their corresponding raster
paths. These raster paths will be absolute paths.
"""
table = utils.read_csv_to_dataframe(
csv_path, convert_vals_to_lower=False, expand_path_cols=['raster_path'])
output_dict = {}
table.set_index("snapshot_year", drop=False, inplace=True)
for index, row in table.iterrows():
output_dict[int(index)] = row['raster_path']
return output_dict
@validation.invest_validator
def validate(args, limit_to=None):
"""Validate an input dictionary for Coastal Blue Carbon.
@ -2277,8 +2236,10 @@ def validate(args, limit_to=None):
if ("landcover_snapshot_csv" not in invalid_keys and
"landcover_snapshot_csv" in sufficient_keys):
snapshots = _extract_snapshots_from_table(
args['landcover_snapshot_csv'])
snapshots = utils.read_csv_to_dataframe(
args['landcover_snapshot_csv'],
MODEL_SPEC['args']['landcover_snapshot_csv']
)['raster_path'].to_dict()
for snapshot_year, snapshot_raster_path in snapshots.items():
raster_error_message = validation.check_raster(
@ -2299,4 +2260,26 @@ def validate(args, limit_to=None):
analysis_year=args['analysis_year'],
latest_year=max(snapshots.keys()))))
# check for invalid options in the translation table
if ("landcover_transitions_table" not in invalid_keys and
"landcover_transitions_table" in sufficient_keys):
transitions_spec = MODEL_SPEC['args']['landcover_transitions_table']
transition_options = list(
transitions_spec['columns']['[LULC CODE]']['options'].keys())
# lowercase options since utils call will lowercase table values
transition_options = [x.lower() for x in transition_options]
transitions_df = utils.read_csv_to_dataframe(
args['landcover_transitions_table'], transitions_spec)
transitions_mask = ~transitions_df.isin(transition_options) & ~transitions_df.isna()
if transitions_mask.any(axis=None):
transition_numpy_mask = transitions_mask.values
transition_numpy_values = transitions_df.to_numpy()
bad_transition_values = list(
numpy.unique(transition_numpy_values[transition_numpy_mask]))
validation_warnings.append((
['landcover_transitions_table'],
INVALID_TRANSITION_VALUES_MSG.format(
model_transitions=(transition_options),
transition_values=bad_transition_values)))
return validation_warnings

View File

@ -36,6 +36,7 @@ MODEL_SPEC = {
"A table mapping LULC codes from the snapshot rasters to the "
"corresponding LULC class names, and whether or not the "
"class is a coastal blue carbon habitat."),
"index_col": "code",
"columns": {
"code": {
"type": "integer",
@ -55,10 +56,10 @@ MODEL_SPEC = {
},
"landcover_snapshot_csv": {
"type": "csv",
"index_col": "snapshot_year",
"columns": {
"snapshot_year": {
"type": "number",
"units": u.year_AD,
"type": "integer",
"about": gettext("Year to snapshot.")},
"raster_path": {
"type": "raster",
@ -82,6 +83,7 @@ MODEL_SPEC = {
"source LULC class, and the first row represents the "
"destination LULC classes. Cells are populated with "
"transition states, or left empty if no such transition occurs."),
"index_col": "lulc-class",
"columns": {
"lulc-class": {
"type": "integer",
@ -112,6 +114,7 @@ MODEL_SPEC = {
"Table mapping each LULC type to impact and accumulation "
"information. This is a template that you will fill out to "
"create the biophysical table input to the main model."),
"index_col": "code",
"columns": {
**BIOPHYSICAL_COLUMNS_SPEC,
# remove "expression" property which doesn't go in output spec
@ -131,7 +134,7 @@ MODEL_SPEC = {
"to match all the other LULC maps."),
"bands": {1: {"type": "integer"}}
},
"task_cache": spec_utils.TASKGRAPH_DIR
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -164,8 +167,7 @@ def execute(args):
"""
suffix = utils.make_suffix_string(args, 'results_suffix')
output_dir = os.path.join(args['workspace_dir'], 'outputs_preprocessor')
taskgraph_cache_dir = os.path.join(args['workspace_dir'], 'task_cache')
utils.make_directories([output_dir, taskgraph_cache_dir])
utils.make_directories([output_dir])
try:
n_workers = int(args['n_workers'])
@ -175,11 +177,13 @@ def execute(args):
# TypeError when n_workers is None.
n_workers = -1 # Synchronous mode.
task_graph = taskgraph.TaskGraph(
taskgraph_cache_dir, n_workers, reporting_interval=5.0)
os.path.join(args['workspace_dir'], 'taskgraph_cache'),
n_workers, reporting_interval=5.0)
snapshots_dict = (
coastal_blue_carbon._extract_snapshots_from_table(
args['landcover_snapshot_csv']))
snapshots_dict = utils.read_csv_to_dataframe(
args['landcover_snapshot_csv'],
MODEL_SPEC['args']['landcover_snapshot_csv']
)['raster_path'].to_dict()
# Align the raster stack for analyzing the various transitions.
min_pixel_size = float('inf')
@ -209,14 +213,15 @@ def execute(args):
target_path_list=aligned_snapshot_paths,
task_name='Align input landcover rasters')
landcover_table = utils.read_csv_to_dataframe(
args['lulc_lookup_table_path'], 'code').to_dict(orient='index')
landcover_df = utils.read_csv_to_dataframe(
args['lulc_lookup_table_path'],
MODEL_SPEC['args']['lulc_lookup_table_path'])
target_transition_table = os.path.join(
output_dir, TRANSITION_TABLE.format(suffix=suffix))
_ = task_graph.add_task(
func=_create_transition_table,
args=(landcover_table,
args=(landcover_df,
aligned_snapshot_paths,
target_transition_table),
target_path_list=[target_transition_table],
@ -227,7 +232,7 @@ def execute(args):
output_dir, BIOPHYSICAL_TABLE.format(suffix=suffix))
_ = task_graph.add_task(
func=_create_biophysical_table,
args=(landcover_table, target_biophysical_table_path),
args=(landcover_df, target_biophysical_table_path),
target_path_list=[target_biophysical_table_path],
task_name='Write biophysical table template')
@ -235,20 +240,20 @@ def execute(args):
task_graph.join()
def _create_transition_table(landcover_table, lulc_snapshot_list,
def _create_transition_table(landcover_df, lulc_snapshot_list,
target_table_path):
"""Create the transition table from a series of landcover snapshots.
Args:
landcover_table (dict): A dict mapping integer landcover codes to dict
values indicating the landcover class name in the ``lulc-class``
field and ``True`` or ``False`` under the
``is_coastal_blue_carbon_habitat`` key.
landcover_df (pandas.DataFrame: A table mapping integer landcover
codes to values indicating the landcover class name in the
``lulc-class`` column and ``True`` or ``False`` under the
``is_coastal_blue_carbon_habitat`` column.
lulc_snapshot_list (list): A list of string paths to GDAL rasters on
disk. All rasters must have the same spatial reference, pixel size
and dimensions and must also all be integer rasters, where all
non-nodata pixel values must be represented in the
``landcover_table`` dict.
``landcover_df`` dataframe.
target_table_path (string): A string path to where the target
transition table should be written.
@ -317,13 +322,13 @@ def _create_transition_table(landcover_table, lulc_snapshot_list,
sparse_transition_table = {}
for from_lucode, to_lucode in transition_pairs:
try:
from_is_cbc = landcover_table[
from_lucode]['is_coastal_blue_carbon_habitat']
to_is_cbc = landcover_table[
to_lucode]['is_coastal_blue_carbon_habitat']
from_is_cbc = landcover_df[
'is_coastal_blue_carbon_habitat'][from_lucode]
to_is_cbc = landcover_df[
'is_coastal_blue_carbon_habitat'][to_lucode]
except KeyError:
for variable in (from_lucode, to_lucode):
if variable not in landcover_table:
if variable not in landcover_df.index:
raise ValueError(
'The landcover table is missing a row with the '
f'landuse code {variable}.')
@ -331,14 +336,14 @@ def _create_transition_table(landcover_table, lulc_snapshot_list,
sparse_transition_table[(from_lucode, to_lucode)] = (
transition_types[(from_is_cbc, to_is_cbc)])
code_list = sorted([code for code in landcover_table.keys()])
code_list = sorted(landcover_df.index)
lulc_class_list_sorted = [
landcover_table[code]['lulc-class'] for code in code_list]
landcover_df['lulc-class'][code] for code in code_list]
with open(target_table_path, 'w') as csv_file:
fieldnames = ['lulc-class'] + lulc_class_list_sorted
csv_file.write(f"{','.join(fieldnames)}\n")
for row_code in code_list:
class_name = landcover_table[row_code]['lulc-class']
class_name = landcover_df['lulc-class'][row_code]
row = [class_name]
for col_code in code_list:
try:
@ -361,7 +366,7 @@ def _create_transition_table(landcover_table, lulc_snapshot_list,
csv_file.write("\n,NCC (no-carbon-change)")
def _create_biophysical_table(landcover_table, target_biophysical_table_path):
def _create_biophysical_table(landcover_df, target_biophysical_table_path):
"""Write the biophysical table template to disk.
The biophysical table templates contains all of the fields required by the
@ -370,8 +375,8 @@ def _create_biophysical_table(landcover_table, target_biophysical_table_path):
table.
Args:
landcover_table (dict): A dict mapping int landcover codes to a dict
with string keys that map to numeric or string column values.
landcover_df (pandas.DataFrame): A table mapping int landcover codes
to biophysical data
target_biophysical_table_path (string): The path to where the
biophysical table template will be stored on disk.
@ -384,16 +389,19 @@ def _create_biophysical_table(landcover_table, target_biophysical_table_path):
with open(target_biophysical_table_path, 'w') as bio_table:
bio_table.write(f"{','.join(target_column_names)}\n")
for lulc_code in sorted(landcover_table.keys()):
for lulc_code, row in landcover_df.sort_index().iterrows():
# 2 columns are defined below, and we need 1 less comma to only
# have commas between fields.
row = []
for colname in target_column_names:
try:
# Use the user's defined value if it exists
row.append(str(landcover_table[lulc_code][colname]))
except KeyError:
row.append('')
if colname == 'code':
row.append(str(lulc_code))
else:
try:
# Use the user's defined value if it exists
row.append(str(landcover_df[colname][lulc_code]))
except KeyError:
row.append('')
bio_table.write(f"{','.join(row)}\n")

View File

@ -247,17 +247,11 @@ MODEL_SPEC = {
"represented by any value and absence of the habitat "
"can be represented by 0 and nodata values.")},
"rank": {
"type": "option_string",
"options": {
"1": {"description": gettext("very high protection")},
"2": {"description": gettext("high protection")},
"3": {"description": gettext("moderate protection")},
"4": {"description": gettext("low protection")},
"5": {"description": gettext("very low protection")}
},
"type": "integer",
"about": gettext(
"Relative amount of coastline protection this habitat "
"provides.")
"provides, from 1 (very high protection) to 5 "
"(very low protection.")
},
"protection distance (m)": {
"type": "number",
@ -268,6 +262,7 @@ MODEL_SPEC = {
"no protection to the coastline.")
},
},
"index_col": "id",
"about": gettext(
"Table that specifies spatial habitat data and parameters."),
"name": gettext("habitats table")
@ -365,6 +360,7 @@ MODEL_SPEC = {
},
"coastal_exposure.csv": {
"about": "This is an identical copy of the attribute table of coastal_exposure.gpkg provided in csv format for convenience. Users may wish to modify or add to the columns of this table in order to calculate exposure indices for custom scenarios.",
"index_col": "shore_id",
"columns": FINAL_OUTPUT_FIELDS
},
"intermediate": {
@ -460,6 +456,7 @@ MODEL_SPEC = {
"habitat_protection.csv": {
"about": (
"Shore points with associated habitat data"),
"index_col": "shore_id",
"columns": {
"shore_id": {
"type": "integer",
@ -685,10 +682,10 @@ MODEL_SPEC = {
"fields": WWIII_FIELDS
}
}
},
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -800,8 +797,6 @@ def execute(args):
geomorph_dir, wind_wave_dir, surge_dir, population_dir, slr_dir])
file_suffix = utils.make_suffix_string(args, 'results_suffix')
taskgraph_cache_dir = os.path.join(
intermediate_dir, '_taskgraph_working_dir')
try:
n_workers = int(args['n_workers'])
except (KeyError, ValueError, TypeError):
@ -809,7 +804,8 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # Single process mode.
task_graph = taskgraph.TaskGraph(taskgraph_cache_dir, n_workers)
task_graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
model_resolution = float(args['model_resolution'])
max_fetch_distance = float(args['max_fetch_distance'])
@ -2315,42 +2311,41 @@ def _schedule_habitat_tasks(
"""
habitat_dataframe = utils.read_csv_to_dataframe(
habitat_table_path, convert_vals_to_lower=False, expand_path_cols=['path'])
habitat_dataframe = habitat_dataframe.rename(
columns={'protection distance (m)': 'distance'})
habitat_table_path, MODEL_SPEC['args']['habitat_table_path']
).rename(columns={'protection distance (m)': 'distance'})
habitat_task_list = []
habitat_pickles_list = []
for habitat_row in habitat_dataframe.itertuples():
for _id, habitat_row in habitat_dataframe.iterrows():
target_habitat_pickle_path = os.path.join(
working_dir, f'{habitat_row.id}{file_suffix}.pickle')
working_dir, f'{_id}{file_suffix}.pickle')
habitat_pickles_list.append(target_habitat_pickle_path)
gis_type = pygeoprocessing.get_gis_type(habitat_row.path)
if gis_type == 2:
habitat_task_list.append(task_graph.add_task(
func=search_for_vector_habitat,
args=(base_shore_point_vector_path,
habitat_row.distance,
habitat_row.rank,
habitat_row.id,
habitat_row.path,
habitat_row['distance'],
habitat_row['rank'],
_id,
habitat_row['path'],
target_habitat_pickle_path),
target_path_list=[target_habitat_pickle_path],
task_name=f'searching for {habitat_row.id}'))
task_name=f'searching for {_id}'))
continue
if gis_type == 1:
habitat_task_list.append(task_graph.add_task(
func=search_for_raster_habitat,
args=(base_shore_point_vector_path,
habitat_row.distance,
habitat_row.rank,
habitat_row.id,
habitat_row.path,
habitat_row['distance'],
habitat_row['rank'],
_id,
habitat_row['path'],
target_habitat_pickle_path,
model_resolution,
file_suffix),
target_path_list=[target_habitat_pickle_path],
task_name=f'searching for {habitat_row.id}'))
task_name=f'searching for {_id}'))
return habitat_task_list, habitat_pickles_list
@ -2835,12 +2830,14 @@ def assemble_results_and_calculate_exposure(
final_values_dict[var_name] = pickle.load(file)
habitat_df = utils.read_csv_to_dataframe(
habitat_protection_path, convert_cols_to_lower=False, convert_vals_to_lower=False)
habitat_protection_path, MODEL_SPEC['outputs']['intermediate'][
'contents']['habitats']['contents']['habitat_protection.csv']
).rename(columns={'r_hab': 'R_hab'})
output_layer.StartTransaction()
for feature in output_layer:
shore_id = feature.GetField(SHORE_ID_FIELD)
# The R_hab ranks were stored in a CSV, now this dataframe:
rank = habitat_df[habitat_df[SHORE_ID_FIELD] == shore_id][R_hab_name]
rank = habitat_df.loc[shore_id, R_hab_name]
feature.SetField(str(R_hab_name), float(rank))
# The other variables were stored in pickles, now this dict:
for fieldname in final_values_dict:
@ -3235,7 +3232,6 @@ def _aggregate_raster_values_in_radius(
kernel_mask &= ~utils.array_equals_nodata(array, nodata)
result[shore_id] = aggregation_op(array, kernel_mask)
with open(target_pickle_path, 'wb') as pickle_file:
pickle.dump(result, pickle_file)
@ -3465,8 +3461,7 @@ def _validate_habitat_table_paths(habitat_table_path):
ValueError if any vector in the ``path`` column cannot be opened.
"""
habitat_dataframe = utils.read_csv_to_dataframe(
habitat_table_path, convert_cols_to_lower=False, convert_vals_to_lower=False,
expand_path_cols=['path'])
habitat_table_path, MODEL_SPEC['args']['habitat_table_path'])
bad_paths = []
for habitat_row in habitat_dataframe.itertuples():
try:

View File

@ -22,6 +22,87 @@ from .crop_production_regression import NUTRIENTS
LOGGER = logging.getLogger(__name__)
CROP_OPTIONS = {
# TODO: use human-readable translatable crop names (#614)
crop: {"description": crop} for crop in [
"abaca", "agave", "alfalfa", "almond", "aniseetc",
"apple", "apricot", "areca", "artichoke", "asparagus",
"avocado", "bambara", "banana", "barley", "bean",
"beetfor", "berrynes", "blueberry", "brazil",
"canaryseed", "carob", "carrot", "carrotfor", "cashew",
"broadbean", "buckwheat", "cabbage", "cabbagefor",
"cashewapple", "cassava", "castor", "cauliflower",
"cerealnes", "cherry", "chestnut", "chickpea",
"chicory", "chilleetc", "cinnamon", "citrusnes",
"clove", "clover", "cocoa", "coconut", "coffee",
"cotton", "cowpea", "cranberry", "cucumberetc",
"currant", "date", "eggplant", "fibrenes", "fig",
"flax", "fonio", "fornes", "fruitnes", "garlic",
"ginger", "gooseberry", "grape", "grapefruitetc",
"grassnes", "greenbean", "greenbroadbean", "greencorn",
"greenonion", "greenpea", "groundnut", "hazelnut",
"hemp", "hempseed", "hop", "jute", "jutelikefiber",
"kapokfiber", "kapokseed", "karite", "kiwi", "kolanut",
"legumenes", "lemonlime", "lentil", "lettuce",
"linseed", "lupin", "maize", "maizefor", "mango",
"mate", "melonetc", "melonseed", "millet",
"mixedgrain", "mixedgrass", "mushroom", "mustard",
"nutmeg", "nutnes", "oats", "oilpalm", "oilseedfor",
"oilseednes", "okra", "olive", "onion", "orange",
"papaya", "pea", "peachetc", "pear", "pepper",
"peppermint", "persimmon", "pigeonpea", "pimento",
"pineapple", "pistachio", "plantain", "plum", "poppy",
"potato", "pulsenes", "pumpkinetc", "pyrethrum",
"quince", "quinoa", "ramie", "rapeseed", "rasberry",
"rice", "rootnes", "rubber", "rye", "ryefor",
"safflower", "sesame", "sisal", "sorghum",
"sorghumfor", "sourcherry, soybean", "spicenes",
"spinach", "stonefruitnes", "strawberry", "stringbean",
"sugarbeet", "sugarcane", "sugarnes", "sunflower",
"swedefor", "sweetpotato", "tangetc", "taro", "tea",
"tobacco", "tomato", "triticale", "tropicalnes",
"tung", "turnipfor", "vanilla", "vegetablenes",
"vegfor", "vetch", "walnut", "watermelon", "wheat",
"yam", "yautia"
]
}
nutrient_units = {
"protein": u.gram/u.hectogram,
"lipid": u.gram/u.hectogram, # total lipid
"energy": u.kilojoule/u.hectogram,
"ca": u.milligram/u.hectogram, # calcium
"fe": u.milligram/u.hectogram, # iron
"mg": u.milligram/u.hectogram, # magnesium
"ph": u.milligram/u.hectogram, # phosphorus
"k": u.milligram/u.hectogram, # potassium
"na": u.milligram/u.hectogram, # sodium
"zn": u.milligram/u.hectogram, # zinc
"cu": u.milligram/u.hectogram, # copper
"fl": u.microgram/u.hectogram, # fluoride
"mn": u.milligram/u.hectogram, # manganese
"se": u.microgram/u.hectogram, # selenium
"vita": u.IU/u.hectogram, # vitamin A
"betac": u.microgram/u.hectogram, # beta carotene
"alphac": u.microgram/u.hectogram, # alpha carotene
"vite": u.milligram/u.hectogram, # vitamin e
"crypto": u.microgram/u.hectogram, # cryptoxanthin
"lycopene": u.microgram/u.hectogram, # lycopene
"lutein": u.microgram/u.hectogram, # lutein + zeaxanthin
"betat": u.milligram/u.hectogram, # beta tocopherol
"gammat": u.milligram/u.hectogram, # gamma tocopherol
"deltat": u.milligram/u.hectogram, # delta tocopherol
"vitc": u.milligram/u.hectogram, # vitamin C
"thiamin": u.milligram/u.hectogram,
"riboflavin": u.milligram/u.hectogram,
"niacin": u.milligram/u.hectogram,
"pantothenic": u.milligram/u.hectogram, # pantothenic acid
"vitb6": u.milligram/u.hectogram, # vitamin B6
"folate": u.microgram/u.hectogram,
"vitb12": u.microgram/u.hectogram, # vitamin B12
"vitk": u.microgram/u.hectogram, # vitamin K
}
MODEL_SPEC = {
"model_name": MODEL_METADATA["crop_production_percentile"].model_title,
"pyname": MODEL_METADATA["crop_production_percentile"].pyname,
@ -44,54 +125,12 @@ MODEL_SPEC = {
},
"landcover_to_crop_table_path": {
"type": "csv",
"index_col": "crop_name",
"columns": {
"lucode": {"type": "integer"},
"crop_name": {
"type": "option_string",
"options": {
# TODO: use human-readable translatable crop names (#614)
crop: {"description": crop} for crop in [
"abaca", "agave", "alfalfa", "almond", "aniseetc",
"apple", "apricot", "areca", "artichoke", "asparagus",
"avocado", "bambara", "banana", "barley", "bean",
"beetfor", "berrynes", "blueberry", "brazil",
"canaryseed", "carob", "carrot", "carrotfor", "cashew",
"broadbean", "buckwheat", "cabbage", "cabbagefor",
"cashewapple", "cassava", "castor", "cauliflower",
"cerealnes", "cherry", "chestnut", "chickpea",
"chicory", "chilleetc", "cinnamon", "citrusnes",
"clove", "clover", "cocoa", "coconut", "coffee",
"cotton", "cowpea", "cranberry", "cucumberetc",
"currant", "date", "eggplant", "fibrenes", "fig",
"flax", "fonio", "fornes", "fruitnes", "garlic",
"ginger", "gooseberry", "grape", "grapefruitetc",
"grassnes", "greenbean", "greenbroadbean", "greencorn",
"greenonion", "greenpea", "groundnut", "hazelnut",
"hemp", "hempseed", "hop", "jute", "jutelikefiber",
"kapokfiber", "kapokseed", "karite", "kiwi", "kolanut",
"legumenes", "lemonlime", "lentil", "lettuce",
"linseed", "lupin", "maize", "maizefor", "mango",
"mate", "melonetc", "melonseed", "millet",
"mixedgrain", "mixedgrass", "mushroom", "mustard",
"nutmeg", "nutnes", "oats", "oilpalm", "oilseedfor",
"oilseednes", "okra", "olive", "onion", "orange",
"papaya", "pea", "peachetc", "pear", "pepper",
"peppermint", "persimmon", "pigeonpea", "pimento",
"pineapple", "pistachio", "plantain", "plum", "poppy",
"potato", "pulsenes", "pumpkinetc", "pyrethrum",
"quince", "quinoa", "ramie", "rapeseed", "rasberry",
"rice", "rootnes", "rubber", "rye", "ryefor",
"safflower", "sesame", "sisal", "sorghum",
"sorghumfor", "sourcherry, soybean", "spicenes",
"spinach", "stonefruitnes", "strawberry", "stringbean",
"sugarbeet", "sugarcane", "sugarnes", "sunflower",
"swedefor", "sweetpotato", "tangetc", "taro", "tea",
"tobacco", "tomato", "triticale", "tropicalnes",
"tung", "turnipfor", "vanilla", "vegetablenes",
"vegfor", "vetch", "walnut", "watermelon", "wheat",
"yam", "yautia"
]
}
"options": CROP_OPTIONS
}
},
"about": gettext(
@ -116,6 +155,7 @@ MODEL_SPEC = {
"contents": {
"[CROP]_percentile_yield_table.csv": {
"type": "csv",
"index_col": "climate_bin",
"columns": {
"climate_bin": {"type": "integer"},
"yield_25th": {
@ -163,45 +203,19 @@ MODEL_SPEC = {
},
"crop_nutrient.csv": {
"type": "csv",
"index_col": "crop",
"columns": {
nutrient: {
"crop": {
"type": "option_string",
"options": CROP_OPTIONS
},
"percentrefuse": {
"type": "percent"
},
**{nutrient: {
"type": "number",
"units": units
} for nutrient, units in {
"protein": u.gram/u.hectogram,
"lipid": u.gram/u.hectogram, # total lipid
"energy": u.kilojoule/u.hectogram,
"ca": u.milligram/u.hectogram, # calcium
"fe": u.milligram/u.hectogram, # iron
"mg": u.milligram/u.hectogram, # magnesium
"ph": u.milligram/u.hectogram, # phosphorus
"k": u.milligram/u.hectogram, # potassium
"na": u.milligram/u.hectogram, # sodium
"zn": u.milligram/u.hectogram, # zinc
"cu": u.milligram/u.hectogram, # copper
"fl": u.microgram/u.hectogram, # fluoride
"mn": u.milligram/u.hectogram, # manganese
"se": u.microgram/u.hectogram, # selenium
"vita": u.IU/u.hectogram, # vitamin A
"betac": u.microgram/u.hectogram, # beta carotene
"alphac": u.microgram/u.hectogram, # alpha carotene
"vite": u.milligram/u.hectogram, # vitamin e
"crypto": u.microgram/u.hectogram, # cryptoxanthin
"lycopene": u.microgram/u.hectogram, # lycopene
"lutein": u.microgram/u.hectogram, # lutein + zeaxanthin
"betaT": u.milligram/u.hectogram, # beta tocopherol
"gammaT": u.milligram/u.hectogram, # gamma tocopherol
"deltaT": u.milligram/u.hectogram, # delta tocopherol
"vitc": u.milligram/u.hectogram, # vitamin C
"thiamin": u.milligram/u.hectogram,
"riboflavin": u.milligram/u.hectogram,
"niacin": u.milligram/u.hectogram,
"pantothenic": u.milligram/u.hectogram, # pantothenic acid
"vitb6": u.milligram/u.hectogram, # vitamin B6
"folate": u.microgram/u.hectogram,
"vitb12": u.microgram/u.hectogram, # vitamin B12
"vitk": u.microgram/u.hectogram, # vitamin K
}.items()
} for nutrient, units in nutrient_units.items()}
}
}
},
@ -213,6 +227,7 @@ MODEL_SPEC = {
"aggregate_results.csv": {
"created_if": "aggregate_polygon_path",
"about": "Model results aggregated to AOI polygons",
"index_col": "FID",
"columns": {
"FID": {
"type": "integer",
@ -251,6 +266,7 @@ MODEL_SPEC = {
},
"result_table.csv": {
"about": "Model results aggregated by crop",
"index_col": "crop",
"columns": {
"crop": {
"type": "freestyle_string",
@ -346,10 +362,10 @@ MODEL_SPEC = {
"bands": {1: {
"type": "number", "units": u.metric_ton/u.hectare
}}
},
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -405,12 +421,7 @@ _AGGREGATE_VECTOR_FILE_PATTERN = os.path.join(
_AGGREGATE_TABLE_FILE_PATTERN = os.path.join(
'.', 'aggregate_results%s.csv')
_EXPECTED_NUTRIENT_TABLE_HEADERS = [
'Protein', 'Lipid', 'Energy', 'Ca', 'Fe', 'Mg', 'Ph', 'K', 'Na', 'Zn',
'Cu', 'Fl', 'Mn', 'Se', 'VitA', 'betaC', 'alphaC', 'VitE', 'Crypto',
'Lycopene', 'Lutein', 'betaT', 'gammaT', 'deltaT', 'VitC', 'Thiamin',
'Riboflavin', 'Niacin', 'Pantothenic', 'VitB6', 'Folate', 'VitB12',
'VitK']
_EXPECTED_NUTRIENT_TABLE_HEADERS = list(nutrient_units.keys())
_EXPECTED_LUCODE_TABLE_HEADER = 'lucode'
_NODATA_YIELD = -1
@ -458,10 +469,11 @@ def execute(args):
None.
"""
crop_to_landcover_table = utils.read_csv_to_dataframe(
args['landcover_to_crop_table_path'], 'crop_name').to_dict(orient='index')
crop_to_landcover_df = utils.read_csv_to_dataframe(
args['landcover_to_crop_table_path'],
MODEL_SPEC['args']['landcover_to_crop_table_path'])
bad_crop_name_list = []
for crop_name in crop_to_landcover_table:
for crop_name in crop_to_landcover_df.index:
crop_climate_bin_raster_path = os.path.join(
args['model_data_path'],
_EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
@ -498,8 +510,6 @@ def execute(args):
edge_samples=11)
# Initialize a TaskGraph
work_token_dir = os.path.join(
output_dir, _INTERMEDIATE_OUTPUT_DIR, '_taskgraph_working_dir')
try:
n_workers = int(args['n_workers'])
except (KeyError, ValueError, TypeError):
@ -507,14 +517,14 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # Single process mode.
task_graph = taskgraph.TaskGraph(work_token_dir, n_workers)
task_graph = taskgraph.TaskGraph(
os.path.join(output_dir, 'taskgraph_cache'), n_workers)
dependent_task_list = []
crop_lucode = None
observed_yield_nodata = None
for crop_name in crop_to_landcover_table:
crop_lucode = crop_to_landcover_table[crop_name][
_EXPECTED_LUCODE_TABLE_HEADER]
for crop_name, row in crop_to_landcover_df.iterrows():
crop_lucode = row[_EXPECTED_LUCODE_TABLE_HEADER]
LOGGER.info("Processing crop %s", crop_name)
crop_climate_bin_raster_path = os.path.join(
args['model_data_path'],
@ -540,11 +550,13 @@ def execute(args):
climate_percentile_yield_table_path = os.path.join(
args['model_data_path'],
_CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name)
crop_climate_percentile_table = utils.read_csv_to_dataframe(
climate_percentile_yield_table_path, 'climate_bin').to_dict(orient='index')
crop_climate_percentile_df = utils.read_csv_to_dataframe(
climate_percentile_yield_table_path,
MODEL_SPEC['args']['model_data_path']['contents'][
'climate_percentile_yield_tables']['contents'][
'[CROP]_percentile_yield_table.csv'])
yield_percentile_headers = [
x for x in list(crop_climate_percentile_table.values())[0]
if x != 'climate_bin']
x for x in crop_climate_percentile_df.columns if x != 'climate_bin']
reclassify_error_details = {
'raster_name': f'{crop_name} Climate Bin',
@ -556,10 +568,8 @@ def execute(args):
output_dir,
_INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN % (
crop_name, yield_percentile_id, file_suffix))
bin_to_percentile_yield = dict([
(bin_id,
crop_climate_percentile_table[bin_id][yield_percentile_id])
for bin_id in crop_climate_percentile_table])
bin_to_percentile_yield = (
crop_climate_percentile_df[yield_percentile_id].to_dict())
# reclassify nodata to a valid value of 0
# we're assuming that the crop doesn't exist where there is no data
# this is more likely than assuming the crop does exist, esp.
@ -698,17 +708,17 @@ def execute(args):
# both 'crop_nutrient.csv' and 'crop' are known data/header values for
# this model data.
nutrient_table = utils.read_csv_to_dataframe(
nutrient_df = utils.read_csv_to_dataframe(
os.path.join(args['model_data_path'], 'crop_nutrient.csv'),
'crop', convert_cols_to_lower=False, convert_vals_to_lower=False
).to_dict(orient='index')
MODEL_SPEC['args']['model_data_path']['contents']['crop_nutrient.csv'])
result_table_path = os.path.join(
output_dir, 'result_table%s.csv' % file_suffix)
crop_names = crop_to_landcover_df.index.to_list()
tabulate_results_task = task_graph.add_task(
func=tabulate_results,
args=(nutrient_table, yield_percentile_headers,
crop_to_landcover_table, pixel_area_ha,
args=(nutrient_df, yield_percentile_headers,
crop_names, pixel_area_ha,
args['landcover_raster_path'], landcover_nodata,
output_dir, file_suffix, result_table_path),
target_path_list=[result_table_path],
@ -727,7 +737,7 @@ def execute(args):
args=(args['aggregate_polygon_path'],
target_aggregate_vector_path,
landcover_raster_info['projection_wkt'],
crop_to_landcover_table, nutrient_table,
crop_names, nutrient_df,
yield_percentile_headers, output_dir, file_suffix,
aggregate_results_table_path),
target_path_list=[target_aggregate_vector_path,
@ -851,19 +861,18 @@ def _mask_observed_yield_op(
def tabulate_results(
nutrient_table, yield_percentile_headers,
crop_to_landcover_table, pixel_area_ha, landcover_raster_path,
nutrient_df, yield_percentile_headers,
crop_names, pixel_area_ha, landcover_raster_path,
landcover_nodata, output_dir, file_suffix, target_table_path):
"""Write table with total yield and nutrient results by crop.
This function includes all the operations that write to results_table.csv.
Args:
nutrient_table (dict): a lookup of nutrient values by crop in the
form of nutrient_table[<crop>][<nutrient>].
nutrient_df (pandas.DataFrame): a table of nutrient values by crop
yield_percentile_headers (list): list of strings indicating percentiles
at which yield was calculated.
crop_to_landcover_table (dict): landcover codes keyed by crop names
crop_names (list): list of crop names
pixel_area_ha (float): area of lulc raster cells (hectares)
landcover_raster_path (string): path to landcover raster
landcover_nodata (float): landcover raster nodata value
@ -894,7 +903,7 @@ def tabulate_results(
'crop,area (ha),' + 'production_observed,' +
','.join(production_percentile_headers) + ',' + ','.join(
nutrient_headers) + '\n')
for crop_name in sorted(crop_to_landcover_table):
for crop_name in sorted(crop_names):
result_table.write(crop_name)
production_lookup = {}
production_pixel_count = 0
@ -942,19 +951,19 @@ def tabulate_results(
# convert 100g to Mg and fraction left over from refuse
nutrient_factor = 1e4 * (
1 - nutrient_table[crop_name]['Percentrefuse'] / 100)
1 - nutrient_df['percentrefuse'][crop_name] / 100)
for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
for yield_percentile_id in sorted(yield_percentile_headers):
total_nutrient = (
nutrient_factor *
production_lookup[yield_percentile_id] *
nutrient_table[crop_name][nutrient_id])
nutrient_df[nutrient_id][crop_name])
result_table.write(",%f" % (total_nutrient))
result_table.write(
",%f" % (
nutrient_factor *
production_lookup['observed'] *
nutrient_table[crop_name][nutrient_id]))
nutrient_df[nutrient_id][crop_name]))
result_table.write('\n')
total_area = 0
@ -972,8 +981,8 @@ def tabulate_results(
def aggregate_to_polygons(
base_aggregate_vector_path, target_aggregate_vector_path,
landcover_raster_projection, crop_to_landcover_table,
nutrient_table, yield_percentile_headers, output_dir, file_suffix,
landcover_raster_projection, crop_names,
nutrient_df, yield_percentile_headers, output_dir, file_suffix,
target_aggregate_table_path):
"""Write table with aggregate results of yield and nutrient values.
@ -986,9 +995,8 @@ def aggregate_to_polygons(
target_aggregate_vector_path (string):
path to re-projected copy of polygon vector
landcover_raster_projection (string): a WKT projection string
crop_to_landcover_table (dict): landcover codes keyed by crop names
nutrient_table (dict): a lookup of nutrient values by crop in the
form of nutrient_table[<crop>][<nutrient>].
crop_names (list): list of crop names
nutrient_df (pandas.DataFrame): a table of nutrient values by crop
yield_percentile_headers (list): list of strings indicating percentiles
at which yield was calculated.
output_dir (string): the file path to the output workspace.
@ -1012,10 +1020,10 @@ def aggregate_to_polygons(
total_nutrient_table = collections.defaultdict(
lambda: collections.defaultdict(lambda: collections.defaultdict(
float)))
for crop_name in crop_to_landcover_table:
for crop_name in crop_names:
# convert 100g to Mg and fraction left over from refuse
nutrient_factor = 1e4 * (
1 - nutrient_table[crop_name]['Percentrefuse'] / 100)
1 - nutrient_df['percentrefuse'][crop_name] / 100)
# loop over percentiles
for yield_percentile_id in yield_percentile_headers:
percentile_crop_production_raster_path = os.path.join(
@ -1040,24 +1048,24 @@ def aggregate_to_polygons(
total_yield_lookup['%s_%s' % (
crop_name, yield_percentile_id)][
id_index]['sum'] *
nutrient_table[crop_name][nutrient_id])
nutrient_df[nutrient_id][crop_name])
# process observed
observed_yield_path = os.path.join(
output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (
crop_name, file_suffix))
total_yield_lookup['%s_observed' % crop_name] = (
total_yield_lookup[f'{crop_name}_observed'] = (
pygeoprocessing.zonal_statistics(
(observed_yield_path, 1),
target_aggregate_vector_path))
for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
for id_index in total_yield_lookup['%s_observed' % crop_name]:
for id_index in total_yield_lookup[f'{crop_name}_observed']:
total_nutrient_table[
nutrient_id]['observed'][id_index] += (
nutrient_factor *
total_yield_lookup[
'%s_observed' % crop_name][id_index]['sum'] *
nutrient_table[crop_name][nutrient_id])
f'{crop_name}_observed'][id_index]['sum'] *
nutrient_df[nutrient_id][crop_name])
# report everything to a table
with open(target_aggregate_table_path, 'w') as aggregate_table:

View File

@ -86,6 +86,7 @@ MODEL_SPEC = {
},
"landcover_to_crop_table_path": {
"type": "csv",
"index_col": "crop_name",
"columns": {
"lucode": {"type": "integer"},
"crop_name": {
@ -101,6 +102,7 @@ MODEL_SPEC = {
},
"fertilization_rate_table_path": {
"type": "csv",
"index_col": "crop_name",
"columns": {
"crop_name": {
"type": "option_string",
@ -129,29 +131,38 @@ MODEL_SPEC = {
"contents": {
"[CROP]_regression_yield_table.csv": {
"type": "csv",
"index_col": "climate_bin",
"columns": {
'climate_bin': {"type": "integer"},
'yield_ceiling': {
"climate_bin": {"type": "integer"},
"yield_ceiling": {
"type": "number",
"units": u.metric_ton/u.hectare
},
'b_nut': {"type": "number", "units": u.none},
'b_k2o': {"type": "number", "units": u.none},
'c_n': {"type": "number", "units": u.none},
'c_p2o5': {"type": "number", "units": u.none},
'c_k2o': {"type": "number", "units": u.none}
"b_nut": {"type": "number", "units": u.none},
"b_k2o": {"type": "number", "units": u.none},
"c_n": {"type": "number", "units": u.none},
"c_p2o5": {"type": "number", "units": u.none},
"c_k2o": {"type": "number", "units": u.none}
}
}
}
},
"crop_nutrient.csv": {
"type": "csv",
"index_col": "crop",
"columns": {
nutrient: {
"crop": {
"type": "option_string",
"options": CROPS
},
"percentrefuse": {
"type": "percent"
},
**{nutrient: {
"about": about,
"type": "number",
"units": units
} for nutrient, about, units in NUTRIENTS
} for nutrient, about, units in NUTRIENTS}
}
},
"extended_climate_bin_maps": {
@ -186,6 +197,7 @@ MODEL_SPEC = {
"aggregate_results.csv": {
"created_if": "aggregate_polygon_path",
"about": "Table of results aggregated by ",
"index_col": "FID",
"columns": {
"FID": {
"type": "integer",
@ -213,6 +225,7 @@ MODEL_SPEC = {
},
"result_table.csv": {
"about": "Table of results aggregated by crop",
"index_col": "crop",
"columns": {
"crop": {
"type": "freestyle_string",
@ -306,10 +319,10 @@ MODEL_SPEC = {
"bands": {1: {
"type": "number", "units": u.metric_ton/u.hectare
}}
},
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -319,7 +332,7 @@ _REGRESSION_TABLE_PATTERN = os.path.join(
'climate_regression_yield_tables', '%s_regression_yield_table.csv')
_EXPECTED_REGRESSION_TABLE_HEADERS = [
'climate_bin', 'yield_ceiling', 'b_nut', 'b_k2o', 'c_n', 'c_p2o5', 'c_k2o']
'yield_ceiling', 'b_nut', 'b_k2o', 'c_n', 'c_p2o5', 'c_k2o']
# crop_name, yield_regression_id, file_suffix
_COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN = os.path.join(
@ -409,11 +422,11 @@ _AGGREGATE_TABLE_FILE_PATTERN = os.path.join(
'.', 'aggregate_results%s.csv')
_EXPECTED_NUTRIENT_TABLE_HEADERS = [
'Protein', 'Lipid', 'Energy', 'Ca', 'Fe', 'Mg', 'Ph', 'K', 'Na', 'Zn',
'Cu', 'Fl', 'Mn', 'Se', 'VitA', 'betaC', 'alphaC', 'VitE', 'Crypto',
'Lycopene', 'Lutein', 'betaT', 'gammaT', 'deltaT', 'VitC', 'Thiamin',
'Riboflavin', 'Niacin', 'Pantothenic', 'VitB6', 'Folate', 'VitB12',
'VitK']
'protein', 'lipid', 'energy', 'ca', 'fe', 'mg', 'ph', 'k', 'na', 'zn',
'cu', 'fl', 'mn', 'se', 'vita', 'betac', 'alphac', 'vite', 'crypto',
'lycopene', 'lutein', 'betat', 'gammat', 'deltat', 'vitc', 'thiamin',
'riboflavin', 'niacin', 'pantothenic', 'vitb6', 'folate', 'vitb12',
'vitk']
_EXPECTED_LUCODE_TABLE_HEADER = 'lucode'
_NODATA_YIELD = -1
@ -470,8 +483,6 @@ def execute(args):
output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)])
# Initialize a TaskGraph
work_token_dir = os.path.join(
output_dir, _INTERMEDIATE_OUTPUT_DIR, '_taskgraph_working_dir')
try:
n_workers = int(args['n_workers'])
except (KeyError, ValueError, TypeError):
@ -479,20 +490,21 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # Single process mode.
task_graph = taskgraph.TaskGraph(work_token_dir, n_workers)
task_graph = taskgraph.TaskGraph(
os.path.join(output_dir, 'taskgraph_cache'), n_workers)
dependent_task_list = []
LOGGER.info(
"Checking if the landcover raster is missing lucodes")
crop_to_landcover_table = utils.read_csv_to_dataframe(
args['landcover_to_crop_table_path'], 'crop_name').to_dict(orient='index')
crop_to_landcover_df = utils.read_csv_to_dataframe(
args['landcover_to_crop_table_path'],
MODEL_SPEC['args']['landcover_to_crop_table_path'])
crop_to_fertlization_rate_table = utils.read_csv_to_dataframe(
args['fertilization_rate_table_path'], 'crop_name').to_dict(orient='index')
crop_to_fertilization_rate_df = utils.read_csv_to_dataframe(
args['fertilization_rate_table_path'],
MODEL_SPEC['args']['fertilization_rate_table_path'])
crop_lucodes = [
x[_EXPECTED_LUCODE_TABLE_HEADER]
for x in crop_to_landcover_table.values()]
crop_lucodes = list(crop_to_landcover_df[_EXPECTED_LUCODE_TABLE_HEADER])
unique_lucodes = numpy.array([])
for _, lu_band_data in pygeoprocessing.iterblocks(
@ -509,9 +521,7 @@ def execute(args):
"aren't in the landcover raster: %s", missing_lucodes)
LOGGER.info("Checking that crops correspond to known types.")
for crop_name in crop_to_landcover_table:
crop_lucode = crop_to_landcover_table[crop_name][
_EXPECTED_LUCODE_TABLE_HEADER]
for crop_name in crop_to_landcover_df.index:
crop_climate_bin_raster_path = os.path.join(
args['model_data_path'],
_EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
@ -543,9 +553,8 @@ def execute(args):
crop_lucode = None
observed_yield_nodata = None
for crop_name in crop_to_landcover_table:
crop_lucode = crop_to_landcover_table[crop_name][
_EXPECTED_LUCODE_TABLE_HEADER]
for crop_name, row in crop_to_landcover_df.iterrows():
crop_lucode = row[_EXPECTED_LUCODE_TABLE_HEADER]
LOGGER.info("Processing crop %s", crop_name)
crop_climate_bin_raster_path = os.path.join(
args['model_data_path'],
@ -568,19 +577,19 @@ def execute(args):
task_name='crop_climate_bin')
dependent_task_list.append(crop_climate_bin_task)
crop_regression_table_path = os.path.join(
args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name)
crop_regression_table = utils.read_csv_to_dataframe(
crop_regression_table_path, 'climate_bin').to_dict(orient='index')
for bin_id in crop_regression_table:
crop_regression_df = utils.read_csv_to_dataframe(
os.path.join(args['model_data_path'],
_REGRESSION_TABLE_PATTERN % crop_name),
MODEL_SPEC['args']['model_data_path']['contents'][
'climate_regression_yield_tables']['contents'][
'[CROP]_regression_yield_table.csv'])
for _, row in crop_regression_df.iterrows():
for header in _EXPECTED_REGRESSION_TABLE_HEADERS:
if crop_regression_table[bin_id][header.lower()] == '':
crop_regression_table[bin_id][header.lower()] = 0
if numpy.isnan(row[header]):
row[header] = 0
yield_regression_headers = [
x for x in list(crop_regression_table.values())[0]
if x != 'climate_bin']
x for x in crop_regression_df.columns if x != 'climate_bin']
reclassify_error_details = {
'raster_name': f'{crop_name} Climate Bin',
@ -597,10 +606,7 @@ def execute(args):
output_dir,
_INTERPOLATED_YIELD_REGRESSION_FILE_PATTERN % (
crop_name, yield_regression_id, file_suffix)))
bin_to_regression_value = dict([
(bin_id,
crop_regression_table[bin_id][yield_regression_id])
for bin_id in crop_regression_table])
bin_to_regression_value = crop_regression_df[yield_regression_id].to_dict()
# reclassify nodata to a valid value of 0
# we're assuming that the crop doesn't exist where there is no data
# this is more likely than assuming the crop does exist, esp.
@ -653,8 +659,8 @@ def execute(args):
(regression_parameter_raster_path_lookup['b_nut'], 1),
(regression_parameter_raster_path_lookup['c_n'], 1),
(args['landcover_raster_path'], 1),
(crop_to_fertlization_rate_table[crop_name]
['nitrogen_rate'], 'raw'),
(crop_to_fertilization_rate_df['nitrogen_rate'][crop_name],
'raw'),
(crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
_x_yield_op,
nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
@ -672,8 +678,8 @@ def execute(args):
(regression_parameter_raster_path_lookup['b_nut'], 1),
(regression_parameter_raster_path_lookup['c_p2o5'], 1),
(args['landcover_raster_path'], 1),
(crop_to_fertlization_rate_table[crop_name]
['phosphorus_rate'], 'raw'),
(crop_to_fertilization_rate_df['phosphorus_rate'][crop_name],
'raw'),
(crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
_x_yield_op,
phosphorus_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
@ -691,8 +697,8 @@ def execute(args):
(regression_parameter_raster_path_lookup['b_k2o'], 1),
(regression_parameter_raster_path_lookup['c_k2o'], 1),
(args['landcover_raster_path'], 1),
(crop_to_fertlization_rate_table[crop_name]
['potassium_rate'], 'raw'),
(crop_to_fertilization_rate_df['potassium_rate'][crop_name],
'raw'),
(crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
_x_yield_op,
potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
@ -796,18 +802,18 @@ def execute(args):
# both 'crop_nutrient.csv' and 'crop' are known data/header values for
# this model data.
nutrient_table = utils.read_csv_to_dataframe(
nutrient_df = utils.read_csv_to_dataframe(
os.path.join(args['model_data_path'], 'crop_nutrient.csv'),
'crop', convert_cols_to_lower=False, convert_vals_to_lower=False
).to_dict(orient='index')
MODEL_SPEC['args']['model_data_path']['contents']['crop_nutrient.csv'])
LOGGER.info("Generating report table")
crop_names = list(crop_to_landcover_df.index)
result_table_path = os.path.join(
output_dir, 'result_table%s.csv' % file_suffix)
_ = task_graph.add_task(
func=tabulate_regression_results,
args=(nutrient_table,
crop_to_landcover_table, pixel_area_ha,
args=(nutrient_df,
crop_names, pixel_area_ha,
args['landcover_raster_path'], landcover_nodata,
output_dir, file_suffix, result_table_path),
target_path_list=[result_table_path],
@ -827,7 +833,7 @@ def execute(args):
args=(args['aggregate_polygon_path'],
target_aggregate_vector_path,
landcover_raster_info['projection_wkt'],
crop_to_landcover_table, nutrient_table,
crop_names, nutrient_df,
output_dir, file_suffix,
aggregate_results_table_path),
target_path_list=[target_aggregate_vector_path,
@ -929,17 +935,16 @@ def _mask_observed_yield_op(
def tabulate_regression_results(
nutrient_table,
crop_to_landcover_table, pixel_area_ha, landcover_raster_path,
nutrient_df,
crop_names, pixel_area_ha, landcover_raster_path,
landcover_nodata, output_dir, file_suffix, target_table_path):
"""Write table with total yield and nutrient results by crop.
This function includes all the operations that write to results_table.csv.
Args:
nutrient_table (dict): a lookup of nutrient values by crop in the
form of nutrient_table[<crop>][<nutrient>].
crop_to_landcover_table (dict): landcover codes keyed by crop names
nutrient_df (pandas.DataFrame): a table of nutrient values by crop
crop_names (list): list of crop names
pixel_area_ha (float): area of lulc raster cells (hectares)
landcover_raster_path (string): path to landcover raster
landcover_nodata (float): landcover raster nodata value
@ -960,7 +965,7 @@ def tabulate_regression_results(
result_table.write(
'crop,area (ha),' + 'production_observed,production_modeled,' +
','.join(nutrient_headers) + '\n')
for crop_name in sorted(crop_to_landcover_table):
for crop_name in sorted(crop_names):
result_table.write(crop_name)
production_lookup = {}
production_pixel_count = 0
@ -1006,18 +1011,18 @@ def tabulate_regression_results(
# convert 100g to Mg and fraction left over from refuse
nutrient_factor = 1e4 * (
1 - nutrient_table[crop_name]['Percentrefuse'] / 100)
1 - nutrient_df['percentrefuse'][crop_name] / 100)
for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
total_nutrient = (
nutrient_factor *
production_lookup['modeled'] *
nutrient_table[crop_name][nutrient_id])
nutrient_df[nutrient_id][crop_name])
result_table.write(",%f" % (total_nutrient))
result_table.write(
",%f" % (
nutrient_factor *
production_lookup['observed'] *
nutrient_table[crop_name][nutrient_id]))
nutrient_df[nutrient_id][crop_name]))
result_table.write('\n')
total_area = 0
@ -1035,8 +1040,8 @@ def tabulate_regression_results(
def aggregate_regression_results_to_polygons(
base_aggregate_vector_path, target_aggregate_vector_path,
landcover_raster_projection, crop_to_landcover_table,
nutrient_table, output_dir, file_suffix,
landcover_raster_projection, crop_names,
nutrient_df, output_dir, file_suffix,
target_aggregate_table_path):
"""Write table with aggregate results of yield and nutrient values.
@ -1049,9 +1054,8 @@ def aggregate_regression_results_to_polygons(
target_aggregate_vector_path (string):
path to re-projected copy of polygon vector
landcover_raster_projection (string): a WKT projection string
crop_to_landcover_table (dict): landcover codes keyed by crop names
nutrient_table (dict): a lookup of nutrient values by crop in the
form of nutrient_table[<crop>][<nutrient>].
crop_names (list): list of crop names
nutrient_df (pandas.DataFrame): a table of nutrient values by crop
output_dir (string): the file path to the output workspace.
file_suffix (string): string to append to any output filenames.
target_aggregate_table_path (string): path to 'aggregate_results.csv'
@ -1072,10 +1076,10 @@ def aggregate_regression_results_to_polygons(
total_nutrient_table = collections.defaultdict(
lambda: collections.defaultdict(lambda: collections.defaultdict(
float)))
for crop_name in crop_to_landcover_table:
for crop_name in crop_names:
# convert 100g to Mg and fraction left over from refuse
nutrient_factor = 1e4 * (
1 - nutrient_table[crop_name]['Percentrefuse'] / 100)
1 - nutrient_df['percentrefuse'][crop_name] / 100)
LOGGER.info(
"Calculating zonal stats for %s", crop_name)
crop_production_raster_path = os.path.join(
@ -1093,7 +1097,7 @@ def aggregate_regression_results_to_polygons(
nutrient_factor *
total_yield_lookup['%s_modeled' % crop_name][
fid_index]['sum'] *
nutrient_table[crop_name][nutrient_id])
nutrient_df[nutrient_id][crop_name])
# process observed
observed_yield_path = os.path.join(
@ -1111,7 +1115,7 @@ def aggregate_regression_results_to_polygons(
nutrient_factor * # percent crop used * 1000 [100g per Mg]
total_yield_lookup[
'%s_observed' % crop_name][fid_index]['sum'] *
nutrient_table[crop_name][nutrient_id]) # nutrient unit per 100g crop
nutrient_df[nutrient_id][crop_name]) # nutrient unit per 100g crop
# report everything to a table
aggregate_table_path = os.path.join(

View File

@ -336,7 +336,7 @@ def build_datastack_archive(args, model_name, datastack_path):
data_dir, f'{key}_csv_data')
dataframe = utils.read_csv_to_dataframe(
source_path, convert_vals_to_lower=False)
source_path, args_spec[key])
csv_source_dir = os.path.abspath(os.path.dirname(source_path))
for spatial_column_name in spatial_columns:
# Iterate through the spatial columns, identify the set of

View File

@ -137,7 +137,7 @@ MODEL_SPEC = {
"geometries": spec_utils.POINT,
"fields": {}
},
"_work_tokens": spec_utils.TASKGRAPH_DIR
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -221,8 +221,6 @@ def execute(args):
file_registry = utils.build_file_registry(
[(_OUTPUT_FILES, output_directory)], file_suffix)
work_token_dir = os.path.join(output_directory, '_work_tokens')
# Manually setting n_workers to be -1 so that everything happens in the
# same thread.
try:
@ -232,7 +230,8 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1
graph = taskgraph.TaskGraph(work_token_dir, n_workers=n_workers)
graph = taskgraph.TaskGraph(
os.path.join(output_directory, '_work_tokens'), n_workers=n_workers)
fill_pits_task = graph.add_task(
pygeoprocessing.routing.fill_pits,

View File

@ -10,6 +10,7 @@ import time
import uuid
import numpy
import pandas
import pygeoprocessing
import scipy.spatial
import taskgraph
@ -64,6 +65,7 @@ MODEL_SPEC = {
},
"biophysical_table_path": {
"type": "csv",
"index_col": "lucode",
"columns": {
"lucode": spec_utils.LULC_TABLE_COLUMN,
"is_tropical_forest": {
@ -249,10 +251,10 @@ MODEL_SPEC = {
"bands": {1: {
"type": "number", "units": u.metric_ton/u.hectare
}}
},
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -375,8 +377,6 @@ def execute(args):
file_suffix = utils.make_suffix_string(args, 'results_suffix')
# Initialize a TaskGraph
taskgraph_working_dir = os.path.join(
intermediate_dir, '_taskgraph_working_dir')
try:
n_workers = int(args['n_workers'])
except (KeyError, ValueError, TypeError):
@ -384,7 +384,8 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # single process mode.
task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers)
task_graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
# used to keep track of files generated by this module
output_file_registry = {
@ -418,16 +419,15 @@ def execute(args):
# Map non-forest landcover codes to carbon biomasses
LOGGER.info('Calculating direct mapped carbon stocks')
carbon_maps = []
biophysical_table = utils.read_csv_to_dataframe(
args['biophysical_table_path'], 'lucode').to_dict(orient='index')
biophysical_keys = [
x.lower() for x in list(biophysical_table.values())[0].keys()]
biophysical_df = utils.read_csv_to_dataframe(
args['biophysical_table_path'],
MODEL_SPEC['args']['biophysical_table_path'])
pool_list = [('c_above', True)]
if args['pools_to_calculate'] == 'all':
pool_list.extend([
('c_below', False), ('c_soil', False), ('c_dead', False)])
for carbon_pool_type, ignore_tropical_type in pool_list:
if carbon_pool_type in biophysical_keys:
if carbon_pool_type in biophysical_df.columns:
carbon_maps.append(
output_file_registry[carbon_pool_type+'_map'])
task_graph.add_task(
@ -630,8 +630,8 @@ def _calculate_lulc_carbon_map(
"""
# classify forest pixels from lulc
biophysical_table = utils.read_csv_to_dataframe(
biophysical_table_path, 'lucode').to_dict(orient='index')
biophysical_df = utils.read_csv_to_dataframe(
biophysical_table_path, MODEL_SPEC['args']['biophysical_table_path'])
lucode_to_per_cell_carbon = {}
cell_size = pygeoprocessing.get_raster_info(
@ -639,24 +639,22 @@ def _calculate_lulc_carbon_map(
cell_area_ha = abs(cell_size[0]) * abs(cell_size[1]) / 10000
# Build a lookup table
for lucode in biophysical_table:
for lucode, row in biophysical_df.iterrows():
if compute_forest_edge_effects:
is_tropical_forest = (
int(biophysical_table[int(lucode)]['is_tropical_forest']))
is_tropical_forest = row['is_tropical_forest']
else:
is_tropical_forest = 0
if ignore_tropical_type and is_tropical_forest == 1:
is_tropical_forest = False
if ignore_tropical_type and is_tropical_forest:
# if tropical forest above ground, lookup table is nodata
lucode_to_per_cell_carbon[int(lucode)] = NODATA_VALUE
lucode_to_per_cell_carbon[lucode] = NODATA_VALUE
else:
try:
lucode_to_per_cell_carbon[int(lucode)] = float(
biophysical_table[lucode][carbon_pool_type]) * cell_area_ha
except ValueError:
if pandas.isna(row[carbon_pool_type]):
raise ValueError(
"Could not interpret carbon pool value as a number. "
f"lucode: {lucode}, pool_type: {carbon_pool_type}, "
f"value: {biophysical_table[lucode][carbon_pool_type]}")
f"value: {row[carbon_pool_type]}")
lucode_to_per_cell_carbon[lucode] = row[carbon_pool_type] * cell_area_ha
# map aboveground carbon from table to lulc that is not forest
reclass_error_details = {
@ -696,11 +694,9 @@ def _map_distance_from_tropical_forest_edge(
"""
# Build a list of forest lucodes
biophysical_table = utils.read_csv_to_dataframe(
biophysical_table_path, 'lucode').to_dict(orient='index')
forest_codes = [
lucode for (lucode, ludata) in biophysical_table.items()
if int(ludata['is_tropical_forest']) == 1]
biophysical_df = utils.read_csv_to_dataframe(
biophysical_table_path, MODEL_SPEC['args']['biophysical_table_path'])
forest_codes = biophysical_df[biophysical_df['is_tropical_forest']].index.values
# Make a raster where 1 is non-forest landcover types and 0 is forest
lulc_nodata = pygeoprocessing.get_raster_info(

View File

@ -77,6 +77,7 @@ MODEL_SPEC = {
},
"threats_table_path": {
"type": "csv",
"index_col": "threat",
"columns": {
"threat": {
"type": "freestyle_string",
@ -170,8 +171,13 @@ MODEL_SPEC = {
},
"sensitivity_table_path": {
"type": "csv",
"index_col": "lulc",
"columns": {
"lulc": spec_utils.LULC_TABLE_COLUMN,
"name": {
"type": "freestyle_string",
"required": False
},
"habitat": {
"type": "ratio",
"about": gettext(
@ -303,10 +309,10 @@ MODEL_SPEC = {
"bands": {1: {"type": "integer"}}
}
}
},
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
# All out rasters besides rarity should be gte to 0. Set nodata accordingly.
@ -371,28 +377,23 @@ def execute(args):
args['workspace_dir'], 'intermediate')
utils.make_directories([intermediate_output_dir, output_dir])
taskgraph_working_dir = os.path.join(
intermediate_output_dir, '_taskgraph_working_dir')
n_workers = int(args.get('n_workers', -1))
task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers)
task_graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
LOGGER.info("Checking Threat and Sensitivity tables for compliance")
# Get CSVs as dictionaries and ensure the key is a string for threats.
threat_dict = {
str(key): value for key, value in utils.read_csv_to_dataframe(
args['threats_table_path'], 'THREAT',
expand_path_cols=['cur_path', 'fut_path', 'base_path']
).to_dict(orient='index').items()}
sensitivity_dict = utils.read_csv_to_dataframe(
args['sensitivity_table_path'], 'LULC').to_dict(orient='index')
threat_df = utils.read_csv_to_dataframe(
args['threats_table_path'], MODEL_SPEC['args']['threats_table_path']
).fillna('')
sensitivity_df = utils.read_csv_to_dataframe(
args['sensitivity_table_path'],
MODEL_SPEC['args']['sensitivity_table_path'])
half_saturation_constant = float(args['half_saturation_constant'])
# Dictionary for reclassing habitat values
sensitivity_reclassify_habitat_dict = {
int(key): float(val['habitat']) for key, val in
sensitivity_dict.items()}
sensitivity_reclassify_habitat_dict = sensitivity_df['habitat'].to_dict()
# declare dictionaries to store the land cover and the threat rasters
# pertaining to the different threats
@ -419,13 +420,12 @@ def execute(args):
# for each threat given in the CSV file try opening the associated
# raster which should be found relative to the Threat CSV
for threat in threat_dict:
for threat, row in threat_df.iterrows():
LOGGER.debug(f"Validating path for threat: {threat}")
threat_table_path_col = _THREAT_SCENARIO_MAP[lulc_key]
threat_path = threat_dict[threat][threat_table_path_col]
threat_validate_result = _validate_threat_path(
threat_path, lulc_key)
row[threat_table_path_col], lulc_key)
if threat_validate_result == 'error':
raise ValueError(
'There was an Error locating a threat raster from '
@ -516,7 +516,7 @@ def execute(args):
intermediate_output_dir,
(f'{os.path.splitext(os.path.basename(lulc_path))[0]}'
f'_aligned{file_suffix}.tif'))
for threat in threat_dict:
for threat in threat_df.index.values:
threat_path = threat_path_dict['threat' + lulc_key][threat]
if threat_path in lulc_and_threat_raster_list:
aligned_threat_path = os.path.join(
@ -578,10 +578,7 @@ def execute(args):
access_task_list.append(rasterize_access_task)
# calculate the weight sum which is the sum of all the threats' weights
weight_sum = 0.0
for threat_data in threat_dict.values():
# Sum weight of threats
weight_sum = weight_sum + threat_data['weight']
weight_sum = threat_df['weight'].sum()
# for each land cover raster provided compute habitat quality
for lulc_key, lulc_path in lulc_path_dict.items():
@ -618,9 +615,9 @@ def execute(args):
exit_landcover = False
# adjust each threat/threat raster for distance, weight, and access
for threat, threat_data in threat_dict.items():
for threat, row in threat_df.iterrows():
LOGGER.debug(
f'Calculating threat: {threat}.\nThreat data: {threat_data}')
f'Calculating threat: {threat}.\nThreat data: {row}')
# get the threat raster for the specific threat
threat_raster_path = threat_path_dict['threat' + lulc_key][threat]
@ -634,7 +631,7 @@ def execute(args):
exit_landcover = True
break
# Check to make sure max_dist is greater than 0
if threat_data['max_dist'] <= 0.0:
if row['max_dist'] <= 0:
raise ValueError(
f"The max distance for threat: '{threat}' is less than"
" or equal to 0. MAX_DIST should be a positive value.")
@ -650,17 +647,15 @@ def execute(args):
dependent_task_list=[align_task],
task_name=f'distance edt {lulc_key} {threat}')
decay_type = threat_data['decay']
filtered_threat_raster_path = os.path.join(
intermediate_output_dir,
f'filtered_{decay_type}_{threat}{lulc_key}{file_suffix}.tif')
f'filtered_{row["decay"]}_{threat}{lulc_key}{file_suffix}.tif')
dist_decay_task = task_graph.add_task(
func=_decay_distance,
args=(
distance_raster_path, threat_data['max_dist'],
decay_type, filtered_threat_raster_path),
distance_raster_path, row['max_dist'],
row['decay'], filtered_threat_raster_path),
target_path_list=[filtered_threat_raster_path],
dependent_task_list=[dist_edt_task],
task_name=f'distance decay {lulc_key} {threat}')
@ -672,9 +667,7 @@ def execute(args):
f'sens_{threat}{lulc_key}{file_suffix}.tif')
# Dictionary for reclassing threat sensitivity values
sensitivity_reclassify_threat_dict = {
int(key): float(val[threat]) for key, val in
sensitivity_dict.items()}
sensitivity_reclassify_threat_dict = sensitivity_df[threat].to_dict()
reclass_error_details = {
'raster_name': 'LULC', 'column_name': 'lucode',
@ -686,11 +679,11 @@ def execute(args):
reclass_error_details),
target_path_list=[sens_raster_path],
dependent_task_list=[align_task],
task_name=f'sens_raster_{decay_type}{lulc_key}_{threat}')
task_name=f'sens_raster_{row["decay"]}{lulc_key}_{threat}')
sensitivity_task_list.append(sens_threat_task)
# get the normalized weight for each threat
weight_avg = threat_data['weight'] / weight_sum
weight_avg = row['weight'] / weight_sum
# add the threat raster adjusted by distance and the raster
# representing sensitivity to the list to be past to
@ -724,7 +717,7 @@ def execute(args):
dependent_task_list=[
*threat_decay_task_list, *sensitivity_task_list,
*access_task_list],
task_name=f'tot_degradation_{decay_type}{lulc_key}_{threat}')
task_name=f'tot_degradation_{row["decay"]}{lulc_key}_{threat}')
# Compute habitat quality
# ksq: a term used below to compute habitat quality
@ -1154,20 +1147,18 @@ def validate(args, limit_to=None):
if ("threats_table_path" not in invalid_keys and
"sensitivity_table_path" not in invalid_keys and
"threat_raster_folder" not in invalid_keys):
# Get CSVs as dictionaries and ensure the key is a string for threats.
threat_dict = {
str(key): value for key, value in utils.read_csv_to_dataframe(
args['threats_table_path'], 'THREAT',
expand_path_cols=['cur_path', 'fut_path', 'base_path']
).to_dict(orient='index').items()}
sensitivity_dict = utils.read_csv_to_dataframe(
args['sensitivity_table_path'], 'LULC').to_dict(orient='index')
threat_df = utils.read_csv_to_dataframe(
args['threats_table_path'],
MODEL_SPEC['args']['threats_table_path']).fillna('')
sensitivity_df = utils.read_csv_to_dataframe(
args['sensitivity_table_path'],
MODEL_SPEC['args']['sensitivity_table_path'])
# check that the threat names in the threats table match with the
# threats columns in the sensitivity table.
sens_header_set = set(list(sensitivity_dict.values())[0])
threat_set = {threat for threat in threat_dict}
sens_header_set = set(sensitivity_df.columns)
threat_set = set(threat_df.index.values)
missing_sens_header_set = threat_set.difference(sens_header_set)
if missing_sens_header_set:
@ -1191,14 +1182,14 @@ def validate(args, limit_to=None):
# for each threat given in the CSV file try opening the
# associated raster which should be found in
# threat_raster_folder
for threat in threat_dict:
for threat, row in threat_df.iterrows():
threat_table_path_col = _THREAT_SCENARIO_MAP[lulc_key]
if threat_table_path_col not in threat_dict[threat]:
if threat_table_path_col not in row:
bad_threat_columns.append(threat_table_path_col)
break
# Threat path from threat CSV is relative to CSV
threat_path = threat_dict[threat][threat_table_path_col]
threat_path = row[threat_table_path_col]
threat_validate_result = _validate_threat_path(
threat_path, lulc_key)

View File

@ -66,6 +66,7 @@ MODEL_SPEC = {
"name": gettext("habitat stressor table"),
"about": gettext("A table describing each habitat and stressor."),
"type": "csv",
"index_col": "name",
"columns": {
"name": {
"type": "freestyle_string",
@ -437,7 +438,7 @@ MODEL_SPEC = {
}
}
},
".taskgraph": spec_utils.TASKGRAPH_DIR
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -496,7 +497,6 @@ def execute(args):
intermediate_dir = os.path.join(args['workspace_dir'],
'intermediate_outputs')
output_dir = os.path.join(args['workspace_dir'], 'outputs')
taskgraph_working_dir = os.path.join(args['workspace_dir'], '.taskgraph')
utils.make_directories([intermediate_dir, output_dir])
suffix = utils.make_suffix_string(args, 'results_suffix')
@ -526,7 +526,8 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # single process mode.
graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers)
graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
# parse the info table and get info dicts for habitats, stressors.
habitats_info, stressors_info = _parse_info_table(args['info_table_path'])
@ -1584,7 +1585,7 @@ def _align(raster_path_map, vector_path_map, target_pixel_size,
layer = None
vector = None
_create_raster_from_bounding_box(
pygeoprocessing.create_raster_from_bounding_box(
target_raster_path=target_raster_path,
target_bounding_box=target_bounding_box,
target_pixel_size=target_pixel_size,
@ -1599,74 +1600,6 @@ def _align(raster_path_map, vector_path_map, target_pixel_size,
burn_values=burn_values, option_list=rasterize_option_list)
def _create_raster_from_bounding_box(
target_raster_path, target_bounding_box, target_pixel_size,
target_pixel_type, target_srs_wkt, target_nodata=None,
fill_value=None):
"""Create a raster from a given bounding box.
Args:
target_raster_path (string): The path to where the new raster should be
created on disk.
target_bounding_box (tuple): a 4-element iterable of (minx, miny,
maxx, maxy) in projected units matching the SRS of
``target_srs_wkt``.
target_pixel_size (tuple): A 2-element tuple of the (x, y) pixel size
of the target raster. Elements are in units of the target SRS.
target_pixel_type (int): The GDAL GDT_* type of the target raster.
target_srs_wkt (string): The SRS of the target raster, in Well-Known
Text format.
target_nodata (float): If provided, the nodata value of the target
raster.
fill_value=None (number): If provided, the value that the target raster
should be filled with.
Returns:
``None``
"""
bbox_minx, bbox_miny, bbox_maxx, bbox_maxy = target_bounding_box
driver = gdal.GetDriverByName('GTiff')
n_bands = 1
n_cols = int(numpy.ceil(
abs((bbox_maxx - bbox_minx) / target_pixel_size[0])))
n_rows = int(numpy.ceil(
abs((bbox_maxy - bbox_miny) / target_pixel_size[1])))
raster = driver.Create(
target_raster_path, n_cols, n_rows, n_bands, target_pixel_type,
options=['TILED=YES', 'BIGTIFF=YES', 'COMPRESS=DEFLATE',
'BLOCKXSIZE=256', 'BLOCKYSIZE=256'])
raster.SetProjection(target_srs_wkt)
# Set the transform based on the upper left corner and given pixel
# dimensions. Bounding box is in format [minx, miny, maxx, maxy]
if target_pixel_size[0] < 0:
x_source = bbox_maxx
else:
x_source = bbox_minx
if target_pixel_size[1] < 0:
y_source = bbox_maxy
else:
y_source = bbox_miny
raster_transform = [
x_source, target_pixel_size[0], 0.0,
y_source, 0.0, target_pixel_size[1]]
raster.SetGeoTransform(raster_transform)
# Fill the band if requested.
band = raster.GetRasterBand(1)
if fill_value is not None:
band.Fill(fill_value)
# Set the nodata value.
if target_nodata is not None:
band.SetNoDataValue(float(target_nodata))
band = None
raster = None
def _simplify(source_vector_path, tolerance, target_vector_path,
preserve_columns=None):
"""Simplify a geometry to a given tolerance.
@ -1841,12 +1774,15 @@ def _open_table_as_dataframe(table_path, **kwargs):
excel_df = pandas.read_excel(table_path, **kwargs)
excel_df.columns = excel_df.columns.str.lower()
excel_df['path'] = excel_df['path'].apply(
lambda p: utils.expand_path(p, table_path))
lambda p: utils.expand_path(p, table_path)).astype('string')
excel_df['name'] = excel_df['name'].astype('string')
excel_df['type'] = excel_df['type'].astype('string')
excel_df['stressor buffer (meters)'] = excel_df['stressor buffer (meters)'].astype(float)
excel_df = excel_df.set_index('name')
return excel_df
else:
return utils.read_csv_to_dataframe(
table_path, convert_vals_to_lower=False,
expand_path_cols=['path'], **kwargs)
table_path, MODEL_SPEC['args']['info_table_path'], **kwargs)
def _parse_info_table(info_table_path):
@ -1871,8 +1807,12 @@ def _parse_info_table(info_table_path):
"""
info_table_path = os.path.abspath(info_table_path)
table = _open_table_as_dataframe(info_table_path)
table = table.set_index('name')
try:
table = _open_table_as_dataframe(info_table_path)
except ValueError as err:
if 'Index has duplicate keys' in str(err):
raise ValueError("Habitat and stressor names may not overlap.")
table = table.rename(columns={'stressor buffer (meters)': 'buffer'})
# Drop the buffer column from the habitats list; we don't need it.
@ -1883,15 +1823,6 @@ def _parse_info_table(info_table_path):
stressors = table.loc[table['type'] == 'stressor'].drop(
columns=['type']).to_dict(orient='index')
# habitats and stressors must be nonoverlapping sets.
repeated_habitats_stressors = set(
habitats.keys()).intersection(stressors.keys())
if repeated_habitats_stressors:
raise ValueError(
"Habitat and stressor names may not overlap. These names are "
"both habitats and stressors: "
f"{', '.join(repeated_habitats_stressors)}")
return (habitats, stressors)

View File

@ -73,6 +73,7 @@ MODEL_SPEC = {
},
"biophysical_table_path": {
"type": "csv",
"index_col": "lucode",
"columns": {
"lucode": spec_utils.LULC_TABLE_COLUMN,
"load_[NUTRIENT]": { # nitrogen or phosphorus nutrient loads
@ -373,53 +374,60 @@ MODEL_SPEC = {
"type": "integer"
}}
},
"cache_dir": {
"type": "directory",
"contents": {
"aligned_dem.tif": {
"about": "Copy of the DEM clipped to the extent of the other inputs",
"bands": {1: {"type": "number", "units": u.meter}}
},
"aligned_lulc.tif": {
"about": (
"Copy of the LULC clipped to the extent of the other inputs "
"and reprojected to the DEM projection"),
"bands": {1: {"type": "integer"}}
},
"aligned_runoff_proxy.tif": {
"about": (
"Copy of the runoff proxy clipped to the extent of the other inputs "
"and reprojected to the DEM projection"),
"bands": {1: {"type": "number", "units": u.none}}
},
"filled_dem.tif": spec_utils.FILLED_DEM,
"slope.tif": spec_utils.SLOPE,
"subsurface_export_n.pickle": {
"about": "Pickled zonal statistics of nitrogen subsurface export"
},
"subsurface_load_n.pickle": {
"about": "Pickled zonal statistics of nitrogen subsurface load"
},
"surface_export_n.pickle": {
"about": "Pickled zonal statistics of nitrogen surface export"
},
"surface_export_p.pickle": {
"about": "Pickled zonal statistics of phosphorus surface export"
},
"surface_load_n.pickle": {
"about": "Pickled zonal statistics of nitrogen surface load"
},
"surface_load_p.pickle": {
"about": "Pickled zonal statistics of phosphorus surface load"
},
"total_export_n.pickle": {
"about": "Pickled zonal statistics of total nitrogen export"
},
"taskgraph.db": {}
}
"aligned_dem.tif": {
"about": "Copy of the DEM clipped to the extent of the other inputs",
"bands": {1: {"type": "number", "units": u.meter}}
},
"aligned_lulc.tif": {
"about": (
"Copy of the LULC clipped to the extent of the other inputs "
"and reprojected to the DEM projection"),
"bands": {1: {"type": "integer"}}
},
"aligned_runoff_proxy.tif": {
"about": (
"Copy of the runoff proxy clipped to the extent of the other inputs "
"and reprojected to the DEM projection"),
"bands": {1: {"type": "number", "units": u.none}}
},
"masked_dem.tif": {
"about": "DEM input masked to exclude pixels outside the watershed",
"bands": {1: {"type": "number", "units": u.meter}}
},
"masked_lulc.tif": {
"about": "LULC input masked to exclude pixels outside the watershed",
"bands": {1: {"type": "integer"}}
},
"masked_runoff_proxy.tif": {
"about": "Runoff proxy input masked to exclude pixels outside the watershed",
"bands": {1: {"type": "number", "units": u.none}}
},
"filled_dem.tif": spec_utils.FILLED_DEM,
"slope.tif": spec_utils.SLOPE,
"subsurface_export_n.pickle": {
"about": "Pickled zonal statistics of nitrogen subsurface export"
},
"subsurface_load_n.pickle": {
"about": "Pickled zonal statistics of nitrogen subsurface load"
},
"surface_export_n.pickle": {
"about": "Pickled zonal statistics of nitrogen surface export"
},
"surface_export_p.pickle": {
"about": "Pickled zonal statistics of phosphorus surface export"
},
"surface_load_n.pickle": {
"about": "Pickled zonal statistics of nitrogen surface load"
},
"surface_load_p.pickle": {
"about": "Pickled zonal statistics of phosphorus surface load"
},
"total_export_n.pickle": {
"about": "Pickled zonal statistics of total nitrogen export"
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -463,14 +471,14 @@ _INTERMEDIATE_BASE_FILES = {
'thresholded_slope_path': 'thresholded_slope.tif',
'dist_to_channel_path': 'dist_to_channel.tif',
'drainage_mask': 'what_drains_to_stream.tif',
}
_CACHE_BASE_FILES = {
'filled_dem_path': 'filled_dem.tif',
'aligned_dem_path': 'aligned_dem.tif',
'masked_dem_path': 'masked_dem.tif',
'slope_path': 'slope.tif',
'aligned_lulc_path': 'aligned_lulc.tif',
'masked_lulc_path': 'masked_lulc.tif',
'aligned_runoff_proxy_path': 'aligned_runoff_proxy.tif',
'masked_runoff_proxy_path': 'masked_runoff_proxy.tif',
'surface_load_n_pickle_path': 'surface_load_n.pickle',
'surface_load_p_pickle_path': 'surface_load_p.pickle',
'subsurface_load_n_pickle_path': 'subsurface_load_n.pickle',
@ -537,14 +545,14 @@ def execute(args):
None
"""
def _validate_inputs(nutrients_to_process, lucode_to_parameters):
def _validate_inputs(nutrients_to_process, biophysical_df):
"""Validate common errors in inputs.
Args:
nutrients_to_process (list): list of 'n' and/or 'p'
lucode_to_parameters (dictionary): biophysical input table mapping
lucode to dictionary of table parameters. Used to validate
the correct columns are input
biophysical_df (pandas.DataFrame): dataframe representation of
the input biophysical table. Used to validate the correct
columns are input
Returns:
None
@ -564,7 +572,7 @@ def execute(args):
# is missing.
row_header_table_list = []
lu_parameter_row = list(lucode_to_parameters.values())[0]
lu_parameter_row = biophysical_df.columns.to_list()
row_header_table_list.append(
(lu_parameter_row, ['load_', 'eff_', 'crit_len_'],
args['biophysical_table_path']))
@ -594,8 +602,7 @@ def execute(args):
output_dir = os.path.join(args['workspace_dir'])
intermediate_output_dir = os.path.join(
args['workspace_dir'], INTERMEDIATE_DIR_NAME)
cache_dir = os.path.join(intermediate_output_dir, 'cache_dir')
utils.make_directories([output_dir, intermediate_output_dir, cache_dir])
utils.make_directories([output_dir, intermediate_output_dir])
try:
n_workers = int(args['n_workers'])
@ -605,13 +612,13 @@ def execute(args):
# TypeError when n_workers is None.
n_workers = -1 # Synchronous mode.
task_graph = taskgraph.TaskGraph(
cache_dir, n_workers, reporting_interval=5.0)
os.path.join(args['workspace_dir'], 'taskgraph_cache'),
n_workers, reporting_interval=5.0)
file_suffix = utils.make_suffix_string(args, 'results_suffix')
f_reg = utils.build_file_registry(
[(_OUTPUT_BASE_FILES, output_dir),
(_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
(_CACHE_BASE_FILES, cache_dir)], file_suffix)
(_INTERMEDIATE_BASE_FILES, intermediate_output_dir)], file_suffix)
# Build up a list of nutrients to process based on what's checked on
nutrients_to_process = []
@ -619,10 +626,11 @@ def execute(args):
if args['calc_' + nutrient_id]:
nutrients_to_process.append(nutrient_id)
lucode_to_parameters = utils.read_csv_to_dataframe(
args['biophysical_table_path'], 'lucode').to_dict(orient='index')
biophysical_df = utils.read_csv_to_dataframe(
args['biophysical_table_path'],
MODEL_SPEC['args']['biophysical_table_path'])
_validate_inputs(nutrients_to_process, lucode_to_parameters)
_validate_inputs(nutrients_to_process, biophysical_df)
# these are used for aggregation in the last step
field_pickle_map = {}
@ -646,18 +654,64 @@ def execute(args):
base_raster_list, aligned_raster_list,
['near']*len(base_raster_list), dem_info['pixel_size'],
'intersection'),
kwargs={
'base_vector_path_list': [args['watersheds_path']],
'vector_mask_options': {
'mask_vector_path': args['watersheds_path']}},
kwargs={'base_vector_path_list': [args['watersheds_path']]},
target_path_list=aligned_raster_list,
task_name='align rasters')
# Use the cutline feature of gdal.Warp to mask pixels outside the watershed
# it's possible that the DEM, LULC, or runoff proxy inputs might have an
# undefined nodata value. since we're introducing nodata pixels, set a nodata
# value if one is not already defined.
rp_nodata = pygeoprocessing.get_raster_info(
f_reg['aligned_runoff_proxy_path'])['nodata'][0]
mask_runoff_proxy_task = task_graph.add_task(
func=gdal.Warp,
kwargs={
'destNameOrDestDS': f_reg['masked_runoff_proxy_path'],
'srcDSOrSrcDSTab': f_reg['aligned_runoff_proxy_path'],
'dstNodata': _TARGET_NODATA if rp_nodata is None else rp_nodata,
'cutlineDSName': args['watersheds_path']},
dependent_task_list=[align_raster_task],
target_path_list=[f_reg['masked_runoff_proxy_path']],
task_name='mask runoff proxy raster')
dem_nodata = pygeoprocessing.get_raster_info(
f_reg['aligned_dem_path'])['nodata'][0]
dem_target_nodata = float( # GDAL expects a python float, not numpy.float32
numpy.finfo(numpy.float32).min if dem_nodata is None else dem_nodata)
mask_dem_task = task_graph.add_task(
func=gdal.Warp,
kwargs={
'destNameOrDestDS': f_reg['masked_dem_path'],
'srcDSOrSrcDSTab': f_reg['aligned_dem_path'],
'outputType': gdal.GDT_Float32,
'dstNodata': dem_target_nodata,
'cutlineDSName': args['watersheds_path']},
dependent_task_list=[align_raster_task],
target_path_list=[f_reg['masked_dem_path']],
task_name='mask dem raster')
lulc_nodata = pygeoprocessing.get_raster_info(
f_reg['aligned_lulc_path'])['nodata'][0]
lulc_target_nodata = (
numpy.iinfo(numpy.int32).min if lulc_nodata is None else lulc_nodata)
mask_lulc_task = task_graph.add_task(
func=gdal.Warp,
kwargs={
'destNameOrDestDS': f_reg['masked_lulc_path'],
'srcDSOrSrcDSTab': f_reg['aligned_lulc_path'],
'outputType': gdal.GDT_Int32,
'dstNodata': lulc_target_nodata,
'cutlineDSName': args['watersheds_path']},
dependent_task_list=[align_raster_task],
target_path_list=[f_reg['masked_lulc_path']],
task_name='mask lulc raster')
fill_pits_task = task_graph.add_task(
func=pygeoprocessing.routing.fill_pits,
args=(
(f_reg['aligned_dem_path'], 1), f_reg['filled_dem_path']),
kwargs={'working_dir': cache_dir},
(f_reg['masked_dem_path'], 1), f_reg['filled_dem_path']),
kwargs={'working_dir': intermediate_output_dir},
dependent_task_list=[align_raster_task],
target_path_list=[f_reg['filled_dem_path']],
task_name='fill pits')
@ -666,7 +720,7 @@ def execute(args):
func=pygeoprocessing.routing.flow_dir_mfd,
args=(
(f_reg['filled_dem_path'], 1), f_reg['flow_direction_path']),
kwargs={'working_dir': cache_dir},
kwargs={'working_dir': intermediate_output_dir},
dependent_task_list=[fill_pits_task],
target_path_list=[f_reg['flow_direction_path']],
task_name='flow dir')
@ -707,7 +761,7 @@ def execute(args):
runoff_proxy_index_task = task_graph.add_task(
func=_normalize_raster,
args=((f_reg['aligned_runoff_proxy_path'], 1),
args=((f_reg['masked_runoff_proxy_path'], 1),
f_reg['runoff_proxy_index_path']),
target_path_list=[f_reg['runoff_proxy_index_path']],
dependent_task_list=[align_raster_task],
@ -790,14 +844,16 @@ def execute(args):
# Perrine says that 'n' is the only case where we could consider a
# prop subsurface component. So there's a special case for that.
if nutrient == 'n':
subsurface_proportion_type = 'proportion_subsurface_n'
subsurface_proportion_map = (
biophysical_df['proportion_subsurface_n'].to_dict())
else:
subsurface_proportion_type = None
subsurface_proportion_map = None
load_task = task_graph.add_task(
func=_calculate_load,
args=(
f_reg['aligned_lulc_path'], lucode_to_parameters,
f'load_{nutrient}', load_path),
f_reg['masked_lulc_path'],
biophysical_df[f'load_{nutrient}'],
load_path),
dependent_task_list=[align_raster_task],
target_path_list=[load_path],
task_name=f'{nutrient} load')
@ -813,9 +869,8 @@ def execute(args):
surface_load_path = f_reg[f'surface_load_{nutrient}_path']
surface_load_task = task_graph.add_task(
func=_map_surface_load,
args=(modified_load_path, f_reg['aligned_lulc_path'],
lucode_to_parameters, subsurface_proportion_type,
surface_load_path),
args=(modified_load_path, f_reg['masked_lulc_path'],
subsurface_proportion_map, surface_load_path),
target_path_list=[surface_load_path],
dependent_task_list=[modified_load_task, align_raster_task],
task_name=f'map surface load {nutrient}')
@ -824,8 +879,8 @@ def execute(args):
eff_task = task_graph.add_task(
func=_map_lulc_to_val_mask_stream,
args=(
f_reg['aligned_lulc_path'], f_reg['stream_path'],
lucode_to_parameters, f'eff_{nutrient}', eff_path),
f_reg['masked_lulc_path'], f_reg['stream_path'],
biophysical_df[f'eff_{nutrient}'].to_dict(), eff_path),
target_path_list=[eff_path],
dependent_task_list=[align_raster_task, stream_extraction_task],
task_name=f'ret eff {nutrient}')
@ -834,8 +889,9 @@ def execute(args):
crit_len_task = task_graph.add_task(
func=_map_lulc_to_val_mask_stream,
args=(
f_reg['aligned_lulc_path'], f_reg['stream_path'],
lucode_to_parameters, f'crit_len_{nutrient}', crit_len_path),
f_reg['masked_lulc_path'], f_reg['stream_path'],
biophysical_df[f'crit_len_{nutrient}'].to_dict(),
crit_len_path),
target_path_list=[crit_len_path],
dependent_task_list=[align_raster_task, stream_extraction_task],
task_name=f'ret eff {nutrient}')
@ -879,12 +935,11 @@ def execute(args):
# only calculate subsurface things for nitrogen
if nutrient == 'n':
proportion_subsurface_map = {
lucode: params['proportion_subsurface_n']
for lucode, params in lucode_to_parameters.items()}
proportion_subsurface_map = (
biophysical_df['proportion_subsurface_n'].to_dict())
subsurface_load_task = task_graph.add_task(
func=_map_subsurface_load,
args=(modified_load_path, f_reg['aligned_lulc_path'],
args=(modified_load_path, f_reg['masked_lulc_path'],
proportion_subsurface_map, f_reg['sub_load_n_path']),
target_path_list=[f_reg['sub_load_n_path']],
dependent_task_list=[modified_load_task, align_raster_task],
@ -1174,18 +1229,13 @@ def _normalize_raster(base_raster_path_band, target_normalized_raster_path):
target_nodata)
def _calculate_load(
lulc_raster_path, lucode_to_parameters, load_type,
target_load_raster):
def _calculate_load(lulc_raster_path, lucode_to_load, target_load_raster):
"""Calculate load raster by mapping landcover and multiplying by area.
Args:
lulc_raster_path (string): path to integer landcover raster.
lucode_to_parameters (dict): a mapping of landcover IDs to a
dictionary indexed by the value of `load_{load_type}` that
represents a per-area nutrient load.
load_type (string): represent nutrient to map, either 'load_n' or
'load_p'.
lucode_to_load (dict): a mapping of landcover IDs to per-area
nutrient load.
target_load_raster (string): path to target raster that will have
total load per pixel.
@ -1205,8 +1255,7 @@ def _calculate_load(
if lucode != nodata_landuse:
try:
result[lucode_array == lucode] = (
lucode_to_parameters[lucode][load_type] *
cell_area_ha)
lucode_to_load[lucode] * cell_area_ha)
except KeyError:
raise KeyError(
'lucode: %d is present in the landuse raster but '
@ -1290,18 +1339,17 @@ def _sum_rasters(raster_path_list, target_nodata, target_result_path):
def _map_surface_load(
modified_load_path, lulc_raster_path, lucode_to_parameters,
subsurface_proportion_type, target_surface_load_path):
modified_load_path, lulc_raster_path, lucode_to_subsurface_proportion,
target_surface_load_path):
"""Calculate surface load from landcover raster.
Args:
modified_load_path (string): path to modified load raster with units
of kg/pixel.
lulc_raster_path (string): path to landcover raster.
lucode_to_parameters (dict): maps landcover codes to a dictionary that
can be indexed by `subsurface_proportion_type`.
subsurface_proportion_type (string): if None no subsurface transfer
is mapped. Otherwise indexed from lucode_to_parameters.
lucode_to_subsurface_proportion (dict): maps landcover codes to
subsurface proportion values. Or if None, no subsurface transfer
is mapped.
target_surface_load_path (string): path to target raster.
Returns:
@ -1311,16 +1359,15 @@ def _map_surface_load(
lulc_raster_info = pygeoprocessing.get_raster_info(lulc_raster_path)
nodata_landuse = lulc_raster_info['nodata'][0]
keys = sorted(numpy.array(list(lucode_to_parameters)))
if subsurface_proportion_type is not None:
if lucode_to_subsurface_proportion is not None:
keys = sorted(lucode_to_subsurface_proportion.keys())
subsurface_values = numpy.array(
[lucode_to_parameters[x][subsurface_proportion_type]
for x in keys])
[lucode_to_subsurface_proportion[x] for x in keys])
def _map_surface_load_op(lucode_array, modified_load_array):
"""Convert unit load to total load & handle nodata."""
# If we don't have subsurface, just return 0.0.
if subsurface_proportion_type is None:
if lucode_to_subsurface_proportion is None:
return numpy.where(
~utils.array_equals_nodata(lucode_array, nodata_landuse),
modified_load_array, _TARGET_NODATA)
@ -1382,17 +1429,13 @@ def _map_subsurface_load(
def _map_lulc_to_val_mask_stream(
lulc_raster_path, stream_path, lucode_to_parameters, map_id,
target_eff_path):
lulc_raster_path, stream_path, lucodes_to_vals, target_eff_path):
"""Make retention efficiency raster from landcover.
Args:
lulc_raster_path (string): path to landcover raster.
stream_path (string) path to stream layer 0, no stream 1 stream.
lucode_to_parameters (dict) mapping of landcover code to a dictionary
that contains the key in `map_id`
map_id (string): the id in the lookup table with values to map
landcover to efficiency.
lucodes_to_val (dict) mapping of landcover codes to values
target_eff_path (string): target raster that contains the mapping of
landcover codes to retention efficiency values except where there
is a stream in which case the retention efficiency is 0.
@ -1401,9 +1444,8 @@ def _map_lulc_to_val_mask_stream(
None.
"""
keys = sorted(numpy.array(list(lucode_to_parameters)))
values = numpy.array(
[lucode_to_parameters[x][map_id] for x in keys])
lucodes = sorted(lucodes_to_vals.keys())
values = numpy.array([lucodes_to_vals[x] for x in lucodes])
nodata_landuse = pygeoprocessing.get_raster_info(
lulc_raster_path)['nodata'][0]
@ -1417,7 +1459,7 @@ def _map_lulc_to_val_mask_stream(
result = numpy.empty(valid_mask.shape, dtype=numpy.float32)
result[:] = _TARGET_NODATA
index = numpy.digitize(
lucode_array[valid_mask].ravel(), keys, right=True)
lucode_array[valid_mask].ravel(), lucodes, right=True)
result[valid_mask] = (
values[index] * (1 - stream_array[valid_mask]))
return result

View File

@ -39,6 +39,7 @@ MODEL_SPEC = {
},
"guild_table_path": {
"type": "csv",
"index_col": "species",
"columns": {
"species": {
"type": "freestyle_string",
@ -87,6 +88,7 @@ MODEL_SPEC = {
},
"landcover_biophysical_table_path": {
"type": "csv",
"index_col": "lucode",
"columns": {
"lucode": spec_utils.LULC_TABLE_COLUMN,
"nesting_[SUBSTRATE]_availability_index": {
@ -309,10 +311,10 @@ MODEL_SPEC = {
"about": "Farm vector reprojected to the LULC projection",
"fields": {},
"geometries": spec_utils.POLYGONS
},
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -322,7 +324,7 @@ _INDEX_NODATA = -1
_NESTING_SUBSTRATE_PATTERN = 'nesting_([^_]+)_availability_index'
_FLORAL_RESOURCES_AVAILABLE_PATTERN = 'floral_resources_([^_]+)_index'
_EXPECTED_BIOPHYSICAL_HEADERS = [
'lucode', _NESTING_SUBSTRATE_PATTERN, _FLORAL_RESOURCES_AVAILABLE_PATTERN]
_NESTING_SUBSTRATE_PATTERN, _FLORAL_RESOURCES_AVAILABLE_PATTERN]
# These are patterns expected in the guilds table
_NESTING_SUITABILITY_PATTERN = 'nesting_suitability_([^_]+)_index'
@ -332,7 +334,7 @@ _FORAGING_ACTIVITY_RE_PATTERN = _FORAGING_ACTIVITY_PATTERN % '([^_]+)'
_RELATIVE_SPECIES_ABUNDANCE_FIELD = 'relative_abundance'
_ALPHA_HEADER = 'alpha'
_EXPECTED_GUILD_HEADERS = [
'species', _NESTING_SUITABILITY_PATTERN, _FORAGING_ACTIVITY_RE_PATTERN,
_NESTING_SUITABILITY_PATTERN, _FORAGING_ACTIVITY_RE_PATTERN,
_ALPHA_HEADER, _RELATIVE_SPECIES_ABUNDANCE_FIELD]
_NESTING_SUBSTRATE_INDEX_FILEPATTERN = 'nesting_substrate_index_%s%s.tif'
@ -502,8 +504,6 @@ def execute(args):
# create initial working directories and determine file suffixes
intermediate_output_dir = os.path.join(
args['workspace_dir'], 'intermediate_outputs')
work_token_dir = os.path.join(
intermediate_output_dir, '_taskgraph_working_dir')
output_dir = os.path.join(args['workspace_dir'])
utils.make_directories(
[output_dir, intermediate_output_dir])
@ -532,7 +532,8 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # Synchronous mode.
task_graph = taskgraph.TaskGraph(work_token_dir, n_workers)
task_graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
if farm_vector_path is not None:
# ensure farm vector is in the same projection as the landcover map
@ -718,6 +719,7 @@ def execute(args):
pollinator_abundance_task_map = {}
floral_resources_index_path_map = {}
floral_resources_index_task_map = {}
alpha_kernel_map = {}
for species in scenario_variables['species_list']:
# calculate foraging_effectiveness[species]
# FE(x, s) = sum_j [RA(l(x), j) * fa(s, j)]
@ -762,11 +764,17 @@ def execute(args):
intermediate_output_dir, _KERNEL_FILE_PATTERN % (
alpha, file_suffix))
alpha_kernel_raster_task = task_graph.add_task(
task_name=f'decay_kernel_raster_{alpha}',
func=utils.exponential_decay_kernel_raster,
args=(alpha, kernel_path),
target_path_list=[kernel_path])
# to avoid creating duplicate kernel rasters check to see if an
# adequate kernel task has already been submitted
try:
alpha_kernel_raster_task = alpha_kernel_map[kernel_path]
except:
alpha_kernel_raster_task = task_graph.add_task(
task_name=f'decay_kernel_raster_{alpha}',
func=utils.exponential_decay_kernel_raster,
args=(alpha, kernel_path),
target_path_list=[kernel_path])
alpha_kernel_map[kernel_path] = alpha_kernel_raster_task
# convolve FE with alpha_s
floral_resources_index_path = os.path.join(
@ -1179,23 +1187,22 @@ def _parse_scenario_variables(args):
else:
farm_vector_path = None
guild_table = utils.read_csv_to_dataframe(
guild_table_path, 'species').to_dict(orient='index')
guild_df = utils.read_csv_to_dataframe(
guild_table_path, MODEL_SPEC['args']['guild_table_path'])
LOGGER.info('Checking to make sure guild table has all expected headers')
guild_headers = list(guild_table.values())[0].keys()
for header in _EXPECTED_GUILD_HEADERS:
matches = re.findall(header, " ".join(guild_headers))
matches = re.findall(header, " ".join(guild_df.columns))
if len(matches) == 0:
raise ValueError(
"Expected a header in guild table that matched the pattern "
f"'{header}' but was unable to find one. Here are all the "
f"headers from {guild_table_path}: {', '.join(guild_headers)}")
f"headers from {guild_table_path}: {', '.join(guild_df.columns)}")
landcover_biophysical_table = utils.read_csv_to_dataframe(
landcover_biophysical_table_path, 'lucode').to_dict(orient='index')
biophysical_table_headers = (
list(landcover_biophysical_table.values())[0].keys())
landcover_biophysical_df = utils.read_csv_to_dataframe(
landcover_biophysical_table_path,
MODEL_SPEC['args']['landcover_biophysical_table_path'])
biophysical_table_headers = landcover_biophysical_df.columns
for header in _EXPECTED_BIOPHYSICAL_HEADERS:
matches = re.findall(header, " ".join(biophysical_table_headers))
if len(matches) == 0:
@ -1211,7 +1218,7 @@ def _parse_scenario_variables(args):
# this dict to dict will map substrate types to guild/biophysical headers
# ex substrate_to_header['cavity']['biophysical']
substrate_to_header = collections.defaultdict(dict)
for header in guild_headers:
for header in guild_df.columns:
match = re.match(_FORAGING_ACTIVITY_RE_PATTERN, header)
if match:
season = match.group(1)
@ -1297,55 +1304,48 @@ def _parse_scenario_variables(args):
# * substrate_list (list of string)
result['substrate_list'] = sorted(substrate_to_header)
# * species_list (list of string)
result['species_list'] = sorted(guild_table)
result['species_list'] = sorted(guild_df.index)
result['alpha_value'] = dict()
for species in result['species_list']:
result['alpha_value'][species] = float(
guild_table[species][_ALPHA_HEADER])
result['alpha_value'][species] = guild_df[_ALPHA_HEADER][species]
# * species_abundance[species] (string->float)
total_relative_abundance = numpy.sum([
guild_table[species][_RELATIVE_SPECIES_ABUNDANCE_FIELD]
for species in result['species_list']])
total_relative_abundance = guild_df[_RELATIVE_SPECIES_ABUNDANCE_FIELD].sum()
result['species_abundance'] = {}
for species in result['species_list']:
result['species_abundance'][species] = (
guild_table[species][_RELATIVE_SPECIES_ABUNDANCE_FIELD] /
float(total_relative_abundance))
guild_df[_RELATIVE_SPECIES_ABUNDANCE_FIELD][species] /
total_relative_abundance)
# map the relative foraging activity of a species during a certain season
# (species, season)
result['species_foraging_activity'] = dict()
for species in result['species_list']:
total_activity = numpy.sum([
guild_table[species][_FORAGING_ACTIVITY_PATTERN % season]
guild_df[_FORAGING_ACTIVITY_PATTERN % season][species]
for season in result['season_list']])
for season in result['season_list']:
result['species_foraging_activity'][(species, season)] = (
guild_table[species][_FORAGING_ACTIVITY_PATTERN % season] /
float(total_activity))
guild_df[_FORAGING_ACTIVITY_PATTERN % season][species] /
total_activity)
# * landcover_substrate_index[substrate][landcover] (float)
result['landcover_substrate_index'] = collections.defaultdict(dict)
for raw_landcover_id in landcover_biophysical_table:
landcover_id = int(raw_landcover_id)
for landcover_id, row in landcover_biophysical_df.iterrows():
for substrate in result['substrate_list']:
substrate_biophysical_header = (
substrate_to_header[substrate]['biophysical'])
result['landcover_substrate_index'][substrate][landcover_id] = (
landcover_biophysical_table[landcover_id][
substrate_biophysical_header])
row[substrate_biophysical_header])
# * landcover_floral_resources[season][landcover] (float)
result['landcover_floral_resources'] = collections.defaultdict(dict)
for raw_landcover_id in landcover_biophysical_table:
landcover_id = int(raw_landcover_id)
for landcover_id, row in landcover_biophysical_df.iterrows():
for season in result['season_list']:
floral_rources_header = season_to_header[season]['biophysical']
result['landcover_floral_resources'][season][landcover_id] = (
landcover_biophysical_table[landcover_id][
floral_rources_header])
row[floral_rources_header])
# * species_substrate_index[(species, substrate)] (tuple->float)
result['species_substrate_index'] = collections.defaultdict(dict)
@ -1353,7 +1353,7 @@ def _parse_scenario_variables(args):
for substrate in result['substrate_list']:
substrate_guild_header = substrate_to_header[substrate]['guild']
result['species_substrate_index'][species][substrate] = (
guild_table[species][substrate_guild_header])
guild_df[substrate_guild_header][species])
# * foraging_activity_index[(species, season)] (tuple->float)
result['foraging_activity_index'] = {}
@ -1362,7 +1362,7 @@ def _parse_scenario_variables(args):
key = (species, season)
foraging_biophyiscal_header = season_to_header[season]['guild']
result['foraging_activity_index'][key] = (
guild_table[species][foraging_biophyiscal_header])
guild_df[foraging_biophyiscal_header][species])
return result

View File

@ -77,7 +77,7 @@ predictor_table_columns = {
"point_nearest_distance": {
"description": gettext(
"Predictor is a point vector. Metric is the Euclidean "
"distance between the center of each AOI grid cell and "
"distance between the centroid of each AOI grid cell and "
"the nearest point in this layer.")},
"line_intersect_length": {
"description": gettext(
@ -192,6 +192,7 @@ MODEL_SPEC = {
},
"predictor_table_path": {
"type": "csv",
"index_col": "id",
"columns": predictor_table_columns,
"required": "compute_regression",
"about": gettext(
@ -202,6 +203,7 @@ MODEL_SPEC = {
},
"scenario_predictor_table_path": {
"type": "csv",
"index_col": "id",
"columns": predictor_table_columns,
"required": False,
"about": gettext(
@ -233,7 +235,12 @@ MODEL_SPEC = {
},
"monthly_table.csv": {
"about": gettext("Table of monthly photo-user-days."),
"index_col": "poly_id",
"columns": {
"poly_id": {
"type": "integer",
"about": gettext("Polygon ID")
},
"[YEAR]-[MONTH]": {
"about": gettext(
"Total photo-user-days counted in each cell in the "
@ -324,10 +331,10 @@ MODEL_SPEC = {
},
"server_version.pickle": {
"about": gettext("Server version info")
},
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -410,7 +417,7 @@ def execute(args):
* 'point_count': count of the points contained in the
response polygon
* 'point_nearest_distance': distance to the nearest point
from the response polygon
from the centroid of the response polygon
* 'line_intersect_length': length of lines that intersect
with the response polygon in projected units of AOI
* 'polygon_area': area of the polygon contained within
@ -472,7 +479,6 @@ def execute(args):
(_INTERMEDIATE_BASE_FILES, intermediate_dir)], file_suffix)
# Initialize a TaskGraph
taskgraph_db_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir')
try:
n_workers = int(args['n_workers'])
except (KeyError, ValueError, TypeError):
@ -480,7 +486,8 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # single process mode.
task_graph = taskgraph.TaskGraph(taskgraph_db_dir, n_workers)
task_graph = taskgraph.TaskGraph(
os.path.join(output_dir, 'taskgraph_cache'), n_workers)
if args['grid_aoi']:
prep_aoi_task = task_graph.add_task(
@ -853,16 +860,14 @@ def _schedule_predictor_data_processing(
'line_intersect_length': _line_intersect_length,
}
predictor_table = utils.read_csv_to_dataframe(
predictor_table_path, 'id', expand_path_cols=['path']
).to_dict(orient='index')
predictor_df = utils.read_csv_to_dataframe(
predictor_table_path, MODEL_SPEC['args']['predictor_table_path'])
predictor_task_list = []
predictor_json_list = [] # tracks predictor files to add to shp
for predictor_id in predictor_table:
for predictor_id, row in predictor_df.iterrows():
LOGGER.info(f"Building predictor {predictor_id}")
predictor_type = predictor_table[predictor_id]['type'].strip()
predictor_type = row['type']
if predictor_type.startswith('raster'):
# type must be one of raster_sum or raster_mean
raster_op_mode = predictor_type.split('_')[1]
@ -871,7 +876,7 @@ def _schedule_predictor_data_processing(
predictor_json_list.append(predictor_target_path)
predictor_task_list.append(task_graph.add_task(
func=_raster_sum_mean,
args=(predictor_table[predictor_id]['path'], raster_op_mode,
args=(row['path'], raster_op_mode,
response_vector_path, predictor_target_path),
target_path_list=[predictor_target_path],
task_name=f'predictor {predictor_id}'))
@ -884,8 +889,7 @@ def _schedule_predictor_data_processing(
predictor_task_list.append(task_graph.add_task(
func=_polygon_area,
args=(predictor_type, response_polygons_pickle_path,
predictor_table[predictor_id]['path'],
predictor_target_path),
row['path'], predictor_target_path),
target_path_list=[predictor_target_path],
dependent_task_list=[prepare_response_polygons_task],
task_name=f'predictor {predictor_id}'))
@ -896,8 +900,7 @@ def _schedule_predictor_data_processing(
predictor_task_list.append(task_graph.add_task(
func=predictor_functions[predictor_type],
args=(response_polygons_pickle_path,
predictor_table[predictor_id]['path'],
predictor_target_path),
row['path'], predictor_target_path),
target_path_list=[predictor_target_path],
dependent_task_list=[prepare_response_polygons_task],
task_name=f'predictor {predictor_id}'))
@ -1167,7 +1170,7 @@ def _line_intersect_length(
def _point_nearest_distance(
response_polygons_pickle_path, point_vector_path,
predictor_target_path):
"""Calculate distance to nearest point for all polygons.
"""Calculate distance to nearest point for the centroid of all polygons.
Args:
response_polygons_pickle_path (str): path to a pickled dictionary which
@ -1197,7 +1200,7 @@ def _point_nearest_distance(
f"{(100*index)/len(response_polygons_lookup):.2f}% complete"))
point_distance_lookup[str(feature_id)] = min([
geometry.distance(point) for point in points])
geometry.centroid.distance(point) for point in points])
LOGGER.info(f"{os.path.basename(point_vector_path)} point distance: "
"100.00% complete")
with open(predictor_target_path, 'w') as jsonfile:
@ -1546,10 +1549,10 @@ def _validate_same_id_lengths(table_path):
tables.
"""
predictor_table = utils.read_csv_to_dataframe(
table_path, 'id').to_dict(orient='index')
predictor_df = utils.read_csv_to_dataframe(
table_path, MODEL_SPEC['args']['predictor_table_path'])
too_long = set()
for p_id in predictor_table:
for p_id in predictor_df.index:
if len(p_id) > 10:
too_long.add(p_id)
if len(too_long) > 0:
@ -1580,21 +1583,21 @@ def _validate_same_ids_and_types(
tables.
"""
predictor_table = utils.read_csv_to_dataframe(
predictor_table_path, 'id').to_dict(orient='index')
predictor_df = utils.read_csv_to_dataframe(
predictor_table_path, MODEL_SPEC['args']['predictor_table_path'])
scenario_predictor_table = utils.read_csv_to_dataframe(
scenario_predictor_table_path, 'id').to_dict(orient='index')
scenario_predictor_df = utils.read_csv_to_dataframe(
scenario_predictor_table_path,
MODEL_SPEC['args']['scenario_predictor_table_path'])
predictor_table_pairs = set([
(p_id, predictor_table[p_id]['type'].strip()) for p_id in predictor_table])
scenario_predictor_table_pairs = set([
(p_id, scenario_predictor_table[p_id]['type'].strip()) for p_id in
scenario_predictor_table])
if predictor_table_pairs != scenario_predictor_table_pairs:
predictor_pairs = set([
(p_id, row['type']) for p_id, row in predictor_df.iterrows()])
scenario_predictor_pairs = set([
(p_id, row['type']) for p_id, row in scenario_predictor_df.iterrows()])
if predictor_pairs != scenario_predictor_pairs:
raise ValueError('table pairs unequal.\n\t'
f'predictor: {predictor_table_pairs}\n\t'
f'scenario:{scenario_predictor_table_pairs}')
f'predictor: {predictor_pairs}\n\t'
f'scenario:{scenario_predictor_pairs}')
LOGGER.info('tables validate correctly')
@ -1617,8 +1620,8 @@ def _validate_same_projection(base_vector_path, table_path):
# This will load the table as a list of paths which we can iterate through
# without bothering the rest of the table structure
data_paths = utils.read_csv_to_dataframe(
table_path, convert_vals_to_lower=False, expand_path_cols=['path']
).squeeze('columns')['path'].tolist()
table_path, MODEL_SPEC['args']['predictor_table_path']
)['path'].tolist()
base_vector = gdal.OpenEx(base_vector_path, gdal.OF_VECTOR)
base_layer = base_vector.GetLayer()
@ -1674,14 +1677,14 @@ def _validate_predictor_types(table_path):
ValueError if any value in the ``type`` column does not match a valid
type, ignoring leading/trailing whitespace.
"""
df = utils.read_csv_to_dataframe(table_path, convert_vals_to_lower=False)
df = utils.read_csv_to_dataframe(
table_path, MODEL_SPEC['args']['predictor_table_path'])
# ignore leading/trailing whitespace because it will be removed
# when the type values are used
type_list = set([type.strip() for type in df['type']])
valid_types = set({'raster_mean', 'raster_sum', 'point_count',
'point_nearest_distance', 'line_intersect_length',
'polygon_area_coverage', 'polygon_percent_coverage'})
difference = type_list.difference(valid_types)
difference = set(df['type']).difference(valid_types)
if difference:
raise ValueError('The table contains invalid type value(s): '
f'{difference}. The allowed types are: {valid_types}')

View File

@ -107,7 +107,7 @@ MODEL_SPEC = {
},
},
"outputs": {
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR,
"taskgraph_cache": spec_utils.TASKGRAPH_DIR,
"filled.tif": spec_utils.FILLED_DEM,
"flow_accumulation.tif": spec_utils.FLOW_ACCUMULATION,
"flow_direction.tif": spec_utils.FLOW_DIRECTION,
@ -341,8 +341,7 @@ def execute(args):
``None``
"""
file_suffix = utils.make_suffix_string(args, 'results_suffix')
task_cache_dir = os.path.join(args['workspace_dir'], '_taskgraph_working_dir')
utils.make_directories([args['workspace_dir'], task_cache_dir])
utils.make_directories([args['workspace_dir']])
if ('calculate_flow_direction' in args and
bool(args['calculate_flow_direction'])):
@ -373,7 +372,8 @@ def execute(args):
# TypeError when n_workers is None.
n_workers = -1 # Synchronous mode.
graph = taskgraph.TaskGraph(task_cache_dir, n_workers=n_workers)
graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers=n_workers)
# Calculate slope. This is intentionally on the original DEM, not
# on the pitfilled DEM. If the user really wants the slop of the filled

View File

@ -121,6 +121,7 @@ MODEL_SPEC = {
"nearest_to_edge.csv": {
"about": gettext(
"Table of land cover classes and the amount of each that was converted for the nearest-to-edge conversion scenario."),
"index_col": "lucode",
"columns": {
"lucode": {
"type": "integer",
@ -140,6 +141,7 @@ MODEL_SPEC = {
"farthest_from_edge.csv": {
"about": gettext(
"Table of land cover classes and the amount of each that was converted for the nearest-to-edge conversion scenario."),
"index_col": "lucode",
"columns": {
"lucode": {
"type": "integer",
@ -175,10 +177,10 @@ MODEL_SPEC = {
"Map of the distance from each pixel to the nearest "
"edge of the focal landcover."),
"bands": {1: {"type": "number", "units": u.pixel}}
},
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -251,8 +253,6 @@ def execute(args):
utils.make_directories(
[output_dir, intermediate_output_dir, tmp_dir])
work_token_dir = os.path.join(
intermediate_output_dir, '_taskgraph_working_dir')
try:
n_workers = int(args['n_workers'])
except (KeyError, ValueError, TypeError):
@ -260,7 +260,8 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # Single process mode.
task_graph = taskgraph.TaskGraph(work_token_dir, n_workers)
task_graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
area_to_convert = float(args['area_to_convert'])
replacement_lucode = int(args['replacement_lucode'])

View File

@ -209,10 +209,10 @@ MODEL_SPEC = {
"visibility_[FEATURE_ID].tif": {
"about": gettext("Map of visibility for a given structure's viewpoint. This raster has pixel values of 0 (not visible), 1 (visible), or nodata (where the DEM is nodata)."),
"bands": {1: {"type": "integer"}}
},
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -290,7 +290,6 @@ def execute(args):
(_INTERMEDIATE_BASE_FILES, intermediate_dir)],
file_suffix)
work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir')
try:
n_workers = int(args['n_workers'])
except (KeyError, ValueError, TypeError):
@ -298,7 +297,8 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # Synchronous execution
graph = taskgraph.TaskGraph(work_token_dir, n_workers)
graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
reprojected_aoi_task = graph.add_task(
pygeoprocessing.reproject_vector,
@ -549,10 +549,10 @@ def _determine_valid_viewpoints(dem_path, structures_path):
# Coordinates in map units to pass to viewshed algorithm
geometry = point.GetGeometryRef()
if geometry.GetGeometryType() != ogr.wkbPoint:
if geometry.GetGeometryName() != 'POINT':
raise AssertionError(
f"Feature {point.GetFID()} is not a Point geometry. "
"Features must be a Point.")
f"Feature {point.GetFID()} must be a POINT geometry, "
f"not {geometry.GetGeometryName()}")
viewpoint = (geometry.GetX(), geometry.GetY())

View File

@ -87,6 +87,7 @@ MODEL_SPEC = {
},
"biophysical_table_path": {
"type": "csv",
"index_col": "lucode",
"columns": {
"lucode": spec_utils.LULC_TABLE_COLUMN,
"usle_c": {
@ -351,57 +352,52 @@ MODEL_SPEC = {
"times the thresholded slope (in eq. (74))"),
"bands": {1: {"type": "ratio"}}
},
"churn_dir_not_for_humans": {
"type": "directory",
"contents": {
"aligned_dem.tif": {
"about": gettext(
"Copy of the input DEM, clipped to the extent "
"of the other raster inputs."),
"bands": {1: {
"type": "number",
"units": u.meter
}}
},
"aligned_drainage.tif": {
"about": gettext(
"Copy of the input drainage map, clipped to "
"the extent of the other raster inputs and "
"aligned to the DEM."),
"bands": {1: {"type": "integer"}},
},
"aligned_erodibility.tif": {
"about": gettext(
"Copy of the input erodibility map, clipped to "
"the extent of the other raster inputs and "
"aligned to the DEM."),
"bands": {1: {
"type": "number",
"units": u.metric_ton*u.hectare*u.hour/(u.hectare*u.megajoule*u.millimeter)
}}
},
"aligned_erosivity.tif": {
"about": gettext(
"Copy of the input erosivity map, clipped to "
"the extent of the other raster inputs and "
"aligned to the DEM."),
"bands": {1: {
"type": "number",
"units": u.megajoule*u.millimeter/(u.hectare*u.hour*u.year)
}}
},
"aligned_lulc.tif": {
"about": gettext(
"Copy of the input drainage map, clipped to "
"the extent of the other raster inputs and "
"aligned to the DEM."),
"bands": {1: {"type": "integer"}},
},
"taskgraph.db": {}
}
"aligned_dem.tif": {
"about": gettext(
"Copy of the input DEM, clipped to the extent "
"of the other raster inputs."),
"bands": {1: {
"type": "number",
"units": u.meter
}}
},
"aligned_drainage.tif": {
"about": gettext(
"Copy of the input drainage map, clipped to "
"the extent of the other raster inputs and "
"aligned to the DEM."),
"bands": {1: {"type": "integer"}},
},
"aligned_erodibility.tif": {
"about": gettext(
"Copy of the input erodibility map, clipped to "
"the extent of the other raster inputs and "
"aligned to the DEM."),
"bands": {1: {
"type": "number",
"units": u.metric_ton*u.hectare*u.hour/(u.hectare*u.megajoule*u.millimeter)
}}
},
"aligned_erosivity.tif": {
"about": gettext(
"Copy of the input erosivity map, clipped to "
"the extent of the other raster inputs and "
"aligned to the DEM."),
"bands": {1: {
"type": "number",
"units": u.megajoule*u.millimeter/(u.hectare*u.hour*u.year)
}}
},
"aligned_lulc.tif": {
"about": gettext(
"Copy of the input drainage map, clipped to "
"the extent of the other raster inputs and "
"aligned to the DEM."),
"bands": {1: {"type": "integer"}},
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -420,6 +416,11 @@ _OUTPUT_BASE_FILES = {
INTERMEDIATE_DIR_NAME = 'intermediate_outputs'
_INTERMEDIATE_BASE_FILES = {
'aligned_dem_path': 'aligned_dem.tif',
'aligned_drainage_path': 'aligned_drainage.tif',
'aligned_erodibility_path': 'aligned_erodibility.tif',
'aligned_erosivity_path': 'aligned_erosivity.tif',
'aligned_lulc_path': 'aligned_lulc.tif',
'cp_factor_path': 'cp.tif',
'd_dn_path': 'd_dn.tif',
'd_up_path': 'd_up.tif',
@ -440,17 +441,9 @@ _INTERMEDIATE_BASE_FILES = {
'w_path': 'w.tif',
'ws_inverse_path': 'ws_inverse.tif',
'e_prime_path': 'e_prime.tif',
'weighted_avg_aspect_path': 'weighted_avg_aspect.tif',
'drainage_mask': 'what_drains_to_stream.tif',
}
_TMP_BASE_FILES = {
'aligned_dem_path': 'aligned_dem.tif',
'aligned_drainage_path': 'aligned_drainage.tif',
'aligned_erodibility_path': 'aligned_erodibility.tif',
'aligned_erosivity_path': 'aligned_erosivity.tif',
'aligned_lulc_path': 'aligned_lulc.tif',
}
# Target nodata is for general rasters that are positive, and _IC_NODATA are
# for rasters that are any range
@ -501,42 +494,27 @@ def execute(args):
"""
file_suffix = utils.make_suffix_string(args, 'results_suffix')
biophysical_table = utils.read_csv_to_dataframe(
args['biophysical_table_path'], 'lucode').to_dict(orient='index')
biophysical_df = utils.read_csv_to_dataframe(
args['biophysical_table_path'], MODEL_SPEC['args']['biophysical_table_path'])
# Test to see if c or p values are outside of 0..1
for table_key in ['usle_c', 'usle_p']:
for (lulc_code, table) in biophysical_table.items():
try:
float(lulc_code)
except ValueError:
raise ValueError(
f'Value "{lulc_code}" from the "lucode" column of the '
f'biophysical table is not a number. Please check the '
f'formatting of {args["biophysical_table_path"]}')
try:
float_value = float(table[table_key])
if float_value < 0 or float_value > 1:
raise ValueError(
f'{float_value} is not within range 0..1')
except ValueError:
for key in ['usle_c', 'usle_p']:
for lulc_code, row in biophysical_df.iterrows():
if row[key] < 0 or row[key] > 1:
raise ValueError(
f'A value in the biophysical table is not a number '
f'within range 0..1. The offending value is in '
f'column "{table_key}", lucode row "{lulc_code}", '
f'and has value "{table[table_key]}"')
f'column "{key}", lucode row "{lulc_code}", '
f'and has value "{row[key]}"')
intermediate_output_dir = os.path.join(
args['workspace_dir'], INTERMEDIATE_DIR_NAME)
output_dir = os.path.join(args['workspace_dir'])
churn_dir = os.path.join(
intermediate_output_dir, 'churn_dir_not_for_humans')
utils.make_directories([output_dir, intermediate_output_dir, churn_dir])
utils.make_directories([output_dir, intermediate_output_dir])
f_reg = utils.build_file_registry(
[(_OUTPUT_BASE_FILES, output_dir),
(_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
(_TMP_BASE_FILES, churn_dir)], file_suffix)
(_INTERMEDIATE_BASE_FILES, intermediate_output_dir)], file_suffix)
try:
n_workers = int(args['n_workers'])
@ -546,7 +524,8 @@ def execute(args):
# TypeError when n_workers is None.
n_workers = -1 # Synchronous mode.
task_graph = taskgraph.TaskGraph(
churn_dir, n_workers, reporting_interval=5.0)
os.path.join(output_dir, 'taskgraph_cache'),
n_workers, reporting_interval=5.0)
base_list = []
aligned_list = []
@ -617,14 +596,6 @@ def execute(args):
dependent_task_list=[pit_fill_task],
task_name='flow direction calculation')
weighted_avg_aspect_task = task_graph.add_task(
func=sdr_core.calculate_average_aspect,
args=(f_reg['flow_direction_path'],
f_reg['weighted_avg_aspect_path']),
target_path_list=[f_reg['weighted_avg_aspect_path']],
dependent_task_list=[flow_dir_task],
task_name='weighted average of multiple-flow aspects')
flow_accumulation_task = task_graph.add_task(
func=pygeoprocessing.routing.flow_accumulation_mfd,
args=(
@ -639,13 +610,11 @@ def execute(args):
args=(
f_reg['flow_accumulation_path'],
f_reg['slope_path'],
f_reg['weighted_avg_aspect_path'],
float(args['l_max']),
f_reg['ls_path']),
target_path_list=[f_reg['ls_path']],
dependent_task_list=[
flow_accumulation_task, slope_task,
weighted_avg_aspect_task],
flow_accumulation_task, slope_task],
task_name='ls factor calculation')
stream_task = task_graph.add_task(
@ -675,19 +644,21 @@ def execute(args):
drainage_raster_path_task = (
f_reg['stream_path'], stream_task)
lulc_to_c = biophysical_df['usle_c'].to_dict()
threshold_w_task = task_graph.add_task(
func=_calculate_w,
args=(
biophysical_table, f_reg['aligned_lulc_path'], f_reg['w_path'],
lulc_to_c, f_reg['aligned_lulc_path'], f_reg['w_path'],
f_reg['thresholded_w_path']),
target_path_list=[f_reg['w_path'], f_reg['thresholded_w_path']],
dependent_task_list=[align_task],
task_name='calculate W')
lulc_to_cp = (biophysical_df['usle_c'] * biophysical_df['usle_p']).to_dict()
cp_task = task_graph.add_task(
func=_calculate_cp,
args=(
biophysical_table, f_reg['aligned_lulc_path'],
lulc_to_cp, f_reg['aligned_lulc_path'],
f_reg['cp_factor_path']),
target_path_list=[f_reg['cp_factor_path']],
dependent_task_list=[align_task],
@ -1029,26 +1000,61 @@ def _calculate_what_drains_to_stream(
def _calculate_ls_factor(
flow_accumulation_path, slope_path, avg_aspect_path, l_max,
target_ls_prime_factor_path):
flow_accumulation_path, slope_path, l_max,
target_ls_factor_path):
"""Calculate LS factor.
Calculates a modified LS factor as Equation 3 from "Extension and
Calculates the LS factor using Equation 3 from "Extension and
validation of a geographic information system-based method for calculating
the Revised Universal Soil Loss Equation length-slope factor for erosion
risk assessments in large watersheds" where the ``x`` term is the average
aspect ratio weighted by proportional flow to account for multiple flow
direction.
risk assessments in large watersheds".
The equation for this is::
(upstream_area + pixel_area)^(m+1) - upstream_area^(m+1)
LS = S * --------------------------------------------------------
(pixel_area^(m+2)) * aspect_dir * 22.13^(m)
Where
* ``S`` is the slope factor defined in equation 4 from the same paper,
calculated by the following where ``b`` is the slope in radians:
* ``S = 10.8 * sin(b) + 0.03`` where slope < 9%
* ``S = 16.8 * sin(b) - 0.50`` where slope >= 9%
* ``upstream_area`` is interpreted as the square root of the
catchment area, to match SAGA-GIS's method for calculating LS
Factor.
* ``pixel_area`` is the area of the pixel in square meters.
* ``m`` is the slope-length exponent of the RUSLE LS-factor,
which, as discussed in Oliveira et al. 2013 is a function of the
on-pixel slope theta:
* ``m = 0.2`` when ``theta <= 1%``
* ``m = 0.3`` when ``1% < theta <= 3.5%``
* ``m = 0.4`` when ``3.5% < theta <= 5%``
* ``m = 0.5`` when ``5% < theta <= 9%``
* ``m = (beta / (1+beta)`` when ``theta > 9%``, where
``beta = (sin(theta) / 0.0896) / (3*sin(theta)^0.8 + 0.56)``
* ``aspect_dir`` is calculated by ``|sin(alpha)| + |cos(alpha)|``
for the given pixel.
Oliveira et al can be found at:
Oliveira, A.H., Silva, M.A. da, Silva, M.L.N., Curi, N., Neto, G.K.,
Freitas, D.A.F. de, 2013. Development of Topographic Factor Modeling
for Application in Soil Erosion Models, in: Intechopen (Ed.), Soil
Processes and Current Trends in Quality Assessment. p. 28.
Args:
flow_accumulation_path (string): path to raster, pixel values are the
contributing upslope area at that cell. Pixel size is square.
slope_path (string): path to slope raster as a percent
avg_aspect_path (string): The path to to raster of the weighted average
of aspects based on proportional flow.
l_max (float): if the calculated value of L exceeds this value
it is clamped to this value.
target_ls_prime_factor_path (string): path to output ls_prime_factor
target_ls_factor_path (string): path to output ls_prime_factor
raster
Returns:
@ -1056,8 +1062,6 @@ def _calculate_ls_factor(
"""
slope_nodata = pygeoprocessing.get_raster_info(slope_path)['nodata'][0]
avg_aspect_nodata = pygeoprocessing.get_raster_info(
avg_aspect_path)['nodata'][0]
flow_accumulation_info = pygeoprocessing.get_raster_info(
flow_accumulation_path)
@ -1065,14 +1069,12 @@ def _calculate_ls_factor(
cell_size = abs(flow_accumulation_info['pixel_size'][0])
cell_area = cell_size ** 2
def ls_factor_function(
percent_slope, flow_accumulation, avg_aspect, l_max):
"""Calculate the LS' factor.
def ls_factor_function(percent_slope, flow_accumulation, l_max):
"""Calculate the LS factor.
Args:
percent_slope (numpy.ndarray): slope in percent
flow_accumulation (numpy.ndarray): upslope pixels
avg_aspect (numpy.ndarray): the weighted average aspect from MFD
l_max (float): max L factor, clamp to this value if L exceeds it
Returns:
@ -1082,16 +1084,27 @@ def _calculate_ls_factor(
# avg aspect intermediate output should always have a defined
# nodata value from pygeoprocessing
valid_mask = (
(~utils.array_equals_nodata(avg_aspect, avg_aspect_nodata)) &
~utils.array_equals_nodata(percent_slope, slope_nodata) &
~utils.array_equals_nodata(
flow_accumulation, flow_accumulation_nodata))
result = numpy.empty(valid_mask.shape, dtype=numpy.float32)
result[:] = _TARGET_NODATA
contributing_area = (flow_accumulation[valid_mask]-1) * cell_area
# Although Desmet & Govers (1996) discusses "upstream contributing
# area", this is not strictly defined. We decided to use the square
# root of the upstream contributing area here as an estimate, which
# matches the SAGA LS Factor option "square root of catchment area".
# See the InVEST ADR-0001 for more information.
# We subtract 1 from the flow accumulation because FA includes itself
# in its count of pixels upstream and our LS factor equation wants only
# those pixels that are strictly upstream.
contributing_area = numpy.sqrt(
(flow_accumulation[valid_mask]-1) * cell_area)
slope_in_radians = numpy.arctan(percent_slope[valid_mask] / 100.0)
aspect_length = (numpy.fabs(numpy.sin(slope_in_radians)) +
numpy.fabs(numpy.cos(slope_in_radians)))
# From Equation 4 in "Extension and validation of a geographic
# information system ..."
slope_factor = numpy.where(
@ -1121,7 +1134,7 @@ def _calculate_ls_factor(
l_factor = (
((contributing_area + cell_area)**(m_exp+1) -
contributing_area ** (m_exp+1)) /
((cell_size ** (m_exp + 2)) * (avg_aspect[valid_mask]**m_exp) *
((cell_size ** (m_exp + 2)) * (aspect_length**m_exp) *
(22.13**m_exp)))
# threshold L factor to l_max
@ -1130,12 +1143,10 @@ def _calculate_ls_factor(
result[valid_mask] = l_factor * slope_factor
return result
# call vectorize datasets to calculate the ls_factor
pygeoprocessing.raster_calculator(
[(path, 1) for path in [
slope_path, flow_accumulation_path, avg_aspect_path]] + [
[(path, 1) for path in [slope_path, flow_accumulation_path]] + [
(l_max, 'raw')],
ls_factor_function, target_ls_prime_factor_path, gdal.GDT_Float32,
ls_factor_function, target_ls_factor_path, gdal.GDT_Float32,
_TARGET_NODATA)
@ -1277,15 +1288,14 @@ def _add_drainage(stream_path, drainage_path, out_stream_and_drainage_path):
def _calculate_w(
biophysical_table, lulc_path, w_factor_path,
lulc_to_c, lulc_path, w_factor_path,
out_thresholded_w_factor_path):
"""W factor: map C values from LULC and lower threshold to 0.001.
W is a factor in calculating d_up accumulation for SDR.
Args:
biophysical_table (dict): map of LULC codes to dictionaries that
contain at least a 'usle_c' field
lulc_to_c (dict): mapping of LULC codes to C values
lulc_path (string): path to LULC raster
w_factor_path (string): path to outputed raw W factor
out_thresholded_w_factor_path (string): W factor from `w_factor_path`
@ -1295,9 +1305,6 @@ def _calculate_w(
None
"""
lulc_to_c = dict(
[(lulc_code, float(table['usle_c'])) for
(lulc_code, table) in biophysical_table.items()])
if pygeoprocessing.get_raster_info(lulc_path)['nodata'][0] is None:
# will get a case where the raster might be masked but nothing to
# replace so 0 is used by default. Ensure this exists in lookup.
@ -1326,13 +1333,11 @@ def _calculate_w(
gdal.GDT_Float32, _TARGET_NODATA)
def _calculate_cp(biophysical_table, lulc_path, cp_factor_path):
def _calculate_cp(lulc_to_cp, lulc_path, cp_factor_path):
"""Map LULC to C*P value.
Args:
biophysical_table (dict): map of lulc codes to dictionaries that
contain at least the entry 'usle_c" and 'usle_p' corresponding to
those USLE components.
lulc_to_cp (dict): mapping of lulc codes to CP values
lulc_path (string): path to LULC raster
cp_factor_path (string): path to output raster of LULC mapped to C*P
values
@ -1341,9 +1346,6 @@ def _calculate_cp(biophysical_table, lulc_path, cp_factor_path):
None
"""
lulc_to_cp = dict(
[(lulc_code, float(table['usle_c']) * float(table['usle_p'])) for
(lulc_code, table) in biophysical_table.items()])
if pygeoprocessing.get_raster_info(lulc_path)['nodata'][0] is None:
# will get a case where the raster might be masked but nothing to
# replace so 0 is used by default. Ensure this exists in lookup.

View File

@ -675,127 +675,3 @@ def calculate_sediment_deposition(
LOGGER.info('Sediment deposition 100% complete')
sediment_deposition_raster.close()
def calculate_average_aspect(
mfd_flow_direction_path, target_average_aspect_path):
"""Calculate the Weighted Average Aspect Ratio from MFD.
Calculates the average aspect ratio weighted by proportional flow
direction.
Args:
mfd_flow_direction_path (string): The path to an MFD flow direction
raster.
target_average_aspect_path (string): The path to where the calculated
weighted average aspect raster should be written.
Returns:
``None``.
"""
LOGGER.info('Calculating average aspect')
cdef float average_aspect_nodata = -1
pygeoprocessing.new_raster_from_base(
mfd_flow_direction_path, target_average_aspect_path,
gdal.GDT_Float32, [average_aspect_nodata], [average_aspect_nodata])
flow_direction_info = pygeoprocessing.get_raster_info(
mfd_flow_direction_path)
cdef int mfd_flow_direction_nodata = flow_direction_info['nodata'][0]
cdef int n_cols, n_rows
n_cols, n_rows = flow_direction_info['raster_size']
cdef _ManagedRaster mfd_flow_direction_raster = _ManagedRaster(
mfd_flow_direction_path, 1, False)
cdef _ManagedRaster average_aspect_raster = _ManagedRaster(
target_average_aspect_path, 1, True)
cdef int seed_row = 0
cdef int seed_col = 0
cdef int n_pixels_visited = 0
cdef int win_xsize, win_ysize, xoff, yoff
cdef int row_index, col_index, neighbor_index
cdef int flow_weight_in_direction
cdef int weight_sum
cdef int seed_flow_value
cdef float aspect_weighted_average, aspect_weighted_sum
# the flow_lengths array is the functional equivalent
# of calculating |sin(alpha)| + |cos(alpha)|.
cdef float* flow_lengths = [
1, <float>SQRT2,
1, <float>SQRT2,
1, <float>SQRT2,
1, <float>SQRT2
]
# Loop over iterblocks to maintain cache locality
# Find each non-nodata pixel and calculate proportional flow
# Multiply proportional flow times the flow length x_d
# write the final value to the raster.
for offset_dict in pygeoprocessing.iterblocks(
(mfd_flow_direction_path, 1), offset_only=True, largest_block=0):
win_xsize = offset_dict['win_xsize']
win_ysize = offset_dict['win_ysize']
xoff = offset_dict['xoff']
yoff = offset_dict['yoff']
LOGGER.info('Average aspect %.2f%% complete', 100 * (
n_pixels_visited / float(n_cols * n_rows)))
for row_index in range(win_ysize):
seed_row = yoff + row_index
for col_index in range(win_xsize):
seed_col = xoff + col_index
seed_flow_value = <int>mfd_flow_direction_raster.get(
seed_col, seed_row)
# Skip this seed if it's nodata (Currently expected to be 0).
# No need to set the nodata value here since we have already
# filled the raster with nodata values at creation time.
if seed_flow_value == mfd_flow_direction_nodata:
continue
weight_sum = 0
aspect_weighted_sum = 0
for neighbor_index in range(8):
neighbor_row = seed_row + ROW_OFFSETS[neighbor_index]
if neighbor_row == -1 or neighbor_row == n_rows:
continue
neighbor_col = seed_col + COL_OFFSETS[neighbor_index]
if neighbor_col == -1 or neighbor_col == n_cols:
continue
flow_weight_in_direction = (seed_flow_value >> (
neighbor_index * 4) & 0xF)
weight_sum += flow_weight_in_direction
aspect_weighted_sum += (
flow_lengths[neighbor_index] *
flow_weight_in_direction)
# Weight sum should never be less than 0.
# Since it's an int, we can compare it directly against the
# value of 0.
if weight_sum == 0:
aspect_weighted_average = average_aspect_nodata
else:
# We already know that weight_sum will be > 0 because we
# check for it in the condition above.
with cython.cdivision(True):
aspect_weighted_average = (
aspect_weighted_sum / <float>weight_sum)
average_aspect_raster.set(
seed_col, seed_row, aspect_weighted_average)
n_pixels_visited += win_xsize * win_ysize
LOGGER.info('Average aspect 100.00% complete')
mfd_flow_direction_raster.close()
average_aspect_raster.close()

View File

@ -107,6 +107,7 @@ MODEL_SPEC = {
},
"biophysical_table_path": {
"type": "csv",
"index_col": "lucode",
"columns": {
"lucode": spec_utils.LULC_TABLE_COLUMN,
"cn_[SOIL_GROUP]": {
@ -137,6 +138,7 @@ MODEL_SPEC = {
},
"rain_events_table_path": {
"type": "csv",
"index_col": "month",
"columns": {
"month": {
"type": "number",
@ -212,6 +214,7 @@ MODEL_SPEC = {
},
"climate_zone_table_path": {
"type": "csv",
"index_col": "cz_id",
"columns": {
"cz_id": {
"type": "integer",
@ -253,6 +256,7 @@ MODEL_SPEC = {
},
"monthly_alpha_path": {
"type": "csv",
"index_col": "month",
"columns": {
"month": {
"type": "number",
@ -409,10 +413,62 @@ MODEL_SPEC = {
"bands": {1: {
"type": "integer"
}}
},
'Si.tif': {
"about": gettext("Map of the S_i factor derived from CN"),
"bands": {1: {"type": "number", "units": u.inch}}
},
'lulc_aligned.tif': {
"about": gettext("Copy of LULC input, aligned and clipped "
"to match the other spatial inputs"),
"bands": {1: {"type": "integer"}}
},
'dem_aligned.tif': {
"about": gettext("Copy of DEM input, aligned and clipped "
"to match the other spatial inputs"),
"bands": {1: {"type": "number", "units": u.meter}}
},
'pit_filled_dem.tif': {
"about": gettext("Pit filled DEM"),
"bands": {1: {"type": "number", "units": u.meter}}
},
'soil_group_aligned.tif': {
"about": gettext("Copy of soil groups input, aligned and "
"clipped to match the other spatial inputs"),
"bands": {1: {"type": "integer"}}
},
'flow_accum.tif': spec_utils.FLOW_ACCUMULATION,
'prcp_a[MONTH].tif': {
"bands": {1: {"type": "number", "units": u.millimeter/u.year}},
"about": gettext("Monthly precipitation rasters, aligned and "
"clipped to match the other spatial inputs")
},
'n_events[MONTH].tif': {
"about": gettext("Map of monthly rain events"),
"bands": {1: {"type": "integer"}}
},
'et0_a[MONTH].tif': {
"bands": {1: {"type": "number", "units": u.millimeter}},
"about": gettext("Monthly ET0 rasters, aligned and "
"clipped to match the other spatial inputs")
},
'kc_[MONTH].tif': {
"about": gettext("Map of monthly KC values"),
"bands": {1: {"type": "number", "units": u.none}}
},
'l_aligned.tif': {
"about": gettext("Copy of user-defined local recharge input, "
"aligned and clipped to match the other spatial inputs"),
"bands": {1: {"type": "number", "units": u.millimeter}}
},
'cz_aligned.tif': {
"about": gettext("Copy of user-defined climate zones raster, "
"aligned and clipped to match the other spatial inputs"),
"bands": {1: {"type": "integer"}}
}
}
},
"cache_dir": spec_utils.TASKGRAPH_DIR
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -437,18 +493,10 @@ _INTERMEDIATE_BASE_FILES = {
'flow_dir_mfd_path': 'flow_dir_mfd.tif',
'qfm_path_list': ['qf_%d.tif' % (x+1) for x in range(N_MONTHS)],
'stream_path': 'stream.tif',
}
_TMP_BASE_FILES = {
'outflow_direction_path': 'outflow_direction.tif',
'outflow_weights_path': 'outflow_weights.tif',
'kc_path': 'kc.tif',
'si_path': 'Si.tif',
'lulc_aligned_path': 'lulc_aligned.tif',
'dem_aligned_path': 'dem_aligned.tif',
'dem_pit_filled_path': 'pit_filled_dem.tif',
'loss_path': 'loss.tif',
'zero_absorption_source_path': 'zero_absorption.tif',
'soil_group_aligned_path': 'soil_group_aligned.tif',
'flow_accum_path': 'flow_accum.tif',
'precip_path_aligned_list': ['prcp_a%d.tif' % x for x in range(N_MONTHS)],
@ -457,7 +505,6 @@ _TMP_BASE_FILES = {
'kc_path_list': ['kc_%d.tif' % x for x in range(N_MONTHS)],
'l_aligned_path': 'l_aligned.tif',
'cz_aligned_raster_path': 'cz_aligned.tif',
'l_sum_pre_clamp': 'l_sum_pre_clamp.tif'
}
@ -561,41 +608,20 @@ def _execute(args):
# fail early on a missing required rain events table
if (not args['user_defined_local_recharge'] and
not args['user_defined_climate_zones']):
rain_events_lookup = (
utils.read_csv_to_dataframe(
args['rain_events_table_path'], 'month'
).to_dict(orient='index'))
rain_events_df = utils.read_csv_to_dataframe(
args['rain_events_table_path'],
MODEL_SPEC['args']['rain_events_table_path'])
biophysical_table = utils.read_csv_to_dataframe(
args['biophysical_table_path'], 'lucode').to_dict(orient='index')
bad_value_list = []
for lucode, value in biophysical_table.items():
for biophysical_id in ['cn_a', 'cn_b', 'cn_c', 'cn_d'] + [
'kc_%d' % (month_index+1) for month_index in range(N_MONTHS)]:
try:
_ = float(value[biophysical_id])
except ValueError:
bad_value_list.append(
(biophysical_id, lucode, value[biophysical_id]))
if bad_value_list:
raise ValueError(
'biophysical_table at %s seems to have the following incorrect '
'values (expecting all floating point numbers): %s' % (
args['biophysical_table_path'], ','.join(
['%s(lucode %d): "%s"' % (
lucode, biophysical_id, bad_value)
for lucode, biophysical_id, bad_value in
bad_value_list])))
biophysical_df = utils.read_csv_to_dataframe(
args['biophysical_table_path'],
MODEL_SPEC['args']['biophysical_table_path'])
if args['monthly_alpha']:
# parse out the alpha lookup table of the form (month_id: alpha_val)
alpha_month_map = dict(
(key, val['alpha']) for key, val in
utils.read_csv_to_dataframe(
args['monthly_alpha_path'], 'month'
).to_dict(orient='index').items())
alpha_month_map = utils.read_csv_to_dataframe(
args['monthly_alpha_path'],
MODEL_SPEC['args']['monthly_alpha_path']
)['alpha'].to_dict()
else:
# make all 12 entries equal to args['alpha_m']
alpha_m = float(fractions.Fraction(args['alpha_m']))
@ -610,9 +636,8 @@ def _execute(args):
file_suffix = utils.make_suffix_string(args, 'results_suffix')
intermediate_output_dir = os.path.join(
args['workspace_dir'], 'intermediate_outputs')
cache_dir = os.path.join(args['workspace_dir'], 'cache_dir')
output_dir = args['workspace_dir']
utils.make_directories([intermediate_output_dir, cache_dir, output_dir])
utils.make_directories([intermediate_output_dir, output_dir])
try:
n_workers = int(args['n_workers'])
@ -622,13 +647,13 @@ def _execute(args):
# TypeError when n_workers is None.
n_workers = -1 # Synchronous mode.
task_graph = taskgraph.TaskGraph(
cache_dir, n_workers, reporting_interval=5.0)
os.path.join(args['workspace_dir'], 'taskgraph_cache'),
n_workers, reporting_interval=5)
LOGGER.info('Building file registry')
file_registry = utils.build_file_registry(
[(_OUTPUT_BASE_FILES, output_dir),
(_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
(_TMP_BASE_FILES, cache_dir)], file_suffix)
(_INTERMEDIATE_BASE_FILES, intermediate_output_dir)], file_suffix)
LOGGER.info('Checking that the AOI is not the output aggregate vector')
if (os.path.normpath(args['aoi_path']) ==
@ -706,7 +731,7 @@ def _execute(args):
args=(
(file_registry['dem_aligned_path'], 1),
file_registry['dem_pit_filled_path']),
kwargs={'working_dir': cache_dir},
kwargs={'working_dir': intermediate_output_dir},
target_path_list=[file_registry['dem_pit_filled_path']],
dependent_task_list=[align_task],
task_name='fill dem pits')
@ -716,7 +741,7 @@ def _execute(args):
args=(
(file_registry['dem_pit_filled_path'], 1),
file_registry['flow_dir_mfd_path']),
kwargs={'working_dir': cache_dir},
kwargs={'working_dir': intermediate_output_dir},
target_path_list=[file_registry['flow_dir_mfd_path']],
dependent_task_list=[fill_pit_task],
task_name='flow dir mfd')
@ -762,22 +787,18 @@ def _execute(args):
'table_name': 'Climate Zone'}
for month_id in range(N_MONTHS):
if args['user_defined_climate_zones']:
cz_rain_events_lookup = (
utils.read_csv_to_dataframe(
args['climate_zone_table_path'], 'cz_id'
).to_dict(orient='index'))
month_label = MONTH_ID_TO_LABEL[month_id]
climate_zone_rain_events_month = dict([
(cz_id, cz_rain_events_lookup[cz_id][month_label]) for
cz_id in cz_rain_events_lookup])
n_events_nodata = -1
cz_rain_events_df = utils.read_csv_to_dataframe(
args['climate_zone_table_path'],
MODEL_SPEC['args']['climate_zone_table_path'])
climate_zone_rain_events_month = (
cz_rain_events_df[MONTH_ID_TO_LABEL[month_id]].to_dict())
n_events_task = task_graph.add_task(
func=utils.reclassify_raster,
args=(
(file_registry['cz_aligned_raster_path'], 1),
climate_zone_rain_events_month,
file_registry['n_events_path_list'][month_id],
gdal.GDT_Float32, n_events_nodata,
gdal.GDT_Float32, TARGET_NODATA,
reclass_error_details),
target_path_list=[
file_registry['n_events_path_list'][month_id]],
@ -785,15 +806,14 @@ def _execute(args):
task_name='n_events for month %d' % month_id)
reclassify_n_events_task_list.append(n_events_task)
else:
# rain_events_lookup defined near entry point of execute
n_events = rain_events_lookup[month_id+1]['events']
n_events_task = task_graph.add_task(
func=pygeoprocessing.new_raster_from_base,
args=(
file_registry['dem_aligned_path'],
file_registry['n_events_path_list'][month_id],
gdal.GDT_Float32, [TARGET_NODATA]),
kwargs={'fill_value_list': (n_events,)},
kwargs={'fill_value_list': (
rain_events_df['events'][month_id+1],)},
target_path_list=[
file_registry['n_events_path_list'][month_id]],
dependent_task_list=[align_task],
@ -806,7 +826,8 @@ def _execute(args):
args=(
file_registry['lulc_aligned_path'],
file_registry['soil_group_aligned_path'],
biophysical_table, file_registry['cn_path']),
biophysical_df,
file_registry['cn_path']),
target_path_list=[file_registry['cn_path']],
dependent_task_list=[align_task],
task_name='calculate curve number')
@ -827,8 +848,6 @@ def _execute(args):
func=_calculate_monthly_quick_flow,
args=(
file_registry['precip_path_aligned_list'][month_index],
file_registry['lulc_aligned_path'],
file_registry['cn_path'],
file_registry['n_events_path_list'][month_index],
file_registry['stream_path'],
file_registry['si_path'],
@ -855,16 +874,13 @@ def _execute(args):
'raster_name': 'LULC', 'column_name': 'lucode',
'table_name': 'Biophysical'}
for month_index in range(N_MONTHS):
kc_lookup = dict([
(lucode, biophysical_table[lucode]['kc_%d' % (month_index+1)])
for lucode in biophysical_table])
kc_nodata = -1 # a reasonable nodata value
kc_lookup = biophysical_df['kc_%d' % (month_index+1)].to_dict()
kc_task = task_graph.add_task(
func=utils.reclassify_raster,
args=(
(file_registry['lulc_aligned_path'], 1), kc_lookup,
file_registry['kc_path_list'][month_index],
gdal.GDT_Float32, kc_nodata, reclass_error_details),
gdal.GDT_Float32, TARGET_NODATA, reclass_error_details),
target_path_list=[file_registry['kc_path_list'][month_index]],
dependent_task_list=[align_task],
task_name='classify kc month %d' % month_index)
@ -978,7 +994,7 @@ def _calculate_vri(l_path, target_vri_path):
None.
"""
qb_sum = 0.0
qb_sum = 0
qb_valid_count = 0
l_nodata = pygeoprocessing.get_raster_info(l_path)['nodata'][0]
@ -1039,122 +1055,167 @@ def _calculate_annual_qfi(qfm_path_list, target_qf_path):
qfi_sum_op, target_qf_path, gdal.GDT_Float32, qf_nodata)
def _calculate_monthly_quick_flow(
precip_path, lulc_raster_path, cn_path, n_events_raster_path,
stream_path, si_path, qf_monthly_path):
def _calculate_monthly_quick_flow(precip_path, n_events_path, stream_path,
si_path, qf_monthly_path):
"""Calculate quick flow for a month.
Args:
precip_path (string): path to file that correspond to monthly
precipitation
lulc_raster_path (string): path to landcover raster
cn_path (string): path to curve number raster
n_events_raster_path (string): a path to a raster where each pixel
precip_path (string): path to monthly precipitation raster
n_events_path (string): a path to a raster where each pixel
indicates the number of rain events.
stream_path (string): path to stream mask raster where 1 indicates a
stream pixel, 0 is a non-stream but otherwise valid area from the
original DEM, and nodata indicates areas outside the valid DEM.
si_path (string): path to raster that has potential maximum retention
qf_monthly_path_list (list of string): list of paths to output monthly
rasters.
qf_monthly_path (string): path to output monthly QF raster.
Returns:
None
"""
p_nodata = pygeoprocessing.get_raster_info(precip_path)['nodata'][0]
n_nodata = pygeoprocessing.get_raster_info(n_events_path)['nodata'][0]
stream_nodata = pygeoprocessing.get_raster_info(stream_path)['nodata'][0]
si_nodata = pygeoprocessing.get_raster_info(si_path)['nodata'][0]
qf_nodata = -1
p_nodata = pygeoprocessing.get_raster_info(precip_path)['nodata'][0]
n_events_nodata = pygeoprocessing.get_raster_info(
n_events_raster_path)['nodata'][0]
stream_nodata = pygeoprocessing.get_raster_info(stream_path)['nodata'][0]
def qf_op(p_im, s_i, n_events, stream_array):
def qf_op(p_im, s_i, n_m, stream):
"""Calculate quick flow as in Eq [1] in user's guide.
Args:
p_im (numpy.array): precipitation at pixel i on month m
s_i (numpy.array): factor that is 1000/CN_i - 10
(Equation 1b from user's guide)
n_events (numpy.array): number of rain events on the pixel
stream_mask (numpy.array): 1 if stream, otherwise not a stream
pixel.
n_m (numpy.array): number of rain events on pixel i in month m
stream (numpy.array): 1 if stream, otherwise not a stream pixel.
Returns:
quick flow (numpy.array)
"""
# s_i is an intermediate output which will always have a defined
# nodata value
valid_mask = ((p_im != 0.0) &
(stream_array != 1) &
(n_events > 0) &
~utils.array_equals_nodata(s_i, si_nodata))
if p_nodata is not None:
valid_mask &= ~utils.array_equals_nodata(p_im, p_nodata)
if n_events_nodata is not None:
valid_mask &= ~utils.array_equals_nodata(n_events, n_events_nodata)
# stream_nodata is the only input that carry over nodata values from
valid_p_mask = ~utils.array_equals_nodata(p_im, p_nodata)
valid_n_mask = ~utils.array_equals_nodata(n_m, n_nodata)
# precip mask: both p_im and n_m are defined and greater than 0
precip_mask = valid_p_mask & valid_n_mask & (p_im > 0) & (n_m > 0)
stream_mask = stream == 1
# stream_nodata is the only input that carries over nodata values from
# the aligned DEM.
if stream_nodata is not None:
valid_mask &= ~utils.array_equals_nodata(
stream_array, stream_nodata)
valid_mask = (
valid_p_mask &
valid_n_mask &
~utils.array_equals_nodata(stream, stream_nodata) &
~utils.array_equals_nodata(s_i, si_nodata))
valid_n_events = n_events[valid_mask]
valid_si = s_i[valid_mask]
# QF is defined in terms of three cases:
#
# 1. Where there is no precipitation, QF = 0
# (even if stream or s_i is undefined)
#
# 2. Where there is precipitation and we're on a stream, QF = P
# (even if s_i is undefined)
#
# 3. Where there is precipitation and we're not on a stream, use the
# quickflow equation (only if all four inputs are defined):
# QF_im = 25.4 * n_m * (
# (a_im - s_i) * exp(-0.2 * s_i / a_im) +
# s_i^2 / a_im * exp(0.8 * s_i / a_im) * E1(s_i / a_im)
# )
#
# When evaluating the QF equation, there are a few edge cases:
#
# 3a. Where s_i = 0, you get NaN and a warning from numpy because
# E1(0 / a_im) = infinity. In this case, per conversation with
# Rafa, the final term of the equation should evaluate to 0, and
# the equation can be simplified to QF_im = P_im
# (which makes sense because if s_i = 0, no water is retained).
#
# Solution: Preemptively set QF_im equal to P_im where s_i = 0 in
# order to avoid calculations with infinity.
#
# 3b. When the ratio s_i / a_im becomes large, QF approaches 0.
# [NOTE: I don't know how to prove this mathematically, but it
# holds true when I tested with reasonable values of s_i and a_im].
# The exp() term becomes very large, while the E1() term becomes
# very small.
#
# Per conversation with Rafa and Lisa, large s_i / a_im ratios
# shouldn't happen often with real world data. But if they did, it
# would be a situation where there is very little precipitation
# spread out over relatively many rain events and the soil is very
# absorbent, so logically, QF should be effectively zero.
#
# To avoid overflow, we set a threshold of 100 for the s_i / a_im
# ratio. Where s_i / a_im > 100, we set QF to 0. 100 was chosen
# because it's a nice whole number that gets us close to the
# float32 max without surpassing it (exp(0.8*100) = 5e34). When
# s_i / a_im = 100, the actual result of the QF equation is on the
# order of 1e-6, so it should be rounded down to 0 anyway.
#
# 3c. Otherwise, evaluate the QF equation as usual.
#
# 3d. With certain inputs [for example: n_m = 10, CN = 50, p_im = 30],
# it's possible that the QF equation evaluates to a very small
# negative value. Per conversation with Lisa and Rafa, this is an
# edge case that the equation was not designed for. Negative QF
# doesn't make sense, so we set any negative QF values to 0.
# qf_im is the quickflow at pixel i on month m
qf_im = numpy.full(p_im.shape, TARGET_NODATA, dtype=numpy.float32)
# case 1: where there is no precipitation
qf_im[~precip_mask] = 0
# case 2: where there is precipitation and we're on a stream
qf_im[precip_mask & stream_mask] = p_im[precip_mask & stream_mask]
# case 3: where there is precipitation and we're not on a stream
case_3_mask = valid_mask & precip_mask & ~stream_mask
# for consistent indexing, make a_im the same shape as the other
# arrays even though we only use it in case 3
a_im = numpy.full(p_im.shape, numpy.nan, dtype=numpy.float32)
# a_im is the mean rain depth on a rainy day at pixel i on month m
# the 25.4 converts inches to mm since Si is in inches
a_im = numpy.empty(valid_n_events.shape)
a_im = p_im[valid_mask] / (valid_n_events * 25.4)
qf_im = numpy.empty(p_im.shape)
qf_im[:] = qf_nodata
# the 25.4 converts inches to mm since s_i is in inches
a_im[case_3_mask] = p_im[case_3_mask] / (n_m[case_3_mask] * 25.4)
# Precompute the last two terms in quickflow so we can handle a
# numerical instability when s_i is large and/or a_im is small
# on large valid_si/a_im this number will be zero and the latter
# exponent will also be zero because of a divide by zero. rather than
# raise that numerical warning, just handle it manually
E1 = scipy.special.expn(1, valid_si / a_im)
E1[valid_si == 0] = 0
nonzero_e1_mask = E1 != 0
exp_result = numpy.zeros(valid_si.shape)
exp_result[nonzero_e1_mask] = numpy.exp(
(0.8 * valid_si[nonzero_e1_mask]) / a_im[nonzero_e1_mask] +
numpy.log(E1[nonzero_e1_mask]))
# case 3a: when s_i = 0, qf = p
case_3a_mask = case_3_mask & (s_i == 0)
qf_im[case_3a_mask] = p_im[case_3a_mask]
# qf_im is the quickflow at pixel i on month m Eq. [1]
qf_im[valid_mask] = (25.4 * valid_n_events * (
(a_im - valid_si) * numpy.exp(-0.2 * valid_si / a_im) +
valid_si ** 2 / a_im * exp_result))
# case 3b: set quickflow to 0 when the s_i/a_im ratio is too large
case_3b_mask = case_3_mask & (s_i / a_im > 100)
qf_im[case_3b_mask] = 0
# case 3c: evaluate the equation as usual
case_3c_mask = case_3_mask & ~(case_3a_mask | case_3b_mask)
qf_im[case_3c_mask] = (
25.4 * n_m[case_3c_mask] * (
((a_im[case_3c_mask] - s_i[case_3c_mask]) *
numpy.exp(-0.2 * s_i[case_3c_mask] / a_im[case_3c_mask])) +
(s_i[case_3c_mask] ** 2 / a_im[case_3c_mask] *
numpy.exp(0.8 * s_i[case_3c_mask] / a_im[case_3c_mask]) *
scipy.special.exp1(s_i[case_3c_mask] / a_im[case_3c_mask]))
)
)
# case 3d: set any negative values to 0
qf_im[valid_mask & (qf_im < 0)] = 0
# if precip is 0, then QF should be zero
qf_im[(p_im == 0) | (n_events == 0)] = 0.0
# if we're on a stream, set quickflow to the precipitation
valid_stream_precip_mask = stream_array == 1
if p_nodata is not None:
valid_stream_precip_mask &= ~utils.array_equals_nodata(
p_im, p_nodata)
qf_im[valid_stream_precip_mask] = p_im[valid_stream_precip_mask]
return qf_im
pygeoprocessing.raster_calculator(
[(path, 1) for path in [
precip_path, si_path, n_events_raster_path, stream_path]], qf_op,
qf_monthly_path, gdal.GDT_Float32, qf_nodata)
precip_path, si_path, n_events_path, stream_path]],
qf_op, qf_monthly_path, gdal.GDT_Float32, TARGET_NODATA)
def _calculate_curve_number_raster(
lulc_raster_path, soil_group_path, biophysical_table, cn_path):
lulc_raster_path, soil_group_path, biophysical_df, cn_path):
"""Calculate the CN raster from the landcover and soil group rasters.
Args:
lulc_raster_path (string): path to landcover raster
soil_group_path (string): path to raster indicating soil group where
pixel values are in [1,2,3,4]
biophysical_table (dict): maps landcover IDs to dictionaries that
contain at least the keys 'cn_a', 'cn_b', 'cn_c', 'cn_d', that
map to the curve numbers for that landcover and soil type.
biophysical_df (pandas.DataFrame): table mapping landcover IDs to the
columns 'cn_a', 'cn_b', 'cn_c', 'cn_d', that contain
the curve number values for that landcover and soil type.
cn_path (string): path to output curve number raster to be output
which will be the dimensions of the intersection of
`lulc_raster_path` and `soil_group_path` the cell size of
@ -1172,12 +1233,11 @@ def _calculate_curve_number_raster(
4: 'cn_d',
}
# curve numbers are always positive so -1 a good nodata choice
cn_nodata = -1
lulc_to_soil = {}
lulc_nodata = pygeoprocessing.get_raster_info(
lulc_raster_path)['nodata'][0]
lucodes = list(biophysical_table)
lucodes = biophysical_df.index.to_list()
if lulc_nodata is not None:
lucodes.append(lulc_nodata)
@ -1190,12 +1250,12 @@ def _calculate_curve_number_raster(
for lucode in sorted(lucodes):
if lucode != lulc_nodata:
lulc_to_soil[soil_id]['cn_values'].append(
biophysical_table[lucode][soil_column])
biophysical_df[soil_column][lucode])
lulc_to_soil[soil_id]['lulc_values'].append(lucode)
else:
# handle the lulc nodata with cn nodata
lulc_to_soil[soil_id]['lulc_values'].append(lulc_nodata)
lulc_to_soil[soil_id]['cn_values'].append(cn_nodata)
lulc_to_soil[soil_id]['cn_values'].append(TARGET_NODATA)
# Making the landcover array a float32 in case the user provides a
# float landcover map like Kate did.
@ -1213,7 +1273,7 @@ def _calculate_curve_number_raster(
def cn_op(lulc_array, soil_group_array):
"""Map lulc code and soil to a curve number."""
cn_result = numpy.empty(lulc_array.shape)
cn_result[:] = cn_nodata
cn_result[:] = TARGET_NODATA
# if lulc_array value not in lulc_to_soil[soil_group_id]['lulc_values']
# then numpy.digitize will not bin properly and cause an IndexError
@ -1252,10 +1312,9 @@ def _calculate_curve_number_raster(
cn_result[current_soil_mask] = cn_values[current_soil_mask]
return cn_result
cn_nodata = -1
pygeoprocessing.raster_calculator(
[(lulc_raster_path, 1), (soil_group_path, 1)], cn_op, cn_path,
gdal.GDT_Float32, cn_nodata)
gdal.GDT_Float32, TARGET_NODATA)
def _calculate_si_raster(cn_path, stream_path, si_path):
@ -1269,7 +1328,6 @@ def _calculate_si_raster(cn_path, stream_path, si_path):
Returns:
None
"""
si_nodata = -1
cn_nodata = pygeoprocessing.get_raster_info(cn_path)['nodata'][0]
def si_op(ci_factor, stream_mask):
@ -1278,17 +1336,17 @@ def _calculate_si_raster(cn_path, stream_path, si_path):
~utils.array_equals_nodata(ci_factor, cn_nodata) &
(ci_factor > 0))
si_array = numpy.empty(ci_factor.shape)
si_array[:] = si_nodata
si_array[:] = TARGET_NODATA
# multiply by the stream mask != 1 so we get 0s on the stream and
# unaffected results everywhere else
si_array[valid_mask] = (
(1000.0 / ci_factor[valid_mask] - 10) * (
(1000 / ci_factor[valid_mask] - 10) * (
stream_mask[valid_mask] != 1))
return si_array
pygeoprocessing.raster_calculator(
[(cn_path, 1), (stream_path, 1)], si_op, si_path, gdal.GDT_Float32,
si_nodata)
TARGET_NODATA)
def _aggregate_recharge(
@ -1350,7 +1408,7 @@ def _aggregate_recharge(
"no coverage for polygon %s", ', '.join(
[str(poly_feat.GetField(_)) for _ in range(
poly_feat.GetFieldCount())]))
value = 0.0
value = 0
elif op_type == 'sum':
value = aggregate_stats[poly_index]['sum']
poly_feat.SetField(aggregate_field_id, float(value))

View File

@ -46,6 +46,7 @@ MODEL_SPEC = {
"precipitation_path": spec_utils.PRECIP,
"biophysical_table": {
"type": "csv",
"index_col": "lucode",
"columns": {
"lucode": spec_utils.LULC_TABLE_COLUMN,
"emc_[POLLUTANT]": {
@ -363,10 +364,10 @@ MODEL_SPEC = {
"calculated by convolving the search kernel with the "
"retention ratio raster."),
"bands": {1: {"type": "ratio"}}
},
"cache_dir": spec_utils.TASKGRAPH_DIR
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -438,14 +439,14 @@ def execute(args):
suffix = utils.make_suffix_string(args, 'results_suffix')
output_dir = args['workspace_dir']
intermediate_dir = os.path.join(output_dir, 'intermediate')
cache_dir = os.path.join(intermediate_dir, 'cache_dir')
utils.make_directories(
[args['workspace_dir'], intermediate_dir, cache_dir])
utils.make_directories([args['workspace_dir'], intermediate_dir])
files = utils.build_file_registry(
[(INTERMEDIATE_OUTPUTS, intermediate_dir),
(FINAL_OUTPUTS, output_dir)], suffix)
task_graph = taskgraph.TaskGraph(cache_dir, int(args.get('n_workers', -1)))
task_graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'),
int(args.get('n_workers', -1)))
# get the necessary base raster info
source_lulc_raster_info = pygeoprocessing.get_raster_info(
@ -482,11 +483,12 @@ def execute(args):
task_name='align input rasters')
# Build a lookup dictionary mapping each LULC code to its row
biophysical_dict = utils.read_csv_to_dataframe(
args['biophysical_table'], 'lucode').to_dict(orient='index')
# sort the LULC codes upfront because we use the sorted list in multiple
# sort by the LULC codes upfront because we use the sorted list in multiple
# places. it's more efficient to do this once.
sorted_lucodes = sorted(biophysical_dict)
biophysical_df = utils.read_csv_to_dataframe(
args['biophysical_table'], MODEL_SPEC['args']['biophysical_table']
).sort_index()
sorted_lucodes = biophysical_df.index.to_list()
# convert the nested dictionary in to a 2D array where rows are LULC codes
# in sorted order and columns correspond to soil groups in order
@ -498,10 +500,8 @@ def execute(args):
# up with their indices in the array. this is more efficient than
# decrementing the whole soil group array by 1.
retention_ratio_array = numpy.array([
[1 - biophysical_dict[lucode][f'rc_{soil_group}']
for soil_group in ['a', 'b', 'c', 'd']
] for lucode in sorted_lucodes
], dtype=numpy.float32)
1 - biophysical_df[f'rc_{soil_group}'].to_numpy()
for soil_group in ['a', 'b', 'c', 'd']], dtype=numpy.float32).T
# Calculate stormwater retention ratio and volume from
# LULC, soil groups, biophysical data, and precipitation
@ -522,10 +522,6 @@ def execute(args):
if args['adjust_retention_ratios']:
# in raster coord system units
radius = float(args['retention_radius'])
# boolean mapping for each LULC code whether it's connected
is_connected_map = {
lucode: 1 if biophysical_dict[lucode]['is_connected'] else 0
for lucode in biophysical_dict}
reproject_roads_task = task_graph.add_task(
func=pygeoprocessing.reproject_vector,
@ -591,7 +587,7 @@ def execute(args):
func=pygeoprocessing.reclassify_raster,
args=(
(files['lulc_aligned_path'], 1),
is_connected_map,
biophysical_df['is_connected'].astype(int).to_dict(),
files['connected_lulc_path'],
gdal.GDT_Byte,
UINT8_NODATA),
@ -706,14 +702,12 @@ def execute(args):
# (Optional) Calculate stormwater percolation ratio and volume from
# LULC, soil groups, biophysical table, and precipitation
if 'pe_a' in next(iter(biophysical_dict.values())):
if 'pe_a' in biophysical_df.columns:
LOGGER.info('percolation data detected in biophysical table. '
'Will calculate percolation ratio and volume rasters.')
percolation_ratio_array = numpy.array([
[biophysical_dict[lucode][f'pe_{soil_group}']
for soil_group in ['a', 'b', 'c', 'd']
] for lucode in sorted_lucodes
], dtype=numpy.float32)
biophysical_df[f'pe_{soil_group}'].to_numpy()
for soil_group in ['a', 'b', 'c', 'd']], dtype=numpy.float32).T
percolation_ratio_task = task_graph.add_task(
func=lookup_ratios,
args=(
@ -749,8 +743,8 @@ def execute(args):
# get all EMC columns from an arbitrary row in the dictionary
# strip the first four characters off 'EMC_pollutant' to get pollutant name
pollutants = [key[4:] for key in next(iter(biophysical_dict.values()))
if key.startswith('emc_')]
pollutants = [
col[4:] for col in biophysical_df.columns if col.startswith('emc_')]
LOGGER.debug(f'Pollutants found in biophysical table: {pollutants}')
# Calculate avoided pollutant load for each pollutant from retention volume
@ -766,9 +760,7 @@ def execute(args):
output_dir, f'actual_pollutant_load_{pollutant}{suffix}.tif')
actual_load_paths.append(actual_pollutant_load_path)
# make an array mapping each LULC code to the pollutant EMC value
emc_array = numpy.array(
[biophysical_dict[lucode][f'emc_{pollutant}']
for lucode in sorted_lucodes], dtype=numpy.float32)
emc_array = biophysical_df[f'emc_{pollutant}'].to_numpy(dtype=numpy.float32)
# calculate avoided load from retention volume
avoided_load_task = task_graph.add_task(

View File

@ -55,6 +55,7 @@ MODEL_SPEC = {
"biophysical_table_path": {
"name": gettext("biophysical table"),
"type": "csv",
"index_col": "lucode",
"columns": {
"lucode": spec_utils.LULC_TABLE_COLUMN,
"kc": {
@ -170,6 +171,7 @@ MODEL_SPEC = {
"energy_consumption_table_path": {
"name": gettext("energy consumption table"),
"type": "csv",
"index_col": "type",
"columns": {
"type": {
"type": "integer",
@ -340,10 +342,10 @@ MODEL_SPEC = {
"reference of the LULC."),
"geometries": spec_utils.POLYGONS,
"fields": {}
},
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -410,8 +412,8 @@ def execute(args):
intermediate_dir = os.path.join(
args['workspace_dir'], 'intermediate')
utils.make_directories([args['workspace_dir'], intermediate_dir])
biophysical_lucode_map = utils.read_csv_to_dataframe(
args['biophysical_table_path'], 'lucode').to_dict(orient='index')
biophysical_df = utils.read_csv_to_dataframe(
args['biophysical_table_path'], MODEL_SPEC['args']['biophysical_table_path'])
# cast to float and calculate relative weights
# Use default weights for shade, albedo, eti if the user didn't provide
@ -454,7 +456,7 @@ def execute(args):
n_workers = -1 # Synchronous mode.
task_graph = taskgraph.TaskGraph(
os.path.join(intermediate_dir, '_taskgraph_working_dir'), n_workers)
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
# align all the input rasters.
aligned_lulc_raster_path = os.path.join(
@ -496,16 +498,13 @@ def execute(args):
'raster_name': 'LULC', 'column_name': 'lucode',
'table_name': 'Biophysical'}
for prop in reclassification_props:
prop_map = dict(
(lucode, x[prop])
for lucode, x in biophysical_lucode_map.items())
prop_raster_path = os.path.join(
intermediate_dir, f'{prop}{file_suffix}.tif')
prop_task = task_graph.add_task(
func=utils.reclassify_raster,
args=(
(aligned_lulc_raster_path, 1), prop_map, prop_raster_path,
(aligned_lulc_raster_path, 1),
biophysical_df[prop].to_dict(), prop_raster_path,
gdal.GDT_Float32, TARGET_NODATA, reclass_error_details),
target_path_list=[prop_raster_path],
dependent_task_list=[align_task],
@ -1079,8 +1078,9 @@ def calculate_energy_savings(
for field in target_building_layer.schema]
type_field_index = fieldnames.index('type')
energy_consumption_table = utils.read_csv_to_dataframe(
energy_consumption_table_path, 'type').to_dict(orient='index')
energy_consumption_df = utils.read_csv_to_dataframe(
energy_consumption_table_path,
MODEL_SPEC['args']['energy_consumption_table_path'])
target_building_layer.StartTransaction()
last_time = time.time()
@ -1104,7 +1104,7 @@ def calculate_energy_savings(
# Building type should be an integer and has to match the building
# types in the energy consumption table.
target_type = target_feature.GetField(int(type_field_index))
if target_type not in energy_consumption_table:
if target_type not in energy_consumption_df.index:
target_building_layer.CommitTransaction()
target_building_layer = None
target_building_vector = None
@ -1114,16 +1114,14 @@ def calculate_energy_savings(
"that has no corresponding entry in the energy consumption "
f"table at {energy_consumption_table_path}")
consumption_increase = float(
energy_consumption_table[target_type]['consumption'])
consumption_increase = energy_consumption_df['consumption'][target_type]
# Load building cost if we can, but don't adjust the value if the cost
# column is not there.
# NOTE: if the user has an empty column value but the 'cost' column
# exists, this will raise an error.
try:
building_cost = float(
energy_consumption_table[target_type]['cost'])
building_cost = energy_consumption_df['cost'][target_type]
except KeyError:
# KeyError when cost column not present.
building_cost = 1

View File

@ -57,6 +57,7 @@ MODEL_SPEC = {
},
"curve_number_table_path": {
"type": "csv",
"index_col": "lucode",
"columns": {
"lucode": {
"type": "integer",
@ -91,6 +92,7 @@ MODEL_SPEC = {
},
"infrastructure_damage_loss_table_path": {
"type": "csv",
"index_col": "type",
"columns": {
"type": {
"type": "integer",
@ -184,12 +186,7 @@ MODEL_SPEC = {
"the same spatial reference as the LULC."),
"geometries": spec_utils.POLYGONS,
"fields": {}
}
}
},
"temp_working_dir_not_for_humans": {
"type": "directory",
"contents": {
},
"aligned_lulc.tif": {
"about": "Aligned and clipped copy of the LULC.",
"bands": {1: {"type": "integer"}}
@ -205,10 +202,10 @@ MODEL_SPEC = {
"s_max.tif": {
"about": "Map of potential retention.",
"bands": {1: {"type": "number", "units": u.millimeter}}
},
"taskgraph_data.db": {}
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -259,12 +256,10 @@ def execute(args):
file_suffix = utils.make_suffix_string(args, 'results_suffix')
temporary_working_dir = os.path.join(
args['workspace_dir'], 'temp_working_dir_not_for_humans')
intermediate_dir = os.path.join(
args['workspace_dir'], 'intermediate_files')
utils.make_directories([
args['workspace_dir'], intermediate_dir, temporary_working_dir])
args['workspace_dir'], intermediate_dir])
try:
n_workers = int(args['n_workers'])
@ -273,13 +268,14 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # Synchronous mode.
task_graph = taskgraph.TaskGraph(temporary_working_dir, n_workers)
task_graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
# Align LULC with soils
aligned_lulc_path = os.path.join(
temporary_working_dir, f'aligned_lulc{file_suffix}.tif')
intermediate_dir, f'aligned_lulc{file_suffix}.tif')
aligned_soils_path = os.path.join(
temporary_working_dir,
intermediate_dir,
f'aligned_soils_hydrological_group{file_suffix}.tif')
lulc_raster_info = pygeoprocessing.get_raster_info(
@ -306,20 +302,20 @@ def execute(args):
task_name='align raster stack')
# Load CN table
cn_table = utils.read_csv_to_dataframe(
args['curve_number_table_path'], 'lucode').to_dict(orient='index')
cn_df = utils.read_csv_to_dataframe(
args['curve_number_table_path'],
MODEL_SPEC['args']['curve_number_table_path'])
# make cn_table into a 2d array where first dim is lucode, second is
# 0..3 to correspond to CN_A..CN_D
data = []
row_ind = []
col_ind = []
for lucode in cn_table:
data.extend([
cn_table[lucode][f'cn_{soil_id}']
for soil_id in ['a', 'b', 'c', 'd']])
row_ind.extend([int(lucode)] * 4)
for lucode, row in cn_df.iterrows():
data.extend([row[f'cn_{soil_id}'] for soil_id in ['a', 'b', 'c', 'd']])
row_ind.extend([lucode] * 4)
col_ind = [0, 1, 2, 3] * (len(row_ind) // 4)
lucode_to_cn_table = scipy.sparse.csr_matrix((data, (row_ind, col_ind)))
cn_nodata = -1
@ -327,7 +323,7 @@ def execute(args):
soil_type_nodata = soil_raster_info['nodata'][0]
cn_raster_path = os.path.join(
temporary_working_dir, f'cn_raster{file_suffix}.tif')
intermediate_dir, f'cn_raster{file_suffix}.tif')
align_raster_stack_task.join()
cn_raster_task = task_graph.add_task(
@ -344,7 +340,7 @@ def execute(args):
# Generate S_max
s_max_nodata = -9999
s_max_raster_path = os.path.join(
temporary_working_dir, f's_max{file_suffix}.tif')
intermediate_dir, f's_max{file_suffix}.tif')
s_max_task = task_graph.add_task(
func=pygeoprocessing.raster_calculator,
args=(
@ -649,7 +645,9 @@ def _calculate_damage_to_infrastructure_in_aoi(
infrastructure_layer = infrastructure_vector.GetLayer()
damage_type_map = utils.read_csv_to_dataframe(
structures_damage_table, 'type').to_dict(orient='index')
structures_damage_table,
MODEL_SPEC['args']['infrastructure_damage_loss_table_path']
)['damage'].to_dict()
infrastructure_layer_defn = infrastructure_layer.GetLayerDefn()
type_index = -1
@ -703,8 +701,8 @@ def _calculate_damage_to_infrastructure_in_aoi(
intersection_geometry = aoi_geometry_shapely.intersection(
infrastructure_geometry)
damage_type = int(infrastructure_feature.GetField(type_index))
damage = damage_type_map[damage_type]['damage']
total_damage += intersection_geometry.area * damage
total_damage += (
intersection_geometry.area * damage_type_map[damage_type])
aoi_damage[aoi_feature.GetFID()] = total_damage
@ -939,5 +937,25 @@ def validate(args, limit_to=None):
be an empty list if validation succeeds.
"""
return validation.validate(args, MODEL_SPEC['args'],
MODEL_SPEC['args_with_spatial_overlap'])
validation_warnings = validation.validate(
args, MODEL_SPEC['args'], MODEL_SPEC['args_with_spatial_overlap'])
sufficient_keys = validation.get_sufficient_keys(args)
invalid_keys = validation.get_invalid_keys(validation_warnings)
if ("curve_number_table_path" not in invalid_keys and
"curve_number_table_path" in sufficient_keys):
# Load CN table. Resulting DF has index and CN_X columns only.
cn_df = utils.read_csv_to_dataframe(
args['curve_number_table_path'],
MODEL_SPEC['args']['curve_number_table_path'])
# Check for NaN values.
nan_mask = cn_df.isna()
if nan_mask.any(axis=None):
nan_lucodes = nan_mask[nan_mask.any(axis=1)].index
lucode_list = list(nan_lucodes.values)
validation_warnings.append((
['curve_number_table_path'],
f'Missing curve numbers for lucode(s) {lucode_list}'))
return validation_warnings

View File

@ -1,5 +1,4 @@
import collections
import functools
import logging
import math
import os
@ -76,14 +75,17 @@ MODEL_SPEC = {
"corresponding values in this table. Each row is a land use "
"land cover class."
),
'index_col': 'lucode',
'columns': {
'lucode': spec_utils.LULC_TABLE_COLUMN,
'urban_nature': {
'type': 'number',
'units': u.none,
'type': 'ratio',
'about': (
"Binary code indicating whether the LULC type is "
"(1) or is not (0) an urban nature type."
"The proportion (0-1) indicating the naturalness of "
"the land types. 0 indicates the naturalness level of "
"this LULC type is lowest (0% nature), while 1 "
"indicates that of this LULC type is the highest "
"(100% nature)"
),
},
'search_radius_m': {
@ -256,9 +258,10 @@ MODEL_SPEC = {
'name': 'population group radii table',
'type': 'csv',
'required': f'search_radius_mode == "{RADIUS_OPT_POP_GROUP}"',
'index_col': 'pop_group',
'columns': {
"pop_group": {
"type": "ratio",
"type": "freestyle_string",
"required": False,
"about": gettext(
"The name of the population group. Names must match "
@ -302,8 +305,9 @@ MODEL_SPEC = {
'output': {
"type": "directory",
"contents": {
"urban_nature_supply.tif": {
"about": "The calculated supply of urban nature.",
"urban_nature_supply_percapita.tif": {
"about": (
"The calculated supply per capita of urban nature."),
"bands": {1: {
"type": "number",
"units": u.m**2,
@ -412,13 +416,44 @@ MODEL_SPEC = {
"bands": {1: {"type": "number", "units": u.m**2/u.person}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
}
},
# when RADIUS_OPT_UNIFORM
"accessible_urban_nature.tif": {
"about": gettext(
"The area of greenspace available within the defined "
"radius, weighted by the selected decay function."),
"bands": {1: {"type": "number", "units": u.m**2}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
},
# When RADIUS_OPT_URBAN_NATURE
"accessible_urban_nature_lucode_[LUCODE].tif": {
"about": gettext(
"The area of greenspace available within the radius "
"associated with urban nature class LUCODE, weighted "
"by the selected decay function."),
"bands": {1: {"type": "number", "units": u.m**2}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
},
# When RADIUS_OPT_POP_GROUP
"accessible_urban_nature_to_[POP_GROUP].tif": {
"about": gettext(
"The area of greenspace available within the radius "
"associated with group POP_GROUP, weighted by the "
"selected decay function."),
"bands": {1: {"type": "number", "units": u.m**2}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
},
},
},
'intermediate': {
'type': 'directory',
'contents': {
'_taskgraph_working_dir': spec_utils.TASKGRAPH_DIR,
"aligned_lulc.tif": {
"about": gettext(
"A copy of the user's land use land cover raster. "
@ -445,6 +480,7 @@ MODEL_SPEC = {
),
"bands": {1: {'type': 'number', 'units': u.count}},
},
# when RADIUS_OPT_UNIFORM
"distance_weighted_population_within_[SEARCH_RADIUS].tif": {
"about": gettext(
@ -484,13 +520,13 @@ MODEL_SPEC = {
"created_if":
f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
},
"urban_nature_supply_lucode_[LUCODE].tif": {
"urban_nature_supply_percapita_lucode_[LUCODE].tif": {
"about": gettext(
"The urban nature supplied to populations due to the "
"land use land cover code LUCODE"),
"bands": {1: {"type": "number", "units": u.m**2/u.person}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_UNIFORM}'",
f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
},
"urban_nature_population_ratio_lucode_[LUCODE].tif": {
"about": gettext(
@ -501,14 +537,6 @@ MODEL_SPEC = {
"created_if":
f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
},
"urban_nature_supply_lucode_[LUCODE].tif": {
"about": gettext(
"The urban nature supplied to populations due to "
"the land use land cover class LUCODE."),
"bands": {1: {"type": "number", "units": u.m**2/u.person}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
},
# When RADIUS_OPT_POP_GROUP
"population_in_[POP_GROUP].tif": {
@ -547,10 +575,10 @@ MODEL_SPEC = {
"created_if":
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
},
"urban_nature_supply_to_[POP_GROUP].tif": {
"urban_nature_supply_percapita_to_[POP_GROUP].tif": {
"about": gettext(
"The urban nature supply to population group "
"POP_GROUP."),
"The urban nature supply per capita to population "
"group POP_GROUP."),
"bands": {1: {"type": "number", "units": u.m**2/u.person}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
@ -570,16 +598,16 @@ MODEL_SPEC = {
"bands": {1: {"type": "number", "units": u.people}},
"created_if":
f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
},
},
}
}
}
},
'taskgraph_cache': spec_utils.TASKGRAPH_DIR,
}
}
_OUTPUT_BASE_FILES = {
'urban_nature_supply': 'urban_nature_supply.tif',
'urban_nature_supply_percapita': 'urban_nature_supply_percapita.tif',
'admin_boundaries': 'admin_boundaries.gpkg',
'urban_nature_balance_percapita': 'urban_nature_balance_percapita.tif',
'urban_nature_balance_totalpop': 'urban_nature_balance_totalpop.tif',
@ -620,9 +648,10 @@ def execute(args):
CSV with the following columns:
* ``lucode``: (required) the integer landcover code represented.
* ``urban_nature``: (required) ``0`` or ``1`` indicating whether
this landcover code is (``1``) or is not (``0``) an urban nature
pixel.
* ``urban_nature``: (required) a proportion (0-1) representing
how much of this landcover type is urban nature. ``0``
indicates none of this type's area is urban nature, ``1``
indicates all of this type's area is urban nature.
* ``search_radius_m``: (conditionally required) the search radius
for this urban nature LULC class in meters. Required for all
urban nature LULC codes if ``args['search_radius_mode'] ==
@ -677,7 +706,6 @@ def execute(args):
(_INTERMEDIATE_BASE_FILES, intermediate_dir)],
suffix)
work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir')
try:
n_workers = int(args['n_workers'])
except (KeyError, ValueError, TypeError):
@ -685,7 +713,8 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # Synchronous execution
graph = taskgraph.TaskGraph(work_token_dir, n_workers)
graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
kernel_creation_functions = {
KERNEL_LABEL_DICHOTOMY: _kernel_dichotomy,
@ -902,14 +931,16 @@ def execute(args):
aoi_reprojection_task, lulc_mask_task]
)
attr_table = utils.read_csv_to_dataframe(args['lulc_attribute_table'])
attr_table = utils.read_csv_to_dataframe(
args['lulc_attribute_table'],
MODEL_SPEC['args']['lulc_attribute_table'])
kernel_paths = {} # search_radius, kernel path
kernel_tasks = {} # search_radius, kernel task
if args['search_radius_mode'] == RADIUS_OPT_UNIFORM:
search_radii = set([float(args['search_radius'])])
elif args['search_radius_mode'] == RADIUS_OPT_URBAN_NATURE:
urban_nature_attrs = attr_table[attr_table['urban_nature'] == 1]
urban_nature_attrs = attr_table[attr_table['urban_nature'] > 0]
try:
search_radii = set(urban_nature_attrs['search_radius_m'].unique())
except KeyError as missing_key:
@ -918,16 +949,14 @@ def execute(args):
f"attribute table {args['lulc_attribute_table']}")
# Build an iterable of plain tuples: (lucode, search_radius_m)
lucode_to_search_radii = list(
urban_nature_attrs[['lucode', 'search_radius_m']].itertuples(
index=False, name=None))
urban_nature_attrs[['search_radius_m']].itertuples(name=None))
elif args['search_radius_mode'] == RADIUS_OPT_POP_GROUP:
pop_group_table = utils.read_csv_to_dataframe(
args['population_group_radii_table'])
args['population_group_radii_table'],
MODEL_SPEC['args']['population_group_radii_table'])
search_radii = set(pop_group_table['search_radius_m'].unique())
# Build a dict of {pop_group: search_radius_m}
search_radii_by_pop_group = dict(
pop_group_table[['pop_group', 'search_radius_m']].itertuples(
index=False, name=None))
search_radii_by_pop_group = pop_group_table['search_radius_m'].to_dict()
else:
valid_options = ', '.join(
MODEL_SPEC['args']['search_radius_mode']['options'].keys())
@ -989,6 +1018,21 @@ def execute(args):
dependent_task_list=[lulc_mask_task]
)
accessible_urban_nature_path = os.path.join(
output_dir, f'accessible_urban_nature{suffix}.tif')
_ = graph.add_task(
_convolve_and_set_lower_bound,
kwargs={
"signal_path_band": (urban_nature_pixels_path, 1),
"kernel_path_band": (kernel_paths[search_radius_m], 1),
"target_path": accessible_urban_nature_path,
"working_dir": intermediate_dir,
},
task_name='Accessible urban nature',
target_path_list=[accessible_urban_nature_path],
dependent_task_list=[urban_nature_reclassification_task]
)
urban_nature_population_ratio_path = os.path.join(
intermediate_dir,
f'urban_nature_population_ratio{suffix}.tif')
@ -1005,17 +1049,17 @@ def execute(args):
urban_nature_reclassification_task, decayed_population_task,
])
urban_nature_supply_task = graph.add_task(
urban_nature_supply_percapita_task = graph.add_task(
_convolve_and_set_lower_bound,
kwargs={
'signal_path_band': (
urban_nature_population_ratio_path, 1),
'kernel_path_band': (kernel_path, 1),
'target_path': file_registry['urban_nature_supply'],
'target_path': file_registry['urban_nature_supply_percapita'],
'working_dir': intermediate_dir,
},
task_name='2SFCA - urban nature supply',
target_path_list=[file_registry['urban_nature_supply']],
target_path_list=[file_registry['urban_nature_supply_percapita']],
dependent_task_list=[
kernel_tasks[search_radius_m],
urban_nature_population_ratio_task])
@ -1044,8 +1088,8 @@ def execute(args):
dependent_task_list=[
kernel_tasks[search_radius_m], population_mask_task])
partial_urban_nature_supply_paths = []
partial_urban_nature_supply_tasks = []
partial_urban_nature_supply_percapita_paths = []
partial_urban_nature_supply_percapita_tasks = []
for lucode, search_radius_m in lucode_to_search_radii:
urban_nature_pixels_path = os.path.join(
intermediate_dir,
@ -1063,6 +1107,22 @@ def execute(args):
dependent_task_list=[lulc_mask_task]
)
accessible_urban_nature_path = os.path.join(
output_dir,
f'accessible_urban_nature_lucode_{lucode}{suffix}.tif')
_ = graph.add_task(
_convolve_and_set_lower_bound,
kwargs={
"signal_path_band": (urban_nature_pixels_path, 1),
"kernel_path_band": (kernel_paths[search_radius_m], 1),
"target_path": accessible_urban_nature_path,
"working_dir": intermediate_dir,
},
task_name='Accessible urban nature',
target_path_list=[accessible_urban_nature_path],
dependent_task_list=[urban_nature_reclassification_task]
)
urban_nature_population_ratio_path = os.path.join(
intermediate_dir,
f'urban_nature_population_ratio_lucode_{lucode}{suffix}.tif')
@ -1080,35 +1140,37 @@ def execute(args):
decayed_population_tasks[search_radius_m],
])
urban_nature_supply_path = os.path.join(
urban_nature_supply_percapita_path = os.path.join(
intermediate_dir,
f'urban_nature_supply_lucode_{lucode}{suffix}.tif')
partial_urban_nature_supply_paths.append(urban_nature_supply_path)
partial_urban_nature_supply_tasks.append(graph.add_task(
f'urban_nature_supply_percapita_lucode_{lucode}{suffix}.tif')
partial_urban_nature_supply_percapita_paths.append(
urban_nature_supply_percapita_path)
partial_urban_nature_supply_percapita_tasks.append(graph.add_task(
pygeoprocessing.convolve_2d,
kwargs={
'signal_path_band': (
urban_nature_population_ratio_path, 1),
'kernel_path_band': (kernel_paths[search_radius_m], 1),
'target_path': urban_nature_supply_path,
'target_path': urban_nature_supply_percapita_path,
'working_dir': intermediate_dir,
},
task_name=f'2SFCA - urban_nature supply for lucode {lucode}',
target_path_list=[urban_nature_supply_path],
target_path_list=[urban_nature_supply_percapita_path],
dependent_task_list=[
kernel_tasks[search_radius_m],
urban_nature_population_ratio_task]))
urban_nature_supply_task = graph.add_task(
urban_nature_supply_percapita_task = graph.add_task(
ndr._sum_rasters,
kwargs={
'raster_path_list': partial_urban_nature_supply_paths,
'raster_path_list': partial_urban_nature_supply_percapita_paths,
'target_nodata': FLOAT32_NODATA,
'target_result_path': file_registry['urban_nature_supply'],
'target_result_path':
file_registry['urban_nature_supply_percapita'],
},
task_name='2SFCA - urban nature supply total',
target_path_list=[file_registry['urban_nature_supply']],
dependent_task_list=partial_urban_nature_supply_tasks
target_path_list=[file_registry['urban_nature_supply_percapita']],
dependent_task_list=partial_urban_nature_supply_percapita_tasks
)
# Search radius mode 3: search radii are defined per population group.
@ -1133,6 +1195,23 @@ def execute(args):
decayed_population_in_group_tasks = []
for pop_group in split_population_fields:
search_radius_m = search_radii_by_pop_group[pop_group]
accessible_urban_nature_path = os.path.join(
output_dir,
f'accessible_urban_nature_to_{pop_group}{suffix}.tif')
_ = graph.add_task(
_convolve_and_set_lower_bound,
kwargs={
"signal_path_band": (urban_nature_pixels_path, 1),
"kernel_path_band": (kernel_paths[search_radius_m], 1),
"target_path": accessible_urban_nature_path,
"working_dir": intermediate_dir,
},
task_name='Accessible urban nature',
target_path_list=[accessible_urban_nature_path],
dependent_task_list=[urban_nature_reclassification_task]
)
decayed_population_in_group_path = os.path.join(
intermediate_dir,
f'distance_weighted_population_in_{pop_group}{suffix}.tif')
@ -1185,42 +1264,36 @@ def execute(args):
sum_of_decayed_population_task,
])
# Create a dict of {pop_group: search_radius_m}
group_radii_table = utils.read_csv_to_dataframe(
args['population_group_radii_table'])
search_radii = dict(
group_radii_table[['pop_group', 'search_radius_m']].itertuples(
index=False, name=None))
urban_nature_supply_by_group_paths = {}
urban_nature_supply_by_group_tasks = []
urban_nature_supply_percapita_by_group_paths = {}
urban_nature_supply_percapita_by_group_tasks = []
urban_nature_balance_totalpop_by_group_paths = {}
urban_nature_balance_totalpop_by_group_tasks = []
supply_population_paths = {'over': {}, 'under': {}}
supply_population_tasks = {'over': {}, 'under': {}}
for pop_group, proportional_pop_path in (
proportional_population_paths.items()):
search_radius_m = search_radii[pop_group]
urban_nature_supply_to_group_path = os.path.join(
search_radius_m = search_radii_by_pop_group[pop_group]
urban_nature_supply_percapita_to_group_path = os.path.join(
intermediate_dir,
f'urban_nature_supply_to_{pop_group}{suffix}.tif')
urban_nature_supply_by_group_paths[
pop_group] = urban_nature_supply_to_group_path
urban_nature_supply_by_group_task = graph.add_task(
f'urban_nature_supply_percapita_to_{pop_group}{suffix}.tif')
urban_nature_supply_percapita_by_group_paths[
pop_group] = urban_nature_supply_percapita_to_group_path
urban_nature_supply_percapita_by_group_task = graph.add_task(
_convolve_and_set_lower_bound,
kwargs={
'signal_path_band': (
file_registry['urban_nature_population_ratio'], 1),
'kernel_path_band': (kernel_paths[search_radius_m], 1),
'target_path': urban_nature_supply_to_group_path,
'target_path': urban_nature_supply_percapita_to_group_path,
'working_dir': intermediate_dir,
},
task_name=f'2SFCA - urban nature supply for {pop_group}',
target_path_list=[urban_nature_supply_to_group_path],
target_path_list=[urban_nature_supply_percapita_to_group_path],
dependent_task_list=[
kernel_tasks[search_radius_m],
urban_nature_population_ratio_task])
urban_nature_supply_by_group_tasks.append(
urban_nature_supply_by_group_task)
urban_nature_supply_percapita_by_group_tasks.append(
urban_nature_supply_percapita_by_group_task)
# Calculate SUP_DEMi_cap for each population group.
per_cap_urban_nature_balance_pop_group_path = os.path.join(
@ -1230,7 +1303,7 @@ def execute(args):
pygeoprocessing.raster_calculator,
kwargs={
'base_raster_path_band_const_list': [
(urban_nature_supply_to_group_path, 1),
(urban_nature_supply_percapita_to_group_path, 1),
(float(args['urban_nature_demand']), 'raw')
],
'local_op': _urban_nature_balance_percapita_op,
@ -1244,7 +1317,7 @@ def execute(args):
target_path_list=[
per_cap_urban_nature_balance_pop_group_path],
dependent_task_list=[
urban_nature_supply_by_group_task,
urban_nature_supply_percapita_by_group_task,
])
urban_nature_balance_totalpop_by_group_path = os.path.join(
@ -1303,21 +1376,21 @@ def execute(args):
proportional_population_tasks[pop_group],
])
urban_nature_supply_task = graph.add_task(
urban_nature_supply_percapita_task = graph.add_task(
_weighted_sum,
kwargs={
'raster_path_list':
[urban_nature_supply_by_group_paths[group] for group in
[urban_nature_supply_percapita_by_group_paths[group] for group in
sorted(split_population_fields)],
'weight_raster_list':
[pop_group_proportion_paths[group] for group in
sorted(split_population_fields)],
'target_path': file_registry['urban_nature_supply'],
'target_path': file_registry['urban_nature_supply_percapita'],
},
task_name='2SFCA - urban nature supply total',
target_path_list=[file_registry['urban_nature_supply']],
target_path_list=[file_registry['urban_nature_supply_percapita']],
dependent_task_list=[
*urban_nature_supply_by_group_tasks,
*urban_nature_supply_percapita_by_group_tasks,
*pop_group_proportion_tasks.values(),
])
@ -1325,7 +1398,7 @@ def execute(args):
pygeoprocessing.raster_calculator,
kwargs={
'base_raster_path_band_const_list': [
(file_registry['urban_nature_supply'], 1),
(file_registry['urban_nature_supply_percapita'], 1),
(float(args['urban_nature_demand']), 'raw')
],
'local_op': _urban_nature_balance_percapita_op,
@ -1337,7 +1410,7 @@ def execute(args):
task_name='Calculate per-capita urban nature balance',
target_path_list=[file_registry['urban_nature_balance_percapita']],
dependent_task_list=[
urban_nature_supply_task,
urban_nature_supply_percapita_task,
])
urban_nature_balance_totalpop_task = graph.add_task(
@ -1388,7 +1461,7 @@ def execute(args):
pygeoprocessing.raster_calculator,
kwargs={
'base_raster_path_band_const_list': [
(file_registry['urban_nature_supply'], 1),
(file_registry['urban_nature_supply_percapita'], 1),
(float(args['urban_nature_demand']), 'raw')
],
'local_op': _urban_nature_balance_percapita_op,
@ -1400,7 +1473,7 @@ def execute(args):
task_name='Calculate per-capita urban nature balance',
target_path_list=[file_registry['urban_nature_balance_percapita']],
dependent_task_list=[
urban_nature_supply_task,
urban_nature_supply_percapita_task,
])
# This is "SUP_DEMi" from the user's guide
@ -1730,13 +1803,16 @@ def _reclassify_urban_nature_area(
"""Reclassify LULC pixels into the urban nature area they represent.
After execution, urban nature pixels will have values representing the
pixel's area, while pixels that are not urban nature will have a pixel
value of 0. Nodata values will propagate to the output raster.
pixel's area of urban nature (pixel area * proportion of urban nature),
while pixels that are not urban nature will have a pixel value of 0.
Nodata values will propagate to the output raster.
Args:
lulc_raster_path (string): The path to a land-use/land-cover raster.
lulc_attribute_table (string): The path to a CSV table representing
LULC attributes. Must have "lucode" and "urban_nature" columns.
The "urban_nature" column represents a proportion 0-1 of how much
of the pixel's area represents urban nature.
target_raster_path (string): Where the reclassified urban nature raster
should be written.
only_these_urban_nature_codes=None (iterable or None): If ``None``, all
@ -1748,8 +1824,8 @@ def _reclassify_urban_nature_area(
Returns:
``None``
"""
attribute_table_dict = utils.read_csv_to_dataframe(
lulc_attribute_table, 'lucode').to_dict(orient='index')
lulc_attribute_df = utils.read_csv_to_dataframe(
lulc_attribute_table, MODEL_SPEC['args']['lulc_attribute_table'])
squared_pixel_area = abs(
numpy.multiply(*_square_off_pixels(lulc_raster_path)))
@ -1758,14 +1834,15 @@ def _reclassify_urban_nature_area(
valid_urban_nature_codes = set(only_these_urban_nature_codes)
else:
valid_urban_nature_codes = set(
lucode for lucode, attributes in attribute_table_dict.items()
if (attributes['urban_nature']) == 1)
lulc_attribute_df[lulc_attribute_df['urban_nature'] > 0].index)
urban_nature_area_map = {}
for lucode, attributes in attribute_table_dict.items():
for row in lulc_attribute_df[['urban_nature']].itertuples():
lucode = row.Index
urban_nature_proportion = row.urban_nature
urban_nature_area = 0
if lucode in valid_urban_nature_codes:
urban_nature_area = squared_pixel_area
urban_nature_area = squared_pixel_area * urban_nature_proportion
urban_nature_area_map[lucode] = urban_nature_area
lulc_raster_info = pygeoprocessing.get_raster_info(lulc_raster_path)

View File

@ -597,85 +597,119 @@ def expand_path(path, base_path):
return os.path.abspath(os.path.join(os.path.dirname(base_path), path))
def read_csv_to_dataframe(
path, index_col=False, usecols=None, convert_cols_to_lower=True,
convert_vals_to_lower=True, expand_path_cols=None, sep=None, engine='python',
encoding='utf-8-sig', **kwargs):
def read_csv_to_dataframe(path, spec, **kwargs):
"""Return a dataframe representation of the CSV.
Wrapper around ``pandas.read_csv`` that standardizes the column names by
stripping leading/trailing whitespace and optionally making all lowercase.
This helps avoid common errors caused by user-supplied CSV files with
column names that don't exactly match the specification. Strips
leading/trailing whitespace from data entries as well.
Wrapper around ``pandas.read_csv`` that performs some common data cleaning
based on information in the arg spec.
Also sets custom defaults for some kwargs passed to ``pandas.read_csv``.
Columns are filtered to just those that match a pattern in the spec.
Column names are lowercased and whitespace is stripped off. Empty rows are
dropped. Values in each column are processed and cast to an appropriate
dtype according to the type in the spec:
- Values in raster, vector, csv, file, and directory columns are cast to
str, whitespace stripped, and expanded as paths relative to the input path
- Values in freestyle_string and option_string columns are cast to str,
whitespace stripped, and converted to lowercase
- Values in number, ratio, and percent columns are cast to float
- Values in integer columns are cast to int
- Values in boolean columns are cast to bool
Empty or NA cells are returned as ``numpy.nan`` (for floats) or
``pandas.NA`` (for all other types).
Also sets custom defaults for some kwargs passed to ``pandas.read_csv``,
which you can override with kwargs:
- sep=None: lets the Python engine infer the separator
- engine='python': The 'python' engine supports the sep=None option.
- encoding='utf-8-sig': 'utf-8-sig' handles UTF-8 with or without BOM.
Args:
path (str): path to a CSV file
index_col (str): name of column to use as the dataframe index. If
``convert_cols_to_lower``, this column name and the dataframe column names
will be lowercased before they are compared. If ``usecols``
is defined, this must be included in ``usecols``.
usecols (list(str)): list of column names to subset from the dataframe.
If ``convert_cols_to_lower``, these names and the dataframe column names
will be lowercased before they are compared.
convert_cols_to_lower (bool): if True, convert all column names to lowercase
convert_vals_to_lower (bool): if True, convert all table values to lowercase
expand_path_cols (list[string])): if provided, a list of the names of
columns that contain paths to expand. Any relative paths in these
columns will be expanded to absolute paths. It is assumed that
relative paths are relative to the CSV's path.
sep: kwarg of ``pandas.read_csv``. Defaults to None, which
lets the Python engine infer the separator
engine (str): kwarg of ``pandas.read_csv``. The 'python' engine
supports the sep=None option.
encoding (str): kwarg of ``pandas.read_csv``. Using the 'utf-8-sig'
encoding handles UTF-8 with or without BOM.
spec (dict): dictionary specifying the structure of the CSV table
**kwargs: additional kwargs will be passed to ``pandas.read_csv``
Returns:
pandas.DataFrame with the contents of the given CSV
"""
# build up a list of regex patterns to match columns against columns from
# the table that match a pattern in this list (after stripping whitespace
# and lowercasing) will be included in the dataframe
patterns = []
for column in spec['columns']:
column = column.lower()
match = re.match(r'(.*)\[(.+)\](.*)', column)
if match:
# for column name patterns, convert it to a regex pattern
groups = match.groups()
patterns.append(f'{groups[0]}(.+){groups[2]}')
else:
# for regular column names, use the exact name as the pattern
patterns.append(column.replace('(', '\(').replace(')', '\)'))
try:
# set index_col=False to force pandas not to index by any column
# this is useful in case of trailing separators
# we'll explicitly set the index column later on
dataframe = pandas.read_csv(
path, index_col=False, sep=sep, engine=engine, encoding=encoding, **kwargs)
df = pandas.read_csv(
path,
index_col=False,
usecols=lambda col: any(
re.fullmatch(pattern, col.strip().lower()) for pattern in patterns
),
**{
'sep': None,
'engine': 'python',
'encoding': 'utf-8-sig',
**kwargs
})
except UnicodeDecodeError as error:
LOGGER.error(
f'The file {path} must be encoded as UTF-8 or ASCII')
raise error
# strip whitespace from column names
# strip whitespace from column names and convert to lowercase
# this won't work on integer types, which happens if you set header=None
# however, there's little reason to use this function if there's no header
dataframe.columns = dataframe.columns.str.strip()
df.columns = df.columns.str.strip().str.lower()
# convert column names to lowercase
if convert_cols_to_lower:
dataframe.columns = dataframe.columns.str.lower()
# if 'to_lower`, case handling is done before trying to access the data.
# the columns are stripped of leading/trailing whitespace in
# ``read_csv_to_dataframe``, and also lowercased if ``to_lower`` so we only
# need to convert the rest of the table.
if index_col and isinstance(index_col, str):
index_col = index_col.lower()
# lowercase column names
if usecols:
usecols = [col.lower() for col in usecols]
# drop any empty rows
df = df.dropna(how="all")
# Subset dataframe by columns if desired
if usecols:
dataframe = dataframe[usecols]
available_cols = set(df.columns)
# Set 'index_col' as the index of the dataframe
if index_col:
for col_spec, pattern in zip(spec['columns'].values(), patterns):
matching_cols = [c for c in available_cols if re.match(pattern, c)]
available_cols -= set(matching_cols)
for col in matching_cols:
try:
if col_spec['type'] in ['csv', 'directory', 'file', 'raster', 'vector', {'vector', 'raster'}]:
df[col] = df[col].apply(
lambda p: p if pandas.isna(p) else expand_path(str(p).strip(), path))
df[col] = df[col].astype(pandas.StringDtype())
elif col_spec['type'] in {'freestyle_string', 'option_string'}:
df[col] = df[col].apply(
lambda s: s if pandas.isna(s) else str(s).strip().lower())
df[col] = df[col].astype(pandas.StringDtype())
elif col_spec['type'] in {'number', 'percent', 'ratio'}:
df[col] = df[col].astype(float)
elif col_spec['type'] == 'integer':
df[col] = df[col].astype(pandas.Int64Dtype())
elif col_spec['type'] == 'boolean':
df[col] = df[col].astype('boolean')
except ValueError as err:
raise ValueError(
f'Value(s) in the "{col}" column of the table {path} '
f'could not be interpreted as {col_spec["type"]}s. '
f'Original error: {err}')
# set the index column, if specified
if 'index_col' in spec and spec['index_col'] is not None:
index_col = spec['index_col'].lower()
try:
dataframe = dataframe.set_index(
index_col, drop=False, verify_integrity=True)
df = df.set_index(index_col, verify_integrity=True)
except KeyError:
# If 'index_col' is not a column then KeyError is raised for using
# it as the index column
@ -683,33 +717,7 @@ def read_csv_to_dataframe(
f"in the table {path}")
raise
# convert table values to lowercase
if convert_vals_to_lower:
dataframe = dataframe.applymap(
lambda x: x.lower() if isinstance(x, str) else x)
# expand paths
if expand_path_cols:
for col in expand_path_cols:
# allow for the case where a column is optional
if col in dataframe:
dataframe[col] = dataframe[col].apply(
# if the whole column is empty, cells will be parsed as NaN
# catch that before trying to expand them as paths
lambda p: '' if pandas.isna(p) else expand_path(p, path))
# drop any empty rows
dataframe = dataframe.dropna(how="all")
# fill the rest of empty or NaN values with empty string
dataframe = dataframe.fillna(value="")
# strip whitespace from table values
# Remove values with leading ('^ +') and trailing (' +$') whitespace.
# Regular expressions using 'replace' only substitute on strings.
dataframe = dataframe.replace(r"^ +| +$", r"", regex=True)
return dataframe
return df
def make_directories(directory_list):

View File

@ -56,7 +56,8 @@ MESSAGES = {
'NOT_AN_INTEGER': gettext('Value "{value}" does not represent an integer'),
'NOT_BOOLEAN': gettext("Value must be either True or False, not {value}"),
'NO_PROJECTION': gettext('Spatial file {filepath} has no projection'),
'BBOX_NOT_INTERSECT': gettext("Bounding boxes do not intersect: {bboxes}"),
'BBOX_NOT_INTERSECT': gettext('Not all of the spatial layers overlap each '
'other. All bounding boxes must intersect: {bboxes}'),
'NEED_PERMISSION': gettext('You must have {permission} access to this file'),
}

View File

@ -423,6 +423,7 @@ MODEL_SPEC = {
"Table of value ranges for each captured wave energy "
"quantile group as well as the number of pixels for "
"each group."),
"index_col": "Percentile Group",
"columns": {
**PERCENTILE_TABLE_FIELDS,
"Value Range (megawatt hours per year, MWh/yr)": {
@ -459,6 +460,7 @@ MODEL_SPEC = {
"Table of value ranges for each net present value "
"quantile group as well as the number of pixels for "
"each group."),
"index_col": "Percentile Group",
"columns": {
**PERCENTILE_TABLE_FIELDS,
"Value Range (thousands of currency units, currency)": {
@ -497,6 +499,7 @@ MODEL_SPEC = {
"about": gettext(
"Table of value ranges for each wave power quantile "
"group as well as the number of pixels for each group."),
"index_col": "Percentile Group",
"columns": {
**PERCENTILE_TABLE_FIELDS,
"Value Range (wave power per unit width of wave crest length, kW/m)": {
@ -597,10 +600,10 @@ MODEL_SPEC = {
"LandPts.txt": {
"created_if": "valuation_container",
"about": "This text file logs records of the landing point coordinates."
},
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -722,8 +725,6 @@ def execute(args):
utils.make_directories([intermediate_dir, output_dir])
# Initialize a TaskGraph
taskgraph_working_dir = os.path.join(
intermediate_dir, '_taskgraph_working_dir')
try:
n_workers = int(args['n_workers'])
except (KeyError, ValueError, TypeError):
@ -731,7 +732,8 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # single process mode.
task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers)
task_graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
# Append a _ to the suffix if it's not empty and doesn't already have one
file_suffix = utils.make_suffix_string(args, 'results_suffix')
@ -743,7 +745,7 @@ def execute(args):
# arrays. Also store the amount of energy the machine produces
# in a certain wave period/height state as a 2D array
machine_perf_dict = {}
machine_perf_data = utils.read_csv_to_dataframe(args['machine_perf_path'])
machine_perf_data = pandas.read_csv(args['machine_perf_path'])
# Get the wave period fields, starting from the second column of the table
machine_perf_dict['periods'] = machine_perf_data.columns.values[1:]
# Build up the height field by taking the first column of the table
@ -775,12 +777,15 @@ def execute(args):
# Check if required column fields are entered in the land grid csv file
if 'land_gridPts_path' in args:
# Create a grid_land_data dataframe for later use in valuation
grid_land_data = utils.read_csv_to_dataframe(
args['land_gridPts_path'], convert_vals_to_lower=False)
required_col_names = ['id', 'type', 'lat', 'long', 'location']
grid_land_data, missing_grid_land_fields = _get_validated_dataframe(
args['land_gridPts_path'], required_col_names)
# Create a grid_land_df dataframe for later use in valuation
grid_land_df = utils.read_csv_to_dataframe(
args['land_gridPts_path'],
MODEL_SPEC['args']['land_gridPts_path'])
missing_grid_land_fields = []
for field in ['id', 'type', 'lat', 'long', 'location']:
if field not in grid_land_df.columns:
missing_grid_land_fields.append(field)
if missing_grid_land_fields:
raise ValueError(
'The following column fields are missing from the Grid '
@ -1080,20 +1085,12 @@ def execute(args):
grid_vector_path = os.path.join(
output_dir, 'GridPts_prj%s.shp' % file_suffix)
grid_data = grid_land_data.loc[
grid_land_data['type'].str.upper() == 'GRID']
land_data = grid_land_data.loc[
grid_land_data['type'].str.upper() == 'LAND']
grid_dict = grid_data.to_dict('index')
land_dict = land_data.to_dict('index')
# Make a point shapefile for grid points
LOGGER.info('Creating Grid Points Vector.')
create_grid_points_vector_task = task_graph.add_task(
func=_dict_to_point_vector,
args=(grid_dict, grid_vector_path, 'grid_points', analysis_area_sr_wkt,
aoi_sr_wkt),
args=(grid_land_df[grid_land_df['type'] == 'grid'].to_dict('index'),
grid_vector_path, 'grid_points', analysis_area_sr_wkt, aoi_sr_wkt),
target_path_list=[grid_vector_path],
task_name='create_grid_points_vector')
@ -1101,8 +1098,8 @@ def execute(args):
LOGGER.info('Creating Landing Points Vector.')
create_land_points_vector_task = task_graph.add_task(
func=_dict_to_point_vector,
args=(land_dict, land_vector_path, 'land_points', analysis_area_sr_wkt,
aoi_sr_wkt),
args=(grid_land_df[grid_land_df['type'] == 'land'].to_dict('index'),
land_vector_path, 'land_points', analysis_area_sr_wkt, aoi_sr_wkt),
target_path_list=[land_vector_path],
task_name='create_land_points_vector')
@ -1413,26 +1410,6 @@ def _add_target_fields_to_wave_vector(
target_wave_vector = None
def _get_validated_dataframe(csv_path, field_list):
"""Return a dataframe with upper cased fields, and a list of missing fields.
Args:
csv_path (str): path to the csv to be converted to a dataframe.
field_list (list): a list of fields in string format.
Returns:
dataframe (pandas.DataFrame): from csv with upper-cased fields.
missing_fields (list): missing fields as string format in dataframe.
"""
dataframe = utils.read_csv_to_dataframe(csv_path, convert_vals_to_lower=False)
missing_fields = []
for field in field_list:
if field not in dataframe.columns:
missing_fields.append(field)
return dataframe, missing_fields
def _dict_to_point_vector(base_dict_data, target_vector_path, layer_name,
base_sr_wkt, target_sr_wkt):
"""Given a dictionary of data create a point shapefile that represents it.
@ -1493,6 +1470,7 @@ def _dict_to_point_vector(base_dict_data, target_vector_path, layer_name,
for point_dict in base_dict_data.values():
latitude = float(point_dict['lat'])
longitude = float(point_dict['long'])
point_dict['id'] = int(point_dict['id'])
# When projecting to WGS84, extents -180 to 180 are used for longitude.
# In case input longitude is from -360 to 0 convert
if longitude < -180:
@ -1505,7 +1483,7 @@ def _dict_to_point_vector(base_dict_data, target_vector_path, layer_name,
target_layer.CreateFeature(output_feature)
for field_name in point_dict:
output_feature.SetField(field_name, point_dict[field_name])
output_feature.SetField(field_name.upper(), point_dict[field_name])
output_feature.SetGeometryDirectly(geom)
target_layer.SetFeature(output_feature)
output_feature = None
@ -1670,9 +1648,13 @@ def _machine_csv_to_dict(machine_csv_path):
machine_dict = {}
# make columns and indexes lowercased and strip whitespace
machine_data = utils.read_csv_to_dataframe(
machine_csv_path, 'name', convert_vals_to_lower=False)
machine_data.index = machine_data.index.str.strip()
machine_data.index = machine_data.index.str.lower()
machine_csv_path,
{
'index_col': 'name',
'columns': {
'name': {'type': 'freestyle_string'},
'value': {'type': 'number'}
}})
# drop NaN indexed rows in dataframe
machine_data = machine_data[machine_data.index.notnull()]

View File

@ -328,6 +328,7 @@ MODEL_SPEC = {
},
"grid_points_path": {
"type": "csv",
"index_col": "id",
"columns": {
"id": {
"type": "integer",
@ -382,6 +383,7 @@ MODEL_SPEC = {
},
"wind_schedule": {
"type": "csv",
"index_col": "year",
"columns": {
"year": {
"type": "number",
@ -524,10 +526,10 @@ MODEL_SPEC = {
"about": "Wind data",
"geometries": spec_utils.POINT,
"fields": OUTPUT_WIND_DATA_FIELDS
},
"_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
}
}
}
},
"taskgraph_cache": spec_utils.TASKGRAPH_DIR
}
}
@ -654,7 +656,6 @@ def execute(args):
suffix = utils.make_suffix_string(args, 'results_suffix')
# Initialize a TaskGraph
taskgraph_working_dir = os.path.join(inter_dir, '_taskgraph_working_dir')
try:
n_workers = int(args['n_workers'])
except (KeyError, ValueError, TypeError):
@ -662,7 +663,8 @@ def execute(args):
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # single process mode.
task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers)
task_graph = taskgraph.TaskGraph(
os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)
# Resample the bathymetry raster if it does not have square pixel size
try:
@ -754,9 +756,11 @@ def execute(args):
# If Price Table provided use that for price of energy, validate inputs
time = int(val_parameters_dict['time_period'])
if args['price_table']:
wind_price_df = utils.read_csv_to_dataframe(args['wind_schedule'])
wind_price_df = utils.read_csv_to_dataframe(
args['wind_schedule'], MODEL_SPEC['args']['wind_schedule']
).sort_index() # sort by year
year_count = len(wind_price_df['year'])
year_count = len(wind_price_df)
if year_count != time + 1:
raise ValueError(
"The 'time' argument in the Global Wind Energy Parameters "
@ -765,7 +769,6 @@ def execute(args):
# Save the price values into a list where the indices of the list
# indicate the time steps for the lifespan of the wind farm
wind_price_df.sort_values('year', inplace=True)
price_list = wind_price_df['price'].tolist()
else:
change_rate = float(args["rate_change"])
@ -1134,19 +1137,11 @@ def execute(args):
# Read the grid points csv, and convert it to land and grid dictionary
grid_land_df = utils.read_csv_to_dataframe(
args['grid_points_path'], convert_vals_to_lower=False)
# Make separate dataframes based on 'TYPE'
grid_df = grid_land_df.loc[(
grid_land_df['type'].str.upper() == 'GRID')]
land_df = grid_land_df.loc[(
grid_land_df['type'].str.upper() == 'LAND')]
args['grid_points_path'], MODEL_SPEC['args']['grid_points_path'])
# Convert the dataframes to dictionaries, using 'ID' (the index) as key
grid_df.set_index('id', inplace=True)
grid_dict = grid_df.to_dict('index')
land_df.set_index('id', inplace=True)
land_dict = land_df.to_dict('index')
grid_dict = grid_land_df[grid_land_df['type'] == 'grid'].to_dict('index')
land_dict = grid_land_df[grid_land_df['type'] == 'land'].to_dict('index')
grid_vector_path = os.path.join(
inter_dir, 'val_grid_points%s.shp' % suffix)
@ -1974,7 +1969,8 @@ def _read_csv_wind_data(wind_data_path, hub_height):
"""
wind_point_df = utils.read_csv_to_dataframe(
wind_data_path, convert_cols_to_lower=False, convert_vals_to_lower=False)
wind_data_path, MODEL_SPEC['args']['wind_data_path'])
wind_point_df.columns = wind_point_df.columns.str.upper()
# Calculate scale value at new hub height given reference values.
# See equation 3 in users guide
@ -2631,10 +2627,12 @@ def _clip_vector_by_vector(
shutil.rmtree(temp_dir, ignore_errors=True)
if empty_clip:
# The "clip_vector_path" is always the AOI.
raise ValueError(
f"Clipping {base_vector_path} by {clip_vector_path} returned 0"
" features. If an AOI was provided this could mean the AOI and"
" Wind Data do not intersect spatially.")
f" features. This means the AOI and {base_vector_path} do not"
" intersect spatially. Please check that the AOI has spatial"
" overlap with all input data.")
LOGGER.info('Finished _clip_vector_by_vector')

View File

@ -10,6 +10,7 @@ import textwrap
import unittest
import numpy
import pandas
import pygeoprocessing
from natcap.invest import utils
from osgeo import gdal
@ -151,10 +152,9 @@ class TestPreprocessor(unittest.TestCase):
pprint.pformat(non_suffixed_files)))
expected_landcover_codes = set(range(0, 24))
found_landcover_codes = set(utils.read_csv_to_dataframe(
os.path.join(outputs_dir,
'carbon_biophysical_table_template_150225.csv'),
'code').to_dict(orient='index').keys())
found_landcover_codes = set(pandas.read_csv(
os.path.join(outputs_dir, 'carbon_biophysical_table_template_150225.csv')
)['code'].values)
self.assertEqual(expected_landcover_codes, found_landcover_codes)
def test_transition_table(self):
@ -188,25 +188,27 @@ class TestPreprocessor(unittest.TestCase):
lulc_csv.write('0,mangrove,True\n')
lulc_csv.write('1,parking lot,False\n')
landcover_table = utils.read_csv_to_dataframe(
landcover_table_path, 'code').to_dict(orient='index')
landcover_df = utils.read_csv_to_dataframe(
landcover_table_path,
preprocessor.MODEL_SPEC['args']['lulc_lookup_table_path'])
target_table_path = os.path.join(self.workspace_dir,
'transition_table.csv')
# Remove landcover code 1 from the table; expect error.
del landcover_table[1]
landcover_df = landcover_df.drop(1)
with self.assertRaises(ValueError) as context:
preprocessor._create_transition_table(
landcover_table, [filename_a, filename_b], target_table_path)
landcover_df, [filename_a, filename_b], target_table_path)
self.assertIn('missing a row with the landuse code 1',
str(context.exception))
# Re-load the landcover table
landcover_table = utils.read_csv_to_dataframe(
landcover_table_path, 'code').to_dict(orient='index')
landcover_df = utils.read_csv_to_dataframe(
landcover_table_path,
preprocessor.MODEL_SPEC['args']['lulc_lookup_table_path'])
preprocessor._create_transition_table(
landcover_table, [filename_a, filename_b], target_table_path)
landcover_df, [filename_a, filename_b], target_table_path)
with open(target_table_path) as transition_table:
self.assertEqual(
@ -235,46 +237,13 @@ class TestCBC2(unittest.TestCase):
"""Remove workspace after each test function."""
shutil.rmtree(self.workspace_dir)
def test_extract_snapshots(self):
"""CBC: Extract snapshots from a snapshot CSV."""
from natcap.invest.coastal_blue_carbon import coastal_blue_carbon
csv_path = os.path.join(self.workspace_dir, 'snapshots.csv')
transition_years = (2000, 2010, 2020)
transition_rasters = []
with open(csv_path, 'w') as transitions_csv:
# Check that we can interpret varying case.
transitions_csv.write('snapshot_YEAR,raster_PATH\n')
for transition_year in transition_years:
# Write absolute paths.
transition_file_path = os.path.join(
self.workspace_dir, f'{transition_year}.tif)')
transition_rasters.append(transition_file_path)
transitions_csv.write(
f'{transition_year},{transition_file_path}\n')
# Make one path relative to the workspace, where the transitions
# CSV also lives.
# The expected raster path is absolute.
transitions_csv.write('2030,some_path.tif\n')
transition_years += (2030,)
transition_rasters.append(os.path.join(self.workspace_dir,
'some_path.tif'))
extracted_transitions = (
coastal_blue_carbon._extract_snapshots_from_table(csv_path))
self.assertEqual(
extracted_transitions,
dict(zip(transition_years, transition_rasters)))
def test_read_invalid_transition_matrix(self):
"""CBC: Test exceptions in invalid transition structure."""
# The full biophysical table will have much, much more information. To
# keep the test simple, I'm only tracking the columns I know I'll need
# in this function.
from natcap.invest.coastal_blue_carbon import coastal_blue_carbon
biophysical_table = {
biophysical_table = pandas.DataFrame({
1: {'lulc-class': 'a',
'soil-yearly-accumulation': 2,
'biomass-yearly-accumulation': 3,
@ -290,7 +259,7 @@ class TestCBC2(unittest.TestCase):
'biomass-yearly-accumulation': 11,
'soil-high-impact-disturb': 12,
'biomass-high-impact-disturb': 13}
}
}).T
transition_csv_path = os.path.join(self.workspace_dir,
'transitions.csv')
@ -332,7 +301,7 @@ class TestCBC2(unittest.TestCase):
# keep the test simple, I'm only tracking the columns I know I'll need
# in this function.
from natcap.invest.coastal_blue_carbon import coastal_blue_carbon
biophysical_table = {
biophysical_table = pandas.DataFrame({
1: {'lulc-class': 'a',
'soil-yearly-accumulation': 2,
'biomass-yearly-accumulation': 3,
@ -348,7 +317,7 @@ class TestCBC2(unittest.TestCase):
'biomass-yearly-accumulation': 11,
'soil-high-impact-disturb': 12,
'biomass-high-impact-disturb': 13}
}
}).T
transition_csv_path = os.path.join(self.workspace_dir,
'transitions.csv')
@ -366,14 +335,14 @@ class TestCBC2(unittest.TestCase):
expected_biomass_disturbance = numpy.zeros((4, 4), dtype=numpy.float32)
expected_biomass_disturbance[1, 3] = (
biophysical_table[1]['biomass-high-impact-disturb'])
biophysical_table['biomass-high-impact-disturb'][1])
numpy.testing.assert_allclose(
expected_biomass_disturbance,
disturbance_matrices['biomass'].toarray())
expected_soil_disturbance = numpy.zeros((4, 4), dtype=numpy.float32)
expected_soil_disturbance[1, 3] = (
biophysical_table[1]['soil-high-impact-disturb'])
biophysical_table['soil-high-impact-disturb'][1])
numpy.testing.assert_allclose(
expected_soil_disturbance,
disturbance_matrices['soil'].toarray())
@ -381,22 +350,22 @@ class TestCBC2(unittest.TestCase):
expected_biomass_accumulation = numpy.zeros(
(4, 4), dtype=numpy.float32)
expected_biomass_accumulation[3, 1] = (
biophysical_table[1]['biomass-yearly-accumulation'])
biophysical_table['biomass-yearly-accumulation'][1])
expected_biomass_accumulation[1, 2] = (
biophysical_table[2]['biomass-yearly-accumulation'])
biophysical_table['biomass-yearly-accumulation'][2])
expected_biomass_accumulation[2, 3] = (
biophysical_table[3]['biomass-yearly-accumulation'])
biophysical_table['biomass-yearly-accumulation'][3])
numpy.testing.assert_allclose(
expected_biomass_accumulation,
accumulation_matrices['biomass'].toarray())
expected_soil_accumulation = numpy.zeros((4, 4), dtype=numpy.float32)
expected_soil_accumulation[3, 1] = (
biophysical_table[1]['soil-yearly-accumulation'])
biophysical_table['soil-yearly-accumulation'][1])
expected_soil_accumulation[1, 2] = (
biophysical_table[2]['soil-yearly-accumulation'])
biophysical_table['soil-yearly-accumulation'][2])
expected_soil_accumulation[2, 3] = (
biophysical_table[3]['soil-yearly-accumulation'])
biophysical_table['soil-yearly-accumulation'][3])
numpy.testing.assert_allclose(
expected_soil_accumulation,
accumulation_matrices['soil'].toarray())
@ -649,8 +618,10 @@ class TestCBC2(unittest.TestCase):
args = TestCBC2._create_model_args(self.workspace_dir)
args['workspace_dir'] = os.path.join(self.workspace_dir, 'workspace')
prior_snapshots = coastal_blue_carbon._extract_snapshots_from_table(
args['landcover_snapshot_csv'])
prior_snapshots = utils.read_csv_to_dataframe(
args['landcover_snapshot_csv'],
coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv']
)['raster_path'].to_dict()
baseline_year = min(prior_snapshots.keys())
baseline_raster = prior_snapshots[baseline_year]
with open(args['landcover_snapshot_csv'], 'w') as snapshot_csv:
@ -825,8 +796,10 @@ class TestCBC2(unittest.TestCase):
args = TestCBC2._create_model_args(self.workspace_dir)
args['workspace_dir'] = os.path.join(self.workspace_dir, 'workspace')
prior_snapshots = coastal_blue_carbon._extract_snapshots_from_table(
args['landcover_snapshot_csv'])
prior_snapshots = utils.read_csv_to_dataframe(
args['landcover_snapshot_csv'],
coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv']
)['raster_path'].to_dict()
baseline_year = min(prior_snapshots.keys())
baseline_raster = prior_snapshots[baseline_year]
with open(args['landcover_snapshot_csv'], 'w') as snapshot_csv:
@ -889,8 +862,10 @@ class TestCBC2(unittest.TestCase):
raster.write('not a raster')
# Write over the landcover snapshot CSV
prior_snapshots = coastal_blue_carbon._extract_snapshots_from_table(
args['landcover_snapshot_csv'])
prior_snapshots = utils.read_csv_to_dataframe(
args['landcover_snapshot_csv'],
coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv']
)['raster_path'].to_dict()
baseline_year = min(prior_snapshots)
with open(args['landcover_snapshot_csv'], 'w') as snapshot_table:
snapshot_table.write('snapshot_year,raster_path\n')
@ -902,8 +877,18 @@ class TestCBC2(unittest.TestCase):
# analysis year must be >= the last transition year.
args['analysis_year'] = baseline_year
# Write invalid entries to landcover transition table
with open(args['landcover_transitions_table'], 'w') as transition_table:
transition_table.write('lulc-class,Developed,Forest,Water\n')
transition_table.write('Developed,NCC,,invalid\n')
transition_table.write('Forest,accum,disturb,low-impact-disturb\n')
transition_table.write('Water,disturb,med-impact-disturb,high-impact-disturb\n')
transition_options = [
'accum', 'high-impact-disturb', 'med-impact-disturb',
'low-impact-disturb', 'ncc']
validation_warnings = coastal_blue_carbon.validate(args)
self.assertEqual(len(validation_warnings), 2)
self.assertEqual(len(validation_warnings), 3)
self.assertIn(
coastal_blue_carbon.INVALID_SNAPSHOT_RASTER_MSG.format(
snapshot_year=baseline_year + 10),
@ -912,6 +897,11 @@ class TestCBC2(unittest.TestCase):
coastal_blue_carbon.INVALID_ANALYSIS_YEAR_MSG.format(
analysis_year=2000, latest_year=2010),
validation_warnings[1][1])
self.assertIn(
coastal_blue_carbon.INVALID_TRANSITION_VALUES_MSG.format(
model_transitions=transition_options,
transition_values=['disturb', 'invalid']),
validation_warnings[2][1])
def test_track_first_disturbance(self):
"""CBC: Track disturbances over time."""

View File

@ -63,14 +63,11 @@ class CropProductionTests(unittest.TestCase):
pandas.testing.assert_frame_equal(
expected_agg_result_table, agg_result_table, check_dtype=False)
result_table_path = os.path.join(
args['workspace_dir'], 'result_table.csv')
expected_result_table_path = os.path.join(
TEST_DATA_PATH, 'expected_result_table.csv')
expected_result_table = pandas.read_csv(
expected_result_table_path)
os.path.join(TEST_DATA_PATH, 'expected_result_table.csv')
)
result_table = pandas.read_csv(
result_table_path)
os.path.join(args['workspace_dir'], 'result_table.csv'))
pandas.testing.assert_frame_equal(
expected_result_table, result_table, check_dtype=False)
@ -314,14 +311,10 @@ class CropProductionTests(unittest.TestCase):
crop_production_regression.execute(args)
agg_result_table_path = os.path.join(
args['workspace_dir'], 'aggregate_results.csv')
expected_agg_result_table_path = os.path.join(
TEST_DATA_PATH, 'expected_regression_aggregate_results.csv')
expected_agg_result_table = pandas.read_csv(
expected_agg_result_table_path)
os.path.join(TEST_DATA_PATH, 'expected_regression_aggregate_results.csv'))
agg_result_table = pandas.read_csv(
agg_result_table_path)
os.path.join(args['workspace_dir'], 'aggregate_results.csv'))
pandas.testing.assert_frame_equal(
expected_agg_result_table, agg_result_table, check_dtype=False)
@ -387,14 +380,10 @@ class CropProductionTests(unittest.TestCase):
crop_production_regression.execute(args)
result_table_path = os.path.join(
args['workspace_dir'], 'result_table.csv')
expected_result_table_path = os.path.join(
TEST_DATA_PATH, 'expected_regression_result_table_no_nodata.csv')
expected_result_table = pandas.read_csv(
expected_result_table_path)
expected_result_table = pandas.read_csv(os.path.join(
TEST_DATA_PATH, 'expected_regression_result_table_no_nodata.csv'))
result_table = pandas.read_csv(
result_table_path)
os.path.join(args['workspace_dir'], 'result_table.csv'))
pandas.testing.assert_frame_equal(
expected_result_table, result_table, check_dtype=False)

View File

@ -378,8 +378,14 @@ class DatastackArchiveTests(unittest.TestCase):
filecmp.cmp(archive_params[key], params[key], shallow=False))
spatial_csv_dict = utils.read_csv_to_dataframe(
archive_params['spatial_table'], 'ID',
convert_cols_to_lower=True, convert_vals_to_lower=True).to_dict(orient='index')
archive_params['spatial_table'],
{
'index_col': 'id',
'columns': {
'id': {'type': 'integer'},
'path': {'type': 'file'}
}
}).to_dict(orient='index')
spatial_csv_dir = os.path.dirname(archive_params['spatial_table'])
numpy.testing.assert_allclose(
pygeoprocessing.raster_to_numpy_array(

View File

@ -1934,7 +1934,7 @@ class HabitatQualityTests(unittest.TestCase):
self.assertTrue(
validate_result,
"expected failed validations instead didn't get any.")
self.assertIn("Bounding boxes do not intersect", validate_result[0][1])
self.assertIn("bounding boxes must intersect", validate_result[0][1])
def test_habitat_quality_argspec_missing_projection(self):
"""Habitat Quality: raise error on missing projection."""

View File

@ -514,40 +514,6 @@ class HRAUnitTests(unittest.TestCase):
(source_array != nodata).astype(numpy.uint8)
)
def test_create_raster_from_bounding_box(self):
"""HRA: test creation of a raster from a bbox."""
from natcap.invest import hra
# [minx, miny, maxx, maxy]
bounding_box = [
ORIGIN[0],
ORIGIN[1] - 100, # force rounding up of pixel dimensions
ORIGIN[0] + 90, # no rounding up needed
ORIGIN[1],
]
pixel_size = (30, -30)
target_raster_path = os.path.join(self.workspace_dir, 'raster.tif')
hra._create_raster_from_bounding_box(
target_raster_path, bounding_box, pixel_size, gdal.GDT_Byte,
SRS_WKT, target_nodata=2, fill_value=2)
try:
raster = gdal.OpenEx(target_raster_path)
band = raster.GetRasterBand(1)
self.assertEqual(
raster.GetGeoTransform(),
(ORIGIN[0], pixel_size[0], 0.0, ORIGIN[1], 0.0, pixel_size[1])
)
self.assertEqual(raster.RasterXSize, 3)
self.assertEqual(raster.RasterYSize, 4)
self.assertEqual(band.GetNoDataValue(), 2)
numpy.testing.assert_array_equal(
band.ReadAsArray(),
numpy.full((4, 3), 2, dtype=numpy.uint8))
finally:
band = None
raster = None
def test_align(self):
"""HRA: test alignment function."""
from natcap.invest import hra
@ -749,21 +715,22 @@ class HRAUnitTests(unittest.TestCase):
# No matter the supported file format, make sure we have consistent
# table headings.
source_df = pandas.read_csv(io.StringIO(textwrap.dedent("""\
FOO,bar,BaZ,path
1, 2, 3,foo.tif""")))
source_df = pandas.DataFrame({
'name': pandas.Series(['1'], dtype='string'),
'type': pandas.Series(['2'], dtype='string'),
'stressor buffer (meters)': pandas.Series([3], dtype=float),
'path': pandas.Series(['foo.tif'], dtype='string')
})
expected_df = source_df.copy() # defaults to a deepcopy.
expected_df.columns = expected_df.columns.str.lower()
expected_df['path'] = [os.path.join(self.workspace_dir, 'foo.tif')]
expected_df = source_df.copy().set_index('name') # defaults to a deepcopy.
expected_df['path']['1'] = os.path.join(self.workspace_dir, 'foo.tif')
for filename, func in [('target.csv', source_df.to_csv),
('target.xlsx', source_df.to_excel)]:
full_filepath = os.path.join(self.workspace_dir, filename)
func(full_filepath, index=False)
opened_df = hra._open_table_as_dataframe(full_filepath)
pandas.testing.assert_frame_equal(expected_df, opened_df)
pandas.testing.assert_frame_equal(expected_df, opened_df, check_index_type=False)
def test_pairwise_risk(self):
"""HRA: check pairwise risk calculations."""

View File

@ -173,7 +173,11 @@ class ValidateModelSpecs(unittest.TestCase):
spec['columns'][column],
f'{key}.columns.{column}',
parent_type=t)
if 'index_col' in spec:
self.assertIn(spec['index_col'], spec['columns'])
attrs.discard('columns')
attrs.discard('index_col')
elif t == 'directory':
# directory type should have a contents property that maps each
@ -249,6 +253,7 @@ class ValidateModelSpecs(unittest.TestCase):
types = arg['type'] if isinstance(
arg['type'], set) else [arg['type']]
attrs = set(arg.keys())
for t in types:
self.assertIn(t, valid_nested_types[parent_type])
@ -395,8 +400,12 @@ class ValidateModelSpecs(unittest.TestCase):
f'{name}.{direction}.{header}',
parent_type=t)
attrs.discard('rows')
attrs.discard('columns')
if 'index_col' in arg:
self.assertIn(arg['index_col'], arg['columns'])
attrs.discard('index_col')
attrs.discard('rows')
attrs.discard('columns')
# csv type may optionally have an 'excel_ok' attribute
if 'excel_ok' in arg:

View File

@ -107,7 +107,6 @@ class NDRTests(unittest.TestCase):
# use predefined directory so test can clean up files during teardown
args = NDRTests.generate_base_args(self.workspace_dir)
# make args explicit that this is a base run of SWY
args['biophysical_table_path'] = os.path.join(
REGRESSION_DATA, 'input', 'biophysical_table_missing_headers.csv')
with self.assertRaises(ValueError):
@ -171,7 +170,6 @@ class NDRTests(unittest.TestCase):
# use predefined directory so test can clean up files during teardown
args = NDRTests.generate_base_args(self.workspace_dir)
# make args explicit that this is a base run of SWY
args['biophysical_table_path'] = os.path.join(
REGRESSION_DATA, 'input', 'biophysical_table_missing_lucode.csv')
with self.assertRaises(KeyError) as cm:
@ -187,7 +185,6 @@ class NDRTests(unittest.TestCase):
# use predefined directory so test can clean up files during teardown
args = NDRTests.generate_base_args(self.workspace_dir)
# make args explicit that this is a base run of SWY
args['calc_n'] = False
args['calc_p'] = False
with self.assertRaises(ValueError):
@ -210,8 +207,6 @@ class NDRTests(unittest.TestCase):
os.path.join(self.workspace_dir, 'watershed_results_ndr.gpkg'),
'wb') as f:
f.write(b'')
# make args explicit that this is a base run of SWY
ndr.execute(args)
result_vector = ogr.Open(os.path.join(
@ -248,6 +243,53 @@ class NDRTests(unittest.TestCase):
args['workspace_dir'], 'intermediate_outputs',
'what_drains_to_stream.tif')))
def test_regression_undefined_nodata(self):
"""NDR test when DEM, LULC and runoff proxy have undefined nodata."""
from natcap.invest.ndr import ndr
# use predefined directory so test can clean up files during teardown
args = NDRTests.generate_base_args(self.workspace_dir)
# unset nodata values for DEM, LULC, and runoff proxy
# this is ok because the test data is 100% valid
# regression test for https://github.com/natcap/invest/issues/1005
for key in ['runoff_proxy_path', 'dem_path', 'lulc_path']:
target_path = os.path.join(self.workspace_dir, f'{key}_no_nodata.tif')
source = gdal.OpenEx(args[key], gdal.OF_RASTER)
driver = gdal.GetDriverByName('GTIFF')
target = driver.CreateCopy(target_path, source)
target.GetRasterBand(1).DeleteNoDataValue()
source, target = None, None
args[key] = target_path
ndr.execute(args)
result_vector = ogr.Open(os.path.join(
args['workspace_dir'], 'watershed_results_ndr.gpkg'))
result_layer = result_vector.GetLayer()
result_feature = result_layer.GetFeature(1)
result_layer = None
result_vector = None
mismatch_list = []
# these values were generated by manual inspection of regression
# results
for field, expected_value in [
('p_surface_load', 41.921860),
('p_surface_export', 5.899117),
('n_surface_load', 2978.519775),
('n_surface_export', 289.0498),
('n_subsurface_load', 28.614094),
('n_subsurface_export', 15.61077),
('n_total_export', 304.660614)]:
val = result_feature.GetField(field)
if not numpy.isclose(val, expected_value):
mismatch_list.append(
(field, 'expected: %f' % expected_value,
'actual: %f' % val))
result_feature = None
if mismatch_list:
raise RuntimeError("results not expected: %s" % mismatch_list)
def test_validation(self):
"""NDR test argument validation."""
from natcap.invest import validation

View File

@ -581,14 +581,14 @@ class TestRecServer(unittest.TestCase):
expected_grid_vector_path = os.path.join(
REGRESSION_DATA, 'predictor_data_all_metrics.shp')
utils._assert_vectors_equal(
out_grid_vector_path, expected_grid_vector_path, 1e-3)
expected_grid_vector_path, out_grid_vector_path, 1e-3)
out_scenario_path = os.path.join(
args['workspace_dir'], 'scenario_results.shp')
expected_scenario_path = os.path.join(
REGRESSION_DATA, 'scenario_results_all_metrics.shp')
utils._assert_vectors_equal(
out_scenario_path, expected_scenario_path, 1e-3)
expected_scenario_path, out_scenario_path, 1e-3)
def test_results_suffix_on_serverside_files(self):
"""Recreation test suffix gets added to files created on server."""
@ -924,7 +924,7 @@ class RecreationRegressionTests(unittest.TestCase):
REGRESSION_DATA, 'square_grid_vector_path.shp')
utils._assert_vectors_equal(
out_grid_vector_path, expected_grid_vector_path)
expected_grid_vector_path, out_grid_vector_path)
def test_hex_grid(self):
"""Recreation hex grid regression test."""
@ -941,7 +941,7 @@ class RecreationRegressionTests(unittest.TestCase):
REGRESSION_DATA, 'hex_grid_vector_path.shp')
utils._assert_vectors_equal(
out_grid_vector_path, expected_grid_vector_path)
expected_grid_vector_path, out_grid_vector_path)
@unittest.skip("skipping to avoid remote server call (issue #3753)")
def test_no_grid_execute(self):
@ -1007,7 +1007,7 @@ class RecreationRegressionTests(unittest.TestCase):
REGRESSION_DATA, 'hex_grid_vector_path.shp')
utils._assert_vectors_equal(
out_grid_vector_path, expected_grid_vector_path)
expected_grid_vector_path, out_grid_vector_path)
def test_existing_regression_coef(self):
"""Recreation test regression coefficients handle existing output."""
@ -1030,8 +1030,9 @@ class RecreationRegressionTests(unittest.TestCase):
# make outputs to be overwritten
predictor_dict = utils.read_csv_to_dataframe(
predictor_table_path, 'id',
convert_cols_to_lower=True, convert_vals_to_lower=True).to_dict(orient='index')
predictor_table_path,
recmodel_client.MODEL_SPEC['args']['predictor_table_path']
).to_dict(orient='index')
predictor_list = predictor_dict.keys()
tmp_working_dir = tempfile.mkdtemp(dir=self.workspace_dir)
empty_json_list = [
@ -1057,7 +1058,7 @@ class RecreationRegressionTests(unittest.TestCase):
REGRESSION_DATA, 'test_regression_coefficients.shp')
utils._assert_vectors_equal(
out_coefficient_vector_path, expected_coeff_vector_path, 1e-6)
expected_coeff_vector_path, out_coefficient_vector_path, 1e-6)
def test_predictor_table_absolute_paths(self):
"""Recreation test validation from full path."""

View File

@ -126,7 +126,8 @@ class ScenicQualityTests(unittest.TestCase):
with self.assertRaises(AssertionError) as cm:
scenic_quality._determine_valid_viewpoints(
dem_path, viewpoints_path)
self.assertIn('Feature 1 is not a Point geometry', str(cm.exception))
self.assertIn('Feature 1 must be a POINT geometry, not LINESTRING',
str(cm.exception))
def test_exception_when_no_structures_aoi_overlap(self):
"""SQ: model raises exception when AOI does not overlap structures."""

View File

@ -141,11 +141,11 @@ class SDRTests(unittest.TestCase):
sdr.execute(args)
expected_results = {
'usle_tot': 13.90210914612,
'sed_export': 0.55185163021,
'sed_dep': 8.80130577087,
'avoid_exp': 57971.87890625,
'avoid_eros': 1458232.5,
'usle_tot': 2.62457418442,
'sed_export': 0.09748090804,
'sed_dep': 1.71672844887,
'avoid_exp': 10199.7490234375,
'avoid_eros': 274510.75,
}
vector_path = os.path.join(
@ -213,10 +213,10 @@ class SDRTests(unittest.TestCase):
sdr.execute(args)
expected_results = {
'sed_export': 0.55185163021,
'usle_tot': 13.90210914612,
'avoid_exp': 57971.87890625,
'avoid_eros': 1458232.5,
'sed_export': 0.09748090804,
'usle_tot': 2.62457418442,
'avoid_exp': 10199.7490234375,
'avoid_eros': 274510.75,
}
vector_path = os.path.join(
@ -238,10 +238,10 @@ class SDRTests(unittest.TestCase):
sdr.execute(args)
expected_results = {
'sed_export': 0.67064666748,
'usle_tot': 12.6965303421,
'avoid_exp': 69130.8203125,
'avoid_eros': 1317588.375,
'sed_export': 0.08896198869,
'usle_tot': 1.86480903625,
'avoid_exp': 9204.283203125,
'avoid_eros': 194613.28125,
}
vector_path = os.path.join(
@ -264,10 +264,10 @@ class SDRTests(unittest.TestCase):
sdr.execute(args)
expected_results = {
'sed_export': 0.97192692757,
'usle_tot': 12.68887424469,
'avoid_exp': 100960.9609375,
'avoid_eros': 1329122.0,
'sed_export': 0.17336219549,
'usle_tot': 2.56186032295,
'avoid_exp': 17980.52734375,
'avoid_eros': 267931.71875,
}
vector_path = os.path.join(
@ -303,8 +303,7 @@ class SDRTests(unittest.TestCase):
with self.assertRaises(ValueError) as context:
sdr.execute(args)
self.assertIn(
f'A value in the biophysical table is not a number '
f'within range 0..1.', str(context.exception))
f'could not be interpreted as ratios', str(context.exception))
def test_lucode_not_a_number(self):
"""SDR test expected exception for invalid data in lucode column."""
@ -325,8 +324,7 @@ class SDRTests(unittest.TestCase):
with self.assertRaises(ValueError) as context:
sdr.execute(args)
self.assertIn(
f'Value "{invalid_value}" from the "lucode" column of the '
f'biophysical table is not a number.', str(context.exception))
'could not be interpreted as integers', str(context.exception))
def test_missing_lulc_value(self):
"""SDR test for ValueError when LULC value not found in table."""
@ -393,3 +391,43 @@ class SDRTests(unittest.TestCase):
what_drains = pygeoprocessing.raster_to_numpy_array(
target_what_drains_path)
numpy.testing.assert_allclose(what_drains, expected_drainage)
def test_ls_factor(self):
"""SDR test for our LS Factor function."""
from natcap.invest.sdr import sdr
nodata = -1
# These varying percent slope values should cover all of the slope
# factor and slope table cases.
pct_slope_array = numpy.array(
[[1.5, 4, 8, 10, 15, nodata]], dtype=numpy.float32)
flow_accum_array = numpy.array(
[[100, 100, 100, 100, 10000000, nodata]], dtype=numpy.float32)
l_max = 25 # affects the last item in the array only
srs = osr.SpatialReference()
srs.ImportFromEPSG(26910) # NAD83 / UTM zone 11N
srs_wkt = srs.ExportToWkt()
origin = (463250, 4929700)
pixel_size = (30, -30)
pct_slope_path = os.path.join(self.workspace_dir, 'pct_slope.tif')
pygeoprocessing.numpy_array_to_raster(
pct_slope_array, nodata, pixel_size, origin, srs_wkt,
pct_slope_path)
flow_accum_path = os.path.join(self.workspace_dir, 'flow_accum.tif')
pygeoprocessing.numpy_array_to_raster(
flow_accum_array, nodata, pixel_size, origin, srs_wkt,
flow_accum_path)
target_ls_factor_path = os.path.join(self.workspace_dir, 'ls.tif')
sdr._calculate_ls_factor(flow_accum_path, pct_slope_path, l_max,
target_ls_factor_path)
ls = pygeoprocessing.raster_to_numpy_array(target_ls_factor_path)
expected_ls = numpy.array(
[[0.253996, 0.657229, 1.345856, 1.776729, 49.802994, -1]],
dtype=numpy.float32)
numpy.testing.assert_allclose(ls, expected_ls, rtol=1e-6)

View File

@ -737,8 +737,8 @@ class SeasonalWaterYieldRegressionTests(unittest.TestCase):
with self.assertRaises(ValueError) as context:
seasonal_water_yield.execute(args)
self.assertTrue(
'expecting all floating point numbers' in str(context.exception))
self.assertIn(
'could not be interpreted as numbers', str(context.exception))
def test_monthly_alpha_regression(self):
"""SWY monthly alpha values regression test on sample data.
@ -974,12 +974,6 @@ class SeasonalWaterYieldRegressionTests(unittest.TestCase):
precip_array = numpy.array([
[10, 10],
[10, 10]], dtype=numpy.float32)
lulc_array = numpy.array([
[1, 1],
[2, 2]], dtype=numpy.float32)
cn_array = numpy.array([
[40, 40],
[80, 80]], dtype=numpy.float32)
si_array = numpy.array([
[15, 15],
[2.5, 2.5]], dtype=numpy.float32)
@ -990,13 +984,12 @@ class SeasonalWaterYieldRegressionTests(unittest.TestCase):
[0, 0],
[0, 0]], dtype=numpy.float32)
# results calculated by wolfram alpha
expected_quickflow_array = numpy.array([
[-4.82284552e-36, -4.82284552e-36],
[ 6.19275831e-01, 6.19275831e-01]])
[0, 0],
[0.61928378, 0.61928378]])
precip_path = os.path.join(self.workspace_dir, 'precip.tif')
lulc_path = os.path.join(self.workspace_dir, 'lulc.tif')
cn_path = os.path.join(self.workspace_dir, 'cn.tif')
si_path = os.path.join(self.workspace_dir, 'si.tif')
n_events_path = os.path.join(self.workspace_dir, 'n_events.tif')
stream_path = os.path.join(self.workspace_dir, 'stream.tif')
@ -1008,13 +1001,11 @@ class SeasonalWaterYieldRegressionTests(unittest.TestCase):
# write all the test arrays to raster files
for array, path in [(precip_array, precip_path),
(lulc_array, lulc_path),
(n_events_array, n_events_path)]:
# make the nodata value undefined for user inputs
pygeoprocessing.numpy_array_to_raster(
array, None, (1, -1), (1180000, 690000), project_wkt, path)
for array, path in [(cn_array, cn_path),
(si_array, si_path),
for array, path in [(si_array, si_path),
(stream_mask, stream_path)]:
# define a nodata value for intermediate outputs
pygeoprocessing.numpy_array_to_raster(
@ -1022,13 +1013,119 @@ class SeasonalWaterYieldRegressionTests(unittest.TestCase):
# save the quickflow results raster to quickflow.tif
seasonal_water_yield._calculate_monthly_quick_flow(
precip_path, lulc_path, cn_path, n_events_path, stream_path,
si_path, output_path)
precip_path, n_events_path, stream_path, si_path, output_path)
# read the raster output back in to a numpy array
quickflow_array = pygeoprocessing.raster_to_numpy_array(output_path)
# assert each element is close to the expected value
self.assertTrue(numpy.isclose(
quickflow_array, expected_quickflow_array).all())
numpy.testing.assert_allclose(
quickflow_array, expected_quickflow_array, atol=1e-5)
def test_monthly_quickflow_si_zero(self):
"""Test `_calculate_monthly_quick_flow` when s_i is zero"""
from natcap.invest.seasonal_water_yield import seasonal_water_yield
# QF should be equal to P when s_i is 0
precip_array = numpy.array([[10.5]], dtype=numpy.float32)
si_array = numpy.array([[0]], dtype=numpy.float32)
n_events_array = numpy.array([[10]], dtype=numpy.float32)
stream_mask = numpy.array([[0]], dtype=numpy.float32)
expected_quickflow_array = numpy.array([[10.5]])
precip_path = os.path.join(self.workspace_dir, 'precip.tif')
si_path = os.path.join(self.workspace_dir, 'si.tif')
n_events_path = os.path.join(self.workspace_dir, 'n_events.tif')
stream_path = os.path.join(self.workspace_dir, 'stream.tif')
srs = osr.SpatialReference()
srs.ImportFromEPSG(26910) # UTM Zone 10N
project_wkt = srs.ExportToWkt()
output_path = os.path.join(self.workspace_dir, 'quickflow.tif')
# write all the test arrays to raster files
for array, path in [(precip_array, precip_path),
(n_events_array, n_events_path),
(si_array, si_path),
(stream_mask, stream_path)]:
# define a nodata value for intermediate outputs
pygeoprocessing.numpy_array_to_raster(
array, -1, (1, -1), (1180000, 690000), project_wkt, path)
seasonal_water_yield._calculate_monthly_quick_flow(
precip_path, n_events_path, stream_path, si_path, output_path)
numpy.testing.assert_allclose(
pygeoprocessing.raster_to_numpy_array(output_path),
expected_quickflow_array, atol=1e-5)
def test_monthly_quickflow_large_si_aim_ratio(self):
"""Test `_calculate_monthly_quick_flow` with large s_i/a_im ratio"""
from natcap.invest.seasonal_water_yield import seasonal_water_yield
# with these values, the QF equation would overflow float32 if
# we didn't catch it early
precip_array = numpy.array([[6]], dtype=numpy.float32)
si_array = numpy.array([[23.33]], dtype=numpy.float32)
n_events_array = numpy.array([[10]], dtype=numpy.float32)
stream_mask = numpy.array([[0]], dtype=numpy.float32)
expected_quickflow_array = numpy.array([[0]])
precip_path = os.path.join(self.workspace_dir, 'precip.tif')
si_path = os.path.join(self.workspace_dir, 'si.tif')
n_events_path = os.path.join(self.workspace_dir, 'n_events.tif')
stream_path = os.path.join(self.workspace_dir, 'stream.tif')
srs = osr.SpatialReference()
srs.ImportFromEPSG(26910) # UTM Zone 10N
project_wkt = srs.ExportToWkt()
output_path = os.path.join(self.workspace_dir, 'quickflow.tif')
# write all the test arrays to raster files
for array, path in [(precip_array, precip_path),
(n_events_array, n_events_path),
(si_array, si_path),
(stream_mask, stream_path)]:
# define a nodata value for intermediate outputs
pygeoprocessing.numpy_array_to_raster(
array, -1, (1, -1), (1180000, 690000), project_wkt, path)
seasonal_water_yield._calculate_monthly_quick_flow(
precip_path, n_events_path, stream_path, si_path, output_path)
numpy.testing.assert_allclose(
pygeoprocessing.raster_to_numpy_array(output_path),
expected_quickflow_array, atol=1e-5)
def test_monthly_quickflow_negative_values_set_to_zero(self):
"""Test `_calculate_monthly_quick_flow` with negative QF result"""
from natcap.invest.seasonal_water_yield import seasonal_water_yield
# with these values, the QF equation evaluates to a small negative
# number. assert that it is set to zero
precip_array = numpy.array([[30]], dtype=numpy.float32)
si_array = numpy.array([[10]], dtype=numpy.float32)
n_events_array = numpy.array([[10]], dtype=numpy.float32)
stream_mask = numpy.array([[0]], dtype=numpy.float32)
expected_quickflow_array = numpy.array([[0]])
precip_path = os.path.join(self.workspace_dir, 'precip.tif')
si_path = os.path.join(self.workspace_dir, 'si.tif')
n_events_path = os.path.join(self.workspace_dir, 'n_events.tif')
stream_path = os.path.join(self.workspace_dir, 'stream.tif')
srs = osr.SpatialReference()
srs.ImportFromEPSG(26910) # UTM Zone 10N
project_wkt = srs.ExportToWkt()
output_path = os.path.join(self.workspace_dir, 'quickflow.tif')
# write all the test arrays to raster files
for array, path in [(precip_array, precip_path),
(n_events_array, n_events_path),
(si_array, si_path),
(stream_mask, stream_path)]:
# define a nodata value for intermediate outputs
pygeoprocessing.numpy_array_to_raster(
array, -1, (1, -1), (1180000, 690000), project_wkt, path)
seasonal_water_yield._calculate_monthly_quick_flow(
precip_path, n_events_path, stream_path, si_path, output_path)
numpy.testing.assert_allclose(
pygeoprocessing.raster_to_numpy_array(output_path),
expected_quickflow_array, atol=1e-5)
def test_calculate_annual_qfi_different_nodata_areas(self):
"""Test with qf rasters with different areas of nodata."""
@ -1079,8 +1176,8 @@ class SeasonalWaterYieldRegressionTests(unittest.TestCase):
[100, 100],
[200, 200]], dtype=numpy.float32)
quickflow_array = numpy.array([
[-4.8e-36, -4.822e-36],
[ 6.1e-01, 6.1e-01]], dtype=numpy.float32)
[0, 0],
[0.61, 0.61]], dtype=numpy.float32)
flow_dir_array = numpy.array([
[15, 25],
[50, 50]], dtype=numpy.float32)

View File

@ -360,3 +360,17 @@ class UFRMTests(unittest.TestCase):
[(['curve_number_table_path'],
validation.MESSAGES['MATCHED_NO_HEADERS'].format(
header='column', header_name='cn_a'))])
# test missing CN_X values raise warnings
args = self._make_args()
cn_table = pandas.read_csv(args['curve_number_table_path'])
cn_table.at[0, 'CN_A'] = numpy.nan
new_cn_path = os.path.join(
self.workspace_dir, 'cn_missing_value_table.csv')
cn_table.to_csv(new_cn_path, index=False)
args['curve_number_table_path'] = new_cn_path
result = urban_flood_risk_mitigation.validate(args)
self.assertEqual(
result,
[(['curve_number_table_path'],
'Missing curve numbers for lucode(s) [0]')])

View File

@ -85,7 +85,8 @@ def _build_model_args(workspace):
6,0,100
7,1,100
8,0,100
9,1,100"""))
9,1,100
"""))
admin_geom = [
shapely.geometry.box(
@ -342,7 +343,7 @@ class UNATests(unittest.TestCase):
from natcap.invest import urban_nature_access
nodata = urban_nature_access.FLOAT32_NODATA
urban_nature_supply = numpy.array([
urban_nature_supply_percapita = numpy.array([
[nodata, 100.5],
[75, 100]], dtype=numpy.float32)
urban_nature_demand = 50
@ -353,7 +354,7 @@ class UNATests(unittest.TestCase):
urban_nature_budget = (
urban_nature_access._urban_nature_balance_percapita_op(
urban_nature_supply, urban_nature_demand))
urban_nature_supply_percapita, urban_nature_demand))
expected_urban_nature_budget = numpy.array([
[nodata, 50.5],
[25, 50]], dtype=numpy.float32)
@ -480,6 +481,16 @@ class UNATests(unittest.TestCase):
admin_vector = None
admin_layer = None
accessible_urban_nature_array = pygeoprocessing.raster_to_numpy_array(
os.path.join(args['workspace_dir'], 'output',
'accessible_urban_nature_suffix.tif'))
valid_mask = ~utils.array_equals_nodata(
accessible_urban_nature_array, urban_nature_access.FLOAT32_NODATA)
valid_pixels = accessible_urban_nature_array[valid_mask]
self.assertAlmostEqual(numpy.sum(valid_pixels), 6221004.41259766)
self.assertAlmostEqual(numpy.min(valid_pixels), 1171.7352294921875)
self.assertAlmostEqual(numpy.max(valid_pixels), 11898.0712890625)
def test_split_urban_nature(self):
from natcap.invest import urban_nature_access
@ -532,6 +543,23 @@ class UNATests(unittest.TestCase):
admin_vector = None
admin_layer = None
output_dir = os.path.join(args['workspace_dir'], 'output')
self._assert_urban_nature(os.path.join(
output_dir, 'accessible_urban_nature_lucode_1_suffix.tif'),
72000.0, 0.0, 900.0)
self._assert_urban_nature(os.path.join(
output_dir, 'accessible_urban_nature_lucode_3_suffix.tif'),
1034934.9864730835, 0.0, 4431.1650390625)
self._assert_urban_nature(os.path.join(
output_dir, 'accessible_urban_nature_lucode_5_suffix.tif'),
2837622.9519348145, 0.0, 8136.6884765625)
self._assert_urban_nature(os.path.join(
output_dir, 'accessible_urban_nature_lucode_7_suffix.tif'),
8112734.805541992, 2019.2935791015625, 17729.431640625)
self._assert_urban_nature(os.path.join(
output_dir, 'accessible_urban_nature_lucode_9_suffix.tif'),
7744116.974121094, 1567.57958984375, 12863.4619140625)
def test_split_population(self):
"""UNA: test split population optional module.
@ -602,6 +630,36 @@ class UNATests(unittest.TestCase):
rtol=1e-6
)
def _assert_urban_nature(self, path, sum_value, min_value, max_value):
"""Compare a raster's sum, min and max to given values.
The raster is assumed to be an accessible urban nature raster.
Args:
path (str): The path to an urban nature raster.
sum_value (float): The expected sum of the raster.
min_value (float): The expected min of the raster.
max_value (float): The expected max of the raster.
Returns:
``None``
Raises:
AssertionError: When the raster's sum, min or max values are not
numerically close to the expected values.
"""
from natcap.invest import urban_nature_access
accessible_urban_nature_array = (
pygeoprocessing.raster_to_numpy_array(path))
valid_mask = ~utils.array_equals_nodata(
accessible_urban_nature_array,
urban_nature_access.FLOAT32_NODATA)
valid_pixels = accessible_urban_nature_array[valid_mask]
self.assertAlmostEqual(numpy.sum(valid_pixels), sum_value)
self.assertAlmostEqual(numpy.min(valid_pixels), min_value)
self.assertAlmostEqual(numpy.max(valid_pixels), max_value)
def test_radii_by_pop_group(self):
"""UNA: Test defining radii by population group."""
from natcap.invest import urban_nature_access
@ -666,11 +724,19 @@ class UNATests(unittest.TestCase):
self.assertAlmostEqual(
expected_value, summary_feature.GetField(fieldname))
output_dir = os.path.join(args['workspace_dir'], 'output')
self._assert_urban_nature(os.path.join(
output_dir, 'accessible_urban_nature_to_pop_male.tif'),
6221004.412597656, 1171.7352294921875, 11898.0712890625)
self._assert_urban_nature(os.path.join(
output_dir, 'accessible_urban_nature_to_pop_female.tif'),
6221004.412597656, 1171.7352294921875, 11898.0712890625)
def test_modes_same_radii_same_results(self):
"""UNA: all modes have same results when consistent radii.
Although the different modes have different ways of defining their
search radii, the urban_nature_supply raster should be numerically
search radii, the urban_nature_supply_percapita raster should be numerically
equivalent if they all use the same search radii.
This is a good gut-check of basic model behavior across modes.
@ -772,16 +838,19 @@ class UNATests(unittest.TestCase):
uniform_radius_supply = pygeoprocessing.raster_to_numpy_array(
os.path.join(uniform_args['workspace_dir'], 'output',
'urban_nature_supply_uniform.tif'))
split_urban_nature_supply = pygeoprocessing.raster_to_numpy_array(
os.path.join(split_urban_nature_args['workspace_dir'], 'output',
'urban_nature_supply_urban_nature.tif'))
'urban_nature_supply_percapita_uniform.tif'))
split_urban_nature_supply_percapita = (
pygeoprocessing.raster_to_numpy_array(
os.path.join(
split_urban_nature_args['workspace_dir'], 'output',
'urban_nature_supply_percapita_urban_nature.tif')))
split_pop_groups_supply = pygeoprocessing.raster_to_numpy_array(
os.path.join(pop_group_args['workspace_dir'], 'output',
'urban_nature_supply_popgroup.tif'))
'urban_nature_supply_percapita_popgroup.tif'))
numpy.testing.assert_allclose(
uniform_radius_supply, split_urban_nature_supply, rtol=1e-6)
uniform_radius_supply, split_urban_nature_supply_percapita,
rtol=1e-6)
numpy.testing.assert_allclose(
uniform_radius_supply, split_pop_groups_supply, rtol=1e-6)
@ -893,9 +962,76 @@ class UNATests(unittest.TestCase):
# TODO
pass
def test_urban_nature_proportion(self):
"""UNA: Run the model with urban nature proportion."""
from natcap.invest import urban_nature_access
args = _build_model_args(self.workspace_dir)
args['search_radius_mode'] = urban_nature_access.RADIUS_OPT_UNIFORM
args['search_radius'] = 1000
with open(args['lulc_attribute_table'], 'a') as attr_table:
attr_table.write("10,0.5,100\n")
# make sure our inputs validate
validation_results = urban_nature_access.validate(args)
self.assertEqual(validation_results, [])
urban_nature_access.execute(args)
def test_reclassify_urban_nature(self):
"""UNA: Test for urban nature area reclassification."""
from natcap.invest import urban_nature_access
args = _build_model_args(self.workspace_dir)
# Rewrite the lulc attribute table to use proportions of urban nature.
with open(args['lulc_attribute_table'], 'w') as attr_table:
attr_table.write(textwrap.dedent(
"""\
lucode,urban_nature,search_radius_m
0,0,100
1,0.1,100
2,0,100
3,0.3,100
4,0,100
5,0.5,100
6,0,100
7,0.7,100
8,0,100
9,0.9,100
"""))
urban_nature_area_path = os.path.join(
self.workspace_dir, 'urban_nature_area.tif')
for limit_to_lucodes in (None, set([1, 3])):
urban_nature_access._reclassify_urban_nature_area(
args['lulc_raster_path'], args['lulc_attribute_table'],
urban_nature_area_path,
only_these_urban_nature_codes=limit_to_lucodes)
# The source lulc is randomized, so need to programmatically build
# up the expected array.
source_lulc_array = pygeoprocessing.raster_to_numpy_array(
args['lulc_raster_path'])
pixel_area = abs(_DEFAULT_PIXEL_SIZE[0] * _DEFAULT_PIXEL_SIZE[1])
expected_array = numpy.zeros(source_lulc_array.shape,
dtype=numpy.float32)
for i in range(1, 10, 2):
if limit_to_lucodes is not None:
if i not in limit_to_lucodes:
continue
factor = float(f"0.{i}")
expected_array[source_lulc_array == i] = factor * pixel_area
reclassified_array = pygeoprocessing.raster_to_numpy_array(
urban_nature_area_path)
numpy.testing.assert_array_almost_equal(
reclassified_array, expected_array)
def test_validate(self):
"""UNA: Basic test for validation."""
from natcap.invest import urban_nature_access
args = _build_model_args(self.workspace_dir)
args['search_radius_mode'] = urban_nature_access.RADIUS_OPT_URBAN_NATURE
args['search_radius_mode'] = (
urban_nature_access.RADIUS_OPT_URBAN_NATURE)
self.assertEqual(urban_nature_access.validate(args), [])

View File

@ -15,6 +15,7 @@ import warnings
import numpy
import numpy.testing
import pandas as pd
import pygeoprocessing
from osgeo import gdal
from osgeo import ogr
@ -254,7 +255,7 @@ class GaussianDecayUtilsTests(unittest.TestCase):
# The sigma*3 is the maximum radius from the center
# Anything greater than that distance should be set to 0 by the
# gaussian kernel creation function.
kernel[dist_from_center > (sigma * 3)] = 0.0
kernel[dist_from_center > (sigma * 3)] = 0
return kernel / numpy.sum(kernel)
expected_matrix = gkern()
@ -619,12 +620,14 @@ class ReadCSVToDataframeTests(unittest.TestCase):
with open(csv_file, 'w') as file_obj:
file_obj.write(textwrap.dedent(
"""\
HEADER,
A,
header,
a,
b
"""
))
df = utils.read_csv_to_dataframe(csv_file)
df = utils.read_csv_to_dataframe(
csv_file,
{'columns': {'header': {'type': 'freestyle_string'}}})
# header and table values should be lowercased
self.assertEqual(df.columns[0], 'header')
self.assertEqual(df['header'][0], 'a')
@ -642,15 +645,19 @@ class ReadCSVToDataframeTests(unittest.TestCase):
with open(table_path, 'w') as table_file:
table_file.write(csv_text)
result = utils.read_csv_to_dataframe(
table_path, 'lucode').to_dict(orient='index')
expected_result = {
1: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1},
2: {'desc': 'bread', 'val1': 1, 'val2': 4, 'lucode': 2},
3: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3},
4: {'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4}}
self.assertDictEqual(result, expected_result)
df = utils.read_csv_to_dataframe(
table_path,
{
'index_col': 'lucode',
'columns': {
'desc': {'type': 'freestyle_string'},
'lucode': {'type': 'integer'},
'val1': {'type': 'number'},
'val2': {'type': 'number'}
}})
self.assertEqual(df.index.name, 'lucode')
self.assertEqual(list(df.index.values), [1, 2, 3, 4])
self.assertEqual(df['desc'][2], 'bread')
def test_non_unique_keys(self):
"""utils: test error is raised if keys are not unique."""
@ -665,7 +672,16 @@ class ReadCSVToDataframeTests(unittest.TestCase):
table_file.write(csv_text)
with self.assertRaises(ValueError):
utils.read_csv_to_dataframe(table_path, 'lucode')
utils.read_csv_to_dataframe(
table_path,
{
'index_col': 'lucode',
'columns': {
'desc': {'type': 'freestyle_string'},
'lucode': {'type': 'integer'},
'val1': {'type': 'number'},
'val2': {'type': 'number'}
}})
def test_missing_key_field(self):
"""utils: test error is raised when missing key field."""
@ -680,29 +696,16 @@ class ReadCSVToDataframeTests(unittest.TestCase):
table_file.write(csv_text)
with self.assertRaises(KeyError):
utils.read_csv_to_dataframe(table_path, 'lucode')
def test_nan_holes(self):
"""utils: test empty strings returned when missing data is present."""
from natcap.invest import utils
csv_text = ("lucode,desc,val1,val2\n"
"1,corn,0.5,2\n"
"2,,1,4\n"
"3,beans,0.5,4\n"
"4,butter,,1")
table_path = os.path.join(self.workspace_dir, 'table.csv')
with open(table_path, 'w') as table_file:
table_file.write(csv_text)
result = utils.read_csv_to_dataframe(
table_path, 'lucode').to_dict(orient='index')
expected_result = {
1: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1},
2: {'desc': '', 'val1': 1, 'val2': 4, 'lucode': 2},
3: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3},
4: {'desc': 'butter', 'val1': '', 'val2': 1, 'lucode': 4}}
self.assertDictEqual(result, expected_result)
utils.read_csv_to_dataframe(
table_path,
{
'index_col': 'lucode',
'columns': {
'desc': {'type': 'freestyle_string'},
'lucode': {'type': 'integer'},
'val1': {'type': 'number'},
'val2': {'type': 'number'}
}})
def test_nan_row(self):
"""utils: test NaN row is dropped."""
@ -717,60 +720,88 @@ class ReadCSVToDataframeTests(unittest.TestCase):
table_file.write(csv_text)
result = utils.read_csv_to_dataframe(
table_path, 'lucode').to_dict(orient='index')
table_path,
{
'index_col': 'lucode',
'columns': {
'desc': {'type': 'freestyle_string'},
'lucode': {'type': 'integer'},
'val1': {'type': 'number'},
'val2': {'type': 'number'}
}}).to_dict(orient='index')
expected_result = {
1.0: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1.0},
3.0: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3.0},
4.0: {'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4.0}}
1: {'desc': 'corn', 'val1': 0.5, 'val2': 2},
3: {'desc': 'beans', 'val1': 0.5, 'val2': 4},
4: {'desc': 'butter', 'val1': 9, 'val2': 1}}
self.assertDictEqual(result, expected_result)
def test_column_subset(self):
"""utils: test column subset is properly returned."""
from natcap.invest import utils
csv_text = ("lucode,desc,val1,val2\n"
"1,corn,0.5,2\n"
"2,bread,1,4\n"
"3,beans,0.5,4\n"
"4,butter,9,1")
table_path = os.path.join(self.workspace_dir, 'table.csv')
with open(table_path, 'w') as table_file:
table_file.write(csv_text)
table_file.write(
"lucode,desc,val1,val2\n"
"1,corn,0.5,2\n"
"2,bread,1,4\n"
"3,beans,0.5,4\n"
"4,butter,9,1")
df = utils.read_csv_to_dataframe(
table_path,
{
'columns': {
'lucode': {'type': 'integer'},
'val1': {'type': 'number'},
'val2': {'type': 'number'}
}
})
self.assertEqual(list(df.columns), ['lucode', 'val1', 'val2'])
result = utils.read_csv_to_dataframe(
table_path, 'lucode',
usecols=['lucode', 'val1', 'val2']).to_dict(orient='index')
expected_result = {
1: {'val1': 0.5, 'val2': 2, 'lucode': 1},
2: {'val1': 1, 'val2': 4, 'lucode': 2},
3: {'val1': 0.5, 'val2': 4, 'lucode': 3},
4: {'val1': 9, 'val2': 1, 'lucode': 4}}
self.assertDictEqual(result, expected_result)
def test_column_pattern_matching(self):
"""utils: test column subset is properly returned."""
from natcap.invest import utils
table_path = os.path.join(self.workspace_dir, 'table.csv')
with open(table_path, 'w') as table_file:
table_file.write(
"lucode,grassland_value,forest_value,wetland_valueee\n"
"1,0.5,2\n"
"2,1,4\n"
"3,0.5,4\n"
"4,9,1")
df = utils.read_csv_to_dataframe(
table_path, {
'columns': {
'lucode': {'type': 'integer'},
'[HABITAT]_value': {'type': 'number'}
}
})
self.assertEqual(
list(df.columns), ['lucode', 'grassland_value', 'forest_value'])
def test_trailing_comma(self):
"""utils: test a trailing comma on first line is handled properly."""
from natcap.invest import utils
csv_text = ("lucode,desc,val1,val2\n"
"1,corn,0.5,2,\n"
"2,bread,1,4\n"
"3,beans,0.5,4\n"
"4,butter,9,1")
table_path = os.path.join(self.workspace_dir, 'table.csv')
with open(table_path, 'w') as table_file:
table_file.write(csv_text)
table_file.write(
"lucode,desc,val1,val2\n"
"1,corn,0.5,2,\n"
"2,bread,1,4\n"
"3,beans,0.5,4\n"
"4,butter,9,1")
result = utils.read_csv_to_dataframe(
table_path, 'lucode').to_dict(orient='index')
table_path,
{
'columns': {
'desc': {'type': 'freestyle_string'},
'lucode': {'type': 'integer'},
'val1': {'type': 'number'},
'val2': {'type': 'number'}
}})
self.assertEqual(result['val2'][0], 2)
self.assertEqual(result['lucode'][1], 2)
expected_result = {
1: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1},
2: {'desc': 'bread', 'val1': 1, 'val2': 4, 'lucode': 2},
3: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3},
4: {'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4}}
self.assertDictEqual(result, expected_result)
def test_trailing_comma_second_line(self):
"""utils: test a trailing comma on second line is handled properly."""
@ -785,58 +816,24 @@ class ReadCSVToDataframeTests(unittest.TestCase):
table_file.write(csv_text)
result = utils.read_csv_to_dataframe(
table_path, 'lucode').to_dict(orient='index')
table_path,
{
'index_col': 'lucode',
'columns': {
'desc': {'type': 'freestyle_string'},
'lucode': {'type': 'integer'},
'val1': {'type': 'number'},
'val2': {'type': 'number'}
}}).to_dict(orient='index')
expected_result = {
1: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1},
2: {'desc': 'bread', 'val1': 1, 'val2': 4, 'lucode': 2},
3: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3},
4: {'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4}}
1: {'desc': 'corn', 'val1': 0.5, 'val2': 2},
2: {'desc': 'bread', 'val1': 1, 'val2': 4},
3: {'desc': 'beans', 'val1': 0.5, 'val2': 4},
4: {'desc': 'butter', 'val1': 9, 'val2': 1}}
self.assertDictEqual(result, expected_result)
def test_results_lowercase_non_numeric(self):
"""utils: text handling of converting to lowercase."""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
with open(csv_file, 'w') as file_obj:
file_obj.write(textwrap.dedent(
"""\
header1,HEADER2,header3
1,2,bar
4,5,FOO
"""
))
lookup_dict = utils.read_csv_to_dataframe(
csv_file, 'header1').to_dict(orient='index')
self.assertEqual(lookup_dict[4]['header3'], 'foo')
self.assertEqual(lookup_dict[1]['header2'], 2)
def test_results_uppercase_numeric_cast(self):
"""utils: test handling of uppercase, num. casting, blank values."""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
with open(csv_file, 'w') as file_obj:
file_obj.write(textwrap.dedent(
"""\
header1,HEADER2,header3,missing_column,
1,2,3,
4,FOO,bar,
"""
))
lookup_dict = utils.read_csv_to_dataframe(
csv_file, 'header1',
convert_cols_to_lower=False, convert_vals_to_lower=False).to_dict(orient='index')
self.assertEqual(lookup_dict[4]['HEADER2'], 'FOO')
self.assertEqual(lookup_dict[4]['header3'], 'bar')
self.assertEqual(lookup_dict[1]['header1'], 1)
def test_csv_dialect_detection_semicolon_delimited(self):
"""utils: test that we can parse semicolon-delimited CSVs."""
from natcap.invest import utils
@ -851,13 +848,183 @@ class ReadCSVToDataframeTests(unittest.TestCase):
"""
))
lookup_dict = utils.read_csv_to_dataframe(
csv_file, 'header1',
convert_cols_to_lower=False, convert_vals_to_lower=False).to_dict(orient='index')
df = utils.read_csv_to_dataframe(
csv_file,
{'columns': {
'header1': {'type': 'integer'},
'header2': {'type': 'freestyle_string'},
'header3': {'type': 'freestyle_string'}
}
})
self.assertEqual(df['header2'][1], 'foo')
self.assertEqual(df['header3'][1], 'bar')
self.assertEqual(df['header1'][0], 1)
self.assertEqual(lookup_dict[4]['HEADER2'], 'FOO')
self.assertEqual(lookup_dict[4]['header3'], 'bar')
self.assertEqual(lookup_dict[1]['header1'], 1)
def test_convert_cols_to_lower(self):
"""utils: test that column names are converted to lowercase"""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
with open(csv_file, 'w') as file_obj:
file_obj.write(textwrap.dedent(
"""\
header,
A,
b
"""
))
df = utils.read_csv_to_dataframe(
csv_file, {'columns': {
'header': {'type': 'freestyle_string'}
}})
self.assertEqual(df['header'][0], 'a')
def test_convert_vals_to_lower(self):
"""utils: test that values are converted to lowercase"""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
with open(csv_file, 'w') as file_obj:
file_obj.write(textwrap.dedent(
"""\
HEADER,
a,
b
"""
))
df = utils.read_csv_to_dataframe(
csv_file, {'columns': {
'header': {'type': 'freestyle_string'}
}})
self.assertEqual(df.columns[0], 'header')
def test_integer_type_columns(self):
"""utils: integer column values are returned as integers."""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
with open(csv_file, 'w') as file_obj:
file_obj.write(textwrap.dedent(
"""\
id,header,
1,5.0,
2,-1,
3,
"""
))
df = utils.read_csv_to_dataframe(
csv_file, {'columns': {
'id': {'type': 'integer'},
'header': {'type': 'integer', 'na_allowed': True}}})
self.assertIsInstance(df['header'][0], numpy.int64)
self.assertIsInstance(df['header'][1], numpy.int64)
# empty values are returned as pandas.NA
self.assertTrue(pd.isna(df['header'][2]))
def test_float_type_columns(self):
"""utils: float column values are returned as floats."""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
with open(csv_file, 'w') as file_obj:
file_obj.write(textwrap.dedent(
"""\
h1,h2,h3
5,0.5,.4
-1,-.3,
"""
))
df = utils.read_csv_to_dataframe(
csv_file, {'columns': {
'h1': {'type': 'number'},
'h2': {'type': 'ratio'},
'h3': {'type': 'percent', 'na_allowed': True},
}})
self.assertEqual(df['h1'].dtype, float)
self.assertEqual(df['h2'].dtype, float)
self.assertEqual(df['h3'].dtype, float)
# empty values are returned as numpy.nan
self.assertTrue(numpy.isnan(df['h3'][1]))
def test_string_type_columns(self):
"""utils: string column values are returned as strings."""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
with open(csv_file, 'w') as file_obj:
file_obj.write(textwrap.dedent(
"""\
h1,h2,h3
1,a,foo
2,b,
"""
))
df = utils.read_csv_to_dataframe(
csv_file, {'columns': {
'h1': {'type': 'freestyle_string'},
'h2': {'type': 'option_string'},
'h3': {'type': 'freestyle_string'},
}})
self.assertEqual(df['h1'][0], '1')
self.assertEqual(df['h2'][1], 'b')
# empty values are returned as NA
self.assertTrue(pd.isna(df['h3'][1]))
def test_boolean_type_columns(self):
"""utils: boolean column values are returned as booleans."""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
with open(csv_file, 'w') as file_obj:
file_obj.write(textwrap.dedent(
"""\
index,h1
a,1
b,0
c,
"""
))
df = utils.read_csv_to_dataframe(
csv_file, {'columns': {
'index': {'type': 'freestyle_string'},
'h1': {'type': 'bool', 'na_allowed': True}}})
self.assertEqual(df['h1'][0], True)
self.assertEqual(df['h1'][1], False)
# empty values are returned as pandas.NA
self.assertTrue(pd.isna(df['h1'][2]))
def test_expand_path_columns(self):
"""utils: test values in path columns are expanded."""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
with open(csv_file, 'w') as file_obj:
file_obj.write(textwrap.dedent(
f"""\
bar,path
1,foo.txt
2,foo/bar.txt
3,foo\\bar.txt
4,{self.workspace_dir}/foo.txt
5,
"""
))
df = utils.read_csv_to_dataframe(
csv_file, {'columns': {
'bar': {'type': 'integer'},
'path': {'type': 'file'}
}})
self.assertEqual(
f'{self.workspace_dir}{os.sep}foo.txt',
df['path'][0])
self.assertEqual(
f'{self.workspace_dir}{os.sep}foo{os.sep}bar.txt',
df['path'][1])
self.assertEqual(
f'{self.workspace_dir}{os.sep}foo\\bar.txt',
df['path'][2])
self.assertEqual(
f'{self.workspace_dir}{os.sep}foo.txt',
df['path'][3])
# empty values are returned as empty strings
self.assertTrue(pd.isna(df['path'][4]))
def test_csv_utf8_encoding(self):
"""utils: test that CSV read correctly with UTF-8 encoding."""
@ -873,21 +1040,26 @@ class ReadCSVToDataframeTests(unittest.TestCase):
"""
))
lookup_dict = utils.read_csv_to_dataframe(
csv_file, 'header1').to_dict(orient='index')
csv_file,
{
'index_col': 'header1',
'columns': {
'header1': {'type': 'integer'},
'header2': {'type': 'integer'},
'header3': {'type': 'freestyle_string'}
}}).to_dict(orient='index')
self.assertEqual(lookup_dict[4]['header2'], 5)
self.assertEqual(lookup_dict[4]['header3'], 'foo')
self.assertEqual(lookup_dict[1]['header1'], 1)
def test_csv_utf8_bom_encoding(self):
def test_utf8_bom_encoding(self):
"""utils: test that CSV read correctly with UTF-8 BOM encoding."""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
# writing with utf-8-sig will prepend the BOM
with open(csv_file, 'w', encoding='utf-8-sig') as file_obj:
file_obj.write(textwrap.dedent(
"""\
header1,HEADER2,header3
header1,header2,header3
1,2,bar
4,5,FOO
"""
@ -895,18 +1067,20 @@ class ReadCSVToDataframeTests(unittest.TestCase):
# confirm that the file has the BOM prefix
with open(csv_file, 'rb') as file_obj:
self.assertTrue(file_obj.read().startswith(codecs.BOM_UTF8))
lookup_dict = utils.read_csv_to_dataframe(
csv_file, 'header1').to_dict(orient='index')
df = utils.read_csv_to_dataframe(csv_file,
{
'columns': {
'header1': {'type': 'integer'},
'header2': {'type': 'integer'},
'header3': {'type': 'freestyle_string'}
}})
# assert the BOM prefix was correctly parsed and skipped
self.assertEqual(lookup_dict[4]['header2'], 5)
self.assertEqual(lookup_dict[4]['header3'], 'foo')
self.assertEqual(lookup_dict[1]['header1'], 1)
self.assertEqual(df.columns[0], 'header1')
self.assertEqual(df['header2'][1], 5)
def test_csv_latin_1_encoding(self):
"""utils: test that CSV read correctly with Latin-1 encoding."""
"""utils: can read Latin-1 encoded CSV if it uses only ASCII chars."""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
with codecs.open(csv_file, 'w', encoding='iso-8859-1') as file_obj:
file_obj.write(textwrap.dedent(
@ -916,13 +1090,16 @@ class ReadCSVToDataframeTests(unittest.TestCase):
4,5,FOO
"""
))
lookup_dict = utils.read_csv_to_dataframe(
csv_file, 'header 1').to_dict(orient='index')
self.assertEqual(lookup_dict[4]['header 2'], 5)
self.assertEqual(lookup_dict[4]['header 3'], 'foo')
self.assertEqual(lookup_dict[1]['header 1'], 1)
df = utils.read_csv_to_dataframe(
csv_file,
{'columns': {
'header 1': {'type': 'integer'},
'header 2': {'type': 'integer'},
'header 3': {'type': 'freestyle_string'}
}})
self.assertEqual(df['header 2'][1], 5)
self.assertEqual(df['header 3'][1], 'foo')
self.assertEqual(df['header 1'][0], 1)
def test_csv_error_non_utf8_character(self):
"""utils: test that error is raised on non-UTF8 character."""
@ -938,91 +1115,15 @@ class ReadCSVToDataframeTests(unittest.TestCase):
"""
))
with self.assertRaises(UnicodeDecodeError):
utils.read_csv_to_dataframe(csv_file, 'header 1')
def test_expand_path(self):
"""utils: test path expansion function."""
from natcap.invest import utils
base_path = os.path.join(self.workspace_dir, 'csv.csv')
self.assertEqual(
f'{self.workspace_dir}{os.sep}foo.txt',
utils.expand_path('foo.txt', base_path))
self.assertEqual(
f'{self.workspace_dir}{os.sep}foo{os.sep}bar.txt',
utils.expand_path('foo/bar.txt', base_path))
self.assertEqual(
f'{self.workspace_dir}{os.sep}foo\\bar.txt',
utils.expand_path('foo\\bar.txt', base_path))
self.assertEqual(
f'{self.workspace_dir}{os.sep}foo.txt',
utils.expand_path(f'{self.workspace_dir}{os.sep}foo.txt', base_path))
def test_convert_cols_to_lower(self):
"""utils: test that to_lower=True makes headers lowercase"""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
with open(csv_file, 'w') as file_obj:
file_obj.write(textwrap.dedent(
"""\
HEADER,
A,
b
"""
))
df = utils.read_csv_to_dataframe(
csv_file, convert_cols_to_lower=True, convert_vals_to_lower=False)
# header should be lowercase
self.assertEqual(df.columns[0], 'header')
# case of table values shouldn't change
self.assertEqual(df['header'][0], 'A')
self.assertEqual(df['header'][1], 'b')
def test_convert_vals_to_lower(self):
"""utils: test that to_lower=True makes headers lowercase"""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
with open(csv_file, 'w') as file_obj:
file_obj.write(textwrap.dedent(
"""\
HEADER,
A,
b
"""
))
df = utils.read_csv_to_dataframe(
csv_file, convert_cols_to_lower=False, convert_vals_to_lower=True)
# header should still be uppercase
self.assertEqual(df.columns[0], 'HEADER')
# case of table values should change
self.assertEqual(df['HEADER'][0], 'a')
self.assertEqual(df['HEADER'][1], 'b')
def test_utf8_bom_encoding(self):
"""utils: test that CSV read correctly with UTF-8 BOM encoding."""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
# writing with utf-8-sig will prepend the BOM
with open(csv_file, 'w', encoding='utf-8-sig') as file_obj:
file_obj.write(textwrap.dedent(
"""\
header1,header2,header3
1,2,bar
4,5,FOO
"""
))
# confirm that the file has the BOM prefix
with open(csv_file, 'rb') as file_obj:
self.assertTrue(file_obj.read().startswith(codecs.BOM_UTF8))
df = utils.read_csv_to_dataframe(csv_file)
# assert the BOM prefix was correctly parsed and skipped
self.assertEqual(df.columns[0], 'header1')
self.assertEqual(df['header2'][1], 5)
utils.read_csv_to_dataframe(
csv_file,
{
'index_col': 'header1',
'columns': {
'header1': {'type': 'integer'},
'header2': {'type': 'integer'},
'header3': {'type': 'freestyle_string'}
}})
def test_override_default_encoding(self):
"""utils: test that you can override the default encoding kwarg"""
@ -1039,7 +1140,10 @@ class ReadCSVToDataframeTests(unittest.TestCase):
bar
"""
))
df = utils.read_csv_to_dataframe(csv_file, encoding='iso8859_5')
df = utils.read_csv_to_dataframe(
csv_file, {
'columns': {'header': {'type': 'freestyle_string'}
}}, encoding='iso8859_5')
# with the encoding specified, special characters should work
# and be lowercased
self.assertEqual(df['header'][0], 'fюю')
@ -1061,10 +1165,16 @@ class ReadCSVToDataframeTests(unittest.TestCase):
))
# using sep=None with the default engine='python',
# it should infer what the separator is
df = utils.read_csv_to_dataframe(csv_file, sep=None)
df = utils.read_csv_to_dataframe(
csv_file, {
'columns': {
'h1': {'type': 'freestyle_string'},
'h2': {'type': 'freestyle_string'},
'h3': {'type': 'freestyle_string'}
}}, converters={'h2': lambda val: f'foo_{val}'})
self.assertEqual(df.columns[0], 'h1')
self.assertEqual(df['h2'][1], 'e')
self.assertEqual(df['h2'][1], 'foo_e')
def test_csv_with_integer_headers(self):
"""
@ -1085,7 +1195,13 @@ class ReadCSVToDataframeTests(unittest.TestCase):
d,e,f
"""
))
df = utils.read_csv_to_dataframe(csv_file)
df = utils.read_csv_to_dataframe(
csv_file,
{'columns': {
'1': {'type': 'freestyle_string'},
'2': {'type': 'freestyle_string'},
'3': {'type': 'freestyle_string'}
}})
# expect headers to be strings
self.assertEqual(df.columns[0], '1')
self.assertEqual(df['1'][0], 'a')
@ -1100,48 +1216,23 @@ class ReadCSVToDataframeTests(unittest.TestCase):
file_obj.write(" Col1, Col2 ,Col3 \n")
file_obj.write(" val1, val2 ,val3 \n")
file_obj.write(" , 2 1 , ")
df = utils.read_csv_to_dataframe(csv_file, convert_cols_to_lower=False)
# header should have no leading / trailing whitespace
self.assertEqual(df.columns[0], 'Col1')
self.assertEqual(df.columns[1], 'Col2')
self.assertEqual(df.columns[2], 'Col3')
# values should have no leading / trailing whitespace
self.assertEqual(df['Col1'][0], 'val1')
self.assertEqual(df['Col2'][0], 'val2')
self.assertEqual(df['Col3'][0], 'val3')
self.assertEqual(df['Col1'][1], '')
self.assertEqual(df['Col2'][1], '2 1')
self.assertEqual(df['Col3'][1], '')
def test_expand_path_columns(self):
"""utils: test path expansion feature of read_csv_to_dataframe."""
from natcap.invest import utils
csv_file = os.path.join(self.workspace_dir, 'csv.csv')
with open(csv_file, 'w') as file_obj:
file_obj.write(textwrap.dedent(
f"""\
bar,path
1,foo.txt
2,foo/bar.txt
3,foo\\bar.txt
4,{self.workspace_dir}/foo.txt
"""
))
df = utils.read_csv_to_dataframe(
csv_file, expand_path_cols=['path'], convert_vals_to_lower=False)
self.assertEqual(
f'{self.workspace_dir}{os.sep}foo.txt',
df['path'][0])
self.assertEqual(
f'{self.workspace_dir}{os.sep}foo{os.sep}bar.txt',
df['path'][1])
self.assertEqual(
f'{self.workspace_dir}{os.sep}foo\\bar.txt',
df['path'][2])
self.assertEqual(
f'{self.workspace_dir}{os.sep}foo.txt',
df['path'][3])
csv_file, {
'columns': {
'col1': {'type': 'freestyle_string'},
'col2': {'type': 'freestyle_string'},
'col3': {'type': 'freestyle_string'}
}})
# header should have no leading / trailing whitespace
self.assertEqual(list(df.columns), ['col1', 'col2', 'col3'])
# values should have no leading / trailing whitespace
self.assertEqual(df['col1'][0], 'val1')
self.assertEqual(df['col2'][0], 'val2')
self.assertEqual(df['col3'][0], 'val3')
self.assertEqual(df['col1'][1], '')
self.assertEqual(df['col2'][1], '2 1')
self.assertEqual(df['col3'][1], '')
class CreateCoordinateTransformationTests(unittest.TestCase):
@ -1471,7 +1562,7 @@ class AssertVectorsEqualTests(unittest.TestCase):
attrs = [{'id': 1, 'foo': 2.3456}, {'id': 2, 'foo': 5.6789}]
attrs_copy = [
{'id': 1, 'foo': 2.3456}, {'id': 2, 'foo': 5.6789},
{'id': 3, 'foo': 5.0}]
{'id': 3, 'foo': 5}]
srs = osr.SpatialReference()
srs.ImportFromEPSG(3157)

View File

@ -846,7 +846,7 @@ class WindEnergyRegressionTests(unittest.TestCase):
wind_energy.execute(args)
self.assertTrue(
"returned 0 features. If an AOI was" in str(cm.exception))
"returned 0 features. This means the AOI " in str(cm.exception))
class WindEnergyValidationTests(unittest.TestCase):

View File

@ -0,0 +1,22 @@
export default class Store {
constructor(options) {
this.defaults = options.defaults || {};
this.store = this.defaults;
}
get(key) {
return this.store[key];
}
set(key, val) {
this.store[key] = val;
}
delete(key) {
delete this.store[key];
}
reset() {
this.store = this.defaults;
}
}

View File

@ -43,11 +43,11 @@ const config = {
from: 'resources/storage_token.txt',
to: 'storage_token.txt',
},
{
from: '../LICENSE.txt',
to: 'LICENSE.InVEST.txt',
},
],
extraFiles: [{
from: '../LICENSE.txt',
to: 'LICENSE.InVEST.txt',
}],
appId: APP_ID,
productName: PRODUCT_NAME,
artifactName: ARTIFACT_NAME,

View File

@ -53,6 +53,7 @@
"dependencies": {
"@babel/runtime": "^7.13.10",
"electron-log": "^4.3.5",
"electron-store": "^8.1.0",
"i18next": "^22.4.9",
"localforage": "^1.9.0",
"node-fetch": "^2.6.7",
@ -71,6 +72,7 @@
"@testing-library/react": "^14.0.0",
"@testing-library/user-event": "^14.4.3",
"@vitejs/plugin-react": "^4.0.0",
"ajv": "^8.12.0",
"babel-eslint": "^10.1.0",
"bootstrap": "4.3.1",
"concurrently": "^8.2.0",

View File

@ -1,4 +1,4 @@
import { spawn, exec } from 'child_process';
import { spawn, execSync } from 'child_process';
import fetch from 'node-fetch';
@ -88,26 +88,12 @@ export async function shutdownPythonProcess(subprocess) {
subprocess.kill();
} else {
const { pid } = subprocess;
exec(`taskkill /pid ${pid} /t /f`);
execSync(`taskkill /pid ${pid} /t /f`);
}
} catch (error) {
// if the process was already killed by some other means
logger.debug(error);
} finally {
Promise.resolve();
}
// If we return too quickly, it seems the electron app is allowed
// to quit before the subprocess is killed, and the subprocess remains
// open. Here we poll a flask endpoint and resolve only when it
// gives ECONNREFUSED.
return fetch(`${HOSTNAME}:${process.env.PORT}/ready`, {
method: 'get',
})
.then(async () => {
await new Promise((resolve) => setTimeout(resolve, 300));
return shutdownPythonProcess(subprocess);
})
.catch(() => {
logger.debug('flask server is closed');
return Promise.resolve();
});
}

View File

@ -1,9 +1,12 @@
export const ipcMainChannels = {
CHANGE_LANGUAGE: 'change-language',
CHECK_FILE_PERMISSIONS: 'check-file-permissions',
CHECK_STORAGE_TOKEN: 'check-storage-token',
DOWNLOAD_URL: 'download-url',
GET_N_CPUS: 'get-n-cpus',
GET_ELECTRON_PATHS: 'get-electron-paths',
GET_N_CPUS: 'get-n-cpus',
GET_SETTING: 'get-setting',
GET_LANGUAGE: 'get-language',
INVEST_KILL: 'invest-kill',
INVEST_READ_LOG: 'invest-read-log',
INVEST_RUN: 'invest-run',
@ -12,8 +15,8 @@ export const ipcMainChannels = {
LOGGER: 'logger',
OPEN_EXTERNAL_URL: 'open-external-url',
OPEN_LOCAL_HTML: 'open-local-html',
SET_SETTING: 'set-setting',
SHOW_ITEM_IN_FOLDER: 'show-item-in-folder',
SHOW_OPEN_DIALOG: 'show-open-dialog',
SHOW_SAVE_DIALOG: 'show-save-dialog',
CHANGE_LANGUAGE: 'change-language',
};

View File

@ -4,7 +4,6 @@ import path from 'path';
import {
app,
BrowserWindow,
screen,
nativeTheme,
Menu,
ipcMain
@ -29,7 +28,7 @@ import {
import setupGetNCPUs from './setupGetNCPUs';
import setupOpenExternalUrl from './setupOpenExternalUrl';
import setupOpenLocalHtml from './setupOpenLocalHtml';
import setupChangeLanguage from './setupChangeLanguage';
import { settingsStore, setupSettingsHandlers } from './settingsStore';
import setupGetElectronPaths from './setupGetElectronPaths';
import setupRendererLogger from './setupRendererLogger';
import { ipcMainChannels } from './ipcMainChannels';
@ -37,8 +36,8 @@ import menuTemplate from './menubar';
import ELECTRON_DEV_MODE from './isDevMode';
import BASE_URL from './baseUrl';
import { getLogger } from './logger';
import pkg from '../../package.json';
import i18n from './i18n/i18n';
import pkg from '../../package.json';
const logger = getLogger(__filename.split('/').slice(-1)[0]);
@ -61,6 +60,7 @@ if (!process.env.PORT) {
let mainWindow;
let splashScreen;
let flaskSubprocess;
let forceQuit = false;
export function destroyWindow() {
mainWindow = null;
@ -71,6 +71,8 @@ export const createWindow = async () => {
logger.info(`Running invest-workbench version ${pkg.version}`);
nativeTheme.themeSource = 'light'; // override OS/browser setting
i18n.changeLanguage(settingsStore.get('language'));
splashScreen = new BrowserWindow({
width: 574, // dims set to match the image in splash.html
height: 500,
@ -86,7 +88,7 @@ export const createWindow = async () => {
setupCheckFilePermissions();
setupCheckFirstRun();
setupCheckStorageToken();
setupChangeLanguage();
setupSettingsHandlers();
setupGetElectronPaths();
setupGetNCPUs();
setupInvestLogReaderHandler();
@ -110,14 +112,6 @@ export const createWindow = async () => {
menuTemplate(mainWindow, ELECTRON_DEV_MODE, i18n)
)
);
// when language changes, rebuild the menu bar in new language
i18n.on('languageChanged', (lng) => {
Menu.setApplicationMenu(
Menu.buildFromTemplate(
menuTemplate(mainWindow, ELECTRON_DEV_MODE, i18n)
)
);
});
mainWindow.loadURL(path.join(BASE_URL, 'index.html'));
mainWindow.once('ready-to-show', () => {
@ -135,6 +129,16 @@ export const createWindow = async () => {
logger.error(details);
});
mainWindow.on('close', (event) => {
// 'close' is triggered by the red traffic light button on mac
// override this behavior and just minimize,
// unless we're actually quitting the app
if (process.platform === 'darwin' & !forceQuit) {
event.preventDefault();
mainWindow.minimize()
}
});
mainWindow.on('closed', () => {
mainWindow = null;
});
@ -186,17 +190,12 @@ export function main() {
createWindow();
}
});
app.on('window-all-closed', async () => {
// On OS X it is common for applications and their menu bar
// to stay active until the user quits explicitly with Cmd + Q
if (process.platform !== 'darwin') {
app.quit();
}
});
let shuttingDown = false;
app.on('before-quit', async (event) => {
// prevent quitting until after we're done with cleanup,
// then programatically quit
forceQuit = true;
if (shuttingDown) { return; }
event.preventDefault();
shuttingDown = true;

View File

@ -0,0 +1,101 @@
import { app, ipcMain } from 'electron';
import Store from 'electron-store';
import Ajv from 'ajv';
import { ipcMainChannels } from './ipcMainChannels';
import { getLogger } from './logger';
const logger = getLogger(__filename.split('/').slice(-1)[0]);
export const defaults = {
nWorkers: -1,
taskgraphLoggingLevel: 'INFO',
loggingLevel: 'INFO',
language: 'en',
};
export const schema = {
type: 'object',
properties: {
nWorkers: {
type: 'number',
},
taskgraphLoggingLevel: {
enum: ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'],
},
loggingLevel: {
enum: ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'],
},
language: {
enum: ['en', 'es', 'zh'],
},
},
required: ['nWorkers', 'taskgraphLoggingLevel', 'loggingLevel', 'language']
};
/**
* Open a store and validate against a schema.
*
* Required properties missing from the store are initialized with defaults.
* Invalid properties are reset to defaults.
*
* @param {object} data key-values with which to initialize a store.
* @returns {Store} an instance of an electron-store Store
*/
export function initStore(data = defaults) {
const ajv = new Ajv({ allErrors: true });
const validate = ajv.compile(schema);
const store = new Store({ defaults: data });
const valid = validate(store.store);
if (!valid) {
validate.errors.forEach((e) => {
logger.debug(e);
let property;
if (e.instancePath) {
property = e.instancePath.split('/').pop();
} else if (e.keyword === 'required') {
property = e.params.missingProperty;
} else {
// something is invalid that we're not prepared to fix
// so just reset the whole store to defaults.
logger.debug(e);
store.reset();
}
logger.debug(`resetting value for setting ${property}`);
store.set(property, defaults[property]);
});
}
return store;
}
export const settingsStore = initStore();
export function setupSettingsHandlers() {
ipcMain.handle(
ipcMainChannels.GET_SETTING,
(event, key) => settingsStore.get(key)
);
ipcMain.on(
ipcMainChannels.SET_SETTING,
(event, key, value) => settingsStore.set(key, value)
);
// language is stored in the same store, but has special
// needs for getting & setting because we need to get
// the value synchronously during preload, and trigger
// an app restart on change.
ipcMain.on(ipcMainChannels.GET_LANGUAGE, (event) => {
event.returnValue = settingsStore.get('language');
});
ipcMain.handle(
ipcMainChannels.CHANGE_LANGUAGE,
(e, languageCode) => {
logger.debug('changing language to', languageCode);
settingsStore.set('language', languageCode);
app.relaunch();
app.quit();
}
);
}

View File

@ -1,16 +1,25 @@
import i18n from 'i18next';
import { ipcMain } from 'electron';
import Store from 'electron-store';
import { app, ipcMain } from 'electron';
import { getLogger } from './logger';
import { ipcMainChannels } from './ipcMainChannels';
const logger = getLogger(__filename.split('/').slice(-1)[0]);
const store = new Store();
export default function setupChangeLanguage() {
ipcMain.on(ipcMainChannels.GET_LANGUAGE, (event) => {
// default to en if no language setting exists
event.returnValue = store.get('language', 'en');
});
ipcMain.handle(
ipcMainChannels.CHANGE_LANGUAGE,
(e, languageCode) => {
logger.debug('changing language to', languageCode);
i18n.changeLanguage(languageCode);
store.set('language', languageCode);
app.relaunch();
app.quit();
}
);
}

View File

@ -11,6 +11,7 @@ import ELECTRON_DEV_MODE from './isDevMode';
import investUsageLogger from './investUsageLogger';
import markupMessage from './investLogMarkup';
import writeInvestParameters from './writeInvestParameters';
import { settingsStore } from './settingsStore';
const logger = getLogger(__filename.split('/').slice(-1)[0]);
@ -45,12 +46,16 @@ export function setupInvestRunHandlers(investExe) {
});
ipcMain.on(ipcMainChannels.INVEST_RUN, async (
event, modelRunName, pyModuleName, args, loggingLevel, taskgraphLoggingLevel, language, tabID
event, modelRunName, pyModuleName, args, tabID
) => {
let investRun;
let investStarted = false;
let investStdErr = '';
const usageLogger = investUsageLogger();
const loggingLevel = settingsStore.get('loggingLevel');
const taskgraphLoggingLevel = settingsStore.get('taskgraphLoggingLevel');
const language = settingsStore.get('language');
const nWorkers = settingsStore.get('nWorkers');
// Write a temporary datastack json for passing to invest CLI
try {
@ -64,7 +69,10 @@ export function setupInvestRunHandlers(investExe) {
filepath: datastackPath,
moduleName: pyModuleName,
relativePaths: false,
args: JSON.stringify(args),
args: JSON.stringify({
...args,
n_workers: nWorkers,
}),
};
await writeInvestParameters(payload);

View File

@ -11,6 +11,7 @@ export default function setupOpenLocalHtml(parentWindow, isDevMode) {
ipcMainChannels.OPEN_LOCAL_HTML, (event, url) => {
const [width, height] = parentWindow.getSize();
const child = new BrowserWindow({
parent: parentWindow,
width: width > 1000 ? 1000 : width, // UG content is never wider
height: height,
frame: true,

View File

@ -35,6 +35,7 @@ export default {
PORT: PORT, // where the flask app is running
ELECTRON_LOG_PATH: electronLogPath,
USERGUIDE_PATH: userguidePath,
LANGUAGE: ipcRenderer.sendSync(ipcMainChannels.GET_LANGUAGE),
logger: {
debug: (message) => ipcRenderer.send(ipcMainChannels.LOGGER, 'debug', message),
info: (message) => ipcRenderer.send(ipcMainChannels.LOGGER, 'info', message),

View File

@ -58,8 +58,8 @@ export default class InvestJob {
const lastKey = sortedJobHashes.pop();
investJobStore.removeItem(lastKey);
}
await investJobStore.setItem(HASH_ARRAY_KEY, sortedJobHashes);
await investJobStore.setItem(job.hash, job);
await investJobStore.setItem(HASH_ARRAY_KEY, sortedJobHashes);
return InvestJob.getJobStore();
}

View File

@ -1,5 +1,6 @@
import React from 'react';
import PropTypes from 'prop-types';
import i18n from 'i18next';
import TabPane from 'react-bootstrap/TabPane';
import TabContent from 'react-bootstrap/TabContent';
@ -19,14 +20,9 @@ import InvestTab from './components/InvestTab';
import SettingsModal from './components/SettingsModal';
import DataDownloadModal from './components/DataDownloadModal';
import DownloadProgressBar from './components/DownloadProgressBar';
import {
saveSettingsStore, getAllSettings,
} from './components/SettingsModal/SettingsStorage';
import { getInvestModelNames } from './server_requests';
import InvestJob from './InvestJob';
import { dragOverHandlerNone } from './utils';
import { ipcMainChannels } from '../main/ipcMainChannels';
import i18n from 'i18next';
const { ipcRenderer } = window.Workbench.electron;
@ -43,18 +39,15 @@ export default class App extends React.Component {
openJobs: {},
investList: null,
recentJobs: [],
investSettings: null,
showDownloadModal: false,
downloadedNofN: null,
};
this.saveSettings = this.saveSettings.bind(this);
this.switchTabs = this.switchTabs.bind(this);
this.openInvestModel = this.openInvestModel.bind(this);
this.closeInvestModel = this.closeInvestModel.bind(this);
this.updateJobProperties = this.updateJobProperties.bind(this);
this.saveJob = this.saveJob.bind(this);
this.clearRecentJobs = this.clearRecentJobs.bind(this);
this.storeDownloadDir = this.storeDownloadDir.bind(this);
this.showDownloadModal = this.showDownloadModal.bind(this);
}
@ -62,17 +55,17 @@ export default class App extends React.Component {
async componentDidMount() {
const investList = await getInvestModelNames();
const recentJobs = await InvestJob.getJobStore();
const investSettings = await getAllSettings();
this.setState({
investList: investList,
recentJobs: recentJobs,
investSettings: investSettings,
// filter out models that do not exist in current version of invest
recentJobs: recentJobs.filter((job) => (
Object.values(investList)
.map((m) => m.model_name)
.includes(job.modelRunName)
)),
showDownloadModal: this.props.isFirstRun,
});
await i18n.changeLanguage(investSettings.language);
await ipcRenderer.invoke(
ipcMainChannels.CHANGE_LANGUAGE, investSettings.language
);
await i18n.changeLanguage(window.Workbench.LANGUAGE);
ipcRenderer.on('download-status', (downloadedNofN) => {
this.setState({
downloadedNofN: downloadedNofN,
@ -94,33 +87,6 @@ export default class App extends React.Component {
);
}
async saveSettings(settings) {
const { investSettings } = this.state;
await saveSettingsStore(settings);
this.setState({ investSettings: settings });
// if language has changed, refresh the app
if (settings.language !== investSettings.language) {
// change language in the renderer process
await i18n.changeLanguage(settings.language);
// change language in the main process
await ipcRenderer.invoke(
ipcMainChannels.CHANGE_LANGUAGE, settings.language
);
// rerender for changes to take effect
window.location.reload();
}
}
/** Store a sampledata filepath in localforage.
*
* @param {string} dir - the path to the user-selected dir
*/
storeDownloadDir(dir) {
const { investSettings } = this.state;
investSettings.sampleDataDir = dir;
this.saveSettings(investSettings);
}
showDownloadModal(shouldShow) {
this.setState({
showDownloadModal: shouldShow,
@ -211,7 +177,6 @@ export default class App extends React.Component {
render() {
const {
investList,
investSettings,
recentJobs,
openJobs,
openTabIDs,
@ -260,7 +225,17 @@ export default class App extends React.Component {
key={id}
className={id === activeTab ? 'active' : ''}
>
<Nav.Link eventKey={id}>
<Nav.Link
eventKey={id}
onAuxClick={(event) => {
event.stopPropagation();
event.preventDefault();
if (event.button === 1) {
// middle mouse button clicked, close tab
this.closeInvestModel(id);
}
}}
>
{statusSymbol}
{` ${job.modelHumanName}`}
</Nav.Link>
@ -288,7 +263,6 @@ export default class App extends React.Component {
<InvestTab
job={job}
tabID={id}
investSettings={investSettings}
saveJob={this.saveJob}
updateJobProperties={this.updateJobProperties}
/>
@ -301,7 +275,6 @@ export default class App extends React.Component {
<DataDownloadModal
show={showDownloadModal}
closeModal={() => this.showDownloadModal(false)}
storeDownloadDir={this.storeDownloadDir}
/>
<TabContainer activeKey={activeTab}>
<Navbar
@ -343,21 +316,12 @@ export default class App extends React.Component {
)
: <div />
}
{
// don't render until after we fetched the data
(investSettings)
? (
<SettingsModal
className="mx-3"
saveSettings={this.saveSettings}
investSettings={investSettings}
clearJobsStorage={this.clearRecentJobs}
showDownloadModal={() => this.showDownloadModal(true)}
nCPU={this.props.nCPU}
/>
)
: <div />
}
<SettingsModal
className="mx-3"
clearJobsStorage={this.clearRecentJobs}
showDownloadModal={() => this.showDownloadModal(true)}
nCPU={this.props.nCPU}
/>
</Col>
</Row>
</Navbar>

View File

@ -109,7 +109,6 @@ class DataDownloadModal extends React.Component {
this.state.selectedLinksArray,
data.filePaths[0]
);
this.props.storeDownloadDir(data.filePaths[0]);
this.closeDialog();
}
}
@ -283,7 +282,6 @@ class DataDownloadModal extends React.Component {
DataDownloadModal.propTypes = {
show: PropTypes.bool.isRequired,
closeModal: PropTypes.func.isRequired,
storeDownloadDir: PropTypes.func.isRequired,
};
export default withTranslation()(DataDownloadModal)
export default withTranslation()(DataDownloadModal);

View File

@ -11,6 +11,8 @@ import { useTranslation } from 'react-i18next';
import OpenButton from '../OpenButton';
import InvestJob from '../../InvestJob';
const { logger } = window.Workbench;
/**
* Renders a table of buttons for each invest model and
* a list of cards for each cached invest job.
@ -110,46 +112,51 @@ HomeTab.propTypes = {
*/
function RecentInvestJobs(props) {
const { recentJobs, openInvestModel } = props;
const handleClick = (jobMetadata) => {
openInvestModel(new InvestJob(jobMetadata));
}
const { t, i18n } = useTranslation();
// Buttons to load each recently saved state
const handleClick = (jobMetadata) => {
try {
openInvestModel(new InvestJob(jobMetadata));
} catch (error) {
logger.debug(error);
}
};
const recentButtons = [];
recentJobs.forEach((job) => {
if (!job.argsValues) { return; }
recentButtons.push(
<Card
className="text-left recent-job-card"
as="button"
key={job.hash}
onClick={() => handleClick(job)}
>
<Card.Body>
<Card.Header>
<span className="header-title">{job.modelHumanName}</span>
</Card.Header>
<Card.Title>
<span className="text-heading">{'Workspace: '}</span>
<span className="text-mono">{job.argsValues.workspace_dir}</span>
</Card.Title>
<Card.Title>
<span className="text-heading">{'Suffix: '}</span>
<span className="text-mono">{job.argsValues.results_suffix}</span>
</Card.Title>
<Card.Footer className="text-muted">
<span className="timestamp">{job.humanTime}</span>
<span className="status">
{(job.status === 'success'
? <span className="status-success">{t('Model Complete')}</span>
: <span className="status-error">{job.status}</span>
)}
</span>
</Card.Footer>
</Card.Body>
</Card>
);
if (job && job.argsValues && job.modelHumanName) {
recentButtons.push(
<Card
className="text-left recent-job-card"
as="button"
key={job.hash}
onClick={() => handleClick(job)}
>
<Card.Body>
<Card.Header>
<span className="header-title">{job.modelHumanName}</span>
</Card.Header>
<Card.Title>
<span className="text-heading">{'Workspace: '}</span>
<span className="text-mono">{job.argsValues.workspace_dir}</span>
</Card.Title>
<Card.Title>
<span className="text-heading">{'Suffix: '}</span>
<span className="text-mono">{job.argsValues.results_suffix}</span>
</Card.Title>
<Card.Footer className="text-muted">
<span className="timestamp">{job.humanTime}</span>
<span className="status">
{(job.status === 'success'
? <span className="status-success">{t('Model Complete')}</span>
: <span className="status-error">{job.status}</span>
)}
</span>
</Card.Footer>
</Card.Body>
</Card>
);
}
});
return (

View File

@ -147,7 +147,6 @@ class InvestTab extends React.Component {
const {
job,
tabID,
investSettings,
updateJobProperties,
} = this.props;
const args = { ...argsValues };
@ -162,9 +161,6 @@ class InvestTab extends React.Component {
job.modelRunName,
this.state.modelSpec.pyname,
args,
investSettings.loggingLevel,
investSettings.taskgraphLoggingLevel,
investSettings.language,
tabID
);
this.switchTabs('log');
@ -205,7 +201,7 @@ class InvestTab extends React.Component {
logfile,
} = this.props.job;
const { tabID, investSettings, t } = this.props;
const { tabID, t } = this.props;
// Don't render the model setup & log until data has been fetched.
if (!modelSpec) {
@ -279,7 +275,6 @@ class InvestTab extends React.Component {
uiSpec={uiSpec}
argsInitValues={argsValues}
investExecute={this.investExecute}
nWorkers={investSettings.nWorkers}
sidebarSetupElementId={sidebarSetupElementId}
sidebarFooterElementId={sidebarFooterElementId}
executeClicked={executeClicked}
@ -313,12 +308,6 @@ InvestTab.propTypes = {
status: PropTypes.string,
}).isRequired,
tabID: PropTypes.string.isRequired,
investSettings: PropTypes.shape({
nWorkers: PropTypes.string,
taskgraphLoggingLevel: PropTypes.string,
loggingLevel: PropTypes.string,
language: PropTypes.string,
}).isRequired,
saveJob: PropTypes.func.isRequired,
updateJobProperties: PropTypes.func.isRequired,
};

View File

@ -11,6 +11,7 @@ import { fetchDatastackFromFile } from '../../server_requests';
import { ipcMainChannels } from '../../../main/ipcMainChannels';
const { ipcRenderer } = window.Workbench.electron;
const { logger } = window.Workbench;
/**
* Render a button that loads args from a datastack, parameterset, or logfile.
@ -23,9 +24,22 @@ class OpenButton extends React.Component {
}
async browseFile() {
const { t } = this.props;
const data = await ipcRenderer.invoke(ipcMainChannels.SHOW_OPEN_DIALOG);
if (!data.canceled) {
const datastack = await fetchDatastackFromFile(data.filePaths[0]);
let datastack;
try {
datastack = await fetchDatastackFromFile(data.filePaths[0]);
} catch (error) {
logger.error(error);
alert(
t(
'No InVEST model data can be parsed from the file:\n {{filepath}}',
{ filepath: data.filePaths[0] }
)
);
return;
}
const job = new InvestJob({
modelRunName: datastack.model_run_name,
modelHumanName: datastack.model_human_name,

View File

@ -75,7 +75,7 @@ export default function ResourcesTab(props) {
}
const { t, i18n } = useTranslation();
const userGuideURL = `${window.Workbench.USERGUIDE_PATH}/${i18n.language}/${docs}`;
const userGuideURL = `${window.Workbench.USERGUIDE_PATH}/${window.Workbench.LANGUAGE}/${docs}`;
return (
<React.Fragment>

View File

@ -1,83 +0,0 @@
import localforage from 'localforage';
const { logger } = window.Workbench;
const investSettingsStore = localforage.createInstance({
name: 'InvestSettings',
});
/** Getter function for global default settings.
*
* @returns {object} to destructure into:
* {String} nWorkers - TaskGraph number of workers
* {String} taskgraphLoggingLevel - InVEST taskgraph logging level
* {String} loggingLevel - InVEST model logging level
* {String} sampleDataDir - default location for sample datastack downloads
*/
export function getDefaultSettings() {
const defaultSettings = {
nWorkers: '-1',
taskgraphLoggingLevel: 'INFO',
loggingLevel: 'INFO',
sampleDataDir: '',
language: 'en'
};
return defaultSettings;
}
/** Getter function for settings store value.
*
* @param {object} obj.argsValues - an invest "args dict" with initial values
* @param {string} key - setting key to get value
*
* @returns {string} - value of the setting key.
*/
export async function getSettingsValue(key) {
const value = await investSettingsStore.getItem(key);
if (!value) {
return getDefaultSettings()[key];
}
return value;
}
/** Getter function for entire contents of store.
*
* @returns {Object} - key: value pairs of settings
*/
export async function getAllSettings() {
try {
const promises = [];
const keys = Object.keys(getDefaultSettings());
keys.forEach((key) => {
promises.push(getSettingsValue(key));
});
const values = await Promise.all(promises);
const settings = Object.fromEntries(keys.map(
(_, i) => [keys[i], values[i]]
));
return settings;
} catch (err) {
logger.error(err.message);
return getDefaultSettings();
}
}
/** Clear the settings store. */
export async function clearSettingsStore() {
await investSettingsStore.clear();
}
/** Setter function for saving store values.
*
* @param {object} settingsObj - object with one or more key:value pairs
*
*/
export async function saveSettingsStore(settingsObj) {
try {
for (const [setting, value] of Object.entries(settingsObj)) {
await investSettingsStore.setItem(setting, value);
}
} catch (err) {
logger.error(`Error saving settings: ${err}`);
}
}

View File

@ -16,7 +16,6 @@ import {
import { BsChevronExpand } from 'react-icons/bs';
import { withTranslation } from 'react-i18next';
import { getDefaultSettings } from './SettingsStorage';
import { ipcMainChannels } from '../../../main/ipcMainChannels';
import { getSupportedLanguages } from '../../server_requests';
@ -29,11 +28,18 @@ class SettingsModal extends React.Component {
this.state = {
show: false,
languageOptions: null,
loggingLevel: null,
taskgraphLoggingLevel: null,
nWorkers: null,
language: window.Workbench.LANGUAGE,
showConfirmLanguageChange: false,
};
this.handleShow = this.handleShow.bind(this);
this.handleClose = this.handleClose.bind(this);
this.handleChange = this.handleChange.bind(this);
this.handleReset = this.handleReset.bind(this);
this.handleChangeNumber = this.handleChangeNumber.bind(this);
this.loadSettings = this.loadSettings.bind(this);
this.handleChangeLanguage = this.handleChangeLanguage.bind(this);
this.switchToDownloadModal = this.switchToDownloadModal.bind(this);
}
@ -42,6 +48,7 @@ class SettingsModal extends React.Component {
this.setState({
languageOptions: languageOptions,
});
this.loadSettings();
}
handleClose() {
@ -54,17 +61,40 @@ class SettingsModal extends React.Component {
this.setState({ show: true });
}
handleReset(event) {
event.preventDefault();
const resetSettings = getDefaultSettings();
this.props.saveSettings(resetSettings);
handleChange(event) {
const { name, value } = event.currentTarget;
this.setState({ [name]: value });
ipcRenderer.send(ipcMainChannels.SET_SETTING, name, value);
}
handleChange(event) {
const newSettings = { ...this.props.investSettings };
handleChangeNumber(event) {
const { name, value } = event.currentTarget;
newSettings[name] = value;
this.props.saveSettings(newSettings);
const numeral = Number(value);
this.setState({ [name]: numeral });
ipcRenderer.send(ipcMainChannels.SET_SETTING, name, numeral);
}
async loadSettings() {
const loggingLevel = await ipcRenderer
.invoke(ipcMainChannels.GET_SETTING, 'loggingLevel');
const taskgraphLoggingLevel = await ipcRenderer
.invoke(ipcMainChannels.GET_SETTING, 'taskgraphLoggingLevel');
const nWorkers = await ipcRenderer
.invoke(ipcMainChannels.GET_SETTING, 'nWorkers');
this.setState({
loggingLevel: loggingLevel,
taskgraphLoggingLevel: taskgraphLoggingLevel,
nWorkers: nWorkers
});
}
handleChangeLanguage() {
// if language has changed, refresh the app
if (this.state.language !== window.Workbench.LANGUAGE) {
// tell the main process to update the language setting in storage
// and then relaunch the app
ipcRenderer.invoke(ipcMainChannels.CHANGE_LANGUAGE, this.state.language);
}
}
switchToDownloadModal() {
@ -73,21 +103,29 @@ class SettingsModal extends React.Component {
}
render() {
const { show, languageOptions } = this.state;
const { investSettings, clearJobsStorage, nCPU, t } = this.props;
const {
show,
languageOptions,
language,
loggingLevel,
taskgraphLoggingLevel,
nWorkers,
showConfirmLanguageChange,
} = this.state;
const { clearJobsStorage, nCPU, t } = this.props;
const nWorkersOptions = [
[-1, `${t('Synchronous')} (-1)`],
[0, `${t('Threaded task management')} (0)`]
[0, `${t('Threaded task management')} (0)`],
];
for (let i = 1; i <= nCPU; i += 1) {
nWorkersOptions.push([i, `${i} ${t('CPUs')}`]);
}
const logLevelOptions = { // map value to display name
'DEBUG': t('DEBUG'),
'INFO': t('INFO'),
'WARNING': t('WARNING'),
'ERROR': t('ERROR')
const logLevelOptions = { // map value to display name
DEBUG: t('DEBUG'),
INFO: t('INFO'),
WARNING: t('WARNING'),
ERROR: t('ERROR'),
};
return (
<React.Fragment>
@ -124,18 +162,18 @@ class SettingsModal extends React.Component {
<Form.Label column sm="8" htmlFor="language-select">
<MdTranslate className="language-icon" />
{t('Language')}
<Form.Text className="text-nowrap" muted>
<MdWarningAmber className="align-text-bottom ml-3" />
{t('Changing this setting will refresh the app and close all tabs')}
</Form.Text>
</Form.Label>
<Col sm="4">
<Form.Control
id="language-select"
as="select"
name="language"
value={investSettings.language}
onChange={this.handleChange}
value={window.Workbench.LANGUAGE}
onChange={
(event) => this.setState({
showConfirmLanguageChange: true,
language: event.target.value
})}
>
{Object.entries(languageOptions).map((entry) => {
const [value, displayName] = entry;
@ -155,7 +193,7 @@ class SettingsModal extends React.Component {
id="logging-select"
as="select"
name="loggingLevel"
value={investSettings.loggingLevel}
value={loggingLevel}
onChange={this.handleChange}
>
{Object.entries(logLevelOptions).map(
@ -173,7 +211,7 @@ class SettingsModal extends React.Component {
id="taskgraph-logging-select"
as="select"
name="taskgraphLoggingLevel"
value={investSettings.taskgraphLoggingLevel}
value={taskgraphLoggingLevel}
onChange={this.handleChange}
>
{Object.entries(logLevelOptions).map(
@ -197,8 +235,8 @@ class SettingsModal extends React.Component {
as="select"
name="nWorkers"
type="text"
value={investSettings.nWorkers}
onChange={this.handleChange}
value={nWorkers}
onChange={this.handleChangeNumber}
>
{nWorkersOptions.map(
(opt) => <option value={opt[0]} key={opt[0]}>{opt[1]}</option>
@ -233,18 +271,6 @@ class SettingsModal extends React.Component {
)
: <div />
}
<Row className="justify-content-end">
<Col sm="5">
<Button
variant="secondary"
onClick={this.handleReset}
type="button"
className="w-100"
>
{t('Reset to Defaults')}
</Button>
</Col>
</Row>
<hr />
<Button
variant="primary"
@ -264,21 +290,37 @@ class SettingsModal extends React.Component {
<span>{t('no invest workspaces will be deleted')}</span>
</Modal.Body>
</Modal>
{
(languageOptions) ? (
<Modal show={showConfirmLanguageChange} className="confirm-modal" >
<Modal.Header>
<Modal.Title as="h5" >{t('Warning')}</Modal.Title>
</Modal.Header>
<Modal.Body>
<p>
{t('Changing this setting will close your tabs and relaunch the app.')}
</p>
</Modal.Body>
<Modal.Footer>
<Button
variant="secondary"
onClick={() => this.setState({ showConfirmLanguageChange: false })}
>{t('Cancel')}</Button>
<Button
variant="primary"
onClick={this.handleChangeLanguage}
>{t('Change to ') + languageOptions[language]}</Button>
</Modal.Footer>
</Modal>
) : <React.Fragment />
}
</React.Fragment>
);
}
}
SettingsModal.propTypes = {
saveSettings: PropTypes.func.isRequired,
clearJobsStorage: PropTypes.func.isRequired,
investSettings: PropTypes.shape({
nWorkers: PropTypes.string,
taskgraphLoggingLevel: PropTypes.string,
loggingLevel: PropTypes.string,
sampleDataDir: PropTypes.string,
language: PropTypes.string,
}).isRequired,
showDownloadModal: PropTypes.func.isRequired,
nCPU: PropTypes.number.isRequired,
};

View File

@ -30,7 +30,8 @@ const { ipcRenderer } = window.Workbench.electron;
* @returns {string} - the filtered and formatted part of the message
*/
function filterSpatialOverlapFeedback(message, filepath) {
const newPrefix = i18n.t('Bounding box does not intersect at least one other:');
const newPrefix = i18n.t(
'Not all of the spatial layers overlap each other. Bounding box:');
const bbox = message.split(`${filepath}:`).pop().split('|')[0];
const bboxFormatted = bbox.split(' ').map(
(str) => str.padEnd(22, ' ')
@ -167,7 +168,7 @@ export default function ArgInput(props) {
// Messages with this pattern include validation feedback about
// multiple inputs, but the whole message is repeated for each input.
// It's more readable if filtered on the individual input.
const pattern = 'Bounding boxes do not intersect';
const pattern = 'Not all of the spatial layers overlap each other';
if (validationMessage.startsWith(pattern)) {
validationMessage = filterSpatialOverlapFeedback(
validationMessage, value
@ -362,7 +363,7 @@ function AboutModal(props) {
// create link to users guide entry for this arg
// anchor name is the arg name, with underscores replaced with hyphens
const userguideURL = `
${window.Workbench.USERGUIDE_PATH}/${i18n.language}/${userguide}#${argkey.replace(/_/g, '-')}`;
${window.Workbench.USERGUIDE_PATH}/${window.Workbench.LANGUAGE}/${userguide}#${argkey.replace(/_/g, '-')}`;
return (
<React.Fragment>
<Button

View File

@ -1,5 +1,6 @@
import React from 'react';
import PropTypes from 'prop-types';
import { withTranslation } from 'react-i18next';
import Alert from 'react-bootstrap/Alert';
import Container from 'react-bootstrap/Container';
@ -23,9 +24,9 @@ import {
} from '../../server_requests';
import { argsDictFromObject } from '../../utils';
import { ipcMainChannels } from '../../../main/ipcMainChannels';
import { withTranslation } from 'react-i18next';
const { ipcRenderer } = window.Workbench.electron;
const { logger } = window.Workbench;
/** Initialize values of InVEST args based on the model's UI Spec.
*
@ -54,7 +55,7 @@ function initializeArgValues(argsSpec, uiSpec, argsDict) {
if (argsSpec[argkey].type === 'boolean') {
value = argsDict[argkey] || false;
} else if (argsSpec[argkey].type === 'option_string') {
if (argsDict[argkey]) {
if (argsDict[argkey]) {
value = argsDict[argkey];
} else { // default to first
if (Array.isArray(argsSpec[argkey].options)) {
@ -105,7 +106,6 @@ class SetupTab extends React.Component {
this.updateArgTouched = this.updateArgTouched.bind(this);
this.updateArgValues = this.updateArgValues.bind(this);
this.batchUpdateArgs = this.batchUpdateArgs.bind(this);
this.insertNWorkers = this.insertNWorkers.bind(this);
this.callUISpecFunctions = this.callUISpecFunctions.bind(this);
this.browseForDatastack = this.browseForDatastack.bind(this);
this.loadParametersFromFile = this.loadParametersFromFile.bind(this);
@ -204,19 +204,6 @@ class SetupTab extends React.Component {
}
}
/**
* n_workers is a special invest arg stored in global settings
*
* @param {object} argsValues - of the shape returned by `initializeArgValues`.
* @returns {object} copy of original argsValues with an n_workers property.
*/
insertNWorkers(argsValues) {
return {
...argsValues,
n_workers: { value: this.props.nWorkers },
};
}
/** Save the current invest arguments to a python script via datastack.py API.
*
* @param {string} filepath - desired path to the python script
@ -226,9 +213,7 @@ class SetupTab extends React.Component {
const {
modelName,
} = this.props;
const args = argsDictFromObject(
this.insertNWorkers(this.state.argsValues)
);
const args = argsDictFromObject(this.state.argsValues);
const payload = {
filepath: filepath,
modelname: modelName,
@ -242,9 +227,7 @@ class SetupTab extends React.Component {
const {
pyModuleName,
} = this.props;
const args = argsDictFromObject(
this.insertNWorkers(this.state.argsValues)
);
const args = argsDictFromObject(this.state.argsValues);
const payload = {
filepath: datastackPath,
moduleName: pyModuleName,
@ -292,8 +275,20 @@ class SetupTab extends React.Component {
}
async loadParametersFromFile(filepath) {
const datastack = await fetchDatastackFromFile(filepath);
const { pyModuleName, switchTabs, t } = this.props;
let datastack;
try {
datastack = await fetchDatastackFromFile(filepath);
} catch (error) {
logger.error(error);
alert( // eslint-disable-line no-alert
t(
'No InVEST model data can be parsed from the file:\n {{filepath}}',
{ filepath: filepath }
)
);
return;
}
if (datastack.module_name === pyModuleName) {
this.batchUpdateArgs(datastack.args);
switchTabs('setup');
@ -317,7 +312,7 @@ class SetupTab extends React.Component {
wrapInvestExecute() {
this.props.investExecute(
argsDictFromObject(this.insertNWorkers(this.state.argsValues))
argsDictFromObject(this.state.argsValues)
);
}
@ -598,7 +593,6 @@ SetupTab.propTypes = {
}).isRequired,
argsInitValues: PropTypes.objectOf(PropTypes.oneOfType([PropTypes.string, PropTypes.bool])),
investExecute: PropTypes.func.isRequired,
nWorkers: PropTypes.string.isRequired,
sidebarSetupElementId: PropTypes.string.isRequired,
sidebarFooterElementId: PropTypes.string.isRequired,
executeClicked: PropTypes.bool.isRequired,

View File

@ -4,7 +4,6 @@ import { Translation } from 'react-i18next';
import i18n from '../i18n/i18n';
import { handleClickExternalURL } from './handlers';
import { getSettingsValue } from '../components/SettingsModal/SettingsStorage';
import { ipcMainChannels } from '../../main/ipcMainChannels';
import investLogo from '../static/invest-logo.png';
@ -15,8 +14,7 @@ async function getInvestVersion() {
return investVersion;
}
const language = await getSettingsValue('language');
await i18n.changeLanguage(language);
await i18n.changeLanguage(window.Workbench.LANGUAGE);
const investVersion = await getInvestVersion();
ReactDom.render(
<Translation>

View File

@ -7,12 +7,10 @@ import {
handleClickExternalURL,
handleClickFindLogfiles
} from './handlers';
import { getSettingsValue } from '../components/SettingsModal/SettingsStorage';
import investLogo from '../static/invest-logo.png';
import natcapLogo from '../static/NatCapLogo.jpg';
const language = await getSettingsValue('language');
await i18n.changeLanguage(language);
await i18n.changeLanguage(window.Workbench.LANGUAGE);
ReactDom.render(
<Translation>
{(t, { i18n }) => (

View File

@ -1,7 +1,5 @@
import { getSettingsValue } from './components/SettingsModal/SettingsStorage';
const HOSTNAME = 'http://127.0.0.1';
const { logger, PORT } = window.Workbench;
const { logger, PORT, LANGUAGE } = window.Workbench;
const PREFIX = 'api';
// The Flask server sends UTF-8 encoded responses by default
@ -16,13 +14,12 @@ const PREFIX = 'api';
* @returns {Promise} resolves object
*/
export async function getInvestModelNames() {
const language = await getSettingsValue('language');
return (
window.fetch(`${HOSTNAME}:${PORT}/${PREFIX}/models?language=${language}`, {
window.fetch(`${HOSTNAME}:${PORT}/${PREFIX}/models?language=${LANGUAGE}`, {
method: 'get',
})
.then((response) => response.json())
.catch((error) => { logger.error(`${error.stack}`) })
.catch((error) => { logger.error(`${error.stack}`); })
);
}
@ -33,9 +30,8 @@ export async function getInvestModelNames() {
* @returns {Promise} resolves object
*/
export async function getSpec(payload) {
const language = await getSettingsValue('language');
return (
window.fetch(`${HOSTNAME}:${PORT}/${PREFIX}/getspec?language=${language}`, {
window.fetch(`${HOSTNAME}:${PORT}/${PREFIX}/getspec?language=${LANGUAGE}`, {
method: 'post',
body: JSON.stringify(payload),
headers: { 'Content-Type': 'application/json' },
@ -55,9 +51,8 @@ export async function getSpec(payload) {
* @returns {Promise} resolves array
*/
export async function fetchValidation(payload) {
const language = await getSettingsValue('language');
return (
window.fetch(`${HOSTNAME}:${PORT}/${PREFIX}/validate?language=${language}`, {
window.fetch(`${HOSTNAME}:${PORT}/${PREFIX}/validate?language=${LANGUAGE}`, {
method: 'post',
body: JSON.stringify(payload),
headers: { 'Content-Type': 'application/json' },
@ -87,7 +82,6 @@ export function fetchDatastackFromFile(payload) {
headers: { 'Content-Type': 'application/json' },
})
.then((response) => response.json())
.catch((error) => logger.error(error.stack))
);
}
@ -101,7 +95,7 @@ export function getVectorColumnNames(payload) {
return (
window.fetch(`${HOSTNAME}:${PORT}/${PREFIX}/colnames`, {
method: 'post',
body: JSON.stringify({vector_path: payload}),
body: JSON.stringify({ vector_path: payload }),
headers: { 'Content-Type': 'application/json' },
})
.then((response) => response.json())
@ -188,7 +182,6 @@ export function writeParametersToFile(payload) {
);
}
/**
* Get the mapping of supported language codes to display names.
*

View File

@ -572,6 +572,11 @@ input[type=text]::placeholder {
margin-bottom: 0.2rem;
}
.confirm-modal .modal-content {
background-color: papayawhip;
margin-top: 100px;
}
.error-boundary {
max-width:600px;
margin: 0 auto;

View File

@ -16,6 +16,7 @@ import { APP_HAS_RUN_TOKEN } from '../../src/main/setupCheckFirstRun';
jest.setTimeout(240000);
const PORT = 9009;
const WAIT_TO_CLICK = 300; // ms
let ELECTRON_PROCESS;
let BROWSER;
@ -133,13 +134,15 @@ beforeEach(() => {
afterEach(async () => {
try {
const pages = await BROWSER.pages();
await Promise.all(pages.map(page => page.close()));
await BROWSER.close();
} catch (error) {
console.log(BINARY_PATH);
console.error(error);
// Normally BROWSER.close() will kill this process
ELECTRON_PROCESS.kill();
}
ELECTRON_PROCESS.removeAllListeners();
ELECTRON_PROCESS.kill();
});
test('Run a real invest model', async () => {
@ -164,6 +167,7 @@ test('Run a real invest model', async () => {
const downloadModal = await page.waitForSelector('.modal-dialog');
const downloadModalCancel = await downloadModal.waitForSelector(
'aria/[name="Cancel"][role="button"]');
await page.waitForTimeout(WAIT_TO_CLICK); // waiting for click handler to be ready
await downloadModalCancel.click();
// We need to get the modelButton from w/in this list-group because there
// are buttons with the same name in the Recent Jobs container.
@ -232,15 +236,17 @@ test('Check local userguide links', async () => {
const downloadModal = await page.waitForSelector('.modal-dialog');
const downloadModalCancel = await downloadModal.waitForSelector(
'aria/[name="Cancel"][role="button"]');
await page.waitForTimeout(WAIT_TO_CLICK); // waiting for click handler to be ready
await downloadModalCancel.click();
const investList = await page.waitForSelector('.invest-list-group');
const modelButtons = await investList.$$('aria/[role="button"]');
await page.waitForTimeout(WAIT_TO_CLICK); // first btn click does not register w/o this pause
for (const btn of modelButtons) {
await btn.click();
const link = await page.waitForSelector('text/User\'s Guide');
await page.waitForTimeout(300); // link.click() not working w/o this pause
await page.waitForTimeout(WAIT_TO_CLICK); // link.click() not working w/o this pause
const hrefHandle = await link.getProperty('href');
const hrefValue = await hrefHandle.jsonValue();
await link.click();

View File

@ -12,6 +12,7 @@ if (!process.env.ELECTRON_LOG_LEVEL) {
if (global.window) {
// mock the work of preload.js here:
const api = require('../src/preload/api').default;
api.LANGUAGE = 'en';
global.window.Workbench = api;
// normally electron main passes port to preload.

View File

@ -183,6 +183,7 @@ describe('createWindow', () => {
ipcMainChannels.CHANGE_LANGUAGE,
ipcMainChannels.CHECK_STORAGE_TOKEN,
ipcMainChannels.CHECK_FILE_PERMISSIONS,
ipcMainChannels.GET_SETTING,
ipcMainChannels.GET_N_CPUS,
ipcMainChannels.INVEST_VERSION,
ipcMainChannels.IS_FIRST_RUN,
@ -192,6 +193,7 @@ describe('createWindow', () => {
const expectedOnChannels = [
ipcMainChannels.DOWNLOAD_URL,
ipcMainChannels.GET_ELECTRON_PATHS,
ipcMainChannels.GET_LANGUAGE,
ipcMainChannels.INVEST_RUN,
ipcMainChannels.INVEST_KILL,
ipcMainChannels.INVEST_READ_LOG,
@ -199,6 +201,7 @@ describe('createWindow', () => {
ipcMainChannels.SHOW_ITEM_IN_FOLDER,
ipcMainChannels.OPEN_EXTERNAL_URL,
ipcMainChannels.OPEN_LOCAL_HTML,
ipcMainChannels.SET_SETTING,
];
// Even with mocking, the 'on' method is a real event handler,
// so we can get it's registered events from the EventEmitter.

View File

@ -0,0 +1,52 @@
import {
defaults,
settingsStore,
initStore,
} from '../../src/main/settingsStore';
afterEach(() => {
settingsStore.reset();
});
test('an empty store initializes to defaults', () => {
const store = initStore();
expect(store.store).toEqual(defaults);
});
test('invalid items are reset, valid items are unchanged', () => {
const data = { ...defaults };
data.nWorkers = 5; // valid, but not default
data.taskgraphLoggingLevel = 'ERROR'; // valid, but not default
data.loggingLevel = 'FOO'; // wrong value
data.language = 1; // wrong type
const store = initStore(data);
// invalid: should be reset to defaults
expect(store.get('loggingLevel')).toBe(defaults.loggingLevel);
expect(store.get('language')).toBe(defaults.language);
// valid: should be not be reset to defaults
expect(store.get('taskgraphLoggingLevel')).toBe(data.taskgraphLoggingLevel);
expect(store.get('nWorkers')).toBe(data.nWorkers);
});
test('properties not present in schema are untouched during validation', () => {
const data = { ...defaults };
data.foo = 'bar';
const store = initStore(data);
expect(store.get('foo')).toEqual(data.foo);
});
test('missing properties are added with default value', () => {
const data = { ...defaults };
delete data.loggingLevel;
delete data.language;
const store = initStore(data);
expect(store.get('loggingLevel')).toEqual(defaults.loggingLevel);
expect(store.get('language')).toEqual(defaults.language);
});

View File

@ -3,7 +3,6 @@ import { ipcRenderer } from 'electron';
import {
render, waitFor, within
} from '@testing-library/react';
import { act } from 'react-dom/test-utils';
import userEvent from '@testing-library/user-event';
import '@testing-library/jest-dom';
@ -17,13 +16,12 @@ import {
} from '../../src/renderer/server_requests';
import InvestJob from '../../src/renderer/InvestJob';
import {
getSettingsValue,
saveSettingsStore,
clearSettingsStore,
} from '../../src/renderer/components/SettingsModal/SettingsStorage';
settingsStore,
setupSettingsHandlers
} from '../../src/main/settingsStore';
import { ipcMainChannels } from '../../src/main/ipcMainChannels';
import { removeIpcMainListeners } from '../../src/main/main';
import { mockUISpec } from './utils';
// It's quite a pain to dynamically mock a const from a module,
// here we do it by importing as another object, then
// we can overwrite the object we want to mock later
@ -270,7 +268,7 @@ describe('Various ways to open and close InVEST models', () => {
describe('Display recently executed InVEST jobs on Home tab', () => {
beforeEach(() => {
getInvestModelNames.mockResolvedValue({});
getInvestModelNames.mockResolvedValue(MOCK_INVEST_LIST);
});
afterEach(async () => {
@ -279,7 +277,7 @@ describe('Display recently executed InVEST jobs on Home tab', () => {
test('Recent Jobs: each has a button', async () => {
const job1 = new InvestJob({
modelRunName: 'carbon',
modelRunName: MOCK_MODEL_RUN_NAME,
modelHumanName: 'Carbon Sequestration',
argsValues: {
workspace_dir: 'work1',
@ -288,7 +286,7 @@ describe('Display recently executed InVEST jobs on Home tab', () => {
});
await InvestJob.saveJob(job1);
const job2 = new InvestJob({
modelRunName: 'sdr',
modelRunName: MOCK_MODEL_RUN_NAME,
modelHumanName: 'Sediment Ratio Delivery',
argsValues: {
workspace_dir: 'work2',
@ -330,7 +328,7 @@ describe('Display recently executed InVEST jobs on Home tab', () => {
test('Recent Jobs: a job with incomplete data is skipped', async () => {
const job1 = new InvestJob({
modelRunName: 'carbon',
modelRunName: MOCK_MODEL_RUN_NAME,
modelHumanName: 'invest A',
argsValues: {
workspace_dir: 'dir',
@ -339,7 +337,7 @@ describe('Display recently executed InVEST jobs on Home tab', () => {
});
const job2 = new InvestJob({
// argsValues is missing
modelRunName: 'sdr',
modelRunName: MOCK_MODEL_RUN_NAME,
modelHumanName: 'invest B',
status: 'success',
});
@ -352,6 +350,23 @@ describe('Display recently executed InVEST jobs on Home tab', () => {
expect(queryByText(job2.modelHumanName)).toBeNull();
});
test('Recent Jobs: a job from a deprecated model is not displayed', async () => {
const job1 = new InvestJob({
modelRunName: 'does not exist',
modelHumanName: 'invest A',
argsValues: {
workspace_dir: 'dir',
},
status: 'success',
});
await InvestJob.saveJob(job1);
const { findByText, queryByText } = render(<App />);
expect(queryByText(job1.modelHumanName)).toBeNull();
expect(await findByText(/Set up a model from a sample datastack file/))
.toBeInTheDocument();
});
test('Recent Jobs: placeholder if there are no recent jobs', async () => {
const { findByText } = render(
<App />
@ -363,7 +378,7 @@ describe('Display recently executed InVEST jobs on Home tab', () => {
test('Recent Jobs: cleared by button', async () => {
const job1 = new InvestJob({
modelRunName: 'carbon',
modelRunName: MOCK_MODEL_RUN_NAME,
modelHumanName: 'Carbon Sequestration',
argsValues: {
workspace_dir: 'work1',
@ -393,42 +408,29 @@ describe('InVEST global settings: dialog interactions', () => {
const tgLoggingLabelText = 'Taskgraph logging threshold';
const languageLabelText = 'Language';
const { location } = global.window;
beforeAll(() => {
// window.location.reload is not implemented in jsdom
delete global.window.location;
Object.defineProperty(global.window, 'location', {
configurable: true,
value: { reload: jest.fn() },
});
setupSettingsHandlers();
});
afterAll(() => {
Object.defineProperty(global.window, 'location', {
configurable: true,
value: location,
});
removeIpcMainListeners();
});
beforeEach(async () => {
getInvestModelNames.mockResolvedValue({});
getSupportedLanguages.mockResolvedValue({ en: 'english', es: 'spanish' });
ipcRenderer.invoke.mockImplementation(() => Promise.resolve());
});
afterEach(async () => {
await clearSettingsStore();
});
test('Invest settings save on change', async () => {
const nWorkersLabel = 'Threaded task management (0)';
const nWorkersValue = '0';
const nWorkersValue = 0;
const loggingLevel = 'DEBUG';
const tgLoggingLevel = 'DEBUG';
const languageValue = 'es';
const spyInvoke = jest.spyOn(ipcRenderer, 'invoke');
const {
getByText, getByRole, getByLabelText, findByRole,
getByText, getByLabelText, findByRole, findByText,
} = render(
<App />
);
@ -437,74 +439,25 @@ describe('InVEST global settings: dialog interactions', () => {
const nWorkersInput = getByLabelText(nWorkersLabelText, { exact: false });
const loggingInput = getByLabelText(loggingLabelText);
const tgLoggingInput = getByLabelText(tgLoggingLabelText);
const languageInput = getByLabelText(languageLabelText, { exact: false });
await userEvent.selectOptions(nWorkersInput, [getByText(nWorkersLabel)]);
await waitFor(() => { expect(nWorkersInput).toHaveValue(nWorkersValue); });
await waitFor(() => { expect(nWorkersInput).toHaveValue(nWorkersValue.toString()); });
await userEvent.selectOptions(loggingInput, [loggingLevel]);
await waitFor(() => { expect(loggingInput).toHaveValue(loggingLevel); });
await userEvent.selectOptions(tgLoggingInput, [tgLoggingLevel]);
await waitFor(() => { expect(tgLoggingInput).toHaveValue(tgLoggingLevel); });
await userEvent.selectOptions(languageInput, [languageValue]);
await waitFor(() => { expect(languageInput).toHaveValue(languageValue); });
await userEvent.click(getByRole('button', { name: 'close settings' }));
// Check values were saved in app and in store
await userEvent.click(await findByRole('button', { name: 'settings' }));
await waitFor(() => {
expect(nWorkersInput).toHaveValue(nWorkersValue);
expect(loggingInput).toHaveValue(loggingLevel);
expect(tgLoggingInput).toHaveValue(tgLoggingLevel);
expect(languageInput).toHaveValue(languageValue);
});
expect(await getSettingsValue('nWorkers')).toBe(nWorkersValue);
expect(await getSettingsValue('loggingLevel')).toBe(loggingLevel);
expect(await getSettingsValue('taskgraphLoggingLevel')).toBe(tgLoggingLevel);
expect(await getSettingsValue('language')).toBe(languageValue);
});
// Check values were saved
expect(settingsStore.get('nWorkers')).toBe(nWorkersValue);
expect(settingsStore.get('loggingLevel')).toBe(loggingLevel);
expect(settingsStore.get('taskgraphLoggingLevel')).toBe(tgLoggingLevel);
test('Load invest settings from storage and test Reset', async () => {
const defaultSettings = {
nWorkers: '-1',
loggingLevel: 'INFO',
taskgraphLoggingLevel: 'ERROR',
language: 'en',
};
const expectedSettings = {
nWorkers: '0',
loggingLevel: 'ERROR',
taskgraphLoggingLevel: 'INFO',
language: 'en',
};
await saveSettingsStore(expectedSettings);
const {
getByText, getByLabelText, findByRole,
} = render(<App />);
await userEvent.click(await findByRole('button', { name: 'settings' }));
const nWorkersInput = getByLabelText(nWorkersLabelText, { exact: false });
const loggingInput = getByLabelText(loggingLabelText);
const tgLoggingInput = getByLabelText(tgLoggingLabelText);
// language is handled differently; changing it triggers electron to restart
const languageInput = getByLabelText(languageLabelText, { exact: false });
// Test that the invest settings were loaded in from store.
await waitFor(() => {
expect(nWorkersInput).toHaveValue(expectedSettings.nWorkers);
expect(loggingInput).toHaveValue(expectedSettings.loggingLevel);
expect(tgLoggingInput).toHaveValue(expectedSettings.tgLoggingLevel);
expect(languageInput).toHaveValue(expectedSettings.language);
});
// Test Reset sets values to default
await userEvent.click(getByText('Reset to Defaults'));
await waitFor(() => {
expect(nWorkersInput).toHaveValue(defaultSettings.nWorkers);
expect(loggingInput).toHaveValue(defaultSettings.loggingLevel);
expect(tgLoggingInput).toHaveValue(defaultSettings.tgLoggingLevel);
expect(languageInput).toHaveValue(defaultSettings.language);
});
await userEvent.selectOptions(languageInput, [languageValue]);
await userEvent.click(await findByText('Change to spanish'));
expect(spyInvoke)
.toHaveBeenCalledWith(ipcMainChannels.CHANGE_LANGUAGE, languageValue);
});
test('Access sampledata download Modal from settings', async () => {
@ -525,40 +478,3 @@ describe('InVEST global settings: dialog interactions', () => {
expect(queryByText('Settings')).toBeNull();
});
});
describe('Translation', () => {
const { location } = global.window;
beforeAll(async () => {
getInvestModelNames.mockResolvedValue({});
getSupportedLanguages.mockResolvedValue({ en: 'english', ll: 'foo' });
delete global.window.location;
Object.defineProperty(global.window, 'location', {
configurable: true,
value: { reload: jest.fn() },
});
});
afterAll(() => {
Object.defineProperty(global.window, 'location', {
configurable: true,
value: location,
});
});
test('Text rerenders in new language when language setting changes', async () => {
const { findByLabelText } = render(<App />);
await userEvent.click(await findByLabelText('settings'));
const languageInput = await findByLabelText('Language', { exact: false });
expect(languageInput).toHaveValue('en');
await userEvent.selectOptions(languageInput, 'll');
await waitFor(() => {
expect(global.window.location.reload).toHaveBeenCalled();
});
// because we can't reload the window in the test environment,
// components won't actually rerender in the new language
expect(languageInput).toHaveValue('ll');
});
});

View File

@ -12,10 +12,6 @@ import DownloadProgressBar from '../../src/renderer/components/DownloadProgressB
import sampledata_registry from '../../src/renderer/components/DataDownloadModal/sampledata_registry.json';
import { getInvestModelNames } from '../../src/renderer/server_requests';
import App from '../../src/renderer/app';
import {
clearSettingsStore,
getSettingsValue,
} from '../../src/renderer/components/SettingsModal/SettingsStorage';
import setupDownloadHandlers from '../../src/main/setupDownloadHandlers';
import { removeIpcMainListeners } from '../../src/main/main';
import { ipcMainChannels } from '../../src/main/ipcMainChannels';
@ -173,7 +169,6 @@ describe('Integration tests with main process', () => {
afterEach(async () => {
removeIpcMainListeners();
await clearSettingsStore();
});
test('Download: starts, updates progress, & stores location', async () => {
@ -200,29 +195,12 @@ describe('Integration tests with main process', () => {
const downloadButton = await findByRole('button', { name: 'Download' });
await userEvent.click(downloadButton);
const nURLs = allCheckBoxes.length - 1; // all except Select All
await waitFor(async () => {
expect(await getSettingsValue('sampleDataDir'))
.toBe(dialogData.filePaths[0]);
});
const progressBar = await findByRole('progressbar');
expect(progressBar).toHaveTextContent(`Downloading 1 of ${nURLs}`);
// The electron window's downloadURL function is mocked, so we don't
// expect the progress bar to update further in this test.
});
test('Cancel: does not store a sampleDataDir value', async () => {
const { findByRole } = render(<App isFirstRun />);
const existingValue = await getSettingsValue('sampleDataDir');
const cancelButton = await findByRole('button', { name: 'Cancel' });
await userEvent.click(cancelButton);
await waitFor(async () => {
const value = await getSettingsValue('sampleDataDir');
expect(value).toBe(existingValue);
});
});
test('Alert when download location is not writeable', async () => {
const dialogData = {
filePaths: ['foo/directory'],

View File

@ -179,7 +179,7 @@ describe('Sidebar Buttons', () => {
const args = JSON.parse(payload.args);
const argKeys = Object.keys(args);
expect(argKeys).toEqual(
expect.arrayContaining(Object.keys(spec.args).concat('n_workers'))
expect.arrayContaining(Object.keys(spec.args))
);
argKeys.forEach((key) => {
expect(typeof args[key]).toBe('string');
@ -191,7 +191,7 @@ describe('Sidebar Buttons', () => {
const response = 'saved';
saveToPython.mockResolvedValue(response);
const mockDialogData = { canceled: false, filePath: 'foo.py' };
ipcRenderer.invoke.mockResolvedValue(mockDialogData);
ipcRenderer.invoke.mockResolvedValueOnce(mockDialogData);
const { findByText, findByLabelText, findByRole } = renderInvestTab();
const saveAsButton = await findByText('Save as...');
@ -215,7 +215,7 @@ describe('Sidebar Buttons', () => {
const args = JSON.parse(payload.args);
const argKeys = Object.keys(args);
expect(argKeys).toEqual(
expect.arrayContaining(Object.keys(spec.args).concat('n_workers'))
expect.arrayContaining(Object.keys(spec.args))
);
argKeys.forEach((key) => {
expect(typeof args[key]).toBe('string');

View File

@ -163,6 +163,21 @@ describe('Arguments form input types', () => {
expect(input).toHaveValue('a');
expect(input).not.toHaveValue('b');
});
test('initial arg values can contain extra args', async () => {
const spec = baseArgsSpec('number');
const displayedValue = '1';
const missingValue = '0';
const initArgs = {
[Object.keys(spec.args)[0]]: displayedValue,
paramZ: missingValue, // paramZ is not in the ARGS_SPEC or UI_SPEC
};
const { findByLabelText, queryByText } = renderSetupFromSpec(spec, UI_SPEC, initArgs);
const input = await findByLabelText(`${spec.args.arg.name} (${spec.args.arg.units})`);
await waitFor(() => expect(input).toHaveValue(displayedValue));
expect(queryByText(missingValue)).toBeNull();
});
});
describe('Arguments form interactions', () => {
@ -577,8 +592,8 @@ describe('Misc form validation stuff', () => {
const rasterValue = './raster.tif';
const expectedVal2 = '-79.0198012081401';
const rasterBox = `[${expectedVal2}, 26.481559513537064, -78.37173806200593, 27.268061760228512]`;
const message = `Bounding boxes do not intersect: ${vectorValue}: ${vectorBox} | ${rasterValue}: ${rasterBox}`;
const newPrefix = 'Bounding box does not intersect at least one other:';
const message = `Not all of the spatial layers overlap each other. All bounding boxes must intersect: ${vectorValue}: ${vectorBox} | ${rasterValue}: ${rasterBox}`;
const newPrefix = 'Not all of the spatial layers overlap each other. Bounding box:';
const vectorMessage = new RegExp(`${newPrefix}\\s*\\[${expectedVal1}`);
const rasterMessage = new RegExp(`${newPrefix}\\s*\\[${expectedVal2}`);

File diff suppressed because it is too large Load Diff