merge upstream release branch and resolve history conflict. #1662

This commit is contained in:
davemfish 2025-02-11 10:13:09 -05:00
commit 515942389d
23 changed files with 1666 additions and 32 deletions

View File

@ -430,21 +430,20 @@ jobs:
WORKBENCH_BINARY=$(find "$(pwd)/workbench/dist" -type f -name 'invest_*.dmg' | head -n 1)
make WORKBENCH_BIN_TO_SIGN="$WORKBENCH_BINARY" codesign_mac
#- name: Sign binaries (Windows)
# if: github.event_name != 'pull_request' && matrix.os == 'windows-latest' # secrets not available in PR
# env:
# CERT_FILE: Stanford-natcap-code-signing-cert-expires-2024-01-26.p12
# CERT_PASS: ${{ secrets.WINDOWS_CODESIGN_CERT_PASS }}
# run: |
# # figure out the path to signtool.exe (it keeps changing with SDK updates)
# SIGNTOOL_PATH=$(find 'C:\\Program Files (x86)\\Windows Kits\\10' -type f -name 'signtool.exe*' | head -n 1)
# WORKBENCH_BINARY=$(find "$(pwd)/workbench/dist" -type f -name 'invest_*.exe' | head -n 1)
# make WORKBENCH_BIN_TO_SIGN="$WORKBENCH_BINARY" SIGNTOOL="$SIGNTOOL_PATH" codesign_windows
- name: Deploy artifacts to GCS
if: github.event_name != 'pull_request'
run: make deploy
# This relies on the file existing on GCP, so it must be run after `make
# deploy` is called.
- name: Queue windows binaries for signing
if: github.event_name != 'pull_request' && matrix.os == 'windows-latest' # secrets not available in PR
env:
ACCESS_TOKEN: ${{ secrets.CODESIGN_QUEUE_ACCESS_TOKEN }}
run: |
cd codesigning
bash enqueue-current-windows-installer.sh
- name: Upload workbench binary artifact
if: always()
uses: actions/upload-artifact@v4

View File

@ -36,6 +36,7 @@ jobs:
- uses: actions/checkout@v4
if: env.RUN == 'true'
with:
fetch-depth: 0 # fetch entire history, for versioning
token: ${{ secrets.AUTORELEASE_BOT_PAT }}
- name: Install dependencies

View File

@ -72,14 +72,16 @@ jobs:
rm -rf artifacts/Wheel*
# download each artifact separately so that the command will fail if any is missing
for artifact in Workbench-Windows-binary \
Workbench-macOS-binary \
for artifact in Workbench-macOS-binary \
InVEST-sample-data \
InVEST-user-guide
do
gh run download $RUN_ID --dir artifacts --name "$artifact"
done
# download the signed windows workbench file from GCS
wget --directory-prefix=artifacts https://storage.googleapis.com/releases.naturalcapitalproject.org/invest/${{ env.VERSION }}/workbench/invest_${{ env.VERSION }}_workbench_win32_x64.exe
# We build one sdist per combination of OS and python version, so just
# download and unzip all of them into an sdists directory so we can
# just grab the first one. This approach is more flexible to changes

View File

@ -39,16 +39,28 @@
Unreleased Changes
------------------
* General
* Fixed an issue where a user's PROJ_DATA environment variable could
trigger a RuntimeError about a missing proj.db file.
https://github.com/natcap/invest/issues/1742
* Now testing and building against Python 3.13.
No longer testing and building with Python 3.8, which reached EOL.
https://github.com/natcap/invest/issues/1755
* All InVEST model output data now include metadata sidecar files.
These are '.yml' files with the same basename as the dataset they
describe. https://github.com/natcap/invest/issues/1662
* InVEST's windows binaries are now distributed once again with a valid
signature, signed by Stanford University.
https://github.com/natcap/invest/issues/1580
* Workbench
* Auto-scrolling of log output is halted on user-initiated scrolling,
enabling easier inspection of log output while a model is running
(`InVEST #1533 <https://github.com/natcap/invest/issues/1533>`_).
* Annual Water Yield
* Fixed an issue where the model would crash if the valuation table was
provided, but the demand table was not. Validation will now warn about
this, and the ``MODEL_SPEC`` has been improved to reflect that this table
is now required when doing valuation.
https://github.com/natcap/invest/issues/1769
* Carbon
* Updated styling of the HTML report generated by the carbon model, for
visual consistency with the Workbench (`InVEST #1732

View File

@ -356,10 +356,8 @@ codesign_mac:
codesign --timestamp --verbose --sign Stanford $(WORKBENCH_BIN_TO_SIGN)
codesign_windows:
$(GSUTIL) cp gs://stanford_cert/$(CERT_FILE) $(BUILD_DIR)/$(CERT_FILE)
"$(SIGNTOOL)" sign -fd SHA256 -f $(BUILD_DIR)/$(CERT_FILE) -p $(CERT_PASS) $(WORKBENCH_BIN_TO_SIGN)
"$(SIGNTOOL)" timestamp -tr http://timestamp.sectigo.com -td SHA256 $(WORKBENCH_BIN_TO_SIGN)
$(RM) $(BUILD_DIR)/$(CERT_FILE)
@echo "Installer was signed with signtool"
deploy:

21
codesigning/Makefile Normal file
View File

@ -0,0 +1,21 @@
.PHONY: deploy-cloudfunction deploy-worker
deploy-cloudfunction:
gcloud functions deploy \
--project natcap-servers \
codesigning-queue \
--memory=256Mi \
--trigger-http \
--gen2 \
--region us-west1 \
--allow-unauthenticated \
--entry-point main \
--runtime python312 \
--source gcp-cloudfunc/
# NOTE: This must be executed from a computer that has SSH access to ncp-inkwell.
deploy-worker:
cd signing-worker && ansible-playbook \
--ask-become-pass \
--inventory-file inventory.ini \
playbook.yml

74
codesigning/README.md Normal file
View File

@ -0,0 +1,74 @@
# InVEST Codesigning Service
This directory contains all of the functional code and configuration (minus a
few secrets) that are needed to deploy our code-signing service. There are
three key components to this service:
1. A cloud function (`gcp-cloudfunc/') that handles a google cloud
storage-backed cloud function that operates as a high-latency queue.
2. A script (`enqueue-binary.py`) that will enqueue a binary that already
exists on one of our GCS buckets.
3. A `systemd` service that runs on a debian:bookworm machine and periodically
polls the cloud function to dequeue the next item to sign.
## Deploying the Cloud Function
The necessary `gcloud` deployment configuration can be executed with
```bash
$ make deploy-cloudfunction
```
### Secrets
The current deployment process requires you to manually create an environment
variable, ``ACCESS_TOKEN``, that contains the secret token shared by the cloud
function, systemd service and enqueue script.
## Deploying the Systemd Service
To deploy the systemd service, you will need to be on a computer that has ssh
access to `ncp-inkwell`, which is a computer that has a yubikey installed in
it. This computer is assumed to run debian:bookworm at this time. To deploy
(non-secret) changes to ncp-inkwell, run this in an environment where
`ansible-playbook` is available (`pip install ansible` to install):
```bash
$ make deploy-worker
```
### Secrets
The systemd service requires several secrets to be available in the codesigning
workspace, which is located at `/opt/natcap-codesign':
* `/opt/natcap-codesign/pass.txt` is a plain text file containing only the PIN
for the yubikey
* `/opt/natcap-codesign/access_token.txt` is a plain text file containing the
access token shared with the cloud function, systemd service and enqueue script.
* `/opt/natcap-codesign/slack_token.txt` is a plain text file containing the
slack token used to post messages to our slack workspace.
* `/opt/natcap-codesign/natcap-servers-1732552f0202.json` is a GCP service
account key used to authenticate to google cloud storage. This file must be
available in the `gcp-cloudfunc/` directory at the time of deployment.
## Future Work
### Authenticate to the function with Identity Federation
The cloud function has access controlled by a secret token, which is not ideal.
Instead, we should be using github/GCP identity federation to control access.
### Trigger the function with GCS Events
GCP Cloud Functions have the ability to subscribe to bucket events, which
should allow us to subscribe very specifically to just those `finalize` events
that apply to the Windows workbench binaries. Doing so will require reworking this cloud function into 2 cloud functions:
1. An endpoint for ncp-inkwell to poll for the next binary to sign
2. A cloud function that subscribes to GCS bucket events and enqueues the binary to sign.
Relevant docs include:
* https://cloud.google.com/functions/docs/writing/write-event-driven-functions#cloudevent-example-python

View File

@ -0,0 +1,28 @@
"""Enqueue a windows binary for signing.
To call this script, you need to set the ACCESS_TOKEN environment variable from
the software team secrets store.
Example invocation:
$ ACCESS_TOKEN=abcs1234 python3 enqueue-binary.py <gs:// uri to binary on gcs>
"""
import os
import sys
import requests
DATA = {
'token': os.environ['ACCESS_TOKEN'],
'action': 'enqueue',
'url': sys.argv[1].replace(
'gs://', 'https://storage.googleapis.com/'),
}
response = requests.post(
'https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue',
json=DATA
)
if response.status_code >= 400:
print(response.text)
sys.exit(1)

View File

@ -0,0 +1,13 @@
#!/usr/bin/env sh
#
# Run this script to enqueue the windows binary for this current version of the
# InVEST windows workbench installer for code signing.
#
# NOTE: this script must be run from the directory containing this script.
version=$(python -m setuptools_scm)
url_base=$(make -C .. --no-print-directory print-DIST_URL_BASE | awk ' { print $3 } ')
url="${url_base}/workbench/invest_${version}_workbench_win32_x64.exe"
echo "Enqueuing URL ${url}"
python enqueue-binary.py "${url}"

View File

@ -0,0 +1,180 @@
import contextlib
import datetime
import json
import logging
import os
import time
from urllib.parse import unquote
import functions_framework
import google.cloud.logging # pip install google-cloud-logging
import requests
from flask import jsonify
from google.cloud import storage # pip install google-cloud-storage
GOOGLE_PREFIX = 'https://storage.googleapis.com'
CODESIGN_DATA_BUCKET = 'natcap-codesigning'
LOG_CLIENT = google.cloud.logging.Client()
LOG_CLIENT.setup_logging()
@contextlib.contextmanager
def get_lock():
"""Acquire a GCS-based mutex.
This requires that the bucket we are using has versioning.
"""
storage_client = storage.Client()
bucket = storage_client.bucket(CODESIGN_DATA_BUCKET)
lock_obtained = False
n_tries = 100
for i in range(n_tries):
lockfile = bucket.blob('mutex.lock')
if not lockfile.generation:
lockfile.upload_from_string(
f"Lock acquired {datetime.datetime.now().isoformat()}")
lock_obtained = True
break
else:
time.sleep(0.1)
if not lock_obtained:
raise RuntimeError(f'Could not obtain lock after {n_tries} tries')
try:
yield
finally:
lockfile.delete()
@functions_framework.http
def main(request):
"""Handle requests to this GCP Cloud Function.
All requests must be POST requests and have a JSON body with the following
attributes:
* token: a secret token that matches the ACCESS_TOKEN environment
variable that is defined in the cloud function configuration.
* action: either 'enqueue' or 'dequeue'
If the action is 'enqueue', the request must also have a 'url' attribute.
The 'url' attribute, when provided, must be a URL to a file that meets
these requirements:
* The URL must be a publicly accessible URL
* The URL must be a file that ends in '.exe'
* The URL must be located in either the releases bucket, or else
in the dev builds bucket. It doesn't necessarily have to be an
InVEST binary.
* The URL must be a file that is not older than June 1, 2024
* The URL must be a file that is not already in the queue
* The URL should be a file that is not already signed (if the file has
already been signed, its signature will be overwritten)
"""
data = request.get_json()
if data['token'] != os.environ['ACCESS_TOKEN']:
logging.info('Rejecting request due to invalid token')
return jsonify('Invalid token'), 403
if request.method != 'POST':
logging.info('Rejecting request due to invalid HTTP method')
return jsonify('Invalid request method'), 405
storage_client = storage.Client()
bucket = storage_client.bucket(CODESIGN_DATA_BUCKET)
logging.debug(f'Data POSTed: {data}')
if data['action'] == 'dequeue':
with get_lock():
queuefile = bucket.blob('queue.json')
queue_dict = json.loads(queuefile.download_as_string())
try:
next_file_url = queue_dict['queue'].pop(0)
except IndexError:
# No items in the queue!
logging.info('No binaries are currently queued for signing')
return jsonify('No items in the queue'), 204
queuefile.upload_from_string(json.dumps(queue_dict))
data = {
'https-url': next_file_url,
'basename': os.path.basename(next_file_url),
'gs-uri': unquote(next_file_url.replace(
f'{GOOGLE_PREFIX}/', 'gs://')),
}
logging.info(f'Dequeued {next_file_url}')
return jsonify(data)
elif data['action'] == 'enqueue':
url = data['url']
logging.info(f'Attempting to enqueue url {url}')
if not url.endswith('.exe'):
logging.info("Rejecting URL because it doesn't end in .exe")
return jsonify('Invalid URL to sign'), 400
if not url.startswith(GOOGLE_PREFIX):
logging.info(f'Rejecting URL because it does not start with {GOOGLE_PREFIX}')
return jsonify('Invalid host'), 400
if not url.startswith((
f'{GOOGLE_PREFIX}/releases.naturalcapitalproject.org/',
f'{GOOGLE_PREFIX}/natcap-dev-build-artifacts/')):
logging.info('Rejecting URL because the bucket is incorrect')
return jsonify("Invalid target bucket"), 400
# Remove http character quoting
url = unquote(url)
binary_bucket_name, *binary_obj_paths = url.replace(
GOOGLE_PREFIX + '/', '').split('/')
codesign_bucket = storage_client.bucket(CODESIGN_DATA_BUCKET)
# If the file does not exist at this URL, reject it.
response = requests.head(url)
if response.status_code >= 400:
logging.info('Rejecting URL because it does not exist')
return jsonify('Requested file does not exist'), 403
# If the file is too old, reject it. Trying to avoid a
# denial-of-service by invoking the service with very old files.
# I just pulled June 1 out of thin air as a date that is a little while
# ago, but not so long ago that we could suddenly have many files
# enqueued.
mday, mmonth, myear = response.headers['Last-Modified'].split(' ')[1:4]
modified_time = datetime.datetime.strptime(
' '.join((mday, mmonth, myear)), '%d %b %Y')
if modified_time < datetime.datetime(year=2024, month=6, day=1):
logging.info('Rejecting URL because it is too old')
return jsonify('File is too old'), 400
response = requests.head(f'{url}.signature')
if response.status_code != 404:
logging.info('Rejecting URL because it has already been signed.')
return jsonify('File has already been signed'), 204
with get_lock():
# Since the file has not already been signed, add the file to the
# queue
queuefile = codesign_bucket.blob('queue.json')
if not queuefile.exists():
queue_dict = {'queue': []}
else:
queue_dict = json.loads(queuefile.download_as_string())
if url not in queue_dict['queue']:
queue_dict['queue'].append(url)
else:
return jsonify(
'File is already in the queue', 200, 'application/json')
queuefile.upload_from_string(json.dumps(queue_dict))
logging.info(f'Enqueued {url}')
return jsonify("OK"), 200
else:
return jsonify('Invalid action request'), 405

View File

@ -0,0 +1,5 @@
google-cloud-storage
google-cloud-logging
functions-framework==3.*
flask
requests

5
codesigning/signing-worker/.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
# This key is copied from GCP. I've added it to the .gitignore to try to
# prevent it from getting committed and pushed to git, while still allowing it
# to be where ansible expects the key to be so ansible can copy the file to the
# remote server.
natcap-servers-1732552f0202.json

View File

@ -0,0 +1,5 @@
# This is an ansible inventory file. If we had more hostnames to list here, we
# could group them into functional groups (e.g. codesign-workers).
[ncp-inkwell]
ncp-inkwell

View File

@ -0,0 +1,265 @@
#!/usr/bin/env python3
"""Service script to sign InVEST windows binaries."""
import logging
import os
import shutil
import subprocess
import sys
import textwrap
import time
import traceback
import pexpect # apt install python3-pexpect
import requests # apt install python3-requests
LOGGER = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
CERTIFICATE = sys.argv[1]
FILE_DIR = os.path.dirname(__file__)
QUEUE_TOKEN_FILE = os.path.join(FILE_DIR, "access_token.txt")
with open(QUEUE_TOKEN_FILE) as token_file:
ACCESS_TOKEN = token_file.read().strip()
SLACK_TOKEN_FILE = os.path.join(FILE_DIR, "slack_token.txt")
with open(SLACK_TOKEN_FILE) as token_file:
SLACK_ACCESS_TOKEN = token_file.read().strip()
SLACK_NOTIFICATION_SUCCESS = textwrap.dedent(
"""\
:lower_left_fountain_pen: Successfully signed and uploaded `{filename}` to
<{url}|google cloud>
""")
SLACK_NOTIFICATION_ALREADY_SIGNED = textwrap.dedent(
"""\
:lower_left_fountain_pen: `{filename}` is already signed!
<{url}|google cloud>
""")
SLACK_NOTIFICATION_FAILURE = textwrap.dedent(
"""\
:red-flag: Something went wrong while signing {filename}:
```
{traceback}
```
Please investigate on ncp-inkwell using:
```
sudo journalctl -u natcap-codesign.service
```
""")
def post_to_slack(message):
"""Post a message to the slack channel.
Args:
message (str): The message to post.
Returns:
``None``
"""
resp = requests.post(
"https://slack.com/api/chat.postMessage",
headers={
"Authorization": f"Bearer {SLACK_ACCESS_TOKEN}",
"Content-Type": "application/json; charset=utf-8"
},
json={
"channel": "CESG428BH", # sw-invest
"text": message
})
resp.raise_for_status()
def get_from_queue():
"""Get an item to sign from the queue.
Returns:
``None`` if there are no items in the queue, the JSON response dict
otherwise.
"""
response = requests.post(
"https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue",
headers={"Content-Type": "application/json"},
json={
"token": ACCESS_TOKEN,
"action": "dequeue"
})
if response.status_code == 204:
return None
else:
return response.json()
def download_file(url):
"""Download an arbitrarily large file.
Adapted from https://stackoverflow.com/a/16696317
Args:
url (str): The URL to download.
Returns:
``None``
"""
local_filename = url.split('/')[-1]
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
return local_filename
def upload_to_bucket(filename, path_on_bucket):
"""Upload a file to a GCS bucket.
Args:
filename (str): The local file to upload.
path_on_bucket (str): The path to the file on the GCS bucket, including
the ``gs://`` prefix.
Returns:
``None``
"""
subprocess.run(['gsutil', 'cp', filename, path_on_bucket], check=True)
def sign_file(file_to_sign):
"""Sign a local .exe file.
Uses ``osslsigncode`` to sign the file using the private key stored on a
Yubikey, and the corresponding certificate that has been exported from the
PIV slot 9c.
Args:
file_to_sign (str): The local filepath to the file to sign.
Returns:
``None``
"""
signed_file = f"{file_to_sign}.signed"
pass_file = os.path.join(FILE_DIR, 'pass.txt')
signcode_command = textwrap.dedent(f"""\
osslsigncode sign \
-pkcs11engine /usr/lib/x86_64-linux-gnu/engines-3/pkcs11.so \
-pkcs11module /usr/lib/x86_64-linux-gnu/libykcs11.so \
-key "pkcs11:id=%02;type=private" \
-certs {CERTIFICATE} \
-h sha256 \
-ts http://timestamp.sectigo.com \
-readpass {pass_file} \
-verbose \
-in {file_to_sign} \
-out {signed_file}""")
process = pexpect.spawnu(signcode_command)
process.expect('Enter PKCS#11 key PIN for Private key for Digital Signature:')
with open(pass_file) as passfile:
process.sendline(passfile.read().strip())
# print remainder of program output for our logging.
print(process.read())
shutil.move(signed_file, file_to_sign)
def note_signature_complete(local_filepath, target_gs_uri):
"""Create a small file next to the signed file to indicate signature.
Args:
gs_uri (str): The GCS URI of the signed file.
"""
# Using osslsigncode to verify the output always fails for me, even though
# the signature is clearly valid when checked on Windows.
process = subprocess.run(
['osslsigncode', 'verify', '-in', local_filepath], check=False,
capture_output=True)
temp_filepath = f'/tmp/{os.path.basename(local_filepath)}.signed'
with open(temp_filepath, 'w') as signature_file:
signature_file.write(process.stdout.decode())
try:
# Upload alongside the original file
subprocess.run(
['gsutil', 'cp', temp_filepath, f'{target_gs_uri}.signature'],
check=True)
finally:
os.remove(temp_filepath)
def has_signature(filename):
"""Check if a file is already signed.
Args:
filename (str): The local filepath to the file to check.
Returns:
``True`` if the file is signed, ``False`` otherwise.
"""
process = subprocess.run(
['osslsigncode', 'verify', '-in', filename], capture_output=True,
check=False)
# Handle the case where it's possible there might not be any stdout or
# stderr to decode.
process_output = ""
for output in (process.stdout, process.stderr):
if output is not None:
process_output += output.decode()
if 'No signature found' in process_output:
return False
return True
def main():
while True:
try:
file_to_sign = get_from_queue()
if file_to_sign is None:
LOGGER.info('No items in the queue')
else:
LOGGER.info(f"Dequeued and downloading {file_to_sign['https-url']}")
filename = download_file(file_to_sign['https-url'])
LOGGER.info(f"Checking if {filename} is already signed")
if has_signature(filename):
LOGGER.info(f"{filename} is already signed, skipping")
post_to_slack(
SLACK_NOTIFICATION_ALREADY_SIGNED.format(
filename=filename,
url=file_to_sign['https-url']))
note_signature_complete(filename, file_to_sign['gs-uri'])
else:
LOGGER.info(f"Signing {filename}")
sign_file(filename)
LOGGER.info(f"Uploading signed file to {file_to_sign['gs-uri']}")
upload_to_bucket(filename, file_to_sign['gs-uri'])
LOGGER.info(
f"Adding {file_to_sign['https-url']} to signed files list")
note_signature_complete(filename, file_to_sign['gs-uri'])
LOGGER.info(f"Removing {filename}")
post_to_slack(
SLACK_NOTIFICATION_SUCCESS.format(
filename=filename,
url=file_to_sign['https-url']))
LOGGER.info("Signing complete.")
os.remove(filename)
except Exception as e:
LOGGER.exception(f"Unexpected error signing file: {e}")
post_to_slack(
SLACK_NOTIFICATION_FAILURE.format(
filename=file_to_sign['https-url'],
traceback=traceback.format_exc()))
time.sleep(60)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,33 @@
# Systemd service for debian:bookworm for signing InVEST windows binaries.
#
# To install this service, copy this onto the host as /etc/systemd/system/natcap-codesign.service
#
# To use, run (for example):
# # On modifying the service file, run:
# $ sudo systemctl daemon-reload
#
# # enable the service
# $ sudo systemctl enable natcap-codesign.service
#
# # start the service
# $ sudo systemctl start natcap-codesign
#
# # check the service status
# $ sudo systemctl status natcap-codesign
#
# This service is built to run in the foreground.
#
# See https://wiki.debian.org/systemd/Services for background info about systemd services.
[Unit]
Description=NatCap Code Signing for Windows EXE Binaries
User=natcap-codesign
Group=natcap-codesign
WorkingDirectory=/tmp
[Service]
# Run in the foreground
Type=simple
Restart=always
ExecStart=python3 /opt/natcap-codesign/natcap-codesign.py /opt/natcap-codesign/codesign-cert-chain.pem

View File

@ -0,0 +1,112 @@
---
- name: Set up everything needed on NCP-Inkwell
hosts: all
become: true
become_method: sudo
tasks:
- name: Install GCP SDK dependencies
ansible.builtin.apt:
update_cache: true
pkg:
- apt-transport-https
- ca-certificates
- gnupg
- curl
- name: Download the Google Cloud SDK package repository signing key
ansible.builtin.shell:
cmd: curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg
creates: /usr/share/keyrings/cloud.google.gpg
- name: Add Google Cloud SDK package repository source
ansible.builtin.apt_repository:
update_cache: true
filename: google-cloud-sdk.list
repo: "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main"
- name: Install packages
ansible.builtin.apt:
update_cache: true
pkg:
- python3
- python3-pexpect
- python3-requests
- wget
- vim-nox
- yubico-piv-tool
- libengine-pkcs11-openssl
- ykcs11
- libssl-dev
- google-cloud-sdk
- google-cloud-cli
- yubikey-manager
- name: Add bookworm-backports repository
ansible.builtin.apt_repository:
update_cache: true
repo: "deb http://deb.debian.org/debian {{ ansible_distribution_release }}-backports main"
filename: bookworm-backports.list
- name: Install osslsigncode from backports
ansible.builtin.apt:
update_cache: true
default_release: "{{ ansible_distribution_release }}-backports"
pkg:
# The normal debian:bookworm repos have osslsigncode 2.5, which has a
# bug in it that prevents it from signing our binaries. This was
# fixed in osslsigncode 2.6. The version available in
# bookworm-backports is 2.9. The issue (and solution) was similar to
# https://stackoverflow.com/a/78308879
- osslsigncode
- name: Create the codesign directory
ansible.builtin.file:
state: directory
path: /opt/natcap-codesign
- name: Install the certificate
ansible.builtin.shell:
cmd: ykman piv certificates export 9c /opt/natcap-codesign/codesign-cert-chain.pem
creates: /opt/natcap-codesign/codesign-cert-chain.pem
- name: Create codesigning group
ansible.builtin.group:
name: natcap-codesign
state: present
- name: Create codesigning user
ansible.builtin.user:
name: natcap-codesign
group: natcap-codesign
shell: /bin/bash
createhome: true
- name: Install the service account key
ansible.builtin.copy:
src: natcap-servers-1732552f0202.json
dest: /opt/natcap-codesign/natcap-servers-1732552f0202.json
mode: 0600
- name: Set up application credentials
ansible.builtin.shell:
cmd: gcloud auth activate-service-account --key-file=/opt/natcap-codesign/natcap-servers-1732552f0202.json
- name: Install codesigning python script
ansible.builtin.copy:
src: natcap-codesign.py
dest: /opt/natcap-codesign/natcap-codesign.py
mode: 0755
- name: Install the codesign service
ansible.builtin.copy:
src: natcap-codesign.service
dest: /etc/systemd/system/natcap-codesign.service
mode: 0644
- name: Enable the natcap-codesign service
ansible.builtin.systemd_service:
name: natcap-codesign
daemon_reload: true # reload in case there are any config changes
state: restarted
enabled: true

View File

@ -3,6 +3,7 @@ import platform
import sys
os.environ['PROJ_LIB'] = os.path.join(sys._MEIPASS, 'proj')
os.environ['PROJ_DATA'] = os.path.join(sys._MEIPASS, 'proj')
if platform.system() == 'Darwin':
# Rtree will look in this directory first for libspatialindex_c.dylib.

View File

@ -251,11 +251,11 @@ MODEL_SPEC = {
}
},
"index_col": "lucode",
"required": False,
"required": "valuation_table_path",
"about": gettext(
"A table of water demand for each LULC class. Each LULC code "
"in the LULC raster must have a corresponding row in this "
"table."),
"table. Required if 'valuation_table_path' is provided."),
"name": gettext("water demand table")
},
"valuation_table_path": {
@ -502,14 +502,13 @@ def execute(args):
a path to an input CSV
table of LULC classes, showing consumptive water use for each
landuse / land-cover type (cubic meters per year) to calculate
water scarcity.
water scarcity. Required if ``valuation_table_path`` is provided.
args['valuation_table_path'] (string): (optional) if a non-empty
string, a path to an input CSV table of
hydropower stations with the following fields to calculate
valuation: 'ws_id', 'time_span', 'discount', 'efficiency',
'fraction', 'cost', 'height', 'kw_price'
Required if ``calculate_valuation`` is True.
args['n_workers'] (int): (optional) The number of worker processes to
use for processing this model. If omitted, computation will take

View File

@ -1,20 +1,53 @@
"""Module for Regression Testing the InVEST Annual Water Yield module."""
import unittest
import tempfile
import shutil
import os
import shutil
import tempfile
import unittest
import numpy
from shapely.geometry import Polygon
import pandas
import numpy
from osgeo import gdal
import pygeoprocessing
from osgeo import gdal, ogr, osr
REGRESSION_DATA = os.path.join(
os.path.dirname(__file__), '..', 'data', 'invest-test-data', 'annual_water_yield')
SAMPLE_DATA = os.path.join(REGRESSION_DATA, 'input')
gdal.UseExceptions()
def make_watershed_vector(path_to_shp):
"""
Generate watershed results shapefile with two polygons
Args:
path_to_shp (str): path to store watershed results vector
Outputs:
None
"""
shapely_geometry_list = [
Polygon([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]),
Polygon([(2, 2), (3, 2), (3, 3), (2, 3), (2, 2)])
]
projection_wkt = osr.GetUserInputAsWKT("EPSG:4326")
vector_format = "ESRI Shapefile"
fields = {"hp_energy": ogr.OFTReal, "hp_val": ogr.OFTReal,
"ws_id": ogr.OFTReal, "rsupply_vl": ogr.OFTReal,
"wyield_mn": ogr.OFTReal, "wyield_vol": ogr.OFTReal,
"consum_mn": ogr.OFTReal, "consum_vol": ogr.OFTReal}
attribute_list = [
{"hp_energy": 1, "hp_val": 1, "ws_id": 0, "rsupply_vl": 2},
{"hp_energy": 11, "hp_val": 3, "ws_id": 1, "rsupply_vl": 52}
]
pygeoprocessing.shapely_geometry_to_vector(shapely_geometry_list,
path_to_shp, projection_wkt,
vector_format, fields,
attribute_list)
class AnnualWaterYieldTests(unittest.TestCase):
"""Regression Tests for Annual Water Yield Model."""
@ -247,7 +280,8 @@ class AnnualWaterYieldTests(unittest.TestCase):
def test_validation(self):
"""Hydro: test failure cases on the validation function."""
from natcap.invest import annual_water_yield, validation
from natcap.invest import annual_water_yield
from natcap.invest import validation
args = AnnualWaterYieldTests.generate_base_args(self.workspace_dir)
@ -367,3 +401,124 @@ class AnnualWaterYieldTests(unittest.TestCase):
self.assertTrue(
'but are not found in the valuation table' in
actual_message, actual_message)
# if the demand table is missing but the valuation table is present,
# make sure we have a validation error.
args_missing_demand_table = args.copy()
args_missing_demand_table['demand_table_path'] = ''
args_missing_demand_table['valuation_table_path'] = (
os.path.join(SAMPLE_DATA, 'hydropower_valuation_table.csv'))
validation_warnings = annual_water_yield.validate(
args_missing_demand_table)
self.assertEqual(len(validation_warnings), 1)
self.assertEqual(
validation_warnings[0],
(['demand_table_path'], 'Input is required but has no value'))
def test_fractp_op(self):
"""Test `fractp_op`"""
from natcap.invest.annual_water_yield import fractp_op
# generate fake data
kc = numpy.array([[1, .1, .1], [.6, .6, .1]])
eto = numpy.array([[1000, 900, 900], [1100, 1005, 1000]])
precip = numpy.array([[100, 1000, 10], [500, 800, 1100]])
root = numpy.array([[99, 300, 400], [5, 500, 800]])
soil = numpy.array([[600, 700, 700], [800, 900, 600]])
pawc = numpy.array([[.11, .11, .12], [.55, .55, .19]])
veg = numpy.array([[1, 1, 0], [0, 1, 0]])
nodata_dict = {'eto': None, 'precip': None, 'depth_root': None,
'pawc': None, 'out_nodata': None}
seasonality_constant = 6
actual_fractp = fractp_op(kc, eto, precip, root, soil, pawc, veg,
nodata_dict, seasonality_constant)
# generated by running fractp_op
expected_fractp = numpy.array([[0.9345682, 0.06896508, 1.],
[1., 0.6487423, 0.09090909]],
dtype=numpy.float32)
numpy.testing.assert_allclose(actual_fractp, expected_fractp,
err_msg="Fractp does not match expected")
def test_compute_watershed_valuation(self):
"""Test `compute_watershed_valuation`, `compute_rsupply_volume`
and `compute_water_yield_volume`"""
from natcap.invest import annual_water_yield
def _create_watershed_results_vector(path_to_shp):
"""Generate a fake watershed results vector file."""
shapely_geometry_list = [
Polygon([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]),
Polygon([(2, 2), (3, 2), (3, 3), (2, 3), (2, 2)])
]
projection_wkt = osr.GetUserInputAsWKT("EPSG:4326")
vector_format = "ESRI Shapefile"
fields = {"ws_id": ogr.OFTReal, "wyield_mn": ogr.OFTReal,
"consum_mn": ogr.OFTReal, "consum_vol": ogr.OFTReal}
attribute_list = [{"ws_id": 0, "wyield_mn": 990000,
"consum_mn": 500, "consum_vol": 50},
{"ws_id": 1, "wyield_mn": 800000,
"consum_mn": 600, "consum_vol": 70}]
pygeoprocessing.shapely_geometry_to_vector(shapely_geometry_list,
path_to_shp,
projection_wkt,
vector_format, fields,
attribute_list)
def _validate_fields(vector_path, field_name, expected_values, error_msg):
"""
Validate a specific field in the watershed results vector
by comparing actual to expected values. Expected values generated
by running the function.
Args:
vector path (str): path to watershed shapefile
field_name (str): attribute field to check
expected values (list): list of expected values for field
error_msg (str): what to print if assertion fails
Returns:
None
"""
with gdal.OpenEx(vector_path, gdal.OF_VECTOR | gdal.GA_Update) as ws_ds:
ws_layer = ws_ds.GetLayer()
actual_values = [ws_feat.GetField(field_name)
for ws_feat in ws_layer]
self.assertEqual(actual_values, expected_values, msg=error_msg)
# generate fake watershed results vector
watershed_results_vector_path = os.path.join(self.workspace_dir,
"watershed_results.shp")
_create_watershed_results_vector(watershed_results_vector_path)
# generate fake val_df
val_df = pandas.DataFrame({'efficiency': [.7, .8], 'height': [12, 50],
'fraction': [.9, .7], 'discount': [60, 20],
'time_span': [10, 10], 'cost': [100, 200],
'kw_price': [15, 20]})
# test water yield volume
annual_water_yield.compute_water_yield_volume(
watershed_results_vector_path)
_validate_fields(watershed_results_vector_path, "wyield_vol",
[990.0, 800.0],
"Error with water yield volume calculation.")
# test rsupply volume
annual_water_yield.compute_rsupply_volume(
watershed_results_vector_path)
_validate_fields(watershed_results_vector_path, "rsupply_vl",
[940.0, 730.0],
"Error calculating total realized water supply volume.")
# test compute watershed valuation
annual_water_yield.compute_watershed_valuation(
watershed_results_vector_path, val_df)
_validate_fields(watershed_results_vector_path, "hp_energy",
[19.329408, 55.5968],
"Error calculating energy.")
_validate_fields(watershed_results_vector_path, "hp_val",
[501.9029748723, 4587.91946857059],
"Error calculating net present value.")

View File

@ -268,6 +268,69 @@ class CarbonTests(unittest.TestCase):
assert_raster_equal_value(
os.path.join(args['workspace_dir'], 'npv_redd.tif'), -0.4602106)
def test_generate_carbon_map(self):
"""Test `_generate_carbon_map`"""
from natcap.invest.carbon import _generate_carbon_map
def _make_simple_lulc_raster(base_raster_path):
"""Create a raster on designated path with arbitrary values.
Args:
base_raster_path (str): the raster path for making the new raster.
Returns:
None.
"""
array = numpy.array([[1, 1], [2, 3]], dtype=numpy.int32)
# UTM Zone 10N
srs = osr.SpatialReference()
srs.ImportFromEPSG(26910)
projection_wkt = srs.ExportToWkt()
origin = (461251, 4923245)
pixel_size = (1, 1)
no_data = -999
pygeoprocessing.numpy_array_to_raster(
array, no_data, pixel_size, origin, projection_wkt,
base_raster_path)
# generate a fake lulc raster
lulc_path = os.path.join(self.workspace_dir, "lulc.tif")
_make_simple_lulc_raster(lulc_path)
# make fake carbon pool dict
carbon_pool_by_type = {1: 5000, 2: 60, 3: 120}
out_carbon_stock_path = os.path.join(self.workspace_dir,
"carbon_stock.tif")
_generate_carbon_map(lulc_path, carbon_pool_by_type,
out_carbon_stock_path)
# open output carbon stock raster and check values
actual_carbon_stock = gdal.Open(out_carbon_stock_path)
band = actual_carbon_stock.GetRasterBand(1)
actual_carbon_stock = band.ReadAsArray()
expected_carbon_stock = numpy.array([[0.5, 0.5], [0.006, 0.012]],
dtype=numpy.float32)
numpy.testing.assert_array_equal(actual_carbon_stock,
expected_carbon_stock)
def test_calculate_valuation_constant(self):
"""Test `_calculate_valuation_constant`"""
from natcap.invest.carbon import _calculate_valuation_constant
valuation_constant = _calculate_valuation_constant(lulc_cur_year=2010,
lulc_fut_year=2012,
discount_rate=50,
rate_change=5,
price_per_metric_ton_of_c=50)
expected_valuation = 40.87302
self.assertEqual(round(valuation_constant, 5), expected_valuation)
class CarbonValidationTests(unittest.TestCase):
"""Tests for the Carbon Model MODEL_SPEC and validation."""

View File

@ -11,6 +11,7 @@ import unittest
import numpy
import pandas
from scipy.sparse import dok_matrix
import pygeoprocessing
from natcap.invest import utils
from natcap.invest import validation
@ -24,6 +25,26 @@ REGRESSION_DATA = os.path.join(
LOGGER = logging.getLogger(__name__)
def make_raster_from_array(base_raster_path, array):
"""Create a raster on designated path with arbitrary values.
Args:
base_raster_path (str): the raster path for making the new raster.
Returns:
None.
"""
# UTM Zone 10N
srs = osr.SpatialReference()
srs.ImportFromEPSG(26910)
projection_wkt = srs.ExportToWkt()
origin = (461261, 4923265)
pixel_size = (1, 1)
no_data = -1
pygeoprocessing.numpy_array_to_raster(
array, no_data, pixel_size, origin, projection_wkt,
base_raster_path)
class TestPreprocessor(unittest.TestCase):
"""Test Coastal Blue Carbon preprocessor functions."""
@ -1060,3 +1081,149 @@ class TestCBC2(unittest.TestCase):
[(['analysis_year'],
coastal_blue_carbon.INVALID_ANALYSIS_YEAR_MSG.format(
analysis_year=2000, latest_year=2000))])
def test_calculate_npv(self):
"""Test `_calculate_npv`"""
from natcap.invest.coastal_blue_carbon import coastal_blue_carbon
# make fake data
net_sequestration_rasters = {
2010: os.path.join(self.workspace_dir, "carbon_seq_2010.tif"),
2011: os.path.join(self.workspace_dir, "carbon_seq_2011.tif"),
2012: os.path.join(self.workspace_dir, "carbon_seq_2012.tif")
}
for year, path in net_sequestration_rasters.items():
array = numpy.array([[year*.5, year*.25], [year-1, 50]]) # random array
make_raster_from_array(path, array)
prices_by_year = {
2010: 50,
2011: 80,
2012: 95
}
discount_rate = 0.1
baseline_year = 2010
target_raster_years_and_paths = {
2010: os.path.join(self.workspace_dir, "tgt_carbon_seq_2010.tif"),
2011: os.path.join(self.workspace_dir, "tgt_carbon_seq_2011.tif"),
2012: os.path.join(self.workspace_dir, "tgt_carbon_seq_2012.tif")
}
coastal_blue_carbon._calculate_npv(net_sequestration_rasters,
prices_by_year, discount_rate,
baseline_year,
target_raster_years_and_paths)
# read in the created target rasters
actual_2011 = gdal.Open(target_raster_years_and_paths[2011])
band = actual_2011.GetRasterBand(1)
actual_2011 = band.ReadAsArray()
actual_2012 = gdal.Open(target_raster_years_and_paths[2012])
band = actual_2012.GetRasterBand(1)
actual_2012 = band.ReadAsArray()
# compare actual rasters to expected (based on running `_calculate_npv`)
expected_2011 = numpy.array([[100525, 50262.5], [200950, 5000]])
expected_2012 = numpy.array([[370206.818182, 185103.409091],
[740045.454545, 18409.090909]])
numpy.testing.assert_allclose(actual_2011, expected_2011)
numpy.testing.assert_allclose(actual_2012, expected_2012)
def test_calculate_accumulation_over_time(self):
"""Test `_calculate_accumulation_over_time`"""
from natcap.invest.coastal_blue_carbon.coastal_blue_carbon import \
_calculate_accumulation_over_time
# generate fake data with nodata values
nodata = float(numpy.finfo(numpy.float32).min)
annual_biomass_matrix = numpy.array([[1, 2], [3, nodata]])
annual_soil_matrix = numpy.array([[11, 12], [13, 14]])
annual_litter_matrix = numpy.array([[.5, .9], [4, .9]])
n_years = 3
actual_accumulation = _calculate_accumulation_over_time(
annual_biomass_matrix, annual_soil_matrix, annual_litter_matrix,
n_years)
expected_accumulation = numpy.array([[37.5, 44.7], [60, nodata]])
numpy.testing.assert_allclose(actual_accumulation, expected_accumulation)
def test_calculate_net_sequestration(self):
"""test `_calculate_net_sequestration`"""
from natcap.invest.coastal_blue_carbon.coastal_blue_carbon import \
_calculate_net_sequestration
# make fake rasters that contain nodata pixels (-1)
accumulation_raster_path = os.path.join(self.workspace_dir,
"accumulation_raster.tif")
accumulation_array = numpy.array([[40, -1], [70, -1]])
make_raster_from_array(accumulation_raster_path, accumulation_array)
emissions_raster_path = os.path.join(self.workspace_dir,
"emissions_raster.tif")
emissions_array = numpy.array([[-1, 8], [7, -1]])
make_raster_from_array(emissions_raster_path, emissions_array)
target_raster_path = os.path.join(self.workspace_dir,
"target_raster.tif")
# run `_calculate_net_sequestration`
_calculate_net_sequestration(accumulation_raster_path,
emissions_raster_path, target_raster_path)
# compare actual to expected output net sequestration raster
actual_sequestration = gdal.Open(target_raster_path)
band = actual_sequestration.GetRasterBand(1)
actual_sequestration = band.ReadAsArray()
# calculated by running `_calculate_net_sequestration`
nodata = float(numpy.finfo(numpy.float32).min)
expected_sequestration = numpy.array([[40, -8], [-7, nodata]])
numpy.testing.assert_allclose(actual_sequestration,
expected_sequestration)
def test_reclassify_accumulation_transition(self):
"""Test `_reclassify_accumulation_transition`"""
from natcap.invest.coastal_blue_carbon.coastal_blue_carbon import \
_reclassify_accumulation_transition, _reclassify_disturbance_magnitude
# make fake raster data
landuse_transition_from_raster = os.path.join(self.workspace_dir,
"landuse_transition_from.tif")
landuse_transition_from_array = numpy.array([[1, 2], [3, 2]])
make_raster_from_array(landuse_transition_from_raster,
landuse_transition_from_array)
landuse_transition_to_raster = os.path.join(self.workspace_dir,
"landuse_transition_to.tif")
landuse_transition_to_array = numpy.array([[1, 1], [2, 3]])
make_raster_from_array(landuse_transition_to_raster,
landuse_transition_to_array)
#make fake accumulation_rate_matrix
accumulation_rate_matrix = dok_matrix((4, 4), dtype=numpy.float32)
accumulation_rate_matrix[1, 2] = 0.5 # Forest -> Grassland
accumulation_rate_matrix[1, 3] = 0.3 # Forest -> Agriculture
accumulation_rate_matrix[2, 1] = 0.2 # Grassland -> Forest
accumulation_rate_matrix[2, 3] = 0.4 # Grassland -> Agriculture
accumulation_rate_matrix[3, 1] = 0.1 # Agriculture -> Forest
accumulation_rate_matrix[3, 2] = 0.3 # Agriculture -> Grassland
target_raster_path = os.path.join(self.workspace_dir, "output.tif")
_reclassify_accumulation_transition(
landuse_transition_from_raster, landuse_transition_to_raster,
accumulation_rate_matrix, target_raster_path)
# compare actual and expected target_raster
actual_accumulation = gdal.Open(target_raster_path)
band = actual_accumulation.GetRasterBand(1)
actual_accumulation = band.ReadAsArray()
expected_accumulation = numpy.array([[0, .2], [.3, .4]])
numpy.testing.assert_allclose(actual_accumulation, expected_accumulation)

View File

@ -8,6 +8,7 @@ import unittest
import numpy.testing
import pandas.testing
import pandas
import pygeoprocessing
import shapely.wkb
import taskgraph
@ -1553,6 +1554,100 @@ class CoastalVulnerabilityTests(unittest.TestCase):
# Polygon has 4 sides on exterior, 3 on interior, expect 7 lines
self.assertTrue(len(line_list) == 7)
def test_assemble_results_and_calculate_exposure(self):
"""Test that assemble_results_and_calculate_exposure correctly
calculates exposure"""
from natcap.invest.coastal_vulnerability import \
assemble_results_and_calculate_exposure
def _make_shore_points_vector(shore_points_path):
# create 4 points, each with a unique 'shore_id' in [0..3].
shore_geometries = [Point(0, 0), Point(1, 0), Point(2, 1), Point(3, 2)]
shore_fields = {'shore_id': ogr.OFTInteger}
shore_attributes = [{'shore_id': i} for i in range(len(shore_geometries))]
# Create a spatial reference (projected or geographic)
srs = osr.SpatialReference()
srs.ImportFromEPSG(26910) # e.g. "NAD83 / UTM zone 10N"
pygeoprocessing.shapely_geometry_to_vector(
shore_geometries, shore_points_path, srs.ExportToWkt(),
vector_format='GPKG',
fields=shore_fields,
attribute_list=shore_attributes,
ogr_geom_type=ogr.wkbPoint
)
def _make_habitat_csv(habitat_csv_path):
# Example: one habitat column named 'kelp', plus 'R_hab'
# We have 4 shore IDs, so we add 4 rows. Values are arbitrary.
habitat_df = pandas.DataFrame(
{'shore_id': [0, 1, 2, 3], 'kelp': [5, 3, 5, 4],
'seagrass': [4, 1, 2, 4], 'R_hab': [5, 2, 5, 3]})
habitat_df.to_csv(habitat_csv_path, index=False)
def _make_risk_id_path_list():
# Create pickles for risk data
relief_pkl = os.path.join(self.workspace_dir, 'relief.pickle')
slr_pkl = os.path.join(self.workspace_dir, 'slr.pickle')
population_pkl = os.path.join(self.workspace_dir, 'population.pickle')
relief_data = {0: 10.0, 1: 50.0, 2: 30.0, 3: 80.0} # arbitrary data
slr_data = {0: 0.1, 1: 0.2, 2: 0.9, 3: 0.5}
population_data = {0: 123.0, 1: 999.0, 2: 55.0, 3: 0.0}
for file_path, data_dict in zip([relief_pkl, slr_pkl, population_pkl],
[relief_data, slr_data, population_data]):
with open(file_path, 'wb') as f:
pickle.dump(data_dict, f)
risk_id_path_list = [
(relief_pkl, True, "R_relief"), # "True" => bin to 1..5
(slr_pkl, True, "R_slr"),
(population_pkl, False, "population")
]
return risk_id_path_list
shore_points_path = os.path.join(self.workspace_dir, "shore_points.gpkg")
_make_shore_points_vector(shore_points_path)
habitat_csv_path = os.path.join(self.workspace_dir, 'habitat_protection.csv')
_make_habitat_csv(habitat_csv_path)
risk_id_path_list = _make_risk_id_path_list()
intermediate_vector_path = os.path.join(self.workspace_dir,
'intermediate_exposure.gpkg')
intermediate_csv_path = os.path.join(self.workspace_dir,
'intermediate_exposure.csv')
output_vector_path = os.path.join(self.workspace_dir,
'coastal_exposure.gpkg')
output_csv_path = os.path.join(self.workspace_dir,
'coastal_exposure.csv')
# call function
assemble_results_and_calculate_exposure(
risk_id_path_list,
habitat_csv_path,
shore_points_path,
intermediate_vector_path,
intermediate_csv_path,
output_vector_path,
output_csv_path
)
# read field values in output vector and compare
actual_df = pandas.read_csv(
output_csv_path,
usecols=["exposure", "habitat_role", "exposure_no_habitats"])
expected_df = pandas.DataFrame({
"exposure": [2.924018, 2.0, 4.641589, 2.289428],
"habitat_role": [0, 0.714418, 0, 0.424989],
"exposure_no_habitats": [2.924018, 2.714418, 4.641589, 2.714418]})
pandas.testing.assert_frame_equal(
actual_df, expected_df, check_dtype=False)
def assert_pickled_arrays_almost_equal(
actual_values_pickle_path, expected_values_json_path):

View File

@ -5,9 +5,10 @@ import shutil
import os
import numpy
from osgeo import gdal
from osgeo import gdal, ogr, osr
import pandas
import pygeoprocessing
from shapely.geometry import Polygon
gdal.UseExceptions()
MODEL_DATA_PATH = os.path.join(
@ -21,6 +22,108 @@ TEST_DATA_PATH = os.path.join(
'crop_production_model')
def make_aggregate_vector(path_to_shp):
"""
Generate shapefile with two overlapping polygons
Args:
path_to_shp (str): path to store watershed results vector
Outputs:
None
"""
# (xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)
shapely_geometry_list = [
Polygon([(461151, 4923265-50), (461261+50, 4923265-50),
(461261+50, 4923265), (461151, 4923265)]),
Polygon([(461261, 4923265-35), (461261+60, 4923265-35),
(461261+60, 4923265+50), (461261, 4923265+50)])
]
srs = osr.SpatialReference()
srs.ImportFromEPSG(26910)
projection_wkt = srs.ExportToWkt()
vector_format = "ESRI Shapefile"
fields = {"id": ogr.OFTReal}
attribute_list = [
{"id": 0},
{"id": 1},
]
pygeoprocessing.shapely_geometry_to_vector(shapely_geometry_list,
path_to_shp, projection_wkt,
vector_format, fields,
attribute_list)
def make_simple_raster(base_raster_path, array):
"""Create a raster on designated path with arbitrary values.
Args:
base_raster_path (str): the raster path for making the new raster.
Returns:
None.
"""
# UTM Zone 10N
srs = osr.SpatialReference()
srs.ImportFromEPSG(26910)
projection_wkt = srs.ExportToWkt()
origin = (461251, 4923245)
pixel_size = (30, 30)
no_data = -1
pygeoprocessing.numpy_array_to_raster(
array, no_data, pixel_size, origin, projection_wkt,
base_raster_path)
def create_nutrient_df():
"""Creates a nutrient DataFrame for testing."""
return pandas.DataFrame([
{'crop': 'corn', 'area (ha)': 21.0, 'production_observed': 0.2,
'percentrefuse': 7, 'protein': 42., 'lipid': 8, 'energy': 476.,
'ca': 27.0, 'fe': 15.7, 'mg': 280.0, 'ph': 704.0, 'k': 1727.0,
'na': 2.0, 'zn': 4.9, 'cu': 1.9, 'fl': 8, 'mn': 2.9, 'se': 0.1,
'vita': 3.0, 'betac': 16.0, 'alphac': 2.30, 'vite': 0.8,
'crypto': 1.6, 'lycopene': 0.36, 'lutein': 63.0, 'betat': 0.5,
'gammat': 2.1, 'deltat': 1.9, 'vitc': 6.8, 'thiamin': 0.4,
'riboflavin': 1.8, 'niacin': 8.2, 'pantothenic': 0.9,
'vitb6': 1.4, 'folate': 385.0, 'vitb12': 2.0, 'vitk': 41.0},
{'crop': 'soybean', 'area (ha)': 5., 'production_observed': 4.,
'percentrefuse': 9, 'protein': 33., 'lipid': 2., 'energy': 99.,
'ca': 257., 'fe': 15.7, 'mg': 280., 'ph': 704.0, 'k': 197.0,
'na': 2., 'zn': 4.9, 'cu': 1.6, 'fl': 3., 'mn': 5.2, 'se': 0.3,
'vita': 3.0, 'betac': 16.0, 'alphac': 1.0, 'vite': 0.8,
'crypto': 0.6, 'lycopene': 0.3, 'lutein': 61.0, 'betat': 0.5,
'gammat': 2.3, 'deltat': 1.2, 'vitc': 3.0, 'thiamin': 0.42,
'riboflavin': 0.82, 'niacin': 12.2, 'pantothenic': 0.92,
'vitb6': 5.4, 'folate': 305., 'vitb12': 3., 'vitk': 42.},
]).set_index('crop')
def _create_crop_rasters(output_dir, crop_names, file_suffix):
"""Creates raster files for test setup."""
_OBSERVED_PRODUCTION_FILE_PATTERN = os.path.join(
'.', '%s_observed_production%s.tif')
_CROP_PRODUCTION_FILE_PATTERN = os.path.join(
'.', '%s_regression_production%s.tif')
for i, crop in enumerate(crop_names):
observed_yield_path = os.path.join(
output_dir,
_OBSERVED_PRODUCTION_FILE_PATTERN % (crop, file_suffix))
crop_production_raster_path = os.path.join(
output_dir,
_CROP_PRODUCTION_FILE_PATTERN % (crop, file_suffix))
# Create arbitrary raster arrays
observed_array = numpy.array([[4, i], [i*3, 4]], dtype=numpy.int16)
crop_array = numpy.array([[i, 1], [i*2, 3]], dtype=numpy.int16)
make_simple_raster(observed_yield_path, observed_array)
make_simple_raster(crop_production_raster_path, crop_array)
class CropProductionTests(unittest.TestCase):
"""Tests for the Crop Production model."""
@ -390,6 +493,304 @@ class CropProductionTests(unittest.TestCase):
pandas.testing.assert_frame_equal(
expected_result_table, result_table, check_dtype=False)
def test_x_yield_op(self):
"""Test `_x_yield_op"""
from natcap.invest.crop_production_regression import _x_yield_op
# make fake data
y_max = numpy.array([[-1, 3, 2], [4, 5, 3]])
b_x = numpy.array([[4, 3, 2], [2, 0, 3]])
c_x = numpy.array([[4, 1, 2], [3, 0, 3]])
lulc_array = numpy.array([[3, 3, 2], [3, -1, 3]])
fert_rate = 0.6
crop_lucode = 3
pixel_area_ha = 10
actual_result = _x_yield_op(y_max, b_x, c_x, lulc_array, fert_rate,
crop_lucode, pixel_area_ha)
expected_result = numpy.array([[-1, -19.393047, -1],
[26.776089, -1, 15.1231]])
numpy.testing.assert_allclose(actual_result, expected_result)
def test_zero_observed_yield_op(self):
"""Test `_zero_observed_yield_op`"""
from natcap.invest.crop_production_regression import \
_zero_observed_yield_op
# make fake data
observed_yield_array = numpy.array([[0, 1, -1], [5, 6, -1]])
observed_yield_nodata = -1
actual_result = _zero_observed_yield_op(observed_yield_array,
observed_yield_nodata)
expected_result = numpy.array([[0, 1, 0], [5, 6, 0]])
numpy.testing.assert_allclose(actual_result, expected_result)
def test_mask_observed_yield_op(self):
"""Test `_mask_observed_yield_op`"""
from natcap.invest.crop_production_regression import \
_mask_observed_yield_op
# make fake data
lulc_array = numpy.array([[3, 5, -9999], [3, 3, -1]])
observed_yield_array = numpy.array([[-1, 5, 4], [8, -9999, 91]])
observed_yield_nodata = -1
# note: this observed_yield_nodata value becomes the nodata value in
# the output array but the values in the observed_yield_array with
# this value are NOT treated as no data within this function
landcover_nodata = -9999
crop_lucode = 3
pixel_area_ha = 10
actual_result = _mask_observed_yield_op(
lulc_array, observed_yield_array, observed_yield_nodata,
landcover_nodata, crop_lucode, pixel_area_ha)
expected_result = numpy.array([[-10, 0, -1], [80, -99990, 0]])
numpy.testing.assert_allclose(actual_result, expected_result)
def test_tabulate_regression_results(self):
"""Test `tabulate_regression_results`"""
from natcap.invest.crop_production_regression import \
tabulate_regression_results
def _create_expected_results():
"""Creates the expected results DataFrame."""
return pandas.DataFrame([
{'crop': 'corn', 'area (ha)': 20.0,
'production_observed': 8.0, 'production_modeled': 4.0,
'protein_modeled': 1562400.0, 'protein_observed': 3124800.0,
'lipid_modeled': 297600.0, 'lipid_observed': 595200.0,
'energy_modeled': 17707200.0, 'energy_observed': 35414400.0,
'ca_modeled': 1004400.0, 'ca_observed': 2008800.0,
'fe_modeled': 584040.0, 'fe_observed': 1168080.0,
'mg_modeled': 10416000.0, 'mg_observed': 20832000.0,
'ph_modeled': 26188800.0, 'ph_observed': 52377600.0,
'k_modeled': 64244400.0, 'k_observed': 128488800.0,
'na_modeled': 74400.0, 'na_observed': 148800.0,
'zn_modeled': 182280.0, 'zn_observed': 364560.0,
'cu_modeled': 70680.0, 'cu_observed': 141360.0,
'fl_modeled': 297600.0, 'fl_observed': 595200.0,
'mn_modeled': 107880.0, 'mn_observed': 215760.0,
'se_modeled': 3720.0, 'se_observed': 7440.0,
'vita_modeled': 111600.0, 'vita_observed': 223200.0,
'betac_modeled': 595200.0, 'betac_observed': 1190400.0,
'alphac_modeled': 85560.0, 'alphac_observed': 171120.0,
'vite_modeled': 29760.0, 'vite_observed': 59520.0,
'crypto_modeled': 59520.0, 'crypto_observed': 119040.0,
'lycopene_modeled': 13392.0, 'lycopene_observed': 26784.0,
'lutein_modeled': 2343600.0, 'lutein_observed': 4687200.0,
'betat_modeled': 18600.0, 'betat_observed': 37200.0,
'gammat_modeled': 78120.0, 'gammat_observed': 156240.0,
'deltat_modeled': 70680.0, 'deltat_observed': 141360.0,
'vitc_modeled': 252960.0, 'vitc_observed': 505920.0,
'thiamin_modeled': 14880.0, 'thiamin_observed': 29760.0,
'riboflavin_modeled': 66960.0, 'riboflavin_observed': 133920.0,
'niacin_modeled': 305040.0, 'niacin_observed': 610080.0,
'pantothenic_modeled': 33480.0, 'pantothenic_observed': 66960.0,
'vitb6_modeled': 52080.0, 'vitb6_observed': 104160.0,
'folate_modeled': 14322000.0, 'folate_observed': 28644000.0,
'vitb12_modeled': 74400.0, 'vitb12_observed': 148800.0,
'vitk_modeled': 1525200.0, 'vitk_observed': 3050400.0},
{'crop': 'soybean', 'area (ha)': 40.0,
'production_observed': 12.0, 'production_modeled': 7.0,
'protein_modeled': 2102100.0, 'protein_observed': 3603600.0,
'lipid_modeled': 127400.0, 'lipid_observed': 218400.0,
'energy_modeled': 6306300.0, 'energy_observed': 10810800.0,
'ca_modeled': 16370900.0, 'ca_observed': 28064400.0,
'fe_modeled': 1000090.0, 'fe_observed': 1714440.0,
'mg_modeled': 17836000.0, 'mg_observed': 30576000.0,
'ph_modeled': 44844800.0, 'ph_observed': 76876800.0,
'k_modeled': 12548900.0, 'k_observed': 21512400.0,
'na_modeled': 127400.0, 'na_observed': 218400.0,
'zn_modeled': 312130.0, 'zn_observed': 535080.0,
'cu_modeled': 101920.0, 'cu_observed': 174720.0,
'fl_modeled': 191100.0, 'fl_observed': 327600.0,
'mn_modeled': 331240.0, 'mn_observed': 567840.0,
'se_modeled': 19110.0, 'se_observed': 32760.0,
'vita_modeled': 191100.0, 'vita_observed': 327600.0,
'betac_modeled': 1019200.0, 'betac_observed': 1747200.0,
'alphac_modeled': 63700.0, 'alphac_observed': 109200.0,
'vite_modeled': 50960.0, 'vite_observed': 87360.0,
'crypto_modeled': 38220.0, 'crypto_observed': 65520.0,
'lycopene_modeled': 19110.0, 'lycopene_observed': 32760.0,
'lutein_modeled': 3885700.0, 'lutein_observed': 6661200.0,
'betat_modeled': 31850.0, 'betat_observed': 54600.0,
'gammat_modeled': 146510.0, 'gammat_observed': 251160.0,
'deltat_modeled': 76440.0, 'deltat_observed': 131040.0,
'vitc_modeled': 191100.0, 'vitc_observed': 327600.0,
'thiamin_modeled': 26754.0, 'thiamin_observed': 45864.0,
'riboflavin_modeled': 52234.0, 'riboflavin_observed': 89544.0,
'niacin_modeled': 777140.0, 'niacin_observed': 1332240.0,
'pantothenic_modeled': 58604.0, 'pantothenic_observed': 100464.0,
'vitb6_modeled': 343980.0, 'vitb6_observed': 589680.0,
'folate_modeled': 19428500.0, 'folate_observed': 33306000.0,
'vitb12_modeled': 191100.0, 'vitb12_observed': 327600.0,
'vitk_modeled': 2675400.0, 'vitk_observed': 4586400.0}])
nutrient_df = create_nutrient_df()
pixel_area_ha = 10
workspace_dir = self.workspace_dir
output_dir = os.path.join(workspace_dir, "OUTPUT")
os.makedirs(output_dir, exist_ok=True)
landcover_raster_path = os.path.join(workspace_dir, "landcover.tif")
landcover_nodata = -1
make_simple_raster(landcover_raster_path,
numpy.array([[2, 1], [2, 3]], dtype=numpy.int16))
file_suffix = "v1"
target_table_path = os.path.join(workspace_dir, "output_table.csv")
crop_names = ["corn", "soybean"]
_create_crop_rasters(output_dir, crop_names, file_suffix)
tabulate_regression_results(
nutrient_df, crop_names, pixel_area_ha,
landcover_raster_path, landcover_nodata,
output_dir, file_suffix, target_table_path
)
# Read only the first 2 crop's data (skipping total area)
actual_result_table = pandas.read_csv(target_table_path, nrows=2,
header=0)
expected_result_table = _create_expected_results()
# Compare expected vs actual
pandas.testing.assert_frame_equal(actual_result_table,
expected_result_table)
def test_aggregate_regression_results_to_polygons(self):
"""Test `aggregate_regression_results_to_polygons`"""
from natcap.invest.crop_production_regression import \
aggregate_regression_results_to_polygons
def _create_expected_agg_table():
"""Create expected output results"""
# Define the new values manually
return pandas.DataFrame([
{"FID": 0, "corn_modeled": 1, "corn_observed": 4,
"soybean_modeled": 2, "soybean_observed": 5,
"protein_modeled": 991200, "protein_observed": 3063900,
"lipid_modeled": 110800, "lipid_observed": 388600,
"energy_modeled": 6228600, "energy_observed": 22211700,
"ca_modeled": 4928500, "ca_observed": 12697900,
"fe_modeled": 431750, "fe_observed": 1298390,
"mg_modeled": 7700000, "mg_observed": 23156000,
"ph_modeled": 19360000, "ph_observed": 58220800,
"k_modeled": 19646500, "k_observed": 73207900,
"na_modeled": 55000, "na_observed": 165400,
"zn_modeled": 134750, "zn_observed": 405230,
"cu_modeled": 46790, "cu_observed": 143480,
"fl_modeled": 129000, "fl_observed": 434100,
"mn_modeled": 121610, "mn_observed": 344480,
"se_modeled": 6390, "se_observed": 17370,
"vita_modeled": 82500, "vita_observed": 248100,
"betac_modeled": 440000, "betac_observed": 1323200,
"alphac_modeled": 39590, "alphac_observed": 131060,
"vite_modeled": 22000, "vite_observed": 66160,
"crypto_modeled": 25800, "crypto_observed": 86820,
"lycopene_modeled": 8808, "lycopene_observed": 27042,
"lutein_modeled": 1696100, "lutein_observed": 5119100,
"betat_modeled": 13750, "betat_observed": 41350,
"gammat_modeled": 61390, "gammat_observed": 182770,
"deltat_modeled": 39510, "deltat_observed": 125280,
"vitc_modeled": 117840, "vitc_observed": 389460,
"thiamin_modeled": 11364, "thiamin_observed": 33990,
"riboflavin_modeled": 31664, "riboflavin_observed": 104270,
"niacin_modeled": 298300, "niacin_observed": 860140,
"pantothenic_modeled": 25114, "pantothenic_observed": 75340,
"vitb6_modeled": 111300, "vitb6_observed": 297780,
"folate_modeled": 9131500, "folate_observed": 28199500,
"vitb12_modeled": 73200, "vitb12_observed": 210900,
"vitk_modeled": 1145700, "vitk_observed": 3436200},
{"FID": 1, "corn_modeled": 4, "corn_observed": 8,
"soybean_modeled": 7, "soybean_observed": 12,
"protein_modeled": 3664500, "protein_observed": 6728400,
"lipid_modeled": 425000, "lipid_observed": 813600,
"energy_modeled": 24013500, "energy_observed": 46225200,
"ca_modeled": 17375300, "ca_observed": 30073200,
"fe_modeled": 1584130, "fe_observed": 2882520,
"mg_modeled": 28252000, "mg_observed": 51408000,
"ph_modeled": 71033600, "ph_observed": 129254400,
"k_modeled": 76793300, "k_observed": 150001200,
"na_modeled": 201800, "na_observed": 367200,
"zn_modeled": 494410, "zn_observed": 899640,
"cu_modeled": 172600, "cu_observed": 316080,
"fl_modeled": 488700, "fl_observed": 922800,
"mn_modeled": 439120, "mn_observed": 783600,
"se_modeled": 22830, "se_observed": 40200,
"vita_modeled": 302700, "vita_observed": 550800,
"betac_modeled": 1614400, "betac_observed": 2937600,
"alphac_modeled": 149260, "alphac_observed": 280320,
"vite_modeled": 80720, "vite_observed": 146880,
"crypto_modeled": 97740, "crypto_observed": 184560,
"lycopene_modeled": 32502, "lycopene_observed": 59544,
"lutein_modeled": 6229300, "lutein_observed": 11348400,
"betat_modeled": 50450, "betat_observed": 91800,
"gammat_modeled": 224630, "gammat_observed": 407400,
"deltat_modeled": 147120, "deltat_observed": 272400,
"vitc_modeled": 444060, "vitc_observed": 833520,
"thiamin_modeled": 41634, "thiamin_observed": 75624,
"riboflavin_modeled": 119194, "riboflavin_observed": 223464,
"niacin_modeled": 1082180, "niacin_observed": 1942320,
"pantothenic_modeled": 92084, "pantothenic_observed": 167424,
"vitb6_modeled": 396060, "vitb6_observed": 693840,
"folate_modeled": 33750500, "folate_observed": 61950000,
"vitb12_modeled": 265500, "vitb12_observed": 476400,
"vitk_modeled": 4200600, "vitk_observed": 7636800}
], dtype=float)
workspace = self.workspace_dir
base_aggregate_vector_path = os.path.join(workspace,
"agg_vector.shp")
make_aggregate_vector(base_aggregate_vector_path)
target_aggregate_vector_path = os.path.join(workspace,
"agg_vector_prj.shp")
spatial_ref = osr.SpatialReference()
spatial_ref.ImportFromEPSG(26910) # EPSG:4326 for WGS84
landcover_raster_projection = spatial_ref.ExportToWkt()
crop_names = ['corn', 'soybean']
nutrient_df = create_nutrient_df()
output_dir = os.path.join(workspace, "OUTPUT")
os.makedirs(output_dir, exist_ok=True)
file_suffix = 'test'
target_aggregate_table_path = '' # unused
_create_crop_rasters(output_dir, crop_names, file_suffix)
aggregate_regression_results_to_polygons(
base_aggregate_vector_path, target_aggregate_vector_path,
landcover_raster_projection, crop_names,
nutrient_df, output_dir, file_suffix,
target_aggregate_table_path)
_AGGREGATE_TABLE_FILE_PATTERN = os.path.join(
'.','aggregate_results%s.csv')
aggregate_table_path = os.path.join(
output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix)
actual_aggregate_table = pandas.read_csv(aggregate_table_path,
dtype=float)
print(actual_aggregate_table)
expected_aggregate_table = _create_expected_agg_table()
pandas.testing.assert_frame_equal(
actual_aggregate_table, expected_aggregate_table)
class CropValidationTests(unittest.TestCase):
"""Tests for the Crop Productions' MODEL_SPEC and validation."""