diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index dae58cae9..fda2ccf4c 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -430,21 +430,20 @@ jobs: WORKBENCH_BINARY=$(find "$(pwd)/workbench/dist" -type f -name 'invest_*.dmg' | head -n 1) make WORKBENCH_BIN_TO_SIGN="$WORKBENCH_BINARY" codesign_mac - #- name: Sign binaries (Windows) - # if: github.event_name != 'pull_request' && matrix.os == 'windows-latest' # secrets not available in PR - # env: - # CERT_FILE: Stanford-natcap-code-signing-cert-expires-2024-01-26.p12 - # CERT_PASS: ${{ secrets.WINDOWS_CODESIGN_CERT_PASS }} - # run: | - # # figure out the path to signtool.exe (it keeps changing with SDK updates) - # SIGNTOOL_PATH=$(find 'C:\\Program Files (x86)\\Windows Kits\\10' -type f -name 'signtool.exe*' | head -n 1) - # WORKBENCH_BINARY=$(find "$(pwd)/workbench/dist" -type f -name 'invest_*.exe' | head -n 1) - # make WORKBENCH_BIN_TO_SIGN="$WORKBENCH_BINARY" SIGNTOOL="$SIGNTOOL_PATH" codesign_windows - - name: Deploy artifacts to GCS if: github.event_name != 'pull_request' run: make deploy + # This relies on the file existing on GCP, so it must be run after `make + # deploy` is called. + - name: Queue windows binaries for signing + if: github.event_name != 'pull_request' && matrix.os == 'windows-latest' # secrets not available in PR + env: + ACCESS_TOKEN: ${{ secrets.CODESIGN_QUEUE_ACCESS_TOKEN }} + run: | + cd codesigning + bash enqueue-current-windows-installer.sh + - name: Upload workbench binary artifact if: always() uses: actions/upload-artifact@v4 diff --git a/.github/workflows/release-part-1.yml b/.github/workflows/release-part-1.yml index 94ddd1a2a..b106bcec2 100644 --- a/.github/workflows/release-part-1.yml +++ b/.github/workflows/release-part-1.yml @@ -36,6 +36,7 @@ jobs: - uses: actions/checkout@v4 if: env.RUN == 'true' with: + fetch-depth: 0 # fetch entire history, for versioning token: ${{ secrets.AUTORELEASE_BOT_PAT }} - name: Install dependencies diff --git a/.github/workflows/release-part-2.yml b/.github/workflows/release-part-2.yml index 0275f1a89..ad394dc37 100644 --- a/.github/workflows/release-part-2.yml +++ b/.github/workflows/release-part-2.yml @@ -72,14 +72,16 @@ jobs: rm -rf artifacts/Wheel* # download each artifact separately so that the command will fail if any is missing - for artifact in Workbench-Windows-binary \ - Workbench-macOS-binary \ + for artifact in Workbench-macOS-binary \ InVEST-sample-data \ InVEST-user-guide do gh run download $RUN_ID --dir artifacts --name "$artifact" done + # download the signed windows workbench file from GCS + wget --directory-prefix=artifacts https://storage.googleapis.com/releases.naturalcapitalproject.org/invest/${{ env.VERSION }}/workbench/invest_${{ env.VERSION }}_workbench_win32_x64.exe + # We build one sdist per combination of OS and python version, so just # download and unzip all of them into an sdists directory so we can # just grab the first one. This approach is more flexible to changes diff --git a/HISTORY.rst b/HISTORY.rst index 7cb16008d..514f5cadd 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -39,16 +39,28 @@ Unreleased Changes ------------------ * General + * Fixed an issue where a user's PROJ_DATA environment variable could + trigger a RuntimeError about a missing proj.db file. + https://github.com/natcap/invest/issues/1742 * Now testing and building against Python 3.13. No longer testing and building with Python 3.8, which reached EOL. https://github.com/natcap/invest/issues/1755 * All InVEST model output data now include metadata sidecar files. These are '.yml' files with the same basename as the dataset they describe. https://github.com/natcap/invest/issues/1662 + * InVEST's windows binaries are now distributed once again with a valid + signature, signed by Stanford University. + https://github.com/natcap/invest/issues/1580 * Workbench * Auto-scrolling of log output is halted on user-initiated scrolling, enabling easier inspection of log output while a model is running (`InVEST #1533 `_). +* Annual Water Yield + * Fixed an issue where the model would crash if the valuation table was + provided, but the demand table was not. Validation will now warn about + this, and the ``MODEL_SPEC`` has been improved to reflect that this table + is now required when doing valuation. + https://github.com/natcap/invest/issues/1769 * Carbon * Updated styling of the HTML report generated by the carbon model, for visual consistency with the Workbench (`InVEST #1732 diff --git a/Makefile b/Makefile index 4b11c3da2..472e2b363 100644 --- a/Makefile +++ b/Makefile @@ -356,10 +356,8 @@ codesign_mac: codesign --timestamp --verbose --sign Stanford $(WORKBENCH_BIN_TO_SIGN) codesign_windows: - $(GSUTIL) cp gs://stanford_cert/$(CERT_FILE) $(BUILD_DIR)/$(CERT_FILE) "$(SIGNTOOL)" sign -fd SHA256 -f $(BUILD_DIR)/$(CERT_FILE) -p $(CERT_PASS) $(WORKBENCH_BIN_TO_SIGN) "$(SIGNTOOL)" timestamp -tr http://timestamp.sectigo.com -td SHA256 $(WORKBENCH_BIN_TO_SIGN) - $(RM) $(BUILD_DIR)/$(CERT_FILE) @echo "Installer was signed with signtool" deploy: diff --git a/codesigning/Makefile b/codesigning/Makefile new file mode 100644 index 000000000..8fa17b6d1 --- /dev/null +++ b/codesigning/Makefile @@ -0,0 +1,21 @@ +.PHONY: deploy-cloudfunction deploy-worker + +deploy-cloudfunction: + gcloud functions deploy \ + --project natcap-servers \ + codesigning-queue \ + --memory=256Mi \ + --trigger-http \ + --gen2 \ + --region us-west1 \ + --allow-unauthenticated \ + --entry-point main \ + --runtime python312 \ + --source gcp-cloudfunc/ + +# NOTE: This must be executed from a computer that has SSH access to ncp-inkwell. +deploy-worker: + cd signing-worker && ansible-playbook \ + --ask-become-pass \ + --inventory-file inventory.ini \ + playbook.yml diff --git a/codesigning/README.md b/codesigning/README.md new file mode 100644 index 000000000..71763ad83 --- /dev/null +++ b/codesigning/README.md @@ -0,0 +1,74 @@ +# InVEST Codesigning Service + +This directory contains all of the functional code and configuration (minus a +few secrets) that are needed to deploy our code-signing service. There are +three key components to this service: + +1. A cloud function (`gcp-cloudfunc/') that handles a google cloud + storage-backed cloud function that operates as a high-latency queue. +2. A script (`enqueue-binary.py`) that will enqueue a binary that already + exists on one of our GCS buckets. +3. A `systemd` service that runs on a debian:bookworm machine and periodically + polls the cloud function to dequeue the next item to sign. + +## Deploying the Cloud Function + +The necessary `gcloud` deployment configuration can be executed with + +```bash +$ make deploy-cloudfunction +``` + +### Secrets + +The current deployment process requires you to manually create an environment +variable, ``ACCESS_TOKEN``, that contains the secret token shared by the cloud +function, systemd service and enqueue script. + +## Deploying the Systemd Service + +To deploy the systemd service, you will need to be on a computer that has ssh +access to `ncp-inkwell`, which is a computer that has a yubikey installed in +it. This computer is assumed to run debian:bookworm at this time. To deploy +(non-secret) changes to ncp-inkwell, run this in an environment where +`ansible-playbook` is available (`pip install ansible` to install): + +```bash +$ make deploy-worker +``` + +### Secrets + +The systemd service requires several secrets to be available in the codesigning +workspace, which is located at `/opt/natcap-codesign': + +* `/opt/natcap-codesign/pass.txt` is a plain text file containing only the PIN + for the yubikey +* `/opt/natcap-codesign/access_token.txt` is a plain text file containing the + access token shared with the cloud function, systemd service and enqueue script. +* `/opt/natcap-codesign/slack_token.txt` is a plain text file containing the + slack token used to post messages to our slack workspace. +* `/opt/natcap-codesign/natcap-servers-1732552f0202.json` is a GCP service + account key used to authenticate to google cloud storage. This file must be + available in the `gcp-cloudfunc/` directory at the time of deployment. + + +## Future Work + +### Authenticate to the function with Identity Federation + +The cloud function has access controlled by a secret token, which is not ideal. +Instead, we should be using github/GCP identity federation to control access. + +### Trigger the function with GCS Events + +GCP Cloud Functions have the ability to subscribe to bucket events, which +should allow us to subscribe very specifically to just those `finalize` events +that apply to the Windows workbench binaries. Doing so will require reworking this cloud function into 2 cloud functions: + +1. An endpoint for ncp-inkwell to poll for the next binary to sign +2. A cloud function that subscribes to GCS bucket events and enqueues the binary to sign. + +Relevant docs include: +* https://cloud.google.com/functions/docs/writing/write-event-driven-functions#cloudevent-example-python + diff --git a/codesigning/enqueue-binary.py b/codesigning/enqueue-binary.py new file mode 100644 index 000000000..2c8e4e1ff --- /dev/null +++ b/codesigning/enqueue-binary.py @@ -0,0 +1,28 @@ +"""Enqueue a windows binary for signing. + +To call this script, you need to set the ACCESS_TOKEN environment variable from +the software team secrets store. + +Example invocation: + + $ ACCESS_TOKEN=abcs1234 python3 enqueue-binary.py +""" + +import os +import sys + +import requests + +DATA = { + 'token': os.environ['ACCESS_TOKEN'], + 'action': 'enqueue', + 'url': sys.argv[1].replace( + 'gs://', 'https://storage.googleapis.com/'), +} +response = requests.post( + 'https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue', + json=DATA +) +if response.status_code >= 400: + print(response.text) + sys.exit(1) diff --git a/codesigning/enqueue-current-windows-installer.sh b/codesigning/enqueue-current-windows-installer.sh new file mode 100644 index 000000000..6b5c14bc7 --- /dev/null +++ b/codesigning/enqueue-current-windows-installer.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env sh +# +# Run this script to enqueue the windows binary for this current version of the +# InVEST windows workbench installer for code signing. +# +# NOTE: this script must be run from the directory containing this script. + +version=$(python -m setuptools_scm) +url_base=$(make -C .. --no-print-directory print-DIST_URL_BASE | awk ' { print $3 } ') +url="${url_base}/workbench/invest_${version}_workbench_win32_x64.exe" + +echo "Enqueuing URL ${url}" +python enqueue-binary.py "${url}" diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py new file mode 100644 index 000000000..f6278dc2e --- /dev/null +++ b/codesigning/gcp-cloudfunc/main.py @@ -0,0 +1,180 @@ +import contextlib +import datetime +import json +import logging +import os +import time +from urllib.parse import unquote + +import functions_framework +import google.cloud.logging # pip install google-cloud-logging +import requests +from flask import jsonify +from google.cloud import storage # pip install google-cloud-storage + +GOOGLE_PREFIX = 'https://storage.googleapis.com' +CODESIGN_DATA_BUCKET = 'natcap-codesigning' +LOG_CLIENT = google.cloud.logging.Client() +LOG_CLIENT.setup_logging() + + +@contextlib.contextmanager +def get_lock(): + """Acquire a GCS-based mutex. + + This requires that the bucket we are using has versioning. + """ + storage_client = storage.Client() + bucket = storage_client.bucket(CODESIGN_DATA_BUCKET) + + lock_obtained = False + n_tries = 100 + for i in range(n_tries): + lockfile = bucket.blob('mutex.lock') + if not lockfile.generation: + lockfile.upload_from_string( + f"Lock acquired {datetime.datetime.now().isoformat()}") + lock_obtained = True + break + else: + time.sleep(0.1) + + if not lock_obtained: + raise RuntimeError(f'Could not obtain lock after {n_tries} tries') + + try: + yield + finally: + lockfile.delete() + + +@functions_framework.http +def main(request): + """Handle requests to this GCP Cloud Function. + + All requests must be POST requests and have a JSON body with the following + attributes: + + * token: a secret token that matches the ACCESS_TOKEN environment + variable that is defined in the cloud function configuration. + * action: either 'enqueue' or 'dequeue' + + If the action is 'enqueue', the request must also have a 'url' attribute. + The 'url' attribute, when provided, must be a URL to a file that meets + these requirements: + * The URL must be a publicly accessible URL + * The URL must be a file that ends in '.exe' + * The URL must be located in either the releases bucket, or else + in the dev builds bucket. It doesn't necessarily have to be an + InVEST binary. + * The URL must be a file that is not older than June 1, 2024 + * The URL must be a file that is not already in the queue + * The URL should be a file that is not already signed (if the file has + already been signed, its signature will be overwritten) + """ + data = request.get_json() + if data['token'] != os.environ['ACCESS_TOKEN']: + logging.info('Rejecting request due to invalid token') + return jsonify('Invalid token'), 403 + + if request.method != 'POST': + logging.info('Rejecting request due to invalid HTTP method') + return jsonify('Invalid request method'), 405 + + storage_client = storage.Client() + bucket = storage_client.bucket(CODESIGN_DATA_BUCKET) + + logging.debug(f'Data POSTed: {data}') + + if data['action'] == 'dequeue': + with get_lock(): + queuefile = bucket.blob('queue.json') + queue_dict = json.loads(queuefile.download_as_string()) + try: + next_file_url = queue_dict['queue'].pop(0) + except IndexError: + # No items in the queue! + logging.info('No binaries are currently queued for signing') + return jsonify('No items in the queue'), 204 + + queuefile.upload_from_string(json.dumps(queue_dict)) + + data = { + 'https-url': next_file_url, + 'basename': os.path.basename(next_file_url), + 'gs-uri': unquote(next_file_url.replace( + f'{GOOGLE_PREFIX}/', 'gs://')), + } + logging.info(f'Dequeued {next_file_url}') + return jsonify(data) + + elif data['action'] == 'enqueue': + url = data['url'] + logging.info(f'Attempting to enqueue url {url}') + + if not url.endswith('.exe'): + logging.info("Rejecting URL because it doesn't end in .exe") + return jsonify('Invalid URL to sign'), 400 + + if not url.startswith(GOOGLE_PREFIX): + logging.info(f'Rejecting URL because it does not start with {GOOGLE_PREFIX}') + return jsonify('Invalid host'), 400 + + if not url.startswith(( + f'{GOOGLE_PREFIX}/releases.naturalcapitalproject.org/', + f'{GOOGLE_PREFIX}/natcap-dev-build-artifacts/')): + logging.info('Rejecting URL because the bucket is incorrect') + return jsonify("Invalid target bucket"), 400 + + # Remove http character quoting + url = unquote(url) + + binary_bucket_name, *binary_obj_paths = url.replace( + GOOGLE_PREFIX + '/', '').split('/') + codesign_bucket = storage_client.bucket(CODESIGN_DATA_BUCKET) + + # If the file does not exist at this URL, reject it. + response = requests.head(url) + if response.status_code >= 400: + logging.info('Rejecting URL because it does not exist') + return jsonify('Requested file does not exist'), 403 + + # If the file is too old, reject it. Trying to avoid a + # denial-of-service by invoking the service with very old files. + # I just pulled June 1 out of thin air as a date that is a little while + # ago, but not so long ago that we could suddenly have many files + # enqueued. + mday, mmonth, myear = response.headers['Last-Modified'].split(' ')[1:4] + modified_time = datetime.datetime.strptime( + ' '.join((mday, mmonth, myear)), '%d %b %Y') + if modified_time < datetime.datetime(year=2024, month=6, day=1): + logging.info('Rejecting URL because it is too old') + return jsonify('File is too old'), 400 + + response = requests.head(f'{url}.signature') + if response.status_code != 404: + logging.info('Rejecting URL because it has already been signed.') + return jsonify('File has already been signed'), 204 + + with get_lock(): + # Since the file has not already been signed, add the file to the + # queue + queuefile = codesign_bucket.blob('queue.json') + if not queuefile.exists(): + queue_dict = {'queue': []} + else: + queue_dict = json.loads(queuefile.download_as_string()) + + if url not in queue_dict['queue']: + queue_dict['queue'].append(url) + else: + return jsonify( + 'File is already in the queue', 200, 'application/json') + + queuefile.upload_from_string(json.dumps(queue_dict)) + + logging.info(f'Enqueued {url}') + return jsonify("OK"), 200 + + else: + return jsonify('Invalid action request'), 405 diff --git a/codesigning/gcp-cloudfunc/requirements.txt b/codesigning/gcp-cloudfunc/requirements.txt new file mode 100644 index 000000000..5296a1506 --- /dev/null +++ b/codesigning/gcp-cloudfunc/requirements.txt @@ -0,0 +1,5 @@ +google-cloud-storage +google-cloud-logging +functions-framework==3.* +flask +requests diff --git a/codesigning/signing-worker/.gitignore b/codesigning/signing-worker/.gitignore new file mode 100644 index 000000000..7ee4051be --- /dev/null +++ b/codesigning/signing-worker/.gitignore @@ -0,0 +1,5 @@ +# This key is copied from GCP. I've added it to the .gitignore to try to +# prevent it from getting committed and pushed to git, while still allowing it +# to be where ansible expects the key to be so ansible can copy the file to the +# remote server. +natcap-servers-1732552f0202.json diff --git a/codesigning/signing-worker/inventory.ini b/codesigning/signing-worker/inventory.ini new file mode 100644 index 000000000..e20b26aa9 --- /dev/null +++ b/codesigning/signing-worker/inventory.ini @@ -0,0 +1,5 @@ +# This is an ansible inventory file. If we had more hostnames to list here, we +# could group them into functional groups (e.g. codesign-workers). + +[ncp-inkwell] +ncp-inkwell diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py new file mode 100755 index 000000000..084857e45 --- /dev/null +++ b/codesigning/signing-worker/natcap-codesign.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python3 +"""Service script to sign InVEST windows binaries.""" + +import logging +import os +import shutil +import subprocess +import sys +import textwrap +import time +import traceback + +import pexpect # apt install python3-pexpect +import requests # apt install python3-requests + +LOGGER = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) +CERTIFICATE = sys.argv[1] + +FILE_DIR = os.path.dirname(__file__) +QUEUE_TOKEN_FILE = os.path.join(FILE_DIR, "access_token.txt") +with open(QUEUE_TOKEN_FILE) as token_file: + ACCESS_TOKEN = token_file.read().strip() + +SLACK_TOKEN_FILE = os.path.join(FILE_DIR, "slack_token.txt") +with open(SLACK_TOKEN_FILE) as token_file: + SLACK_ACCESS_TOKEN = token_file.read().strip() + + +SLACK_NOTIFICATION_SUCCESS = textwrap.dedent( + """\ + :lower_left_fountain_pen: Successfully signed and uploaded `{filename}` to + <{url}|google cloud> + """) + +SLACK_NOTIFICATION_ALREADY_SIGNED = textwrap.dedent( + """\ + :lower_left_fountain_pen: `{filename}` is already signed! + <{url}|google cloud> + """) + + +SLACK_NOTIFICATION_FAILURE = textwrap.dedent( + """\ + :red-flag: Something went wrong while signing {filename}: + ``` + {traceback} + ``` + Please investigate on ncp-inkwell using: + ``` + sudo journalctl -u natcap-codesign.service + ``` + """) + + +def post_to_slack(message): + """Post a message to the slack channel. + + Args: + message (str): The message to post. + + Returns: + ``None`` + """ + resp = requests.post( + "https://slack.com/api/chat.postMessage", + headers={ + "Authorization": f"Bearer {SLACK_ACCESS_TOKEN}", + "Content-Type": "application/json; charset=utf-8" + }, + json={ + "channel": "CESG428BH", # sw-invest + "text": message + }) + resp.raise_for_status() + + +def get_from_queue(): + """Get an item to sign from the queue. + + Returns: + ``None`` if there are no items in the queue, the JSON response dict + otherwise. + """ + response = requests.post( + "https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue", + headers={"Content-Type": "application/json"}, + json={ + "token": ACCESS_TOKEN, + "action": "dequeue" + }) + if response.status_code == 204: + return None + else: + return response.json() + + +def download_file(url): + """Download an arbitrarily large file. + + Adapted from https://stackoverflow.com/a/16696317 + + Args: + url (str): The URL to download. + + Returns: + ``None`` + """ + local_filename = url.split('/')[-1] + with requests.get(url, stream=True) as r: + r.raise_for_status() + with open(local_filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + return local_filename + + +def upload_to_bucket(filename, path_on_bucket): + """Upload a file to a GCS bucket. + + Args: + filename (str): The local file to upload. + path_on_bucket (str): The path to the file on the GCS bucket, including + the ``gs://`` prefix. + + Returns: + ``None`` + """ + subprocess.run(['gsutil', 'cp', filename, path_on_bucket], check=True) + + +def sign_file(file_to_sign): + """Sign a local .exe file. + + Uses ``osslsigncode`` to sign the file using the private key stored on a + Yubikey, and the corresponding certificate that has been exported from the + PIV slot 9c. + + Args: + file_to_sign (str): The local filepath to the file to sign. + + Returns: + ``None`` + """ + signed_file = f"{file_to_sign}.signed" + pass_file = os.path.join(FILE_DIR, 'pass.txt') + + signcode_command = textwrap.dedent(f"""\ + osslsigncode sign \ + -pkcs11engine /usr/lib/x86_64-linux-gnu/engines-3/pkcs11.so \ + -pkcs11module /usr/lib/x86_64-linux-gnu/libykcs11.so \ + -key "pkcs11:id=%02;type=private" \ + -certs {CERTIFICATE} \ + -h sha256 \ + -ts http://timestamp.sectigo.com \ + -readpass {pass_file} \ + -verbose \ + -in {file_to_sign} \ + -out {signed_file}""") + + process = pexpect.spawnu(signcode_command) + process.expect('Enter PKCS#11 key PIN for Private key for Digital Signature:') + with open(pass_file) as passfile: + process.sendline(passfile.read().strip()) + + # print remainder of program output for our logging. + print(process.read()) + + shutil.move(signed_file, file_to_sign) + + +def note_signature_complete(local_filepath, target_gs_uri): + """Create a small file next to the signed file to indicate signature. + + Args: + gs_uri (str): The GCS URI of the signed file. + """ + # Using osslsigncode to verify the output always fails for me, even though + # the signature is clearly valid when checked on Windows. + process = subprocess.run( + ['osslsigncode', 'verify', '-in', local_filepath], check=False, + capture_output=True) + + temp_filepath = f'/tmp/{os.path.basename(local_filepath)}.signed' + with open(temp_filepath, 'w') as signature_file: + signature_file.write(process.stdout.decode()) + + try: + # Upload alongside the original file + subprocess.run( + ['gsutil', 'cp', temp_filepath, f'{target_gs_uri}.signature'], + check=True) + finally: + os.remove(temp_filepath) + + +def has_signature(filename): + """Check if a file is already signed. + + Args: + filename (str): The local filepath to the file to check. + + Returns: + ``True`` if the file is signed, ``False`` otherwise. + """ + process = subprocess.run( + ['osslsigncode', 'verify', '-in', filename], capture_output=True, + check=False) + + # Handle the case where it's possible there might not be any stdout or + # stderr to decode. + process_output = "" + for output in (process.stdout, process.stderr): + if output is not None: + process_output += output.decode() + + if 'No signature found' in process_output: + return False + return True + + +def main(): + while True: + try: + file_to_sign = get_from_queue() + if file_to_sign is None: + LOGGER.info('No items in the queue') + else: + LOGGER.info(f"Dequeued and downloading {file_to_sign['https-url']}") + filename = download_file(file_to_sign['https-url']) + + LOGGER.info(f"Checking if {filename} is already signed") + if has_signature(filename): + LOGGER.info(f"{filename} is already signed, skipping") + post_to_slack( + SLACK_NOTIFICATION_ALREADY_SIGNED.format( + filename=filename, + url=file_to_sign['https-url'])) + note_signature_complete(filename, file_to_sign['gs-uri']) + else: + LOGGER.info(f"Signing {filename}") + sign_file(filename) + LOGGER.info(f"Uploading signed file to {file_to_sign['gs-uri']}") + upload_to_bucket(filename, file_to_sign['gs-uri']) + LOGGER.info( + f"Adding {file_to_sign['https-url']} to signed files list") + note_signature_complete(filename, file_to_sign['gs-uri']) + LOGGER.info(f"Removing {filename}") + post_to_slack( + SLACK_NOTIFICATION_SUCCESS.format( + filename=filename, + url=file_to_sign['https-url'])) + LOGGER.info("Signing complete.") + os.remove(filename) + except Exception as e: + LOGGER.exception(f"Unexpected error signing file: {e}") + post_to_slack( + SLACK_NOTIFICATION_FAILURE.format( + filename=file_to_sign['https-url'], + traceback=traceback.format_exc())) + time.sleep(60) + + +if __name__ == '__main__': + main() diff --git a/codesigning/signing-worker/natcap-codesign.service b/codesigning/signing-worker/natcap-codesign.service new file mode 100644 index 000000000..afc728541 --- /dev/null +++ b/codesigning/signing-worker/natcap-codesign.service @@ -0,0 +1,33 @@ +# Systemd service for debian:bookworm for signing InVEST windows binaries. +# +# To install this service, copy this onto the host as /etc/systemd/system/natcap-codesign.service +# +# To use, run (for example): +# # On modifying the service file, run: +# $ sudo systemctl daemon-reload +# +# # enable the service +# $ sudo systemctl enable natcap-codesign.service +# +# # start the service +# $ sudo systemctl start natcap-codesign +# +# # check the service status +# $ sudo systemctl status natcap-codesign +# +# This service is built to run in the foreground. +# +# See https://wiki.debian.org/systemd/Services for background info about systemd services. + +[Unit] +Description=NatCap Code Signing for Windows EXE Binaries +User=natcap-codesign +Group=natcap-codesign +WorkingDirectory=/tmp + + +[Service] +# Run in the foreground +Type=simple +Restart=always +ExecStart=python3 /opt/natcap-codesign/natcap-codesign.py /opt/natcap-codesign/codesign-cert-chain.pem diff --git a/codesigning/signing-worker/playbook.yml b/codesigning/signing-worker/playbook.yml new file mode 100644 index 000000000..319180a0a --- /dev/null +++ b/codesigning/signing-worker/playbook.yml @@ -0,0 +1,112 @@ +--- + +- name: Set up everything needed on NCP-Inkwell + hosts: all + become: true + become_method: sudo + tasks: + - name: Install GCP SDK dependencies + ansible.builtin.apt: + update_cache: true + pkg: + - apt-transport-https + - ca-certificates + - gnupg + - curl + + - name: Download the Google Cloud SDK package repository signing key + ansible.builtin.shell: + cmd: curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg + creates: /usr/share/keyrings/cloud.google.gpg + + - name: Add Google Cloud SDK package repository source + ansible.builtin.apt_repository: + update_cache: true + filename: google-cloud-sdk.list + repo: "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" + + - name: Install packages + ansible.builtin.apt: + update_cache: true + pkg: + - python3 + - python3-pexpect + - python3-requests + - wget + - vim-nox + - yubico-piv-tool + - libengine-pkcs11-openssl + - ykcs11 + - libssl-dev + - google-cloud-sdk + - google-cloud-cli + - yubikey-manager + + - name: Add bookworm-backports repository + ansible.builtin.apt_repository: + update_cache: true + repo: "deb http://deb.debian.org/debian {{ ansible_distribution_release }}-backports main" + filename: bookworm-backports.list + + - name: Install osslsigncode from backports + ansible.builtin.apt: + update_cache: true + default_release: "{{ ansible_distribution_release }}-backports" + pkg: + # The normal debian:bookworm repos have osslsigncode 2.5, which has a + # bug in it that prevents it from signing our binaries. This was + # fixed in osslsigncode 2.6. The version available in + # bookworm-backports is 2.9. The issue (and solution) was similar to + # https://stackoverflow.com/a/78308879 + - osslsigncode + + - name: Create the codesign directory + ansible.builtin.file: + state: directory + path: /opt/natcap-codesign + + - name: Install the certificate + ansible.builtin.shell: + cmd: ykman piv certificates export 9c /opt/natcap-codesign/codesign-cert-chain.pem + creates: /opt/natcap-codesign/codesign-cert-chain.pem + + - name: Create codesigning group + ansible.builtin.group: + name: natcap-codesign + state: present + + - name: Create codesigning user + ansible.builtin.user: + name: natcap-codesign + group: natcap-codesign + shell: /bin/bash + createhome: true + + - name: Install the service account key + ansible.builtin.copy: + src: natcap-servers-1732552f0202.json + dest: /opt/natcap-codesign/natcap-servers-1732552f0202.json + mode: 0600 + + - name: Set up application credentials + ansible.builtin.shell: + cmd: gcloud auth activate-service-account --key-file=/opt/natcap-codesign/natcap-servers-1732552f0202.json + + - name: Install codesigning python script + ansible.builtin.copy: + src: natcap-codesign.py + dest: /opt/natcap-codesign/natcap-codesign.py + mode: 0755 + + - name: Install the codesign service + ansible.builtin.copy: + src: natcap-codesign.service + dest: /etc/systemd/system/natcap-codesign.service + mode: 0644 + + - name: Enable the natcap-codesign service + ansible.builtin.systemd_service: + name: natcap-codesign + daemon_reload: true # reload in case there are any config changes + state: restarted + enabled: true diff --git a/exe/hooks/rthook.py b/exe/hooks/rthook.py index 492344124..c007eef05 100644 --- a/exe/hooks/rthook.py +++ b/exe/hooks/rthook.py @@ -3,6 +3,7 @@ import platform import sys os.environ['PROJ_LIB'] = os.path.join(sys._MEIPASS, 'proj') +os.environ['PROJ_DATA'] = os.path.join(sys._MEIPASS, 'proj') if platform.system() == 'Darwin': # Rtree will look in this directory first for libspatialindex_c.dylib. diff --git a/src/natcap/invest/annual_water_yield.py b/src/natcap/invest/annual_water_yield.py index eccea65f6..77dfea8db 100644 --- a/src/natcap/invest/annual_water_yield.py +++ b/src/natcap/invest/annual_water_yield.py @@ -251,11 +251,11 @@ MODEL_SPEC = { } }, "index_col": "lucode", - "required": False, + "required": "valuation_table_path", "about": gettext( "A table of water demand for each LULC class. Each LULC code " "in the LULC raster must have a corresponding row in this " - "table."), + "table. Required if 'valuation_table_path' is provided."), "name": gettext("water demand table") }, "valuation_table_path": { @@ -502,14 +502,13 @@ def execute(args): a path to an input CSV table of LULC classes, showing consumptive water use for each landuse / land-cover type (cubic meters per year) to calculate - water scarcity. + water scarcity. Required if ``valuation_table_path`` is provided. args['valuation_table_path'] (string): (optional) if a non-empty string, a path to an input CSV table of hydropower stations with the following fields to calculate valuation: 'ws_id', 'time_span', 'discount', 'efficiency', 'fraction', 'cost', 'height', 'kw_price' - Required if ``calculate_valuation`` is True. args['n_workers'] (int): (optional) The number of worker processes to use for processing this model. If omitted, computation will take diff --git a/tests/test_annual_water_yield.py b/tests/test_annual_water_yield.py index e450f4b7e..ad51661b1 100644 --- a/tests/test_annual_water_yield.py +++ b/tests/test_annual_water_yield.py @@ -1,20 +1,53 @@ """Module for Regression Testing the InVEST Annual Water Yield module.""" -import unittest -import tempfile -import shutil import os +import shutil +import tempfile +import unittest + +import numpy +from shapely.geometry import Polygon import pandas -import numpy -from osgeo import gdal import pygeoprocessing - +from osgeo import gdal, ogr, osr REGRESSION_DATA = os.path.join( os.path.dirname(__file__), '..', 'data', 'invest-test-data', 'annual_water_yield') SAMPLE_DATA = os.path.join(REGRESSION_DATA, 'input') gdal.UseExceptions() + +def make_watershed_vector(path_to_shp): + """ + Generate watershed results shapefile with two polygons + + Args: + path_to_shp (str): path to store watershed results vector + + Outputs: + None + """ + shapely_geometry_list = [ + Polygon([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]), + Polygon([(2, 2), (3, 2), (3, 3), (2, 3), (2, 2)]) + ] + projection_wkt = osr.GetUserInputAsWKT("EPSG:4326") + vector_format = "ESRI Shapefile" + fields = {"hp_energy": ogr.OFTReal, "hp_val": ogr.OFTReal, + "ws_id": ogr.OFTReal, "rsupply_vl": ogr.OFTReal, + "wyield_mn": ogr.OFTReal, "wyield_vol": ogr.OFTReal, + "consum_mn": ogr.OFTReal, "consum_vol": ogr.OFTReal} + attribute_list = [ + {"hp_energy": 1, "hp_val": 1, "ws_id": 0, "rsupply_vl": 2}, + {"hp_energy": 11, "hp_val": 3, "ws_id": 1, "rsupply_vl": 52} + ] + + pygeoprocessing.shapely_geometry_to_vector(shapely_geometry_list, + path_to_shp, projection_wkt, + vector_format, fields, + attribute_list) + + class AnnualWaterYieldTests(unittest.TestCase): """Regression Tests for Annual Water Yield Model.""" @@ -74,7 +107,7 @@ class AnnualWaterYieldTests(unittest.TestCase): with self.assertRaises(ValueError) as cm: annual_water_yield.execute(args) self.assertTrue('veg value must be either 1 or 0' in str(cm.exception)) - + def test_missing_lulc_value(self): """Hydro: catching missing LULC value in Biophysical table.""" from natcap.invest import annual_water_yield @@ -89,7 +122,7 @@ class AnnualWaterYieldTests(unittest.TestCase): bio_df = bio_df[bio_df['lucode'] != 2] bio_df.to_csv(bad_biophysical_path) bio_df = None - + args['biophysical_table_path'] = bad_biophysical_path with self.assertRaises(ValueError) as cm: @@ -97,13 +130,13 @@ class AnnualWaterYieldTests(unittest.TestCase): self.assertTrue( "The missing values found in the LULC raster but not the table" " are: [2]" in str(cm.exception)) - + def test_missing_lulc_demand_value(self): """Hydro: catching missing LULC value in Demand table.""" from natcap.invest import annual_water_yield args = AnnualWaterYieldTests.generate_base_args(self.workspace_dir) - + args['demand_table_path'] = os.path.join( SAMPLE_DATA, 'water_demand_table.csv') args['sub_watersheds_path'] = os.path.join( @@ -117,7 +150,7 @@ class AnnualWaterYieldTests(unittest.TestCase): demand_df = demand_df[demand_df['lucode'] != 2] demand_df.to_csv(bad_demand_path) demand_df = None - + args['demand_table_path'] = bad_demand_path with self.assertRaises(ValueError) as cm: @@ -247,7 +280,8 @@ class AnnualWaterYieldTests(unittest.TestCase): def test_validation(self): """Hydro: test failure cases on the validation function.""" - from natcap.invest import annual_water_yield, validation + from natcap.invest import annual_water_yield + from natcap.invest import validation args = AnnualWaterYieldTests.generate_base_args(self.workspace_dir) @@ -367,3 +401,124 @@ class AnnualWaterYieldTests(unittest.TestCase): self.assertTrue( 'but are not found in the valuation table' in actual_message, actual_message) + # if the demand table is missing but the valuation table is present, + # make sure we have a validation error. + args_missing_demand_table = args.copy() + args_missing_demand_table['demand_table_path'] = '' + args_missing_demand_table['valuation_table_path'] = ( + os.path.join(SAMPLE_DATA, 'hydropower_valuation_table.csv')) + validation_warnings = annual_water_yield.validate( + args_missing_demand_table) + self.assertEqual(len(validation_warnings), 1) + self.assertEqual( + validation_warnings[0], + (['demand_table_path'], 'Input is required but has no value')) + + def test_fractp_op(self): + """Test `fractp_op`""" + from natcap.invest.annual_water_yield import fractp_op + + # generate fake data + kc = numpy.array([[1, .1, .1], [.6, .6, .1]]) + eto = numpy.array([[1000, 900, 900], [1100, 1005, 1000]]) + precip = numpy.array([[100, 1000, 10], [500, 800, 1100]]) + root = numpy.array([[99, 300, 400], [5, 500, 800]]) + soil = numpy.array([[600, 700, 700], [800, 900, 600]]) + pawc = numpy.array([[.11, .11, .12], [.55, .55, .19]]) + veg = numpy.array([[1, 1, 0], [0, 1, 0]]) + nodata_dict = {'eto': None, 'precip': None, 'depth_root': None, + 'pawc': None, 'out_nodata': None} + seasonality_constant = 6 + + actual_fractp = fractp_op(kc, eto, precip, root, soil, pawc, veg, + nodata_dict, seasonality_constant) + + # generated by running fractp_op + expected_fractp = numpy.array([[0.9345682, 0.06896508, 1.], + [1., 0.6487423, 0.09090909]], + dtype=numpy.float32) + + numpy.testing.assert_allclose(actual_fractp, expected_fractp, + err_msg="Fractp does not match expected") + + def test_compute_watershed_valuation(self): + """Test `compute_watershed_valuation`, `compute_rsupply_volume` + and `compute_water_yield_volume`""" + from natcap.invest import annual_water_yield + + def _create_watershed_results_vector(path_to_shp): + """Generate a fake watershed results vector file.""" + shapely_geometry_list = [ + Polygon([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]), + Polygon([(2, 2), (3, 2), (3, 3), (2, 3), (2, 2)]) + ] + projection_wkt = osr.GetUserInputAsWKT("EPSG:4326") + vector_format = "ESRI Shapefile" + fields = {"ws_id": ogr.OFTReal, "wyield_mn": ogr.OFTReal, + "consum_mn": ogr.OFTReal, "consum_vol": ogr.OFTReal} + attribute_list = [{"ws_id": 0, "wyield_mn": 990000, + "consum_mn": 500, "consum_vol": 50}, + {"ws_id": 1, "wyield_mn": 800000, + "consum_mn": 600, "consum_vol": 70}] + + pygeoprocessing.shapely_geometry_to_vector(shapely_geometry_list, + path_to_shp, + projection_wkt, + vector_format, fields, + attribute_list) + + def _validate_fields(vector_path, field_name, expected_values, error_msg): + """ + Validate a specific field in the watershed results vector + by comparing actual to expected values. Expected values generated + by running the function. + + Args: + vector path (str): path to watershed shapefile + field_name (str): attribute field to check + expected values (list): list of expected values for field + error_msg (str): what to print if assertion fails + + Returns: + None + """ + with gdal.OpenEx(vector_path, gdal.OF_VECTOR | gdal.GA_Update) as ws_ds: + ws_layer = ws_ds.GetLayer() + actual_values = [ws_feat.GetField(field_name) + for ws_feat in ws_layer] + self.assertEqual(actual_values, expected_values, msg=error_msg) + + # generate fake watershed results vector + watershed_results_vector_path = os.path.join(self.workspace_dir, + "watershed_results.shp") + _create_watershed_results_vector(watershed_results_vector_path) + + # generate fake val_df + val_df = pandas.DataFrame({'efficiency': [.7, .8], 'height': [12, 50], + 'fraction': [.9, .7], 'discount': [60, 20], + 'time_span': [10, 10], 'cost': [100, 200], + 'kw_price': [15, 20]}) + + # test water yield volume + annual_water_yield.compute_water_yield_volume( + watershed_results_vector_path) + _validate_fields(watershed_results_vector_path, "wyield_vol", + [990.0, 800.0], + "Error with water yield volume calculation.") + + # test rsupply volume + annual_water_yield.compute_rsupply_volume( + watershed_results_vector_path) + _validate_fields(watershed_results_vector_path, "rsupply_vl", + [940.0, 730.0], + "Error calculating total realized water supply volume.") + + # test compute watershed valuation + annual_water_yield.compute_watershed_valuation( + watershed_results_vector_path, val_df) + _validate_fields(watershed_results_vector_path, "hp_energy", + [19.329408, 55.5968], + "Error calculating energy.") + _validate_fields(watershed_results_vector_path, "hp_val", + [501.9029748723, 4587.91946857059], + "Error calculating net present value.") diff --git a/tests/test_carbon.py b/tests/test_carbon.py index e6c3486ca..ed0e11109 100644 --- a/tests/test_carbon.py +++ b/tests/test_carbon.py @@ -268,6 +268,69 @@ class CarbonTests(unittest.TestCase): assert_raster_equal_value( os.path.join(args['workspace_dir'], 'npv_redd.tif'), -0.4602106) + def test_generate_carbon_map(self): + """Test `_generate_carbon_map`""" + from natcap.invest.carbon import _generate_carbon_map + + def _make_simple_lulc_raster(base_raster_path): + """Create a raster on designated path with arbitrary values. + Args: + base_raster_path (str): the raster path for making the new raster. + Returns: + None. + """ + + array = numpy.array([[1, 1], [2, 3]], dtype=numpy.int32) + + # UTM Zone 10N + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) + projection_wkt = srs.ExportToWkt() + + origin = (461251, 4923245) + pixel_size = (1, 1) + no_data = -999 + + pygeoprocessing.numpy_array_to_raster( + array, no_data, pixel_size, origin, projection_wkt, + base_raster_path) + + # generate a fake lulc raster + lulc_path = os.path.join(self.workspace_dir, "lulc.tif") + _make_simple_lulc_raster(lulc_path) + + # make fake carbon pool dict + carbon_pool_by_type = {1: 5000, 2: 60, 3: 120} + + out_carbon_stock_path = os.path.join(self.workspace_dir, + "carbon_stock.tif") + + _generate_carbon_map(lulc_path, carbon_pool_by_type, + out_carbon_stock_path) + + # open output carbon stock raster and check values + actual_carbon_stock = gdal.Open(out_carbon_stock_path) + band = actual_carbon_stock.GetRasterBand(1) + actual_carbon_stock = band.ReadAsArray() + + expected_carbon_stock = numpy.array([[0.5, 0.5], [0.006, 0.012]], + dtype=numpy.float32) + + numpy.testing.assert_array_equal(actual_carbon_stock, + expected_carbon_stock) + + def test_calculate_valuation_constant(self): + """Test `_calculate_valuation_constant`""" + from natcap.invest.carbon import _calculate_valuation_constant + + valuation_constant = _calculate_valuation_constant(lulc_cur_year=2010, + lulc_fut_year=2012, + discount_rate=50, + rate_change=5, + price_per_metric_ton_of_c=50) + expected_valuation = 40.87302 + self.assertEqual(round(valuation_constant, 5), expected_valuation) + class CarbonValidationTests(unittest.TestCase): """Tests for the Carbon Model MODEL_SPEC and validation.""" diff --git a/tests/test_coastal_blue_carbon.py b/tests/test_coastal_blue_carbon.py index 18b062811..93df53aaa 100644 --- a/tests/test_coastal_blue_carbon.py +++ b/tests/test_coastal_blue_carbon.py @@ -11,6 +11,7 @@ import unittest import numpy import pandas +from scipy.sparse import dok_matrix import pygeoprocessing from natcap.invest import utils from natcap.invest import validation @@ -24,6 +25,26 @@ REGRESSION_DATA = os.path.join( LOGGER = logging.getLogger(__name__) +def make_raster_from_array(base_raster_path, array): + """Create a raster on designated path with arbitrary values. + Args: + base_raster_path (str): the raster path for making the new raster. + Returns: + None. + """ + # UTM Zone 10N + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) + projection_wkt = srs.ExportToWkt() + + origin = (461261, 4923265) + pixel_size = (1, 1) + no_data = -1 + + pygeoprocessing.numpy_array_to_raster( + array, no_data, pixel_size, origin, projection_wkt, + base_raster_path) + class TestPreprocessor(unittest.TestCase): """Test Coastal Blue Carbon preprocessor functions.""" @@ -1060,3 +1081,149 @@ class TestCBC2(unittest.TestCase): [(['analysis_year'], coastal_blue_carbon.INVALID_ANALYSIS_YEAR_MSG.format( analysis_year=2000, latest_year=2000))]) + + def test_calculate_npv(self): + """Test `_calculate_npv`""" + from natcap.invest.coastal_blue_carbon import coastal_blue_carbon + + # make fake data + net_sequestration_rasters = { + 2010: os.path.join(self.workspace_dir, "carbon_seq_2010.tif"), + 2011: os.path.join(self.workspace_dir, "carbon_seq_2011.tif"), + 2012: os.path.join(self.workspace_dir, "carbon_seq_2012.tif") + } + + for year, path in net_sequestration_rasters.items(): + array = numpy.array([[year*.5, year*.25], [year-1, 50]]) # random array + make_raster_from_array(path, array) + + prices_by_year = { + 2010: 50, + 2011: 80, + 2012: 95 + } + discount_rate = 0.1 + baseline_year = 2010 + target_raster_years_and_paths = { + 2010: os.path.join(self.workspace_dir, "tgt_carbon_seq_2010.tif"), + 2011: os.path.join(self.workspace_dir, "tgt_carbon_seq_2011.tif"), + 2012: os.path.join(self.workspace_dir, "tgt_carbon_seq_2012.tif") + } + + coastal_blue_carbon._calculate_npv(net_sequestration_rasters, + prices_by_year, discount_rate, + baseline_year, + target_raster_years_and_paths) + + # read in the created target rasters + actual_2011 = gdal.Open(target_raster_years_and_paths[2011]) + band = actual_2011.GetRasterBand(1) + actual_2011 = band.ReadAsArray() + + actual_2012 = gdal.Open(target_raster_years_and_paths[2012]) + band = actual_2012.GetRasterBand(1) + actual_2012 = band.ReadAsArray() + + # compare actual rasters to expected (based on running `_calculate_npv`) + expected_2011 = numpy.array([[100525, 50262.5], [200950, 5000]]) + expected_2012 = numpy.array([[370206.818182, 185103.409091], + [740045.454545, 18409.090909]]) + numpy.testing.assert_allclose(actual_2011, expected_2011) + numpy.testing.assert_allclose(actual_2012, expected_2012) + + def test_calculate_accumulation_over_time(self): + """Test `_calculate_accumulation_over_time`""" + from natcap.invest.coastal_blue_carbon.coastal_blue_carbon import \ + _calculate_accumulation_over_time + + # generate fake data with nodata values + nodata = float(numpy.finfo(numpy.float32).min) + annual_biomass_matrix = numpy.array([[1, 2], [3, nodata]]) + annual_soil_matrix = numpy.array([[11, 12], [13, 14]]) + annual_litter_matrix = numpy.array([[.5, .9], [4, .9]]) + n_years = 3 + + actual_accumulation = _calculate_accumulation_over_time( + annual_biomass_matrix, annual_soil_matrix, annual_litter_matrix, + n_years) + + expected_accumulation = numpy.array([[37.5, 44.7], [60, nodata]]) + numpy.testing.assert_allclose(actual_accumulation, expected_accumulation) + + def test_calculate_net_sequestration(self): + """test `_calculate_net_sequestration`""" + from natcap.invest.coastal_blue_carbon.coastal_blue_carbon import \ + _calculate_net_sequestration + + # make fake rasters that contain nodata pixels (-1) + accumulation_raster_path = os.path.join(self.workspace_dir, + "accumulation_raster.tif") + accumulation_array = numpy.array([[40, -1], [70, -1]]) + make_raster_from_array(accumulation_raster_path, accumulation_array) + + emissions_raster_path = os.path.join(self.workspace_dir, + "emissions_raster.tif") + emissions_array = numpy.array([[-1, 8], [7, -1]]) + make_raster_from_array(emissions_raster_path, emissions_array) + + target_raster_path = os.path.join(self.workspace_dir, + "target_raster.tif") + + # run `_calculate_net_sequestration` + _calculate_net_sequestration(accumulation_raster_path, + emissions_raster_path, target_raster_path) + + # compare actual to expected output net sequestration raster + actual_sequestration = gdal.Open(target_raster_path) + band = actual_sequestration.GetRasterBand(1) + actual_sequestration = band.ReadAsArray() + + # calculated by running `_calculate_net_sequestration` + nodata = float(numpy.finfo(numpy.float32).min) + expected_sequestration = numpy.array([[40, -8], [-7, nodata]]) + + numpy.testing.assert_allclose(actual_sequestration, + expected_sequestration) + + def test_reclassify_accumulation_transition(self): + """Test `_reclassify_accumulation_transition`""" + from natcap.invest.coastal_blue_carbon.coastal_blue_carbon import \ + _reclassify_accumulation_transition, _reclassify_disturbance_magnitude + + # make fake raster data + landuse_transition_from_raster = os.path.join(self.workspace_dir, + "landuse_transition_from.tif") + landuse_transition_from_array = numpy.array([[1, 2], [3, 2]]) + make_raster_from_array(landuse_transition_from_raster, + landuse_transition_from_array) + + landuse_transition_to_raster = os.path.join(self.workspace_dir, + "landuse_transition_to.tif") + landuse_transition_to_array = numpy.array([[1, 1], [2, 3]]) + make_raster_from_array(landuse_transition_to_raster, + landuse_transition_to_array) + + #make fake accumulation_rate_matrix + accumulation_rate_matrix = dok_matrix((4, 4), dtype=numpy.float32) + accumulation_rate_matrix[1, 2] = 0.5 # Forest -> Grassland + accumulation_rate_matrix[1, 3] = 0.3 # Forest -> Agriculture + + accumulation_rate_matrix[2, 1] = 0.2 # Grassland -> Forest + accumulation_rate_matrix[2, 3] = 0.4 # Grassland -> Agriculture + + accumulation_rate_matrix[3, 1] = 0.1 # Agriculture -> Forest + accumulation_rate_matrix[3, 2] = 0.3 # Agriculture -> Grassland + + target_raster_path = os.path.join(self.workspace_dir, "output.tif") + _reclassify_accumulation_transition( + landuse_transition_from_raster, landuse_transition_to_raster, + accumulation_rate_matrix, target_raster_path) + + # compare actual and expected target_raster + actual_accumulation = gdal.Open(target_raster_path) + band = actual_accumulation.GetRasterBand(1) + actual_accumulation = band.ReadAsArray() + + expected_accumulation = numpy.array([[0, .2], [.3, .4]]) + + numpy.testing.assert_allclose(actual_accumulation, expected_accumulation) diff --git a/tests/test_coastal_vulnerability.py b/tests/test_coastal_vulnerability.py index 518f916c5..208456e86 100644 --- a/tests/test_coastal_vulnerability.py +++ b/tests/test_coastal_vulnerability.py @@ -8,6 +8,7 @@ import unittest import numpy.testing import pandas.testing +import pandas import pygeoprocessing import shapely.wkb import taskgraph @@ -1553,6 +1554,100 @@ class CoastalVulnerabilityTests(unittest.TestCase): # Polygon has 4 sides on exterior, 3 on interior, expect 7 lines self.assertTrue(len(line_list) == 7) + def test_assemble_results_and_calculate_exposure(self): + """Test that assemble_results_and_calculate_exposure correctly + calculates exposure""" + from natcap.invest.coastal_vulnerability import \ + assemble_results_and_calculate_exposure + + def _make_shore_points_vector(shore_points_path): + # create 4 points, each with a unique 'shore_id' in [0..3]. + shore_geometries = [Point(0, 0), Point(1, 0), Point(2, 1), Point(3, 2)] + shore_fields = {'shore_id': ogr.OFTInteger} + shore_attributes = [{'shore_id': i} for i in range(len(shore_geometries))] + + # Create a spatial reference (projected or geographic) + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) # e.g. "NAD83 / UTM zone 10N" + pygeoprocessing.shapely_geometry_to_vector( + shore_geometries, shore_points_path, srs.ExportToWkt(), + vector_format='GPKG', + fields=shore_fields, + attribute_list=shore_attributes, + ogr_geom_type=ogr.wkbPoint + ) + + def _make_habitat_csv(habitat_csv_path): + # Example: one habitat column named 'kelp', plus 'R_hab' + # We have 4 shore IDs, so we add 4 rows. Values are arbitrary. + habitat_df = pandas.DataFrame( + {'shore_id': [0, 1, 2, 3], 'kelp': [5, 3, 5, 4], + 'seagrass': [4, 1, 2, 4], 'R_hab': [5, 2, 5, 3]}) + habitat_df.to_csv(habitat_csv_path, index=False) + + def _make_risk_id_path_list(): + # Create pickles for risk data + relief_pkl = os.path.join(self.workspace_dir, 'relief.pickle') + slr_pkl = os.path.join(self.workspace_dir, 'slr.pickle') + population_pkl = os.path.join(self.workspace_dir, 'population.pickle') + + relief_data = {0: 10.0, 1: 50.0, 2: 30.0, 3: 80.0} # arbitrary data + slr_data = {0: 0.1, 1: 0.2, 2: 0.9, 3: 0.5} + population_data = {0: 123.0, 1: 999.0, 2: 55.0, 3: 0.0} + + for file_path, data_dict in zip([relief_pkl, slr_pkl, population_pkl], + [relief_data, slr_data, population_data]): + with open(file_path, 'wb') as f: + pickle.dump(data_dict, f) + + risk_id_path_list = [ + (relief_pkl, True, "R_relief"), # "True" => bin to 1..5 + (slr_pkl, True, "R_slr"), + (population_pkl, False, "population") + ] + return risk_id_path_list + + shore_points_path = os.path.join(self.workspace_dir, "shore_points.gpkg") + _make_shore_points_vector(shore_points_path) + + habitat_csv_path = os.path.join(self.workspace_dir, 'habitat_protection.csv') + _make_habitat_csv(habitat_csv_path) + + risk_id_path_list = _make_risk_id_path_list() + + intermediate_vector_path = os.path.join(self.workspace_dir, + 'intermediate_exposure.gpkg') + intermediate_csv_path = os.path.join(self.workspace_dir, + 'intermediate_exposure.csv') + output_vector_path = os.path.join(self.workspace_dir, + 'coastal_exposure.gpkg') + output_csv_path = os.path.join(self.workspace_dir, + 'coastal_exposure.csv') + + # call function + assemble_results_and_calculate_exposure( + risk_id_path_list, + habitat_csv_path, + shore_points_path, + intermediate_vector_path, + intermediate_csv_path, + output_vector_path, + output_csv_path + ) + + # read field values in output vector and compare + actual_df = pandas.read_csv( + output_csv_path, + usecols=["exposure", "habitat_role", "exposure_no_habitats"]) + + expected_df = pandas.DataFrame({ + "exposure": [2.924018, 2.0, 4.641589, 2.289428], + "habitat_role": [0, 0.714418, 0, 0.424989], + "exposure_no_habitats": [2.924018, 2.714418, 4.641589, 2.714418]}) + + pandas.testing.assert_frame_equal( + actual_df, expected_df, check_dtype=False) + def assert_pickled_arrays_almost_equal( actual_values_pickle_path, expected_values_json_path): diff --git a/tests/test_crop_production.py b/tests/test_crop_production.py index 71ed3170a..dc9b340c8 100644 --- a/tests/test_crop_production.py +++ b/tests/test_crop_production.py @@ -5,9 +5,10 @@ import shutil import os import numpy -from osgeo import gdal +from osgeo import gdal, ogr, osr import pandas import pygeoprocessing +from shapely.geometry import Polygon gdal.UseExceptions() MODEL_DATA_PATH = os.path.join( @@ -21,6 +22,108 @@ TEST_DATA_PATH = os.path.join( 'crop_production_model') +def make_aggregate_vector(path_to_shp): + """ + Generate shapefile with two overlapping polygons + Args: + path_to_shp (str): path to store watershed results vector + Outputs: + None + """ + # (xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax) + shapely_geometry_list = [ + Polygon([(461151, 4923265-50), (461261+50, 4923265-50), + (461261+50, 4923265), (461151, 4923265)]), + Polygon([(461261, 4923265-35), (461261+60, 4923265-35), + (461261+60, 4923265+50), (461261, 4923265+50)]) + ] + + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) + projection_wkt = srs.ExportToWkt() + + vector_format = "ESRI Shapefile" + fields = {"id": ogr.OFTReal} + attribute_list = [ + {"id": 0}, + {"id": 1}, + ] + + pygeoprocessing.shapely_geometry_to_vector(shapely_geometry_list, + path_to_shp, projection_wkt, + vector_format, fields, + attribute_list) + + +def make_simple_raster(base_raster_path, array): + """Create a raster on designated path with arbitrary values. + Args: + base_raster_path (str): the raster path for making the new raster. + Returns: + None. + """ + # UTM Zone 10N + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) + projection_wkt = srs.ExportToWkt() + + origin = (461251, 4923245) + pixel_size = (30, 30) + no_data = -1 + + pygeoprocessing.numpy_array_to_raster( + array, no_data, pixel_size, origin, projection_wkt, + base_raster_path) + + +def create_nutrient_df(): + """Creates a nutrient DataFrame for testing.""" + return pandas.DataFrame([ + {'crop': 'corn', 'area (ha)': 21.0, 'production_observed': 0.2, + 'percentrefuse': 7, 'protein': 42., 'lipid': 8, 'energy': 476., + 'ca': 27.0, 'fe': 15.7, 'mg': 280.0, 'ph': 704.0, 'k': 1727.0, + 'na': 2.0, 'zn': 4.9, 'cu': 1.9, 'fl': 8, 'mn': 2.9, 'se': 0.1, + 'vita': 3.0, 'betac': 16.0, 'alphac': 2.30, 'vite': 0.8, + 'crypto': 1.6, 'lycopene': 0.36, 'lutein': 63.0, 'betat': 0.5, + 'gammat': 2.1, 'deltat': 1.9, 'vitc': 6.8, 'thiamin': 0.4, + 'riboflavin': 1.8, 'niacin': 8.2, 'pantothenic': 0.9, + 'vitb6': 1.4, 'folate': 385.0, 'vitb12': 2.0, 'vitk': 41.0}, + + {'crop': 'soybean', 'area (ha)': 5., 'production_observed': 4., + 'percentrefuse': 9, 'protein': 33., 'lipid': 2., 'energy': 99., + 'ca': 257., 'fe': 15.7, 'mg': 280., 'ph': 704.0, 'k': 197.0, + 'na': 2., 'zn': 4.9, 'cu': 1.6, 'fl': 3., 'mn': 5.2, 'se': 0.3, + 'vita': 3.0, 'betac': 16.0, 'alphac': 1.0, 'vite': 0.8, + 'crypto': 0.6, 'lycopene': 0.3, 'lutein': 61.0, 'betat': 0.5, + 'gammat': 2.3, 'deltat': 1.2, 'vitc': 3.0, 'thiamin': 0.42, + 'riboflavin': 0.82, 'niacin': 12.2, 'pantothenic': 0.92, + 'vitb6': 5.4, 'folate': 305., 'vitb12': 3., 'vitk': 42.}, + ]).set_index('crop') + + +def _create_crop_rasters(output_dir, crop_names, file_suffix): + """Creates raster files for test setup.""" + _OBSERVED_PRODUCTION_FILE_PATTERN = os.path.join( + '.', '%s_observed_production%s.tif') + _CROP_PRODUCTION_FILE_PATTERN = os.path.join( + '.', '%s_regression_production%s.tif') + + for i, crop in enumerate(crop_names): + observed_yield_path = os.path.join( + output_dir, + _OBSERVED_PRODUCTION_FILE_PATTERN % (crop, file_suffix)) + crop_production_raster_path = os.path.join( + output_dir, + _CROP_PRODUCTION_FILE_PATTERN % (crop, file_suffix)) + + # Create arbitrary raster arrays + observed_array = numpy.array([[4, i], [i*3, 4]], dtype=numpy.int16) + crop_array = numpy.array([[i, 1], [i*2, 3]], dtype=numpy.int16) + + make_simple_raster(observed_yield_path, observed_array) + make_simple_raster(crop_production_raster_path, crop_array) + + class CropProductionTests(unittest.TestCase): """Tests for the Crop Production model.""" @@ -390,6 +493,304 @@ class CropProductionTests(unittest.TestCase): pandas.testing.assert_frame_equal( expected_result_table, result_table, check_dtype=False) + def test_x_yield_op(self): + """Test `_x_yield_op""" + from natcap.invest.crop_production_regression import _x_yield_op + + # make fake data + y_max = numpy.array([[-1, 3, 2], [4, 5, 3]]) + b_x = numpy.array([[4, 3, 2], [2, 0, 3]]) + c_x = numpy.array([[4, 1, 2], [3, 0, 3]]) + lulc_array = numpy.array([[3, 3, 2], [3, -1, 3]]) + fert_rate = 0.6 + crop_lucode = 3 + pixel_area_ha = 10 + + actual_result = _x_yield_op(y_max, b_x, c_x, lulc_array, fert_rate, + crop_lucode, pixel_area_ha) + expected_result = numpy.array([[-1, -19.393047, -1], + [26.776089, -1, 15.1231]]) + + numpy.testing.assert_allclose(actual_result, expected_result) + + def test_zero_observed_yield_op(self): + """Test `_zero_observed_yield_op`""" + from natcap.invest.crop_production_regression import \ + _zero_observed_yield_op + + # make fake data + observed_yield_array = numpy.array([[0, 1, -1], [5, 6, -1]]) + observed_yield_nodata = -1 + + actual_result = _zero_observed_yield_op(observed_yield_array, + observed_yield_nodata) + + expected_result = numpy.array([[0, 1, 0], [5, 6, 0]]) + + numpy.testing.assert_allclose(actual_result, expected_result) + + def test_mask_observed_yield_op(self): + """Test `_mask_observed_yield_op`""" + from natcap.invest.crop_production_regression import \ + _mask_observed_yield_op + + # make fake data + lulc_array = numpy.array([[3, 5, -9999], [3, 3, -1]]) + observed_yield_array = numpy.array([[-1, 5, 4], [8, -9999, 91]]) + observed_yield_nodata = -1 + # note: this observed_yield_nodata value becomes the nodata value in + # the output array but the values in the observed_yield_array with + # this value are NOT treated as no data within this function + + landcover_nodata = -9999 + crop_lucode = 3 + pixel_area_ha = 10 + + actual_result = _mask_observed_yield_op( + lulc_array, observed_yield_array, observed_yield_nodata, + landcover_nodata, crop_lucode, pixel_area_ha) + + expected_result = numpy.array([[-10, 0, -1], [80, -99990, 0]]) + + numpy.testing.assert_allclose(actual_result, expected_result) + + def test_tabulate_regression_results(self): + """Test `tabulate_regression_results`""" + from natcap.invest.crop_production_regression import \ + tabulate_regression_results + + def _create_expected_results(): + """Creates the expected results DataFrame.""" + return pandas.DataFrame([ + {'crop': 'corn', 'area (ha)': 20.0, + 'production_observed': 8.0, 'production_modeled': 4.0, + 'protein_modeled': 1562400.0, 'protein_observed': 3124800.0, + 'lipid_modeled': 297600.0, 'lipid_observed': 595200.0, + 'energy_modeled': 17707200.0, 'energy_observed': 35414400.0, + 'ca_modeled': 1004400.0, 'ca_observed': 2008800.0, + 'fe_modeled': 584040.0, 'fe_observed': 1168080.0, + 'mg_modeled': 10416000.0, 'mg_observed': 20832000.0, + 'ph_modeled': 26188800.0, 'ph_observed': 52377600.0, + 'k_modeled': 64244400.0, 'k_observed': 128488800.0, + 'na_modeled': 74400.0, 'na_observed': 148800.0, + 'zn_modeled': 182280.0, 'zn_observed': 364560.0, + 'cu_modeled': 70680.0, 'cu_observed': 141360.0, + 'fl_modeled': 297600.0, 'fl_observed': 595200.0, + 'mn_modeled': 107880.0, 'mn_observed': 215760.0, + 'se_modeled': 3720.0, 'se_observed': 7440.0, + 'vita_modeled': 111600.0, 'vita_observed': 223200.0, + 'betac_modeled': 595200.0, 'betac_observed': 1190400.0, + 'alphac_modeled': 85560.0, 'alphac_observed': 171120.0, + 'vite_modeled': 29760.0, 'vite_observed': 59520.0, + 'crypto_modeled': 59520.0, 'crypto_observed': 119040.0, + 'lycopene_modeled': 13392.0, 'lycopene_observed': 26784.0, + 'lutein_modeled': 2343600.0, 'lutein_observed': 4687200.0, + 'betat_modeled': 18600.0, 'betat_observed': 37200.0, + 'gammat_modeled': 78120.0, 'gammat_observed': 156240.0, + 'deltat_modeled': 70680.0, 'deltat_observed': 141360.0, + 'vitc_modeled': 252960.0, 'vitc_observed': 505920.0, + 'thiamin_modeled': 14880.0, 'thiamin_observed': 29760.0, + 'riboflavin_modeled': 66960.0, 'riboflavin_observed': 133920.0, + 'niacin_modeled': 305040.0, 'niacin_observed': 610080.0, + 'pantothenic_modeled': 33480.0, 'pantothenic_observed': 66960.0, + 'vitb6_modeled': 52080.0, 'vitb6_observed': 104160.0, + 'folate_modeled': 14322000.0, 'folate_observed': 28644000.0, + 'vitb12_modeled': 74400.0, 'vitb12_observed': 148800.0, + 'vitk_modeled': 1525200.0, 'vitk_observed': 3050400.0}, + {'crop': 'soybean', 'area (ha)': 40.0, + 'production_observed': 12.0, 'production_modeled': 7.0, + 'protein_modeled': 2102100.0, 'protein_observed': 3603600.0, + 'lipid_modeled': 127400.0, 'lipid_observed': 218400.0, + 'energy_modeled': 6306300.0, 'energy_observed': 10810800.0, + 'ca_modeled': 16370900.0, 'ca_observed': 28064400.0, + 'fe_modeled': 1000090.0, 'fe_observed': 1714440.0, + 'mg_modeled': 17836000.0, 'mg_observed': 30576000.0, + 'ph_modeled': 44844800.0, 'ph_observed': 76876800.0, + 'k_modeled': 12548900.0, 'k_observed': 21512400.0, + 'na_modeled': 127400.0, 'na_observed': 218400.0, + 'zn_modeled': 312130.0, 'zn_observed': 535080.0, + 'cu_modeled': 101920.0, 'cu_observed': 174720.0, + 'fl_modeled': 191100.0, 'fl_observed': 327600.0, + 'mn_modeled': 331240.0, 'mn_observed': 567840.0, + 'se_modeled': 19110.0, 'se_observed': 32760.0, + 'vita_modeled': 191100.0, 'vita_observed': 327600.0, + 'betac_modeled': 1019200.0, 'betac_observed': 1747200.0, + 'alphac_modeled': 63700.0, 'alphac_observed': 109200.0, + 'vite_modeled': 50960.0, 'vite_observed': 87360.0, + 'crypto_modeled': 38220.0, 'crypto_observed': 65520.0, + 'lycopene_modeled': 19110.0, 'lycopene_observed': 32760.0, + 'lutein_modeled': 3885700.0, 'lutein_observed': 6661200.0, + 'betat_modeled': 31850.0, 'betat_observed': 54600.0, + 'gammat_modeled': 146510.0, 'gammat_observed': 251160.0, + 'deltat_modeled': 76440.0, 'deltat_observed': 131040.0, + 'vitc_modeled': 191100.0, 'vitc_observed': 327600.0, + 'thiamin_modeled': 26754.0, 'thiamin_observed': 45864.0, + 'riboflavin_modeled': 52234.0, 'riboflavin_observed': 89544.0, + 'niacin_modeled': 777140.0, 'niacin_observed': 1332240.0, + 'pantothenic_modeled': 58604.0, 'pantothenic_observed': 100464.0, + 'vitb6_modeled': 343980.0, 'vitb6_observed': 589680.0, + 'folate_modeled': 19428500.0, 'folate_observed': 33306000.0, + 'vitb12_modeled': 191100.0, 'vitb12_observed': 327600.0, + 'vitk_modeled': 2675400.0, 'vitk_observed': 4586400.0}]) + + nutrient_df = create_nutrient_df() + + pixel_area_ha = 10 + workspace_dir = self.workspace_dir + output_dir = os.path.join(workspace_dir, "OUTPUT") + os.makedirs(output_dir, exist_ok=True) + + landcover_raster_path = os.path.join(workspace_dir, "landcover.tif") + landcover_nodata = -1 + make_simple_raster(landcover_raster_path, + numpy.array([[2, 1], [2, 3]], dtype=numpy.int16)) + + file_suffix = "v1" + target_table_path = os.path.join(workspace_dir, "output_table.csv") + crop_names = ["corn", "soybean"] + + _create_crop_rasters(output_dir, crop_names, file_suffix) + + tabulate_regression_results( + nutrient_df, crop_names, pixel_area_ha, + landcover_raster_path, landcover_nodata, + output_dir, file_suffix, target_table_path + ) + + # Read only the first 2 crop's data (skipping total area) + actual_result_table = pandas.read_csv(target_table_path, nrows=2, + header=0) + expected_result_table = _create_expected_results() + + # Compare expected vs actual + pandas.testing.assert_frame_equal(actual_result_table, + expected_result_table) + + def test_aggregate_regression_results_to_polygons(self): + """Test `aggregate_regression_results_to_polygons`""" + from natcap.invest.crop_production_regression import \ + aggregate_regression_results_to_polygons + + def _create_expected_agg_table(): + """Create expected output results""" + # Define the new values manually + return pandas.DataFrame([ + {"FID": 0, "corn_modeled": 1, "corn_observed": 4, + "soybean_modeled": 2, "soybean_observed": 5, + "protein_modeled": 991200, "protein_observed": 3063900, + "lipid_modeled": 110800, "lipid_observed": 388600, + "energy_modeled": 6228600, "energy_observed": 22211700, + "ca_modeled": 4928500, "ca_observed": 12697900, + "fe_modeled": 431750, "fe_observed": 1298390, + "mg_modeled": 7700000, "mg_observed": 23156000, + "ph_modeled": 19360000, "ph_observed": 58220800, + "k_modeled": 19646500, "k_observed": 73207900, + "na_modeled": 55000, "na_observed": 165400, + "zn_modeled": 134750, "zn_observed": 405230, + "cu_modeled": 46790, "cu_observed": 143480, + "fl_modeled": 129000, "fl_observed": 434100, + "mn_modeled": 121610, "mn_observed": 344480, + "se_modeled": 6390, "se_observed": 17370, + "vita_modeled": 82500, "vita_observed": 248100, + "betac_modeled": 440000, "betac_observed": 1323200, + "alphac_modeled": 39590, "alphac_observed": 131060, + "vite_modeled": 22000, "vite_observed": 66160, + "crypto_modeled": 25800, "crypto_observed": 86820, + "lycopene_modeled": 8808, "lycopene_observed": 27042, + "lutein_modeled": 1696100, "lutein_observed": 5119100, + "betat_modeled": 13750, "betat_observed": 41350, + "gammat_modeled": 61390, "gammat_observed": 182770, + "deltat_modeled": 39510, "deltat_observed": 125280, + "vitc_modeled": 117840, "vitc_observed": 389460, + "thiamin_modeled": 11364, "thiamin_observed": 33990, + "riboflavin_modeled": 31664, "riboflavin_observed": 104270, + "niacin_modeled": 298300, "niacin_observed": 860140, + "pantothenic_modeled": 25114, "pantothenic_observed": 75340, + "vitb6_modeled": 111300, "vitb6_observed": 297780, + "folate_modeled": 9131500, "folate_observed": 28199500, + "vitb12_modeled": 73200, "vitb12_observed": 210900, + "vitk_modeled": 1145700, "vitk_observed": 3436200}, + {"FID": 1, "corn_modeled": 4, "corn_observed": 8, + "soybean_modeled": 7, "soybean_observed": 12, + "protein_modeled": 3664500, "protein_observed": 6728400, + "lipid_modeled": 425000, "lipid_observed": 813600, + "energy_modeled": 24013500, "energy_observed": 46225200, + "ca_modeled": 17375300, "ca_observed": 30073200, + "fe_modeled": 1584130, "fe_observed": 2882520, + "mg_modeled": 28252000, "mg_observed": 51408000, + "ph_modeled": 71033600, "ph_observed": 129254400, + "k_modeled": 76793300, "k_observed": 150001200, + "na_modeled": 201800, "na_observed": 367200, + "zn_modeled": 494410, "zn_observed": 899640, + "cu_modeled": 172600, "cu_observed": 316080, + "fl_modeled": 488700, "fl_observed": 922800, + "mn_modeled": 439120, "mn_observed": 783600, + "se_modeled": 22830, "se_observed": 40200, + "vita_modeled": 302700, "vita_observed": 550800, + "betac_modeled": 1614400, "betac_observed": 2937600, + "alphac_modeled": 149260, "alphac_observed": 280320, + "vite_modeled": 80720, "vite_observed": 146880, + "crypto_modeled": 97740, "crypto_observed": 184560, + "lycopene_modeled": 32502, "lycopene_observed": 59544, + "lutein_modeled": 6229300, "lutein_observed": 11348400, + "betat_modeled": 50450, "betat_observed": 91800, + "gammat_modeled": 224630, "gammat_observed": 407400, + "deltat_modeled": 147120, "deltat_observed": 272400, + "vitc_modeled": 444060, "vitc_observed": 833520, + "thiamin_modeled": 41634, "thiamin_observed": 75624, + "riboflavin_modeled": 119194, "riboflavin_observed": 223464, + "niacin_modeled": 1082180, "niacin_observed": 1942320, + "pantothenic_modeled": 92084, "pantothenic_observed": 167424, + "vitb6_modeled": 396060, "vitb6_observed": 693840, + "folate_modeled": 33750500, "folate_observed": 61950000, + "vitb12_modeled": 265500, "vitb12_observed": 476400, + "vitk_modeled": 4200600, "vitk_observed": 7636800} + ], dtype=float) + + workspace = self.workspace_dir + + base_aggregate_vector_path = os.path.join(workspace, + "agg_vector.shp") + make_aggregate_vector(base_aggregate_vector_path) + + target_aggregate_vector_path = os.path.join(workspace, + "agg_vector_prj.shp") + + spatial_ref = osr.SpatialReference() + spatial_ref.ImportFromEPSG(26910) # EPSG:4326 for WGS84 + landcover_raster_projection = spatial_ref.ExportToWkt() + + crop_names = ['corn', 'soybean'] + nutrient_df = create_nutrient_df() + output_dir = os.path.join(workspace, "OUTPUT") + os.makedirs(output_dir, exist_ok=True) + file_suffix = 'test' + target_aggregate_table_path = '' # unused + + _create_crop_rasters(output_dir, crop_names, file_suffix) + + aggregate_regression_results_to_polygons( + base_aggregate_vector_path, target_aggregate_vector_path, + landcover_raster_projection, crop_names, + nutrient_df, output_dir, file_suffix, + target_aggregate_table_path) + + _AGGREGATE_TABLE_FILE_PATTERN = os.path.join( + '.','aggregate_results%s.csv') + + aggregate_table_path = os.path.join( + output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) + + actual_aggregate_table = pandas.read_csv(aggregate_table_path, + dtype=float) + print(actual_aggregate_table) + + expected_aggregate_table = _create_expected_agg_table() + + pandas.testing.assert_frame_equal( + actual_aggregate_table, expected_aggregate_table) + + class CropValidationTests(unittest.TestCase): """Tests for the Crop Productions' MODEL_SPEC and validation."""