From b13e90ef52a763fc90eb4c9942f058169d8eb8e2 Mon Sep 17 00:00:00 2001 From: davemfish Date: Wed, 22 Jan 2025 09:29:12 -0500 Subject: [PATCH 01/80] set PROJ_LIB and PROJ_DATA variables in the exe runtime hook. #1742 --- exe/hooks/rthook.py | 1 + 1 file changed, 1 insertion(+) diff --git a/exe/hooks/rthook.py b/exe/hooks/rthook.py index 492344124..c007eef05 100644 --- a/exe/hooks/rthook.py +++ b/exe/hooks/rthook.py @@ -3,6 +3,7 @@ import platform import sys os.environ['PROJ_LIB'] = os.path.join(sys._MEIPASS, 'proj') +os.environ['PROJ_DATA'] = os.path.join(sys._MEIPASS, 'proj') if platform.system() == 'Darwin': # Rtree will look in this directory first for libspatialindex_c.dylib. From 33d50c5d671e41ab5459aa13bc16133d9f7d58ef Mon Sep 17 00:00:00 2001 From: davemfish Date: Wed, 22 Jan 2025 15:47:37 -0500 Subject: [PATCH 02/80] a note for HISTORY. #1742 --- HISTORY.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 8419211bd..42637cdf4 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -38,6 +38,9 @@ .. Unreleased Changes ------------------ +* General + * Fixed an issue where a user's PROJ_DATA environment variable could + trigger a RuntimeError about a missing proj.db file. 3.14.3 (2024-12-19) ------------------- From 76006437f0b9bc9e5cc8c0ff9494c0f1c0e9a476 Mon Sep 17 00:00:00 2001 From: davemfish Date: Wed, 22 Jan 2025 15:54:39 -0500 Subject: [PATCH 03/80] fix RST markup. #1742 --- HISTORY.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 42637cdf4..30ea51436 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -35,12 +35,12 @@ .. :changelog: -.. - Unreleased Changes - ------------------ + +Unreleased Changes +------------------ * General * Fixed an issue where a user's PROJ_DATA environment variable could - trigger a RuntimeError about a missing proj.db file. + trigger a RuntimeError about a missing proj.db file. 3.14.3 (2024-12-19) ------------------- From 3f8a172ab7441162f5f2559b5ebdcceec7feaa05 Mon Sep 17 00:00:00 2001 From: Claire Simpson Date: Tue, 28 Jan 2025 17:28:17 -0700 Subject: [PATCH 04/80] Add unittests for AWY --- tests/test_annual_water_yield.py | 79 +++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/tests/test_annual_water_yield.py b/tests/test_annual_water_yield.py index e450f4b7e..5ec1ad433 100644 --- a/tests/test_annual_water_yield.py +++ b/tests/test_annual_water_yield.py @@ -6,8 +6,9 @@ import os import pandas import numpy -from osgeo import gdal +from osgeo import gdal, ogr, osr import pygeoprocessing +from shapely.geometry import Polygon REGRESSION_DATA = os.path.join( @@ -367,3 +368,79 @@ class AnnualWaterYieldTests(unittest.TestCase): self.assertTrue( 'but are not found in the valuation table' in actual_message, actual_message) + + def test_fractp_op(self): + """Test `fractp_op`""" + from natcap.invest.annual_water_yield import fractp_op + + # generate fake data + kc = numpy.array([[1, .1, .1], [.6, .6, .1]]) + eto = numpy.array([[1000, 900, 900], [1100, 1005, 1000]]) + precip = numpy.array([[100, 1000, 10], [500, 800, 1100]]) + root = numpy.array([[99, 300, 400], [5, 500, 800]]) + soil = numpy.array([[600, 700, 700], [800, 900, 600]]) + pawc = numpy.array([[.11, .11, .12], [.55, .55, .19]]) + veg = numpy.array([[1, 1, 0], [0, 1, 0]]) + nodata_dict = {'eto': None, 'precip': None, 'depth_root': None, + 'pawc': None, 'out_nodata': None} + seasonality_constant = 6 + + actual_fractp = fractp_op(kc, eto, precip, root, soil, pawc, veg, + nodata_dict, seasonality_constant) + + # generated by running fractp_op + expected_fractp = numpy.array([[0.9345682, 0.06896508, 1.], + [1., 0.6487423, 0.09090909]], + dtype=numpy.float32) + + numpy.testing.assert_allclose(actual_fractp, expected_fractp, + err_msg="Fractp does not match expected") + + def test_compute_watershed_valuation(self): + """Test `compute_watershed_valuation`""" + from natcap.invest.annual_water_yield import compute_watershed_valuation + + # generate fake watershed results vector + watershed_results_vector_path = os.path.join(self.workspace_dir, + "watershed_results.shp") + shapely_geometry_list = [ + Polygon([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]), + Polygon([(2, 2), (3, 2), (3, 3), (2, 3), (2, 2)]) + ] + projection_wkt = osr.GetUserInputAsWKT("EPSG:4326") + vector_format = "ESRI Shapefile" + fields = {"hp_energy": ogr.OFTReal, "hp_val": ogr.OFTReal, + "ws_id": ogr.OFTReal, "rsupply_vl": ogr.OFTReal} + attribute_list = [ + {"hp_energy": 1, "hp_val": 1, "ws_id": 0, "rsupply_vl": 2}, + {"hp_energy": 11, "hp_val": 3, "ws_id": 1, "rsupply_vl": 52} + ] + + pygeoprocessing.shapely_geometry_to_vector(shapely_geometry_list, + watershed_results_vector_path, + projection_wkt, + vector_format, fields, + attribute_list) + + # generate fake val_df + val_df = pandas.DataFrame({'efficiency': [.7, .8], 'height': [6, 5], + 'fraction': [.1, .2], 'discount': [10, 20], + 'time_span': [10, 10], 'cost': [1000, 2000], + 'kw_price': [10, 20]}) + + compute_watershed_valuation(watershed_results_vector_path, val_df) + + ws_ds = gdal.OpenEx(watershed_results_vector_path, + gdal.OF_VECTOR | gdal.GA_Update) + ws_layer = ws_ds.GetLayer() + + # calculated by running `compute_watershed_valuation` + expected_hp_energy = [0.0022848, 0.113152] + expected_npv = [-6758.869386098996, -10050.547726887671] + + # compare expected to actual values + for row, ws_feat in enumerate(ws_layer): + actual_hp_energy = ws_feat.GetField('hp_energy') + actual_npv = ws_feat.GetField('hp_val') + self.assertEqual(actual_hp_energy, expected_hp_energy[row]) + self.assertEqual(actual_npv, expected_npv[row]) From fa56af48493bc84f72b28ccaf1c7cd8b1b93c29d Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 11:41:01 -0800 Subject: [PATCH 05/80] Starting a readme for codesigning. RE:#1580 --- codesigning/Makefile | 21 +++++++++++++++++++++ codesigning/README.md | 17 +++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 codesigning/Makefile create mode 100644 codesigning/README.md diff --git a/codesigning/Makefile b/codesigning/Makefile new file mode 100644 index 000000000..2e04d1403 --- /dev/null +++ b/codesigning/Makefile @@ -0,0 +1,21 @@ +.PHONY: deploy-cloudfunction deploy-worker + +deploy-cloudfunction: + gcloud functions deploy \ + codesigning-queue \ + --trigger-http \ + --region us-west1 \ + --allow-unauthenticated \ + --entry-point main \ + --runtime python312 \ + --run-service-account codesigning-service@natcap-servers.iam.gserviceaccount.com \ + --source gcp-cloudfunc/ + + +# NOTE: This must be executed from a computer that has SSH access to ncp-inkwell. +deploy-worker: + cd signing-worker + ansible-playbook \ + --ask-become-pass \ + --inventory-file inventory.ini \ + playbook.yml diff --git a/codesigning/README.md b/codesigning/README.md new file mode 100644 index 000000000..ecb678e0f --- /dev/null +++ b/codesigning/README.md @@ -0,0 +1,17 @@ +# InVEST Codesigning Service + + +## Future Work + +### Subscribe to GCS events + +GCP Cloud Functions have the ability to subscribe to bucket events, which +should allow us to subscribe very specifically to just those `finalize` events +that apply to the Windows workbench binaries. Doing so will require reworking this cloud function into 2 cloud functions: + +1. An endpoint for ncp-inkwell to poll for the next binary to sign +2. A cloud function that subscribes to GCS bucket events and enqueues the binary to sign. + +Relevant docs include: +* https://cloud.google.com/functions/docs/writing/write-event-driven-functions#cloudevent-example-python + From 64f9d398e3b97f7e183acf9d2d10d1bc49bae79d Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 11:42:52 -0800 Subject: [PATCH 06/80] Starting a readme for the cloud function. RE:#1580 --- codesigning/gcp-cloudfunc/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 codesigning/gcp-cloudfunc/README.md diff --git a/codesigning/gcp-cloudfunc/README.md b/codesigning/gcp-cloudfunc/README.md new file mode 100644 index 000000000..c26f38617 --- /dev/null +++ b/codesigning/gcp-cloudfunc/README.md @@ -0,0 +1,9 @@ +# GCP Cloud Function Source + +The files in this directory are used during deployment to the GCP Cloud Function. + +See the Makefile in the parent directory for the `gcloud` invokation to deploy the function. + +## NOTES: + +* After deploying the function for the first time, make sure the correct secret is defined as an environment variable. From a8448c3e9a1ce7de82994681fd5518b834cf09a3 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 11:43:10 -0800 Subject: [PATCH 07/80] Adding python code for the cloud function. RE:#1580 --- codesigning/gcp-cloudfunc/main.py | 142 +++++++++++++++++++++ codesigning/gcp-cloudfunc/requirements.txt | 5 + 2 files changed, 147 insertions(+) create mode 100644 codesigning/gcp-cloudfunc/main.py create mode 100644 codesigning/gcp-cloudfunc/requirements.txt diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py new file mode 100644 index 000000000..e9cf3f7cd --- /dev/null +++ b/codesigning/gcp-cloudfunc/main.py @@ -0,0 +1,142 @@ +import contextlib +import datetime +import json +import logging +import os +import time +from urllib.parse import unquote + +import functions_framework +import google.cloud.logging # pip install google-cloud-logging +import requests +from flask import jsonify +from google.cloud import storage # pip install google-cloud-storage + +GOOGLE_PREFIX = 'https://storage.googleapis.com' +CODESIGN_DATA_BUCKET = 'natcap-codesigning' +LOG_CLIENT = google.cloud.logging.Client() +LOG_CLIENT.setup_logging() + + +@contextlib.contextmanager +def get_lock(): + """Acquire a GCS-based mutex. + + This requires that the bucket we are using has versioning + + """ + storage_client = storage.Client() + bucket = storage_client.bucket(CODESIGN_DATA_BUCKET) + + lock_obtained = False + n_tries = 100 + for i in range(n_tries): + lockfile = bucket.blob('mutex.lock') + if not lockfile.generation: + lockfile.upload_from_string( + f"Lock acquired {datetime.datetime.now().isoformat()}") + lock_obtained = True + break + else: + time.sleep(0.1) + + if not lock_obtained: + raise RuntimeError(f'Could not obtain lock after {n_tries} tries') + + try: + yield + finally: + lockfile.delete() + + +@functions_framework.http +def main(request): + request_method = request.method + + storage_client = storage.Client() + bucket = storage_client.bucket(CODESIGN_DATA_BUCKET) + + if request_method == 'GET': + with get_lock(): + queuefile = bucket.blob('queue.json') + queue_dict = json.loads(queuefile.download_as_string()) + next_file_url = queue_dict['queue'].pop(0) + queuefile.upload_from_string(json.dumps(queue_dict)) + + data = { + 'https-url': next_file_url, + 'basename': os.path.basename(next_file_url), + 'gs-uri': unquote(next_file_url.replace( + f'{GOOGLE_PREFIX}/', 'gs://')), + } + logging.info(f'Dequeued {next_file_url}') + return jsonify(data) + + elif request_method == 'POST': + data = request.get_json() + + if data['token'] != os.environ['ACCESS_TOKEN']: + return jsonify('Invalid token'), 403 + + url = data['url'] + + if not url.endswith('.exe'): + return jsonify('Invalid URL to sign'), 400 + + if not url.startswith(GOOGLE_PREFIX): + return jsonify('Invalid host'), 400 + + if not url.startswith(( + f'{GOOGLE_PREFIX}/releases.naturalcapitalproject.org/', + f'{GOOGLE_PREFIX}/natcap-dev-build-artifacts/')): + return jsonify("Invalid target bucket"), 400 + + # Remove http character quoting + url = unquote(url) + + binary_bucket_name, *binary_obj_paths = url.replace( + GOOGLE_PREFIX + '/', '').split('/') + codesign_bucket = storage_client.bucket(CODESIGN_DATA_BUCKET) + + # If the file is too old, reject it. Trying to avoid a + # denial-of-service by invoking the service with very old files. + response = requests.head(url) + mday, mmonth, myear = response.headers['Last-Modified'].split(' ')[1:4] + modified_time = datetime.datetime.strptime( + ' '.join((mday, mmonth, myear)), '%d %b %Y') + if modified_time < datetime.datetime(year=2024, month=6, day=1): + return jsonify('File is too old'), 400 + + with get_lock(): + # first, check to see if the file has already been signed. + signed_files_list = codesign_bucket.blob('signed_files.json') + if not signed_files_list.exists(): + signed_files_dict = {'signed_files': []} + else: + signed_files_dict = json.loads( + signed_files_list.download_as_string()) + + if url in signed_files_dict['signed_files']: + return jsonify('File has already been signed'), 400 + + # Since the file has not already been signed, add the file to the + # queue + queuefile = codesign_bucket.blob('queue.json') + if not queuefile.exists(): + queue_dict = {'queue': []} + else: + queue_dict = json.loads(queuefile.download_as_string()) + + if url not in queue_dict['queue']: + queue_dict['queue'].append(url) + else: + return jsonify( + 'File is already in the queue', 200, 'application/json') + + queuefile.upload_from_string(json.dumps(queue_dict)) + + logging.info(f'Enqueued {url}') + return jsonify("OK"), 200 + + else: + return jsonify('Invalid request method'), 405 diff --git a/codesigning/gcp-cloudfunc/requirements.txt b/codesigning/gcp-cloudfunc/requirements.txt new file mode 100644 index 000000000..b34742a98 --- /dev/null +++ b/codesigning/gcp-cloudfunc/requirements.txt @@ -0,0 +1,5 @@ +google-cloud-storage +google-cloud-logging +function-framework==3.* +flask +requests From 50c661291e1ae3579e9a5c9a86b38cd42bc65e14 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 11:50:14 -0800 Subject: [PATCH 08/80] Correcting package name. RE:#1580 --- codesigning/gcp-cloudfunc/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/gcp-cloudfunc/requirements.txt b/codesigning/gcp-cloudfunc/requirements.txt index b34742a98..5296a1506 100644 --- a/codesigning/gcp-cloudfunc/requirements.txt +++ b/codesigning/gcp-cloudfunc/requirements.txt @@ -1,5 +1,5 @@ google-cloud-storage google-cloud-logging -function-framework==3.* +functions-framework==3.* flask requests From 2ebeb7cd4aa63d914a9fc5cf1036f2e4c11f1274 Mon Sep 17 00:00:00 2001 From: Claire Simpson Date: Wed, 29 Jan 2025 13:35:38 -0700 Subject: [PATCH 09/80] updated AWY tests --- tests/test_annual_water_yield.py | 141 ++++++++++++++++++++++--------- 1 file changed, 103 insertions(+), 38 deletions(-) diff --git a/tests/test_annual_water_yield.py b/tests/test_annual_water_yield.py index 5ec1ad433..8f6a651b7 100644 --- a/tests/test_annual_water_yield.py +++ b/tests/test_annual_water_yield.py @@ -16,6 +16,38 @@ REGRESSION_DATA = os.path.join( SAMPLE_DATA = os.path.join(REGRESSION_DATA, 'input') gdal.UseExceptions() + +def make_watershed_vector(path_to_shp): + """ + Generate watershed results shapefile with two polygons + + Args: + path_to_shp (str): path to store watershed results vector + + Outputs: + None + """ + shapely_geometry_list = [ + Polygon([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]), + Polygon([(2, 2), (3, 2), (3, 3), (2, 3), (2, 2)]) + ] + projection_wkt = osr.GetUserInputAsWKT("EPSG:4326") + vector_format = "ESRI Shapefile" + fields = {"hp_energy": ogr.OFTReal, "hp_val": ogr.OFTReal, + "ws_id": ogr.OFTReal, "rsupply_vl": ogr.OFTReal, + "wyield_mn": ogr.OFTReal, "wyield_vol": ogr.OFTReal, + "consum_mn": ogr.OFTReal, "consum_vol": ogr.OFTReal} + attribute_list = [ + {"hp_energy": 1, "hp_val": 1, "ws_id": 0, "rsupply_vl": 2}, + {"hp_energy": 11, "hp_val": 3, "ws_id": 1, "rsupply_vl": 52} + ] + + pygeoprocessing.shapely_geometry_to_vector(shapely_geometry_list, + path_to_shp, projection_wkt, + vector_format, fields, + attribute_list) + + class AnnualWaterYieldTests(unittest.TestCase): """Regression Tests for Annual Water Yield Model.""" @@ -397,50 +429,83 @@ class AnnualWaterYieldTests(unittest.TestCase): err_msg="Fractp does not match expected") def test_compute_watershed_valuation(self): - """Test `compute_watershed_valuation`""" - from natcap.invest.annual_water_yield import compute_watershed_valuation + """Test `compute_watershed_valuation`, `compute_rsupply_volume` + and `compute_water_yield_volume`""" + from natcap.invest import annual_water_yield + + def create_watershed_results_vector(path_to_shp): + """Generate a fake watershed results vector file.""" + shapely_geometry_list = [ + Polygon([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]), + Polygon([(2, 2), (3, 2), (3, 3), (2, 3), (2, 2)]) + ] + projection_wkt = osr.GetUserInputAsWKT("EPSG:4326") + vector_format = "ESRI Shapefile" + fields = {"ws_id": ogr.OFTReal, "wyield_mn": ogr.OFTReal, + "consum_mn": ogr.OFTReal, "consum_vol": ogr.OFTReal} + attribute_list = [{"ws_id": 0, "wyield_mn": 990000, + "consum_mn": 500, "consum_vol": 50}, + {"ws_id": 1, "wyield_mn": 800000, + "consum_mn": 600, "consum_vol": 70}] + + pygeoprocessing.shapely_geometry_to_vector(shapely_geometry_list, + path_to_shp, + projection_wkt, + vector_format, fields, + attribute_list) + + def validate_fields(vector_path, field_name, expected_values, error_msg): + """ + Validate a specific field in the watershed results vector + by comparing actual to expected values. Expected values generated + by running the function. + + Args: + vector path (str): path to watershed shapefile + field_name (str): attribute field to check + expected values (list): list of expected values for field + error_msg (str): what to print if assertion fails + + Returns: + None + """ + with gdal.OpenEx(vector_path, gdal.OF_VECTOR | gdal.GA_Update) as ws_ds: + ws_layer = ws_ds.GetLayer() + actual_values = [ws_feat.GetField(field_name) + for ws_feat in ws_layer] + self.assertEqual(actual_values, expected_values, msg=error_msg) # generate fake watershed results vector watershed_results_vector_path = os.path.join(self.workspace_dir, "watershed_results.shp") - shapely_geometry_list = [ - Polygon([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]), - Polygon([(2, 2), (3, 2), (3, 3), (2, 3), (2, 2)]) - ] - projection_wkt = osr.GetUserInputAsWKT("EPSG:4326") - vector_format = "ESRI Shapefile" - fields = {"hp_energy": ogr.OFTReal, "hp_val": ogr.OFTReal, - "ws_id": ogr.OFTReal, "rsupply_vl": ogr.OFTReal} - attribute_list = [ - {"hp_energy": 1, "hp_val": 1, "ws_id": 0, "rsupply_vl": 2}, - {"hp_energy": 11, "hp_val": 3, "ws_id": 1, "rsupply_vl": 52} - ] - - pygeoprocessing.shapely_geometry_to_vector(shapely_geometry_list, - watershed_results_vector_path, - projection_wkt, - vector_format, fields, - attribute_list) + create_watershed_results_vector(watershed_results_vector_path) # generate fake val_df - val_df = pandas.DataFrame({'efficiency': [.7, .8], 'height': [6, 5], - 'fraction': [.1, .2], 'discount': [10, 20], - 'time_span': [10, 10], 'cost': [1000, 2000], - 'kw_price': [10, 20]}) + val_df = pandas.DataFrame({'efficiency': [.7, .8], 'height': [12, 50], + 'fraction': [.9, .7], 'discount': [60, 20], + 'time_span': [10, 10], 'cost': [100, 200], + 'kw_price': [15, 20]}) - compute_watershed_valuation(watershed_results_vector_path, val_df) + # test water yield volume + annual_water_yield.compute_water_yield_volume( + watershed_results_vector_path) + validate_fields(watershed_results_vector_path, "wyield_vol", + [990.0, 800.0], + "Error with water yield volume calculation.") - ws_ds = gdal.OpenEx(watershed_results_vector_path, - gdal.OF_VECTOR | gdal.GA_Update) - ws_layer = ws_ds.GetLayer() + # test rsupply volume + annual_water_yield.compute_rsupply_volume( + watershed_results_vector_path) + validate_fields(watershed_results_vector_path, "rsupply_vl", + [940.0, 730.0], + "Error calculating total realized water supply volume.") - # calculated by running `compute_watershed_valuation` - expected_hp_energy = [0.0022848, 0.113152] - expected_npv = [-6758.869386098996, -10050.547726887671] - - # compare expected to actual values - for row, ws_feat in enumerate(ws_layer): - actual_hp_energy = ws_feat.GetField('hp_energy') - actual_npv = ws_feat.GetField('hp_val') - self.assertEqual(actual_hp_energy, expected_hp_energy[row]) - self.assertEqual(actual_npv, expected_npv[row]) + # test compute watershed valuation + annual_water_yield.compute_watershed_valuation( + watershed_results_vector_path, val_df) + validate_fields(watershed_results_vector_path, "hp_energy", + [19.329408, 55.5968], + "Error calculating energy.") + validate_fields(watershed_results_vector_path, "hp_val", + [501.9029748723, 4587.91946857059], + "Error calculating net present value.") From 20cf2c492513de476a7bdce85ae3d3206cfa9fdf Mon Sep 17 00:00:00 2001 From: Claire Simpson Date: Wed, 29 Jan 2025 15:17:38 -0700 Subject: [PATCH 10/80] Added unittests for carbon --- tests/test_carbon.py | 72 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/tests/test_carbon.py b/tests/test_carbon.py index e6c3486ca..2c1e29ef8 100644 --- a/tests/test_carbon.py +++ b/tests/test_carbon.py @@ -51,6 +51,38 @@ def make_simple_raster(base_raster_path, fill_val, nodata_val): new_raster = None +def make_simple_lulc_raster(base_raster_path): + """Create a 2x2 raster on designated path with arbitrary lulc codes. + + Args: + base_raster_path (str): the raster path for making the new raster. + + Returns: + None. + """ + array = numpy.array([[1, 1], [2, 3]], dtype=int) + + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) # UTM Zone 10N + projection_wkt = srs.ExportToWkt() + # origin hand-picked for this epsg: + geotransform = [461261, 1.0, 0.0, 4923265, 0.0, -1.0] + + n = 2 + gtiff_driver = gdal.GetDriverByName('GTiff') + new_raster = gtiff_driver.Create( + base_raster_path, n, n, 1, gdal.GDT_Int32, options=[ + 'TILED=YES', 'BIGTIFF=YES', 'COMPRESS=LZW', + 'BLOCKXSIZE=16', 'BLOCKYSIZE=16']) + new_raster.SetProjection(projection_wkt) + new_raster.SetGeoTransform(geotransform) + new_band = new_raster.GetRasterBand(1) + new_band.WriteArray(array) + new_raster.FlushCache() + new_band = None + new_raster = None + + def assert_raster_equal_value(base_raster_path, val_to_compare): """Assert that the entire output raster has the same value as specified. @@ -268,6 +300,46 @@ class CarbonTests(unittest.TestCase): assert_raster_equal_value( os.path.join(args['workspace_dir'], 'npv_redd.tif'), -0.4602106) + def test_generate_carbon_map(self): + """Test `_generate_carbon_map`""" + from natcap.invest.carbon import _generate_carbon_map + + # generate a fake lulc raster + lulc_path = os.path.join(self.workspace_dir, "lulc.tif") + make_simple_lulc_raster(lulc_path) + + # make fake carbon pool dict + carbon_pool_by_type = {1: 5000, 2: 60, 3: 120} + + out_carbon_stock_path = os.path.join(self.workspace_dir, + "carbon_stock.tif") + + _generate_carbon_map(lulc_path, carbon_pool_by_type, + out_carbon_stock_path) + + # open output carbon stock raster and check values + actual_carbon_stock = gdal.Open(out_carbon_stock_path) + band = actual_carbon_stock.GetRasterBand(1) + actual_carbon_stock = band.ReadAsArray() + + expected_carbon_stock = numpy.array([[0.5, 0.5], [0.006, 0.012]], + dtype=numpy.float32) + + numpy.testing.assert_array_equal(actual_carbon_stock, + expected_carbon_stock) + + def test_calculate_valuation_constant(self): + """Test `_calculate_valuation_constant`""" + from natcap.invest.carbon import _calculate_valuation_constant + + valuation_constant = _calculate_valuation_constant(lulc_cur_year=2010, + lulc_fut_year=2012, + discount_rate=50, + rate_change=5, + price_per_metric_ton_of_c=50) + expected_valuation = 40.87302 + self.assertEqual(round(valuation_constant, 5), expected_valuation) + class CarbonValidationTests(unittest.TestCase): """Tests for the Carbon Model MODEL_SPEC and validation.""" From d19e9062ab94aedb54e6ebab107e350401123325 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 14:45:39 -0800 Subject: [PATCH 11/80] Adding codesigning worker config. RE:#1580 --- codesigning/Makefile | 6 +- codesigning/signing-worker/.gitignore | 1 + codesigning/signing-worker/inventory.ini | 2 + codesigning/signing-worker/natcap-codesign.py | 32 +++++ .../signing-worker/natcap-codesign.service | 33 +++++ codesigning/signing-worker/natcap-codesign.sh | 15 +++ codesigning/signing-worker/playbook.yml | 118 ++++++++++++++++++ 7 files changed, 203 insertions(+), 4 deletions(-) create mode 100644 codesigning/signing-worker/.gitignore create mode 100644 codesigning/signing-worker/inventory.ini create mode 100644 codesigning/signing-worker/natcap-codesign.py create mode 100644 codesigning/signing-worker/natcap-codesign.service create mode 100644 codesigning/signing-worker/natcap-codesign.sh create mode 100644 codesigning/signing-worker/playbook.yml diff --git a/codesigning/Makefile b/codesigning/Makefile index 2e04d1403..63d51de01 100644 --- a/codesigning/Makefile +++ b/codesigning/Makefile @@ -4,18 +4,16 @@ deploy-cloudfunction: gcloud functions deploy \ codesigning-queue \ --trigger-http \ + --gen2 \ --region us-west1 \ --allow-unauthenticated \ --entry-point main \ --runtime python312 \ - --run-service-account codesigning-service@natcap-servers.iam.gserviceaccount.com \ --source gcp-cloudfunc/ - # NOTE: This must be executed from a computer that has SSH access to ncp-inkwell. deploy-worker: - cd signing-worker - ansible-playbook \ + cd signing-worker && ansible-playbook \ --ask-become-pass \ --inventory-file inventory.ini \ playbook.yml diff --git a/codesigning/signing-worker/.gitignore b/codesigning/signing-worker/.gitignore new file mode 100644 index 000000000..a2a720a80 --- /dev/null +++ b/codesigning/signing-worker/.gitignore @@ -0,0 +1 @@ +natcap-servers-1732552f0202.json diff --git a/codesigning/signing-worker/inventory.ini b/codesigning/signing-worker/inventory.ini new file mode 100644 index 000000000..c2653461e --- /dev/null +++ b/codesigning/signing-worker/inventory.ini @@ -0,0 +1,2 @@ +[ncp-inkwell] +ncp-inkwell diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py new file mode 100644 index 000000000..9f6f2e73e --- /dev/null +++ b/codesigning/signing-worker/natcap-codesign.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +import sys +import textwrap + +import pexpect # apt install python3-pexpect + +CERTIFICATE = sys.argv[1] +FILETOSIGN = sys.argv[2] +SIGNED = sys.argv[3] + +SIGNCODE_COMMAND = textwrap.dedent(f"""\ + osslsigncode sign \ + -pkcs11engine /usr/lib/aarch64-linux-gnu/engines-3/pkcs11.so \ + -pkcs11module /usr/lib/aarch64-linux-gnu/libykcs11.so.2 \ + -key "pkcs11:id=%02;type=private" \ + -certs {CERTIFICATE} \ + -h sha256 \ + -ts http://timestamp.sectigo.com \ + -readpass pass.txt \ + -verbose \ + -in {FILETOSIGN} \ + -out {SIGNED}""") + + +process = pexpect.spawnu(SIGNCODE_COMMAND) +process.expect('Enter PKCS#11 key PIN for Private key for Digital Signature:') +with open('pass.txt') as passfile: + process.sendline(passfile.read().strip()) + +# print remainder of program output for our logging. +print(process.read()) diff --git a/codesigning/signing-worker/natcap-codesign.service b/codesigning/signing-worker/natcap-codesign.service new file mode 100644 index 000000000..286da3936 --- /dev/null +++ b/codesigning/signing-worker/natcap-codesign.service @@ -0,0 +1,33 @@ +# Systemd service for debian:bookworm for signing InVEST windows binaries. +# +# To install this service, copy this onto the host as /etc/systemd/system/natcap-codesign.service +# +# To use, run (for example): +# # On modifying the service file, run: +# $ sudo systemctl daemon-reload +# +# # enable the service +# $ sudo systemctl enable natcap-codesign.service +# +# # start the service +# $ sudo systemctl start natcap-codesign +# +# # check the service status +# $ sudo systemctl status natcap-codesign +# +# This service is built to run in the foreground. +# +# See https://wiki.debian.org/systemd/Services for background info about systemd services. + +[Unit] +Description=NatCap Code Signing for Windows EXE Binaries +User=natcap-codesign +Group=natcap-codesign +WorkingDirectory=/tmp + + +[Service] +# Run in the foreground +Type=simple +Restart=always +ExecStart=/opt/natcap-codesign/natcap-codesign.py diff --git a/codesigning/signing-worker/natcap-codesign.sh b/codesigning/signing-worker/natcap-codesign.sh new file mode 100644 index 000000000..68ce32c52 --- /dev/null +++ b/codesigning/signing-worker/natcap-codesign.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# TODO: write a cron job to verify that the service is still running (heartbeat) +# TODO: incorporate slack updates to let us know when something was signed, or +# if the service crashed + +while true +do + $DATA=$(get from service) # This is a json object with the https url, gs url, etc. + $BASENAME=$(jq ".basename" $DATA) + wget -O $BASENAME $(jq ".https-url" $DATA) + python3 natcap_codesign.py /opt/natcap-codesign/codesign-cert-chain.pem "$BASENAME" + gcloud storage upload $BASENAME $(jq ".gs-uri" $DATA) + sleep 30 +done diff --git a/codesigning/signing-worker/playbook.yml b/codesigning/signing-worker/playbook.yml new file mode 100644 index 000000000..ffce2325a --- /dev/null +++ b/codesigning/signing-worker/playbook.yml @@ -0,0 +1,118 @@ +--- + +- name: Set up everything needed on NCP-Inkwell + hosts: all + become: true + become_method: sudo + tasks: + - name: Install GCP SDK dependencies + ansible.builtin.apt: + update_cache: true + pkg: + - apt-transport-https + - ca-certificates + - gnupg + - curl + + - name: Download the Google Cloud SDK package repository signing key + ansible.builtin.shell: + cmd: curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg + creates: /usr/share/keyrings/cloud.google.gpg + + - name: Add Google Cloud SDK package repository source + ansible.builtin.apt_repository: + update_cache: true + filename: google-cloud-sdk.list + repo: "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" + + - name: Install packages + ansible.builtin.apt: + update_cache: true + pkg: + - python3 + - python3-pexpect + - wget + - vim-nox + - yubico-piv-tool + - libengine-pkcs11-openssl + - ykcs11 + - libssl-dev + - google-cloud-sdk + - google-cloud-cli + - yubikey-manager + + - name: Add bookworm-backports repository + ansible.builtin.apt_repository: + update_cache: true + repo: "deb http://deb.debian.org/debian {{ ansible_distribution_release }}-backports main" + filename: bookworm-backports.list + + - name: Install osslsigncode from backports + ansible.builtin.apt: + update_cache: true + default_release: "{{ ansible_distribution_release }}-backports" + pkg: + # The backports version is needed because the version in bookworm has + # a critical bug in it that prevents it from working with our + # certificate. + - osslsigncode + + - name: Create the codesign directory + ansible.builtin.file: + state: directory + path: /opt/natcap-codesign + + - name: Install the certificate + ansible.builtin.shell: + cmd: ykman piv certificates export 9c /opt/natcap-codesign/codesign-cert-chain.pem + creates: /opt/natcap-codesign/codesign-cert-chain.pem + + - name: Create codesigning group + ansible.builtin.group: + name: natcap-codesign + state: present + + - name: Create codesigning user + ansible.builtin.user: + name: natcap-codesign + group: natcap-codesign + shell: /bin/bash + createhome: true + + - name: Install the service account key + ansible.builtin.copy: + src: natcap-servers-1732552f0202.json + dest: /opt/natcap-codesign/natcap-servers-1732552f0202.json + mode: 0600 + + - name: Set up application credentials + ansible.builtin.shell: + cmd: gcloud auth activate-service-account --key-file=/opt/natcap-codesign/natcap-servers-1732552f0202.json + + - name: Install codesigning python script + ansible.builtin.copy: + src: natcap-codesign.py + dest: /opt/natcap-codesign/natcap-codesign.py + mode: 0755 + + - name: Install codesigning shell script + ansible.builtin.copy: + src: natcap-codesign.sh + dest: /opt/natcap-codesign/natcap-codesign.sh + mode: 0755 + + - name: Install the codesign service + ansible.builtin.copy: + src: natcap-codesign.service + dest: /etc/systemd/system/natcap-codesign.service + mode: 0644 + + - name: Enable the natcap-codesign service + ansible.builtin.systemd_service: + name: natcap-codesign + state: started + enabled: true + + #- install a service account key to write to GCS later + + From 63c1b1886ffc5569369ca44111589ce77618d0ac Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 14:57:22 -0800 Subject: [PATCH 12/80] Bumping down memory, specifying project. RE:#1580 --- codesigning/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/codesigning/Makefile b/codesigning/Makefile index 63d51de01..dbf358781 100644 --- a/codesigning/Makefile +++ b/codesigning/Makefile @@ -2,7 +2,9 @@ deploy-cloudfunction: gcloud functions deploy \ + --project natcap-servers \ codesigning-queue \ + --memory=128Mi \ --trigger-http \ --gen2 \ --region us-west1 \ From 7eeffa47a23cd330d9ff8081f54ef31e25536d36 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 15:03:27 -0800 Subject: [PATCH 13/80] Handling the case where there are no items in the queue. RE:#1580 --- codesigning/gcp-cloudfunc/main.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index e9cf3f7cd..2c576889e 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -60,7 +60,12 @@ def main(request): with get_lock(): queuefile = bucket.blob('queue.json') queue_dict = json.loads(queuefile.download_as_string()) - next_file_url = queue_dict['queue'].pop(0) + try: + next_file_url = queue_dict['queue'].pop(0) + except IndexError: + # No items in the queue! + return jsonify('No items in the queue'), 204 + queuefile.upload_from_string(json.dumps(queue_dict)) data = { From c432fafc00411c2e96c0b219914c807b400806f8 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 15:23:21 -0800 Subject: [PATCH 14/80] Updating bash service script. RE:#1580 --- codesigning/signing-worker/natcap-codesign.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/codesigning/signing-worker/natcap-codesign.sh b/codesigning/signing-worker/natcap-codesign.sh index 68ce32c52..89718ec5f 100644 --- a/codesigning/signing-worker/natcap-codesign.sh +++ b/codesigning/signing-worker/natcap-codesign.sh @@ -6,10 +6,15 @@ while true do - $DATA=$(get from service) # This is a json object with the https url, gs url, etc. - $BASENAME=$(jq ".basename" $DATA) - wget -O $BASENAME $(jq ".https-url" $DATA) + DATA=$(curl -sb -H "Accept: application/json" "https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue") + # The response body will be empty when there is nothing in the queue. + if [ -z "$DATA" ]; then + sleep 30 + continue + fi + BASENAME=$(jq ".basename" <<< $DATA) + wget -O $BASENAME $(jq ".https-url" <<< $DATA) python3 natcap_codesign.py /opt/natcap-codesign/codesign-cert-chain.pem "$BASENAME" - gcloud storage upload $BASENAME $(jq ".gs-uri" $DATA) + gcloud storage upload $BASENAME $(jq ".gs-uri" <<< $DATA) sleep 30 done From 5805b4729c9356c75909964a599c1e7c8b3d576a Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 15:28:41 -0800 Subject: [PATCH 15/80] Correcting filepaths. RE:#1580 --- codesigning/signing-worker/natcap-codesign.service | 2 +- codesigning/signing-worker/natcap-codesign.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/codesigning/signing-worker/natcap-codesign.service b/codesigning/signing-worker/natcap-codesign.service index 286da3936..678aa1087 100644 --- a/codesigning/signing-worker/natcap-codesign.service +++ b/codesigning/signing-worker/natcap-codesign.service @@ -30,4 +30,4 @@ WorkingDirectory=/tmp # Run in the foreground Type=simple Restart=always -ExecStart=/opt/natcap-codesign/natcap-codesign.py +ExecStart=/opt/natcap-codesign/natcap-codesign.sh diff --git a/codesigning/signing-worker/natcap-codesign.sh b/codesigning/signing-worker/natcap-codesign.sh index 89718ec5f..c2455d137 100644 --- a/codesigning/signing-worker/natcap-codesign.sh +++ b/codesigning/signing-worker/natcap-codesign.sh @@ -14,7 +14,7 @@ do fi BASENAME=$(jq ".basename" <<< $DATA) wget -O $BASENAME $(jq ".https-url" <<< $DATA) - python3 natcap_codesign.py /opt/natcap-codesign/codesign-cert-chain.pem "$BASENAME" + python3 opt/natcap-codesign/natcap_codesign.py /opt/natcap-codesign/codesign-cert-chain.pem "$BASENAME" gcloud storage upload $BASENAME $(jq ".gs-uri" <<< $DATA) sleep 30 done From 2ab96b74fbe6da4fdf24347bbd2f096d9a303953 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 16:03:51 -0800 Subject: [PATCH 16/80] Upping memory per GCP suggestion. RE#1580 --- codesigning/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/Makefile b/codesigning/Makefile index dbf358781..8fa17b6d1 100644 --- a/codesigning/Makefile +++ b/codesigning/Makefile @@ -4,7 +4,7 @@ deploy-cloudfunction: gcloud functions deploy \ --project natcap-servers \ codesigning-queue \ - --memory=128Mi \ + --memory=256Mi \ --trigger-http \ --gen2 \ --region us-west1 \ From f6b27156eba56f900860bdd6f60ab80c55a09b18 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 16:04:23 -0800 Subject: [PATCH 17/80] Confirming access token on all requests. RE#1580 --- codesigning/gcp-cloudfunc/main.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index 2c576889e..9567b89f4 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -51,6 +51,10 @@ def get_lock(): @functions_framework.http def main(request): + data = request.get_json() + if data['token'] != os.environ['ACCESS_TOKEN']: + return jsonify('Invalid token'), 403 + request_method = request.method storage_client = storage.Client() @@ -78,11 +82,6 @@ def main(request): return jsonify(data) elif request_method == 'POST': - data = request.get_json() - - if data['token'] != os.environ['ACCESS_TOKEN']: - return jsonify('Invalid token'), 403 - url = data['url'] if not url.endswith('.exe'): From 2212f799cd108b40e9e86a3ce40126a199847572 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 16:05:01 -0800 Subject: [PATCH 18/80] Always reloading service. RE:#1580 --- codesigning/signing-worker/playbook.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/codesigning/signing-worker/playbook.yml b/codesigning/signing-worker/playbook.yml index ffce2325a..5719a05e5 100644 --- a/codesigning/signing-worker/playbook.yml +++ b/codesigning/signing-worker/playbook.yml @@ -110,6 +110,7 @@ - name: Enable the natcap-codesign service ansible.builtin.systemd_service: name: natcap-codesign + daemon_reload: true # reload in case there are any config changes state: started enabled: true From 595254b98458733d997bc08849e3ed6eadc7a519 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 16:05:51 -0800 Subject: [PATCH 19/80] Adding json body to request. RE:#1580 --- codesigning/signing-worker/natcap-codesign.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/signing-worker/natcap-codesign.sh b/codesigning/signing-worker/natcap-codesign.sh index c2455d137..b4cc155b0 100644 --- a/codesigning/signing-worker/natcap-codesign.sh +++ b/codesigning/signing-worker/natcap-codesign.sh @@ -6,7 +6,7 @@ while true do - DATA=$(curl -sb -H "Accept: application/json" "https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue") + DATA=$(curl -i -H "Accept: application/json" "https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue{\"token\": \"$ACCESS_TOKEN\"}") # The response body will be empty when there is nothing in the queue. if [ -z "$DATA" ]; then sleep 30 From ab1eb56ca4a3f716e2eac828652b257a5d3da858 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 16:22:51 -0800 Subject: [PATCH 20/80] Restructuring the shell script. RE:#1580 --- codesigning/signing-worker/natcap-codesign.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/codesigning/signing-worker/natcap-codesign.sh b/codesigning/signing-worker/natcap-codesign.sh index b4cc155b0..5d98f5383 100644 --- a/codesigning/signing-worker/natcap-codesign.sh +++ b/codesigning/signing-worker/natcap-codesign.sh @@ -9,12 +9,12 @@ do DATA=$(curl -i -H "Accept: application/json" "https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue{\"token\": \"$ACCESS_TOKEN\"}") # The response body will be empty when there is nothing in the queue. if [ -z "$DATA" ]; then - sleep 30 continue + else + BASENAME=$(jq ".basename" <<< $DATA) + wget -O $BASENAME $(jq ".https-url" <<< $DATA) + python3 opt/natcap-codesign/natcap_codesign.py /opt/natcap-codesign/codesign-cert-chain.pem "$BASENAME" + gcloud storage upload $BASENAME $(jq ".gs-uri" <<< $DATA) fi - BASENAME=$(jq ".basename" <<< $DATA) - wget -O $BASENAME $(jq ".https-url" <<< $DATA) - python3 opt/natcap-codesign/natcap_codesign.py /opt/natcap-codesign/codesign-cert-chain.pem "$BASENAME" - gcloud storage upload $BASENAME $(jq ".gs-uri" <<< $DATA) sleep 30 done From d9ba157403484d4df7088a4a9619e9f270a0a21f Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 16:27:37 -0800 Subject: [PATCH 21/80] Trying a different approach to trimming whitespace. RE:#1580 --- codesigning/signing-worker/natcap-codesign.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/signing-worker/natcap-codesign.sh b/codesigning/signing-worker/natcap-codesign.sh index 5d98f5383..4223e1713 100644 --- a/codesigning/signing-worker/natcap-codesign.sh +++ b/codesigning/signing-worker/natcap-codesign.sh @@ -8,7 +8,7 @@ while true do DATA=$(curl -i -H "Accept: application/json" "https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue{\"token\": \"$ACCESS_TOKEN\"}") # The response body will be empty when there is nothing in the queue. - if [ -z "$DATA" ]; then + if [ -z $(echo "$DATA" | xargs) ]; then # echo | xargs will trim all whitespace. continue else BASENAME=$(jq ".basename" <<< $DATA) From 673e16da76e298493dd98e65b339527862e2f49e Mon Sep 17 00:00:00 2001 From: James Douglass Date: Wed, 29 Jan 2025 16:32:10 -0800 Subject: [PATCH 22/80] Trying another way to trim whitespace. RE:#1580 --- codesigning/signing-worker/natcap-codesign.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codesigning/signing-worker/natcap-codesign.sh b/codesigning/signing-worker/natcap-codesign.sh index 4223e1713..a601da483 100644 --- a/codesigning/signing-worker/natcap-codesign.sh +++ b/codesigning/signing-worker/natcap-codesign.sh @@ -8,7 +8,8 @@ while true do DATA=$(curl -i -H "Accept: application/json" "https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue{\"token\": \"$ACCESS_TOKEN\"}") # The response body will be empty when there is nothing in the queue. - if [ -z $(echo "$DATA" | xargs) ]; then # echo | xargs will trim all whitespace. + if [ -z $(echo -e "$DATA" | tr -d '[:space:]') ]; then + echo "No queued requests, waiting 30 seconds..." continue else BASENAME=$(jq ".basename" <<< $DATA) From 943c507d7567a6a66c9d29cc437c6435b338e709 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 14:15:03 -0800 Subject: [PATCH 23/80] Reworking codesigning service to be mostly python. RE:#1580 --- codesigning/signing-worker/natcap-codesign.py | 98 +++++++++++++++---- .../signing-worker/natcap-codesign.service | 2 +- codesigning/signing-worker/playbook.yml | 1 + 3 files changed, 79 insertions(+), 22 deletions(-) mode change 100644 => 100755 codesigning/signing-worker/natcap-codesign.py diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py old mode 100644 new mode 100755 index 9f6f2e73e..229ec8e11 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -1,32 +1,88 @@ #!/usr/bin/env python3 +import logging +import os +import shutil +import subprocess import sys import textwrap +import time import pexpect # apt install python3-pexpect +import requests # apt install python3-requests +LOGGER = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) CERTIFICATE = sys.argv[1] -FILETOSIGN = sys.argv[2] -SIGNED = sys.argv[3] - -SIGNCODE_COMMAND = textwrap.dedent(f"""\ - osslsigncode sign \ - -pkcs11engine /usr/lib/aarch64-linux-gnu/engines-3/pkcs11.so \ - -pkcs11module /usr/lib/aarch64-linux-gnu/libykcs11.so.2 \ - -key "pkcs11:id=%02;type=private" \ - -certs {CERTIFICATE} \ - -h sha256 \ - -ts http://timestamp.sectigo.com \ - -readpass pass.txt \ - -verbose \ - -in {FILETOSIGN} \ - -out {SIGNED}""") -process = pexpect.spawnu(SIGNCODE_COMMAND) -process.expect('Enter PKCS#11 key PIN for Private key for Digital Signature:') -with open('pass.txt') as passfile: - process.sendline(passfile.read().strip()) +def get_from_queue(url): + response = requests.get( + "https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue", + data={"token": str(os.environ['ACCESS_TOKEN'])}) + if response.status_code == 204: + return None + else: + return response.json() -# print remainder of program output for our logging. -print(process.read()) + +# See https://stackoverflow.com/a/16696317 +def download_file(url): + local_filename = url.split('/')[-1] + with requests.get(url, stream=True) as r: + r.raise_for_status() + with open(local_filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + return local_filename + + +def upload_to_bucket(filename, path_on_bucket): + subprocess.run(['gsutil', 'cp', filename, path_on_bucket], check=True) + + +def sign_file(file_to_sign): + signed_file = f"{file_to_sign}.signed" + + signcode_command = textwrap.dedent(f"""\ + osslsigncode sign \ + -pkcs11engine /usr/lib/aarch64-linux-gnu/engines-3/pkcs11.so \ + -pkcs11module /usr/lib/aarch64-linux-gnu/libykcs11.so.2 \ + -key "pkcs11:id=%02;type=private" \ + -certs {CERTIFICATE} \ + -h sha256 \ + -ts http://timestamp.sectigo.com \ + -readpass pass.txt \ + -verbose \ + -in {file_to_sign} \ + -out {signed_file}""") + + process = pexpect.spawnu(signcode_command) + process.expect('Enter PKCS#11 key PIN for Private key for Digital Signature:') + with open('pass.txt') as passfile: + process.sendline(passfile.read().strip()) + + # print remainder of program output for our logging. + print(process.read()) + + shutil.move(signed_file, file_to_sign) + + +def main(): + while True: + try: + file_to_sign = get_from_queue() + if file_to_sign is None: + LOGGER.info('No items in the queue') + else: + filename = download_file(file_to_sign['https-url']) + sign_file(filename) + upload_to_bucket(filename, file_to_sign['gs-uri']) + os.remove(filename) + except Exception: + LOGGER.exception("Unexpected error signing file") + time.sleep(15) + + +if __name__ == '__main__': + main() diff --git a/codesigning/signing-worker/natcap-codesign.service b/codesigning/signing-worker/natcap-codesign.service index 678aa1087..46cc62934 100644 --- a/codesigning/signing-worker/natcap-codesign.service +++ b/codesigning/signing-worker/natcap-codesign.service @@ -30,4 +30,4 @@ WorkingDirectory=/tmp # Run in the foreground Type=simple Restart=always -ExecStart=/opt/natcap-codesign/natcap-codesign.sh +ExecStart=python3 /opt/natcap-codesign/natcap-codesign.py diff --git a/codesigning/signing-worker/playbook.yml b/codesigning/signing-worker/playbook.yml index 5719a05e5..e6154654e 100644 --- a/codesigning/signing-worker/playbook.yml +++ b/codesigning/signing-worker/playbook.yml @@ -31,6 +31,7 @@ pkg: - python3 - python3-pexpect + - python3-requests - wget - vim-nox - yubico-piv-tool From 5f2f3e560a99a30297435c7a91c40a7e7b0afad4 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 14:18:22 -0800 Subject: [PATCH 24/80] Forgot the certificate argument. RE:#1580 --- codesigning/signing-worker/natcap-codesign.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/signing-worker/natcap-codesign.service b/codesigning/signing-worker/natcap-codesign.service index 46cc62934..afc728541 100644 --- a/codesigning/signing-worker/natcap-codesign.service +++ b/codesigning/signing-worker/natcap-codesign.service @@ -30,4 +30,4 @@ WorkingDirectory=/tmp # Run in the foreground Type=simple Restart=always -ExecStart=python3 /opt/natcap-codesign/natcap-codesign.py +ExecStart=python3 /opt/natcap-codesign/natcap-codesign.py /opt/natcap-codesign/codesign-cert-chain.pem From 1c123dcf285ad0d04c770902cb5199a68166a021 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 14:20:32 -0800 Subject: [PATCH 25/80] Removing unnecessary function parameter. RE:#1580 --- codesigning/signing-worker/natcap-codesign.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index 229ec8e11..9081e8541 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -16,7 +16,7 @@ logging.basicConfig(level=logging.INFO) CERTIFICATE = sys.argv[1] -def get_from_queue(url): +def get_from_queue(): response = requests.get( "https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue", data={"token": str(os.environ['ACCESS_TOKEN'])}) From adaf945708d3e1fb2a1a7a9586c4977309c4631f Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 14:27:19 -0800 Subject: [PATCH 26/80] Loading access token from a file. RE:#1580 --- codesigning/signing-worker/natcap-codesign.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index 9081e8541..a4f2df21a 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -15,11 +15,14 @@ LOGGER = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) CERTIFICATE = sys.argv[1] +with open("access_token.txt") as token_file: + ACCESS_TOKEN = token_file.read().strip() + def get_from_queue(): response = requests.get( "https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue", - data={"token": str(os.environ['ACCESS_TOKEN'])}) + data={"token": ACCESS_TOKEN}) if response.status_code == 204: return None else: From 0a622479cdabb3ff562f694e697158ebbb1818b8 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 14:30:32 -0800 Subject: [PATCH 27/80] Clarifying path to the token file. RE:#1580 --- codesigning/signing-worker/natcap-codesign.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index a4f2df21a..c91180db7 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -15,7 +15,8 @@ LOGGER = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) CERTIFICATE = sys.argv[1] -with open("access_token.txt") as token_file: +TOKEN_FILE = os.path.join(os.path.dirname(__file__), "access_token.txt") +with open(TOKEN_FILE) as token_file: ACCESS_TOKEN = token_file.read().strip() From bf6e595677bb775921c611d78755ac0b8d229a41 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 14:33:36 -0800 Subject: [PATCH 28/80] Raising errors when we find them. RE:#1580 --- codesigning/signing-worker/natcap-codesign.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index c91180db7..fa1fdc83e 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -83,8 +83,9 @@ def main(): sign_file(filename) upload_to_bucket(filename, file_to_sign['gs-uri']) os.remove(filename) - except Exception: + except Exception as e: LOGGER.exception("Unexpected error signing file") + raise e time.sleep(15) From e6656f71fb26dd96f33eb398633c7179aec745e6 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 14:46:42 -0800 Subject: [PATCH 29/80] Using POST method always. RE:#1580 --- codesigning/gcp-cloudfunc/main.py | 9 +++++---- codesigning/signing-worker/natcap-codesign.py | 7 +++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index 9567b89f4..ac011d846 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -55,12 +55,13 @@ def main(request): if data['token'] != os.environ['ACCESS_TOKEN']: return jsonify('Invalid token'), 403 - request_method = request.method + if request.method != 'POST': + return jsonify('Invalid request method'), 405 storage_client = storage.Client() bucket = storage_client.bucket(CODESIGN_DATA_BUCKET) - if request_method == 'GET': + if data['action'] == 'dequeue': with get_lock(): queuefile = bucket.blob('queue.json') queue_dict = json.loads(queuefile.download_as_string()) @@ -81,7 +82,7 @@ def main(request): logging.info(f'Dequeued {next_file_url}') return jsonify(data) - elif request_method == 'POST': + elif data['action'] == 'enqueue': url = data['url'] if not url.endswith('.exe'): @@ -143,4 +144,4 @@ def main(request): return jsonify("OK"), 200 else: - return jsonify('Invalid request method'), 405 + return jsonify('Invalid action request'), 405 diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index fa1fdc83e..4be2e8580 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -21,9 +21,12 @@ with open(TOKEN_FILE) as token_file: def get_from_queue(): - response = requests.get( + response = requests.post( "https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue", - data={"token": ACCESS_TOKEN}) + data={ + "token": ACCESS_TOKEN, + "action": "dequeue" + }) if response.status_code == 204: return None else: From e5119574fed59a3546124c480e407ade6e1573ae Mon Sep 17 00:00:00 2001 From: Claire Simpson Date: Thu, 30 Jan 2025 15:53:52 -0700 Subject: [PATCH 30/80] coastal blue carbon tests added --- tests/test_coastal_blue_carbon.py | 183 ++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) diff --git a/tests/test_coastal_blue_carbon.py b/tests/test_coastal_blue_carbon.py index 27999bbb8..ddd4081dc 100644 --- a/tests/test_coastal_blue_carbon.py +++ b/tests/test_coastal_blue_carbon.py @@ -11,6 +11,7 @@ import unittest import numpy import pandas +from scipy.sparse import dok_matrix import pygeoprocessing from natcap.invest import utils from natcap.invest import validation @@ -24,6 +25,38 @@ REGRESSION_DATA = os.path.join( LOGGER = logging.getLogger(__name__) +def make_raster_from_array(base_raster_path, array): + """Create a raster on designated path with arbitrary lulc codes. + + Args: + base_raster_path (str): the raster path for making the new raster. + array (array): array to save as raster + + Returns: + None. + """ + + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) # UTM Zone 10N + projection_wkt = srs.ExportToWkt() + # origin hand-picked for this epsg: + geotransform = [461261, 1.0, 0.0, 4923265, 0.0, -1.0] + + gtiff_driver = gdal.GetDriverByName('GTiff') + new_raster = gtiff_driver.Create( + base_raster_path, array.shape[0], array.shape[1], 1, + gdal.GDT_Int32, options=[ + 'TILED=YES', 'BIGTIFF=YES', 'COMPRESS=LZW', + 'BLOCKXSIZE=16', 'BLOCKYSIZE=16']) + new_raster.SetProjection(projection_wkt) + new_raster.SetGeoTransform(geotransform) + new_band = new_raster.GetRasterBand(1) + new_band.SetNoDataValue(-1) + new_band.WriteArray(array) + new_raster.FlushCache() + new_band = None + new_raster = None + class TestPreprocessor(unittest.TestCase): """Test Coastal Blue Carbon preprocessor functions.""" @@ -1060,3 +1093,153 @@ class TestCBC2(unittest.TestCase): [(['analysis_year'], coastal_blue_carbon.INVALID_ANALYSIS_YEAR_MSG.format( analysis_year=2000, latest_year=2000))]) + + def test_calculate_npv(self): + """Test `_calculate_npv`""" + from natcap.invest.coastal_blue_carbon import coastal_blue_carbon + + def make_carbon_seq_raster(out_path): + """make a carbon sequestration raster and save it to out_path""" + + #make fake data + net_sequestration_rasters = { + 2010: os.path.join(self.workspace_dir, "carbon_seq_2010.tif"), + 2011: os.path.join(self.workspace_dir, "carbon_seq_2011.tif"), + 2012: os.path.join(self.workspace_dir, "carbon_seq_2012.tif") + } + + for year, path in net_sequestration_rasters.items(): + array = numpy.array([[year*.5, year*.25], [year-1, 50]]) # random array + make_raster_from_array(path, array) + + prices_by_year = { + 2010: 50, + 2011: 80, + 2012: 95 + } + discount_rate = 0.1 + baseline_year = 2010 + target_raster_years_and_paths = { + 2010: os.path.join(self.workspace_dir, "tgt_carbon_seq_2010.tif"), + 2011: os.path.join(self.workspace_dir, "tgt_carbon_seq_2011.tif"), + 2012: os.path.join(self.workspace_dir, "tgt_carbon_seq_2012.tif") + } + + coastal_blue_carbon._calculate_npv(net_sequestration_rasters, + prices_by_year, discount_rate, + baseline_year, + target_raster_years_and_paths) + + # read in the created target rasters + actual_2011 = gdal.Open(target_raster_years_and_paths[2011]) + band = actual_2011.GetRasterBand(1) + actual_2011 = band.ReadAsArray() + + actual_2012 = gdal.Open(target_raster_years_and_paths[2012]) + band = actual_2012.GetRasterBand(1) + actual_2012 = band.ReadAsArray() + + # compare actual rasters to expected (based on running `_calculate_npv`) + expected_2011 = numpy.array([[100550, 50300], [200950, 5000]], + dtype=int) + expected_2012 = numpy.array([[370268, 185195], [740045, 18409]], + dtype=int) + numpy.testing.assert_allclose(actual_2011, expected_2011) + numpy.testing.assert_allclose(actual_2012, expected_2012) + + def test_calculate_accumulation_over_time(self): + """Test `_calculate_accumulation_over_time`""" + from natcap.invest.coastal_blue_carbon.coastal_blue_carbon import \ + _calculate_accumulation_over_time + + # generate fake data with nodata values + nodata = float(numpy.finfo(numpy.float32).min) + annual_biomass_matrix = numpy.array([[1, 2], [3, nodata]]) + annual_soil_matrix = numpy.array([[11, 12], [13, 14]]) + annual_litter_matrix = numpy.array([[.5, .9], [4, .9]]) + n_years = 3 + + actual_accumulation = _calculate_accumulation_over_time( + annual_biomass_matrix, annual_soil_matrix, annual_litter_matrix, + n_years) + + expected_accumulation = numpy.array([[37.5, 44.7], [60, nodata]]) + numpy.testing.assert_allclose(actual_accumulation, expected_accumulation) + + def test_calculate_net_sequestration(self): + """test `_calculate_net_sequestration`""" + from natcap.invest.coastal_blue_carbon.coastal_blue_carbon import \ + _calculate_net_sequestration + + # make fake rasters that contain nodata pixels (-1) + accumulation_raster_path = os.path.join(self.workspace_dir, + "accumulation_raster.tif") + accumulation_array = numpy.array([[40, -1], [70, -1]]) + make_raster_from_array(accumulation_raster_path, accumulation_array) + + emissions_raster_path = os.path.join(self.workspace_dir, + "emissions_raster.tif") + emissions_array = numpy.array([[-1, 8], [7, -1]]) + make_raster_from_array(emissions_raster_path, emissions_array) + + target_raster_path = os.path.join(self.workspace_dir, + "target_raster.tif") + + # run `_calculate_net_sequestration` + _calculate_net_sequestration(accumulation_raster_path, + emissions_raster_path, target_raster_path) + + # compare actual to expected output net sequestration raster + actual_sequestration = gdal.Open(target_raster_path) + band = actual_sequestration.GetRasterBand(1) + actual_sequestration = band.ReadAsArray() + + # calculated by running `_calculate_net_sequestration` + nodata = float(numpy.finfo(numpy.float32).min) + expected_sequestration = numpy.array([[40, -8], [-7, nodata]]) + + numpy.testing.assert_allclose(actual_sequestration, + expected_sequestration) + + def test_reclassify_accumulation_transition(self): + """Test `_reclassify_accumulation_transition`""" + from natcap.invest.coastal_blue_carbon.coastal_blue_carbon import \ + _reclassify_accumulation_transition, _reclassify_disturbance_magnitude + + # make fake raster data + landuse_transition_from_raster = os.path.join(self.workspace_dir, + "landuse_transition_from.tif") + landuse_transition_from_array = numpy.array([[1, 2], [3, 2]]) + make_raster_from_array(landuse_transition_from_raster, + landuse_transition_from_array) + + landuse_transition_to_raster = os.path.join(self.workspace_dir, + "landuse_transition_to.tif") + landuse_transition_to_array = numpy.array([[1, 1], [2, 3]]) + make_raster_from_array(landuse_transition_to_raster, + landuse_transition_to_array) + + #make fake accumulation_rate_matrix + accumulation_rate_matrix = dok_matrix((4, 4), dtype=numpy.float32) + accumulation_rate_matrix[1, 2] = 0.5 # Forest -> Grassland + accumulation_rate_matrix[1, 3] = 0.3 # Forest -> Agriculture + + accumulation_rate_matrix[2, 1] = 0.2 # Grassland -> Forest + accumulation_rate_matrix[2, 3] = 0.4 # Grassland -> Agriculture + + accumulation_rate_matrix[3, 1] = 0.1 # Agriculture -> Forest + accumulation_rate_matrix[3, 2] = 0.3 # Agriculture -> Grassland + + target_raster_path = os.path.join(self.workspace_dir, "output.tif") + _reclassify_accumulation_transition( + landuse_transition_from_raster, landuse_transition_to_raster, + accumulation_rate_matrix, target_raster_path) + + # compare actual and expected target_raster + actual_accumulation = gdal.Open(target_raster_path) + band = actual_accumulation.GetRasterBand(1) + actual_accumulation = band.ReadAsArray() + + expected_accumulation = numpy.array([[0, .2], [.3, .4]]) + + numpy.testing.assert_allclose(actual_accumulation, expected_accumulation) From f8ebe382f8a97d287fd0b076feb933fee86ee81b Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 15:00:20 -0800 Subject: [PATCH 31/80] Fixing how I make my request. RE:#1580 --- codesigning/signing-worker/natcap-codesign.py | 3 ++- codesigning/signing-worker/natcap-codesign.service | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index 4be2e8580..89c00b864 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -23,7 +23,8 @@ with open(TOKEN_FILE) as token_file: def get_from_queue(): response = requests.post( "https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue", - data={ + headers={"Content-Type": "application/json"}, + json={ "token": ACCESS_TOKEN, "action": "dequeue" }) diff --git a/codesigning/signing-worker/natcap-codesign.service b/codesigning/signing-worker/natcap-codesign.service index afc728541..69c807525 100644 --- a/codesigning/signing-worker/natcap-codesign.service +++ b/codesigning/signing-worker/natcap-codesign.service @@ -29,5 +29,5 @@ WorkingDirectory=/tmp [Service] # Run in the foreground Type=simple -Restart=always +Restart=no # change back to always ExecStart=python3 /opt/natcap-codesign/natcap-codesign.py /opt/natcap-codesign/codesign-cert-chain.pem From 6d680ac969d5e348af8957299184c5ad9990f6a9 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 15:00:57 -0800 Subject: [PATCH 32/80] Polling every 60s. RE:#1580 --- codesigning/signing-worker/natcap-codesign.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index 89c00b864..37bce1861 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -90,7 +90,7 @@ def main(): except Exception as e: LOGGER.exception("Unexpected error signing file") raise e - time.sleep(15) + time.sleep(60) if __name__ == '__main__': From 0abd959c39132c7faccf60953cec3b8a4c9385af Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 15:32:14 -0800 Subject: [PATCH 33/80] Correcting a filepath. RE:#1580 --- codesigning/signing-worker/natcap-codesign.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index 37bce1861..60d28e7bb 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -15,7 +15,8 @@ LOGGER = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) CERTIFICATE = sys.argv[1] -TOKEN_FILE = os.path.join(os.path.dirname(__file__), "access_token.txt") +FILE_DIR = os.path.dirname(__file__) +TOKEN_FILE = os.path.join(FILE_DIR, "access_token.txt") with open(TOKEN_FILE) as token_file: ACCESS_TOKEN = token_file.read().strip() @@ -51,6 +52,7 @@ def upload_to_bucket(filename, path_on_bucket): def sign_file(file_to_sign): signed_file = f"{file_to_sign}.signed" + pass_file = os.path.join(FILE_DIR, 'pass.txt') signcode_command = textwrap.dedent(f"""\ osslsigncode sign \ @@ -60,14 +62,14 @@ def sign_file(file_to_sign): -certs {CERTIFICATE} \ -h sha256 \ -ts http://timestamp.sectigo.com \ - -readpass pass.txt \ + -readpass {pass_file} \ -verbose \ -in {file_to_sign} \ -out {signed_file}""") process = pexpect.spawnu(signcode_command) process.expect('Enter PKCS#11 key PIN for Private key for Digital Signature:') - with open('pass.txt') as passfile: + with open(pass_file) as passfile: process.sendline(passfile.read().strip()) # print remainder of program output for our logging. From 840dfaa91680f574b8000a6e326afb7ef3e013a3 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 15:34:30 -0800 Subject: [PATCH 34/80] restarting the systemd service. RE:#1580 --- codesigning/signing-worker/playbook.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/signing-worker/playbook.yml b/codesigning/signing-worker/playbook.yml index e6154654e..f7ea17e6a 100644 --- a/codesigning/signing-worker/playbook.yml +++ b/codesigning/signing-worker/playbook.yml @@ -112,7 +112,7 @@ ansible.builtin.systemd_service: name: natcap-codesign daemon_reload: true # reload in case there are any config changes - state: started + state: restarted enabled: true #- install a service account key to write to GCS later From 1bef7da2e35cdc876c391411a036ad2001469a51 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 15:39:06 -0800 Subject: [PATCH 35/80] Updating lib paths to match our VM. RE:#1580 --- codesigning/signing-worker/natcap-codesign.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index 60d28e7bb..673e184ec 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -56,8 +56,8 @@ def sign_file(file_to_sign): signcode_command = textwrap.dedent(f"""\ osslsigncode sign \ - -pkcs11engine /usr/lib/aarch64-linux-gnu/engines-3/pkcs11.so \ - -pkcs11module /usr/lib/aarch64-linux-gnu/libykcs11.so.2 \ + -pkcs11engine /usr/lib/x86_64-linux-gnu/engines-3/pkcs11.so \ + -pkcs11module /usr/lib/x86_64-linux-gnu/libykcs11.so \ -key "pkcs11:id=%02;type=private" \ -certs {CERTIFICATE} \ -h sha256 \ From 4641cf05c9104376c3317b2a12a94e82ff17bb87 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 15:45:00 -0800 Subject: [PATCH 36/80] Adding more helpful logging when signing. RE:#1580 --- codesigning/signing-worker/natcap-codesign.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index 673e184ec..b5927698d 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -85,10 +85,15 @@ def main(): if file_to_sign is None: LOGGER.info('No items in the queue') else: + LOGGER.info(f"Dequeued and downloading {file_to_sign['https-url']}") filename = download_file(file_to_sign['https-url']) + LOGGER.info(f"Signing {filename}") sign_file(filename) + LOGGER.info(f"Uploading signed file to {file_to_sign['gs-uri']}") upload_to_bucket(filename, file_to_sign['gs-uri']) + LOGGER.info(f"Removing {filename}") os.remove(filename) + LOGGER.info("Signing complete.") except Exception as e: LOGGER.exception("Unexpected error signing file") raise e From 884ccc464642b93e938b16bdd903019a00448ec9 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 16:05:37 -0800 Subject: [PATCH 37/80] Tracking the file in the signed files list. RE#1580 --- codesigning/signing-worker/natcap-codesign.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index b5927698d..5438106ca 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import json import logging import os import shutil @@ -78,6 +79,35 @@ def sign_file(file_to_sign): shutil.move(signed_file, file_to_sign) +def add_file_to_signed_list(url): + # Since this process is the only one that should be writing to this file, we + # don't need to worry about race conditions. + remote_signed_files_path = 'gs://natcap-codesigning/signed_files.json' + local_signed_files_path = os.path.join(FILE_DIR, 'signed_files.json') + + # Test to see if the signed files json file exists in the bucket; create it + # if not. + exists_proc = subprocess.run( + ['gsutil', '-q', 'stat', remote_signed_files_path], check=False) + if exists_proc.returncode != 0: + signed_files_dict = {'signed_files': []} + else: + subprocess.run( + ['gsutil', 'cp', remote_signed_files_path, + local_signed_files_path], check=True) + with open(local_signed_files_path, 'r') as signed_files: + signed_files_dict = json.load(signed_files) + + with open(local_signed_files_path, 'w') as signed_files: + signed_files_dict['signed_files'].append(url) + json.dump(signed_files_dict, signed_files) + + subprocess.run( + ['gsutil', 'cp', local_signed_files_path, + remote_signed_files_path], check=True) + LOGGER.info(f"Added {url} to {remote_signed_files_path}") + + def main(): while True: try: @@ -91,6 +121,9 @@ def main(): sign_file(filename) LOGGER.info(f"Uploading signed file to {file_to_sign['gs-uri']}") upload_to_bucket(filename, file_to_sign['gs-uri']) + LOGGER.info( + f"Adding {file_to_sign['https-url']} to signed files list") + add_file_to_signed_list(file_to_sign['https-url']) LOGGER.info(f"Removing {filename}") os.remove(filename) LOGGER.info("Signing complete.") From 21f08a9c0c90fc2f8d0b390a354cec6ca5f59fe7 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 16:17:47 -0800 Subject: [PATCH 38/80] Adding a script to enqueue a binary by its url. RE:#1580 --- codesigning/enqueue-binary.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 codesigning/enqueue-binary.py diff --git a/codesigning/enqueue-binary.py b/codesigning/enqueue-binary.py new file mode 100644 index 000000000..7caa940a5 --- /dev/null +++ b/codesigning/enqueue-binary.py @@ -0,0 +1,25 @@ +"""Enqueue a windows binary for signing. + +To call this script, you need to set the ACCESS_TOKEN environment variable from +the software team secrets store. + +Example invocation: + + $ ACCESS_TOKEN=abcs1234 python3 enqueue-binary.py +""" + +import os +import sys +from urllib import parse +from urllib import request + +DATA = parse.urlencode({ + 'token': os.environ['ACCESS_TOKEN'], + "url": sys.argv[1], + "action": "enqueue", +}).encode() + +req = request.Request( + 'https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue', + data=DATA) +response = request.urlopen(req) From 9f772ce35a55868ac7a1609a0cc4daa0d3c2ea0b Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 16:26:22 -0800 Subject: [PATCH 39/80] Checking for whether an existing file was provided. RE:#1580 --- codesigning/gcp-cloudfunc/main.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index ac011d846..39a0d8014 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -109,6 +109,9 @@ def main(request): mday, mmonth, myear = response.headers['Last-Modified'].split(' ')[1:4] modified_time = datetime.datetime.strptime( ' '.join((mday, mmonth, myear)), '%d %b %Y') + if response.status_code > 400: + return jsonify('Requested file does not exist'), 403 + if modified_time < datetime.datetime(year=2024, month=6, day=1): return jsonify('File is too old'), 400 From af9fc47560ee92e236ae7fa7b1be24f922105580 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 16:31:51 -0800 Subject: [PATCH 40/80] Adding a shell script to enqueue the target binary. RE:#1580 --- codesigning/enqueue-current-windows-installer.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 codesigning/enqueue-current-windows-installer.sh diff --git a/codesigning/enqueue-current-windows-installer.sh b/codesigning/enqueue-current-windows-installer.sh new file mode 100644 index 000000000..f418f2340 --- /dev/null +++ b/codesigning/enqueue-current-windows-installer.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env sh +# +# Run this script to enqueue the windows binary for this current version of the +# InVEST windows workbench installer for code signing. +# +# NOTE: this script must be run from the directory containing this script. + +version=$(python -m setuptools_scm) +url_base=$(make -C .. print-DIST_URL_BASE | awk ' { print $3 } ') +url="${url_base}/workbench/invest_${version}_workbench_win32_x64.exe" + +python enqueue-binary.py "${url}" From ad9a35cba044cb04b4d107967eb649ec9146e2b0 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 16:36:13 -0800 Subject: [PATCH 41/80] Adding codesigning step to binary actions workflow. RE:#1580 --- .github/workflows/build-and-test.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 2094d9881..26a810a39 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -441,6 +441,14 @@ jobs: # WORKBENCH_BINARY=$(find "$(pwd)/workbench/dist" -type f -name 'invest_*.exe' | head -n 1) # make WORKBENCH_BIN_TO_SIGN="$WORKBENCH_BINARY" SIGNTOOL="$SIGNTOOL_PATH" codesign_windows + - name: Queue windows binaries for signing + if: github.event_name != 'pull_request' && matrix.os == 'windows-latest' # secrets not available in PR + env: + ACCESS_TOKEN: ${{ secrets.CODESIGN_QUEUE_ACCESS_TOKEN }} + run: | + cd codesign + bash enqueue-current-windows-installer.sh + - name: Deploy artifacts to GCS if: github.event_name != 'pull_request' run: make deploy From 3cf4a19b756152fa98caaf62ba7299df847d97e5 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 30 Jan 2025 16:51:21 -0800 Subject: [PATCH 42/80] Correcting directory name. RE:#1580 --- .github/workflows/build-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 26a810a39..796df59d4 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -446,7 +446,7 @@ jobs: env: ACCESS_TOKEN: ${{ secrets.CODESIGN_QUEUE_ACCESS_TOKEN }} run: | - cd codesign + cd codesigning bash enqueue-current-windows-installer.sh - name: Deploy artifacts to GCS From 59ba67aff67d9f9beff8ec02fdb341cc8069429b Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 09:03:30 -0800 Subject: [PATCH 43/80] Correcting order of operations on a missing file. RE:#1580 --- codesigning/gcp-cloudfunc/main.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index 39a0d8014..27ebc8ae5 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -103,15 +103,16 @@ def main(request): GOOGLE_PREFIX + '/', '').split('/') codesign_bucket = storage_client.bucket(CODESIGN_DATA_BUCKET) - # If the file is too old, reject it. Trying to avoid a - # denial-of-service by invoking the service with very old files. + # If the file does not exist at this URL, reject it. response = requests.head(url) - mday, mmonth, myear = response.headers['Last-Modified'].split(' ')[1:4] - modified_time = datetime.datetime.strptime( - ' '.join((mday, mmonth, myear)), '%d %b %Y') if response.status_code > 400: return jsonify('Requested file does not exist'), 403 + # If the file is too old, reject it. Trying to avoid a + # denial-of-service by invoking the service with very old files. + mday, mmonth, myear = response.headers['Last-Modified'].split(' ')[1:4] + modified_time = datetime.datetime.strptime( + ' '.join((mday, mmonth, myear)), '%d %b %Y') if modified_time < datetime.datetime(year=2024, month=6, day=1): return jsonify('File is too old'), 400 From a69804b3b6bf04fc942e860a4f2c2b957b1a708b Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 09:09:23 -0800 Subject: [PATCH 44/80] Reqorking enqueue script to use requests. RE:#1580 --- codesigning/enqueue-binary.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/codesigning/enqueue-binary.py b/codesigning/enqueue-binary.py index 7caa940a5..2c8e4e1ff 100644 --- a/codesigning/enqueue-binary.py +++ b/codesigning/enqueue-binary.py @@ -5,21 +5,24 @@ the software team secrets store. Example invocation: - $ ACCESS_TOKEN=abcs1234 python3 enqueue-binary.py + $ ACCESS_TOKEN=abcs1234 python3 enqueue-binary.py """ import os import sys -from urllib import parse -from urllib import request -DATA = parse.urlencode({ +import requests + +DATA = { 'token': os.environ['ACCESS_TOKEN'], - "url": sys.argv[1], - "action": "enqueue", -}).encode() - -req = request.Request( + 'action': 'enqueue', + 'url': sys.argv[1].replace( + 'gs://', 'https://storage.googleapis.com/'), +} +response = requests.post( 'https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue', - data=DATA) -response = request.urlopen(req) + json=DATA +) +if response.status_code >= 400: + print(response.text) + sys.exit(1) From 6e948bebf23be2e660d0a5cd4074ed4fdd44ed84 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 09:29:39 -0800 Subject: [PATCH 45/80] Improving GCP cloud logging in function. RE:#1580 --- codesigning/gcp-cloudfunc/main.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index 27ebc8ae5..6c3f0f292 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -53,9 +53,11 @@ def get_lock(): def main(request): data = request.get_json() if data['token'] != os.environ['ACCESS_TOKEN']: + logging.info('Rejecting request due to invalid token') return jsonify('Invalid token'), 403 if request.method != 'POST': + logging.info('Rejecting request due to invalid HTTP method') return jsonify('Invalid request method'), 405 storage_client = storage.Client() @@ -69,6 +71,7 @@ def main(request): next_file_url = queue_dict['queue'].pop(0) except IndexError: # No items in the queue! + logging.info('No binaries are currently queued for signing') return jsonify('No items in the queue'), 204 queuefile.upload_from_string(json.dumps(queue_dict)) @@ -84,16 +87,21 @@ def main(request): elif data['action'] == 'enqueue': url = data['url'] + logging.info('Attempting to enqueue url %s', url) if not url.endswith('.exe'): + logging.info("Rejecting URL because it doesn't end in .exe"') return jsonify('Invalid URL to sign'), 400 if not url.startswith(GOOGLE_PREFIX): + logging.info('Rejecting URL because it does not start with %s', + GOOGLE_PREFIX) return jsonify('Invalid host'), 400 if not url.startswith(( f'{GOOGLE_PREFIX}/releases.naturalcapitalproject.org/', f'{GOOGLE_PREFIX}/natcap-dev-build-artifacts/')): + logging.info('Rejecting URL because the bucket is incorrect') return jsonify("Invalid target bucket"), 400 # Remove http character quoting @@ -106,6 +114,7 @@ def main(request): # If the file does not exist at this URL, reject it. response = requests.head(url) if response.status_code > 400: + logging.info('Rejecting URL because it does not exist') return jsonify('Requested file does not exist'), 403 # If the file is too old, reject it. Trying to avoid a @@ -114,6 +123,7 @@ def main(request): modified_time = datetime.datetime.strptime( ' '.join((mday, mmonth, myear)), '%d %b %Y') if modified_time < datetime.datetime(year=2024, month=6, day=1): + logging.info('Rejecting URL because it is too old') return jsonify('File is too old'), 400 with get_lock(): @@ -126,6 +136,8 @@ def main(request): signed_files_list.download_as_string()) if url in signed_files_dict['signed_files']: + logging.info( + 'Rejecting URL because it has already been signed') return jsonify('File has already been signed'), 400 # Since the file has not already been signed, add the file to the From 96c3e8620d669e28ae1a9c0c337561b298137842 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 09:31:01 -0800 Subject: [PATCH 46/80] Fixing syntaxerror. RE:#1580 --- codesigning/gcp-cloudfunc/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index 6c3f0f292..e201f090e 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -90,7 +90,7 @@ def main(request): logging.info('Attempting to enqueue url %s', url) if not url.endswith('.exe'): - logging.info("Rejecting URL because it doesn't end in .exe"') + logging.info("Rejecting URL because it doesn't end in .exe") return jsonify('Invalid URL to sign'), 400 if not url.startswith(GOOGLE_PREFIX): From 279bc4fd5b74a14b57fd78811cf6cdb41dc052a5 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 09:36:25 -0800 Subject: [PATCH 47/80] Queueing binary for codesigning after make deploy. RE:#1580 --- .github/workflows/build-and-test.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 796df59d4..1a8786335 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -441,6 +441,12 @@ jobs: # WORKBENCH_BINARY=$(find "$(pwd)/workbench/dist" -type f -name 'invest_*.exe' | head -n 1) # make WORKBENCH_BIN_TO_SIGN="$WORKBENCH_BINARY" SIGNTOOL="$SIGNTOOL_PATH" codesign_windows + - name: Deploy artifacts to GCS + if: github.event_name != 'pull_request' + run: make deploy + + # This relies on the file existing on GCP, so it must be run after `make + # deploy` is called. - name: Queue windows binaries for signing if: github.event_name != 'pull_request' && matrix.os == 'windows-latest' # secrets not available in PR env: @@ -449,10 +455,6 @@ jobs: cd codesigning bash enqueue-current-windows-installer.sh - - name: Deploy artifacts to GCS - if: github.event_name != 'pull_request' - run: make deploy - - name: Upload workbench binary artifact if: always() uses: actions/upload-artifact@v4 From dde4a29018ab535381950691ffb91db2bb9d0b9b Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 10:02:40 -0800 Subject: [PATCH 48/80] Attempting to improve logging for debugging. RE:#1580 --- codesigning/enqueue-current-windows-installer.sh | 1 + codesigning/gcp-cloudfunc/main.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/codesigning/enqueue-current-windows-installer.sh b/codesigning/enqueue-current-windows-installer.sh index f418f2340..c812ed3fd 100644 --- a/codesigning/enqueue-current-windows-installer.sh +++ b/codesigning/enqueue-current-windows-installer.sh @@ -9,4 +9,5 @@ version=$(python -m setuptools_scm) url_base=$(make -C .. print-DIST_URL_BASE | awk ' { print $3 } ') url="${url_base}/workbench/invest_${version}_workbench_win32_x64.exe" +echo "Enqueuing URL ${url}" python enqueue-binary.py "${url}" diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index e201f090e..ec01d7ea8 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -63,6 +63,8 @@ def main(request): storage_client = storage.Client() bucket = storage_client.bucket(CODESIGN_DATA_BUCKET) + logging.debug('Data POSTed: %s', data) + if data['action'] == 'dequeue': with get_lock(): queuefile = bucket.blob('queue.json') @@ -87,7 +89,7 @@ def main(request): elif data['action'] == 'enqueue': url = data['url'] - logging.info('Attempting to enqueue url %s', url) + logging.info('Attempting to enqueue url" %s', url) if not url.endswith('.exe'): logging.info("Rejecting URL because it doesn't end in .exe") From 7ff35670c08aede86e455ab3913fbeadf01eac5e Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 10:36:14 -0800 Subject: [PATCH 49/80] Fixing multi-line issue in make invocation. RE:#1580 --- codesigning/enqueue-current-windows-installer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/enqueue-current-windows-installer.sh b/codesigning/enqueue-current-windows-installer.sh index c812ed3fd..6b5c14bc7 100644 --- a/codesigning/enqueue-current-windows-installer.sh +++ b/codesigning/enqueue-current-windows-installer.sh @@ -6,7 +6,7 @@ # NOTE: this script must be run from the directory containing this script. version=$(python -m setuptools_scm) -url_base=$(make -C .. print-DIST_URL_BASE | awk ' { print $3 } ') +url_base=$(make -C .. --no-print-directory print-DIST_URL_BASE | awk ' { print $3 } ') url="${url_base}/workbench/invest_${version}_workbench_win32_x64.exe" echo "Enqueuing URL ${url}" From 19c7909f2cad6a1ba00e1b36fed9c5253c3cb819 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 11:42:37 -0800 Subject: [PATCH 50/80] Fleshing out docs and cleaning up. RE:#1580 --- codesigning/README.md | 57 ++++++++++++++++++++++++- codesigning/signing-worker/playbook.yml | 4 -- 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/codesigning/README.md b/codesigning/README.md index ecb678e0f..5cdbd80ef 100644 --- a/codesigning/README.md +++ b/codesigning/README.md @@ -1,9 +1,64 @@ # InVEST Codesigning Service +This directory contains all of the functional code and configuration (minus a +few secrets) that are needed to deploy our code-signing service. There are +three key components to this service: + +1. A cloud function (`gcp-cloudfunc/') that handles a google cloud + storage-backed cloud function that operates as a high-latency queue. +2. A script (`enqueue-binary.py`) that will enqueue a binary that already + exists on one of our GCS buckets. +3. A `systemd` service that runs on a debian:bookwork machine and periodically + polls the cloud function to dequeue the next item to sign. + +## Deploying the Cloud Function + +The necessary `gcloud` deployment configuration can be executed with + +```bash +$ make deploy-cloudfunction +``` + +### Secrets + +The current deployment process requires you to manually create an environment +variable, ``ACCESS_TOKEN``, that contains the secret token shared by the cloud +function, systemd service and enqueue script. + +## Deploying the Systemd Service + +To deploy the systemd service, you will need to be on a computer that has ssh +access to `ncp-inkwell`, which is a computer that has a yubikey installed in +it. This computer is assumed to run debian:bookworm at this time. To deploy +(non-secret) changes to ncp-inkwell, run this in an environment where +`ansible-playbook` is available (`pip install ansible` to install): + +```bash +$ make deploy-worker +``` + +### Secrets + +The systemd service requires several secrets to be available in the codesigning +workspace, which is located at `/opt/natcap-codesign': + +* `/opt/natcap-codesign/pass.txt` is a plain text file containing only the PIN + for the yubikey +* `/opt/natcap-codesign/access_token.txt` is a plain text file containing the + access token shared with the cloud function, systemd service and enqueue script. +* `/opt/natcap-codesign/natcap-servers-1732552f0202.json` is a GCP service + account key used to authenticate to google cloud storage. This file must be + available in the `gcp-cloudfunc/` directory at the time of deployment. + ## Future Work -### Subscribe to GCS events +### Authenticate to the function with Identity Federation + +The cloud function has access controlled by a secret token, which is not ideal. +Instead, we should be using github/GCP identity federation to control access. + +### Trigger the function with GCS Events GCP Cloud Functions have the ability to subscribe to bucket events, which should allow us to subscribe very specifically to just those `finalize` events diff --git a/codesigning/signing-worker/playbook.yml b/codesigning/signing-worker/playbook.yml index f7ea17e6a..8d852a2b1 100644 --- a/codesigning/signing-worker/playbook.yml +++ b/codesigning/signing-worker/playbook.yml @@ -114,7 +114,3 @@ daemon_reload: true # reload in case there are any config changes state: restarted enabled: true - - #- install a service account key to write to GCS later - - From 651e715be6e7e2033254ac4b4062e2bb8002af1e Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 11:45:05 -0800 Subject: [PATCH 51/80] Adding docstrings. RE:#1580 --- codesigning/gcp-cloudfunc/main.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index ec01d7ea8..03da8c7c4 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -22,8 +22,7 @@ LOG_CLIENT.setup_logging() def get_lock(): """Acquire a GCS-based mutex. - This requires that the bucket we are using has versioning - + This requires that the bucket we are using has versioning. """ storage_client = storage.Client() bucket = storage_client.bucket(CODESIGN_DATA_BUCKET) @@ -51,6 +50,17 @@ def get_lock(): @functions_framework.http def main(request): + """Handle requests to this GCP Cloud Function. + + All requests must be POST requests and have a JSON body with the following + attributes: + + * token: a secret token that matches the ACCESS_TOKEN environment + variable that is defined in the cloud function configuration. + * action: either 'enqueue' or 'dequeue' + + If the action is 'enqueue', the request must also have a 'url' attribute. + """ data = request.get_json() if data['token'] != os.environ['ACCESS_TOKEN']: logging.info('Rejecting request due to invalid token') From 8b5e7f810451d73bdcf657f8a7122e7b0662690a Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 11:50:21 -0800 Subject: [PATCH 52/80] Removing old windows codesigning stuff. RE:#1580 --- .github/workflows/build-and-test.yml | 11 ----------- Makefile | 2 -- 2 files changed, 13 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 1a8786335..39df73e67 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -430,17 +430,6 @@ jobs: WORKBENCH_BINARY=$(find "$(pwd)/workbench/dist" -type f -name 'invest_*.dmg' | head -n 1) make WORKBENCH_BIN_TO_SIGN="$WORKBENCH_BINARY" codesign_mac - #- name: Sign binaries (Windows) - # if: github.event_name != 'pull_request' && matrix.os == 'windows-latest' # secrets not available in PR - # env: - # CERT_FILE: Stanford-natcap-code-signing-cert-expires-2024-01-26.p12 - # CERT_PASS: ${{ secrets.WINDOWS_CODESIGN_CERT_PASS }} - # run: | - # # figure out the path to signtool.exe (it keeps changing with SDK updates) - # SIGNTOOL_PATH=$(find 'C:\\Program Files (x86)\\Windows Kits\\10' -type f -name 'signtool.exe*' | head -n 1) - # WORKBENCH_BINARY=$(find "$(pwd)/workbench/dist" -type f -name 'invest_*.exe' | head -n 1) - # make WORKBENCH_BIN_TO_SIGN="$WORKBENCH_BINARY" SIGNTOOL="$SIGNTOOL_PATH" codesign_windows - - name: Deploy artifacts to GCS if: github.event_name != 'pull_request' run: make deploy diff --git a/Makefile b/Makefile index a4af63ba6..a400384fe 100644 --- a/Makefile +++ b/Makefile @@ -356,10 +356,8 @@ codesign_mac: codesign --timestamp --verbose --sign Stanford $(WORKBENCH_BIN_TO_SIGN) codesign_windows: - $(GSUTIL) cp gs://stanford_cert/$(CERT_FILE) $(BUILD_DIR)/$(CERT_FILE) "$(SIGNTOOL)" sign -fd SHA256 -f $(BUILD_DIR)/$(CERT_FILE) -p $(CERT_PASS) $(WORKBENCH_BIN_TO_SIGN) "$(SIGNTOOL)" timestamp -tr http://timestamp.sectigo.com -td SHA256 $(WORKBENCH_BIN_TO_SIGN) - $(RM) $(BUILD_DIR)/$(CERT_FILE) @echo "Installer was signed with signtool" deploy: From a93d9183d065391f6259c256134a856c8f06b0ee Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 12:01:52 -0800 Subject: [PATCH 53/80] Adding docstrings. RE:#1580 --- codesigning/gcp-cloudfunc/README.md | 9 ---- codesigning/signing-worker/inventory.ini | 3 ++ codesigning/signing-worker/natcap-codesign.py | 48 ++++++++++++++++++- 3 files changed, 50 insertions(+), 10 deletions(-) delete mode 100644 codesigning/gcp-cloudfunc/README.md diff --git a/codesigning/gcp-cloudfunc/README.md b/codesigning/gcp-cloudfunc/README.md deleted file mode 100644 index c26f38617..000000000 --- a/codesigning/gcp-cloudfunc/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# GCP Cloud Function Source - -The files in this directory are used during deployment to the GCP Cloud Function. - -See the Makefile in the parent directory for the `gcloud` invokation to deploy the function. - -## NOTES: - -* After deploying the function for the first time, make sure the correct secret is defined as an environment variable. diff --git a/codesigning/signing-worker/inventory.ini b/codesigning/signing-worker/inventory.ini index c2653461e..e20b26aa9 100644 --- a/codesigning/signing-worker/inventory.ini +++ b/codesigning/signing-worker/inventory.ini @@ -1,2 +1,5 @@ +# This is an ansible inventory file. If we had more hostnames to list here, we +# could group them into functional groups (e.g. codesign-workers). + [ncp-inkwell] ncp-inkwell diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index 5438106ca..173b5b37f 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +"""Service script to sign InVEST windows binaries.""" import json import logging @@ -23,6 +24,12 @@ with open(TOKEN_FILE) as token_file: def get_from_queue(): + """Get an item to sign from the queue. + + Returns: + ``None`` if there are no items in the queue, the JSON response dict + otherwise. + """ response = requests.post( "https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue", headers={"Content-Type": "application/json"}, @@ -36,8 +43,17 @@ def get_from_queue(): return response.json() -# See https://stackoverflow.com/a/16696317 def download_file(url): + """Download an arbitrarily large file. + + Adapted from https://stackoverflow.com/a/16696317 + + Args: + url (str): The URL to download. + + Returns: + ``None`` + """ local_filename = url.split('/')[-1] with requests.get(url, stream=True) as r: r.raise_for_status() @@ -48,10 +64,32 @@ def download_file(url): def upload_to_bucket(filename, path_on_bucket): + """Upload a file to a GCS bucket. + + Args: + filename (str): The local file to upload. + path_on_bucket (str): The path to the file on the GCS bucket, including + the ``gs://`` prefix. + + Returns: + ``None`` + """ subprocess.run(['gsutil', 'cp', filename, path_on_bucket], check=True) def sign_file(file_to_sign): + """Sign a local .exe file. + + Uses ``osslsigncode`` to sign the file using the private key stored on a + Yubikey, and the corresponding certificate that has been exported from the + PIV slot 9c. + + Args: + file_to_sign (str): The local filepath to the file to sign. + + Returns: + ``None`` + """ signed_file = f"{file_to_sign}.signed" pass_file = os.path.join(FILE_DIR, 'pass.txt') @@ -80,6 +118,14 @@ def sign_file(file_to_sign): def add_file_to_signed_list(url): + """Add a file to the list of signed files on GCS. + + Args: + url (str): The public HTTPS URL of the file to add to the list. + + Returns: + ``None`` + """ # Since this process is the only one that should be writing to this file, we # don't need to worry about race conditions. remote_signed_files_path = 'gs://natcap-codesigning/signed_files.json' From 83279d651bb0d59b057b2dd9f425d1421ba1eda9 Mon Sep 17 00:00:00 2001 From: Claire Simpson Date: Fri, 31 Jan 2025 14:49:55 -0700 Subject: [PATCH 54/80] Add coastal vulnerability unit tests --- tests/test_coastal_vulnerability.py | 95 +++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/tests/test_coastal_vulnerability.py b/tests/test_coastal_vulnerability.py index 518f916c5..40e3f2c93 100644 --- a/tests/test_coastal_vulnerability.py +++ b/tests/test_coastal_vulnerability.py @@ -8,6 +8,7 @@ import unittest import numpy.testing import pandas.testing +import pandas import pygeoprocessing import shapely.wkb import taskgraph @@ -1553,6 +1554,100 @@ class CoastalVulnerabilityTests(unittest.TestCase): # Polygon has 4 sides on exterior, 3 on interior, expect 7 lines self.assertTrue(len(line_list) == 7) + def test_assemble_results_and_calculate_exposure(self): + """Test that assemble_results_and_calculate_exposure correctly + calculates exposure""" + from natcap.invest.coastal_vulnerability import \ + assemble_results_and_calculate_exposure + + def make_shore_points_vector(shore_points_path): + # create 4 points, each with a unique 'shore_id' in [0..3]. + shore_geometries = [Point(0, 0), Point(1, 0), Point(2, 1), Point(3, 2)] + shore_fields = {'shore_id': ogr.OFTInteger} + shore_attributes = [{'shore_id': i} for i in range(len(shore_geometries))] + + # Create a spatial reference (projected or geographic) + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) # e.g. "NAD83 / UTM zone 10N" + pygeoprocessing.shapely_geometry_to_vector( + shore_geometries, shore_points_path, srs.ExportToWkt(), + vector_format='GPKG', + fields=shore_fields, + attribute_list=shore_attributes, + ogr_geom_type=ogr.wkbPoint + ) + + def make_habitat_csv(habitat_csv_path): + # Example: one habitat column named 'kelp', plus 'R_hab' + # We have 4 shore IDs, so we add 4 rows. Values are arbitrary. + habitat_df = pandas.DataFrame( + {'shore_id': [0, 1, 2, 3], 'kelp': [5, 3, 5, 4], + 'seagrass': [4, 1, 2, 4], 'R_hab': [5, 2, 5, 3]}) + habitat_df.to_csv(habitat_csv_path, index=False) + + def make_risk_id_path_list(): + # Create pickles for risk data + relief_pkl = os.path.join(self.workspace_dir, 'relief.pickle') + slr_pkl = os.path.join(self.workspace_dir, 'slr.pickle') + population_pkl = os.path.join(self.workspace_dir, 'population.pickle') + + relief_data = {0: 10.0, 1: 50.0, 2: 30.0, 3: 80.0} # arbitrary data + slr_data = {0: 0.1, 1: 0.2, 2: 0.9, 3: 0.5} + population_data = {0: 123.0, 1: 999.0, 2: 55.0, 3: 0.0} + + for file_path, data_dict in zip([relief_pkl, slr_pkl, population_pkl], + [relief_data, slr_data, population_data]): + with open(file_path, 'wb') as f: + pickle.dump(data_dict, f) + + risk_id_path_list = [ + (relief_pkl, True, "R_relief"), # "True" => bin to 1..5 + (slr_pkl, True, "R_slr"), + (population_pkl, False, "population") + ] + return risk_id_path_list + + shore_points_path = os.path.join(self.workspace_dir, "shore_points.gpkg") + make_shore_points_vector(shore_points_path) + + habitat_csv_path = os.path.join(self.workspace_dir, 'habitat_protection.csv') + make_habitat_csv(habitat_csv_path) + + risk_id_path_list = make_risk_id_path_list() + + intermediate_vector_path = os.path.join(self.workspace_dir, + 'intermediate_exposure.gpkg') + intermediate_csv_path = os.path.join(self.workspace_dir, + 'intermediate_exposure.csv') + output_vector_path = os.path.join(self.workspace_dir, + 'coastal_exposure.gpkg') + output_csv_path = os.path.join(self.workspace_dir, + 'coastal_exposure.csv') + + # call function + assemble_results_and_calculate_exposure( + risk_id_path_list, + habitat_csv_path, + shore_points_path, + intermediate_vector_path, + intermediate_csv_path, + output_vector_path, + output_csv_path + ) + + # read field values in output vector and compare + actual_df = pandas.read_csv( + output_csv_path, + usecols=["exposure", "habitat_role", "exposure_no_habitats"]) + + expected_df = pandas.DataFrame({ + "exposure": [2.924018, 2.0, 4.641589, 2.289428], + "habitat_role": [0, 0.714418, 0, 0.424989], + "exposure_no_habitats": [2.924018, 2.714418, 4.641589, 2.714418]}) + + pandas.testing.assert_frame_equal( + actual_df, expected_df, check_dtype=False) + def assert_pickled_arrays_almost_equal( actual_values_pickle_path, expected_values_json_path): From 38cfd2356c9115f8d8079d2c587bad15efdcd455 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 14:50:12 -0800 Subject: [PATCH 55/80] Noting change in HISTORY. RE:#1580 --- HISTORY.rst | 4 ++++ codesigning/signing-worker/natcap-codesign.sh | 21 ------------------- 2 files changed, 4 insertions(+), 21 deletions(-) delete mode 100644 codesigning/signing-worker/natcap-codesign.sh diff --git a/HISTORY.rst b/HISTORY.rst index 070f973eb..c4a41b08a 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -38,6 +38,10 @@ Unreleased Changes ------------------ +* General + * InVEST's windows binaries are now distributed once again with a valid + signature, signed by Stanford University. + https://github.com/natcap/invest/issues/1580 * Carbon * Updated styling of the HTML report generated by the carbon model, for visual consistency with the Workbench (`InVEST #1732 diff --git a/codesigning/signing-worker/natcap-codesign.sh b/codesigning/signing-worker/natcap-codesign.sh deleted file mode 100644 index a601da483..000000000 --- a/codesigning/signing-worker/natcap-codesign.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -# TODO: write a cron job to verify that the service is still running (heartbeat) -# TODO: incorporate slack updates to let us know when something was signed, or -# if the service crashed - -while true -do - DATA=$(curl -i -H "Accept: application/json" "https://us-west1-natcap-servers.cloudfunctions.net/codesigning-queue{\"token\": \"$ACCESS_TOKEN\"}") - # The response body will be empty when there is nothing in the queue. - if [ -z $(echo -e "$DATA" | tr -d '[:space:]') ]; then - echo "No queued requests, waiting 30 seconds..." - continue - else - BASENAME=$(jq ".basename" <<< $DATA) - wget -O $BASENAME $(jq ".https-url" <<< $DATA) - python3 opt/natcap-codesign/natcap_codesign.py /opt/natcap-codesign/codesign-cert-chain.pem "$BASENAME" - gcloud storage upload $BASENAME $(jq ".gs-uri" <<< $DATA) - fi - sleep 30 -done From a57aeba986514097e585176b58c761c087a4ada8 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 18:14:45 -0800 Subject: [PATCH 56/80] Update codesigning/gcp-cloudfunc/main.py Co-authored-by: Emily Soth <43770515+emlys@users.noreply.github.com> --- codesigning/gcp-cloudfunc/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index 03da8c7c4..16ce91a70 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -73,7 +73,7 @@ def main(request): storage_client = storage.Client() bucket = storage_client.bucket(CODESIGN_DATA_BUCKET) - logging.debug('Data POSTed: %s', data) + logging.debug(f'Data POSTed: {data}') if data['action'] == 'dequeue': with get_lock(): From 35922ee5f3cd41ed7c24a2042f12950ce65683c9 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 18:14:56 -0800 Subject: [PATCH 57/80] Update codesigning/gcp-cloudfunc/main.py Co-authored-by: Emily Soth <43770515+emlys@users.noreply.github.com> --- codesigning/gcp-cloudfunc/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index 16ce91a70..cb85d7682 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -99,7 +99,7 @@ def main(request): elif data['action'] == 'enqueue': url = data['url'] - logging.info('Attempting to enqueue url" %s', url) + logging.info(f'Attempting to enqueue url {url}') if not url.endswith('.exe'): logging.info("Rejecting URL because it doesn't end in .exe") From 787533953a543afd882d5c3f7b54ede8973f8b05 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 18:15:05 -0800 Subject: [PATCH 58/80] Update codesigning/gcp-cloudfunc/main.py Co-authored-by: Emily Soth <43770515+emlys@users.noreply.github.com> --- codesigning/gcp-cloudfunc/main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index cb85d7682..243e12fd6 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -106,8 +106,7 @@ def main(request): return jsonify('Invalid URL to sign'), 400 if not url.startswith(GOOGLE_PREFIX): - logging.info('Rejecting URL because it does not start with %s', - GOOGLE_PREFIX) + logging.info(f'Rejecting URL because it does not start with {GOOGLE_PREFIX}') return jsonify('Invalid host'), 400 if not url.startswith(( From 0e643894c14d50a1e1f3673fcbd0060c5acab1e5 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 18:21:46 -0800 Subject: [PATCH 59/80] Updating the cloud function docstring. RE:#1580 --- codesigning/gcp-cloudfunc/main.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index 243e12fd6..21b98a31a 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -60,6 +60,17 @@ def main(request): * action: either 'enqueue' or 'dequeue' If the action is 'enqueue', the request must also have a 'url' attribute. + The 'url' attribute, when provided, must be a URL to a file that meets + these requirements: + * The URL must be a publicly accessible URL + * The URL must be a file that ends in '.exe' + * The URL must be located in either the releases bucket, or else + in the dev builds bucket. It doesn't necessarily have to be an + InVEST binary. + * The URL must be a file that is not older than June 1, 2024 + * The URL must be a file that is not already in the queue + * The URL should be a file that is not already signed (if the file has + already been signed, its signature will be overwritten) """ data = request.get_json() if data['token'] != os.environ['ACCESS_TOKEN']: @@ -130,6 +141,9 @@ def main(request): # If the file is too old, reject it. Trying to avoid a # denial-of-service by invoking the service with very old files. + # I just pulled June 1 out of thin air as a date that is a little while + # ago, but not so long ago that we could suddenly have many files + # enqueued. mday, mmonth, myear = response.headers['Last-Modified'].split(' ')[1:4] modified_time = datetime.datetime.strptime( ' '.join((mday, mmonth, myear)), '%d %b %Y') From 33a60e539187c064a5eda8c94ca4ea88933a0995 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 18:23:00 -0800 Subject: [PATCH 60/80] Always restarting the service. RE:#1580 --- codesigning/signing-worker/natcap-codesign.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/signing-worker/natcap-codesign.service b/codesigning/signing-worker/natcap-codesign.service index 69c807525..afc728541 100644 --- a/codesigning/signing-worker/natcap-codesign.service +++ b/codesigning/signing-worker/natcap-codesign.service @@ -29,5 +29,5 @@ WorkingDirectory=/tmp [Service] # Run in the foreground Type=simple -Restart=no # change back to always +Restart=always ExecStart=python3 /opt/natcap-codesign/natcap-codesign.py /opt/natcap-codesign/codesign-cert-chain.pem From dfc275ae123b915032662ea2f3b6709fd34a5bdc Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 18:38:23 -0800 Subject: [PATCH 61/80] Improving osslsigncode comments. RE:#1580 --- codesigning/signing-worker/playbook.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/codesigning/signing-worker/playbook.yml b/codesigning/signing-worker/playbook.yml index 8d852a2b1..727295e65 100644 --- a/codesigning/signing-worker/playbook.yml +++ b/codesigning/signing-worker/playbook.yml @@ -53,9 +53,11 @@ update_cache: true default_release: "{{ ansible_distribution_release }}-backports" pkg: - # The backports version is needed because the version in bookworm has - # a critical bug in it that prevents it from working with our - # certificate. + # The normal debian:bookworm repos have osslsigncode 2.5, which has a + # bug in it that prevents it from signing our binaries. This was + # fixed in osslsigncode 2.6. The version available in + # bookworm-backports is 2.9. The issue (and solution) was similar to + # https://stackoverflow.com/a/78308879 - osslsigncode - name: Create the codesign directory From 3801a351f639995f493a24885061b8e2a14e973b Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 31 Jan 2025 18:41:07 -0800 Subject: [PATCH 62/80] Commenting the .gitignore. RE#1580 --- codesigning/signing-worker/.gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/codesigning/signing-worker/.gitignore b/codesigning/signing-worker/.gitignore index a2a720a80..7ee4051be 100644 --- a/codesigning/signing-worker/.gitignore +++ b/codesigning/signing-worker/.gitignore @@ -1 +1,5 @@ +# This key is copied from GCP. I've added it to the .gitignore to try to +# prevent it from getting committed and pushed to git, while still allowing it +# to be where ansible expects the key to be so ansible can copy the file to the +# remote server. natcap-servers-1732552f0202.json From 7c54e2ea9f2fd03dc5c8273613a785a040ff8a10 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Mon, 3 Feb 2025 09:47:48 -0800 Subject: [PATCH 63/80] Apply suggestions from code review Co-authored-by: Doug --- codesigning/README.md | 2 +- codesigning/gcp-cloudfunc/main.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/codesigning/README.md b/codesigning/README.md index 5cdbd80ef..07cee22b6 100644 --- a/codesigning/README.md +++ b/codesigning/README.md @@ -8,7 +8,7 @@ three key components to this service: storage-backed cloud function that operates as a high-latency queue. 2. A script (`enqueue-binary.py`) that will enqueue a binary that already exists on one of our GCS buckets. -3. A `systemd` service that runs on a debian:bookwork machine and periodically +3. A `systemd` service that runs on a debian:bookworm machine and periodically polls the cloud function to dequeue the next item to sign. ## Deploying the Cloud Function diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index 21b98a31a..58a426220 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -152,7 +152,7 @@ def main(request): return jsonify('File is too old'), 400 with get_lock(): - # first, check to see if the file has already been signed. + # First, check to see if the file has already been signed. signed_files_list = codesign_bucket.blob('signed_files.json') if not signed_files_list.exists(): signed_files_dict = {'signed_files': []} From 782dbd141c442c0ab5786a0882415de9b8ba15d0 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Mon, 3 Feb 2025 10:00:16 -0800 Subject: [PATCH 64/80] Using the signed binary in the github release. RE:#1580 --- .github/workflows/release-part-2.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release-part-2.yml b/.github/workflows/release-part-2.yml index 0275f1a89..ad394dc37 100644 --- a/.github/workflows/release-part-2.yml +++ b/.github/workflows/release-part-2.yml @@ -72,14 +72,16 @@ jobs: rm -rf artifacts/Wheel* # download each artifact separately so that the command will fail if any is missing - for artifact in Workbench-Windows-binary \ - Workbench-macOS-binary \ + for artifact in Workbench-macOS-binary \ InVEST-sample-data \ InVEST-user-guide do gh run download $RUN_ID --dir artifacts --name "$artifact" done + # download the signed windows workbench file from GCS + wget --directory-prefix=artifacts https://storage.googleapis.com/releases.naturalcapitalproject.org/invest/${{ env.VERSION }}/workbench/invest_${{ env.VERSION }}_workbench_win32_x64.exe + # We build one sdist per combination of OS and python version, so just # download and unzip all of them into an sdists directory so we can # just grab the first one. This approach is more flexible to changes From de9ee0ca315f923e152d8cb496b2b8290ad6a672 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 4 Feb 2025 12:41:56 -0800 Subject: [PATCH 65/80] Adding posting to slack to the signing worker. RE:#1580 --- codesigning/README.md | 2 + codesigning/signing-worker/natcap-codesign.py | 61 +++++++++++++++++-- 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/codesigning/README.md b/codesigning/README.md index 07cee22b6..71763ad83 100644 --- a/codesigning/README.md +++ b/codesigning/README.md @@ -46,6 +46,8 @@ workspace, which is located at `/opt/natcap-codesign': for the yubikey * `/opt/natcap-codesign/access_token.txt` is a plain text file containing the access token shared with the cloud function, systemd service and enqueue script. +* `/opt/natcap-codesign/slack_token.txt` is a plain text file containing the + slack token used to post messages to our slack workspace. * `/opt/natcap-codesign/natcap-servers-1732552f0202.json` is a GCP service account key used to authenticate to google cloud storage. This file must be available in the `gcp-cloudfunc/` directory at the time of deployment. diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index 173b5b37f..4fffea83d 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -9,6 +9,7 @@ import subprocess import sys import textwrap import time +import traceback import pexpect # apt install python3-pexpect import requests # apt install python3-requests @@ -18,10 +19,55 @@ logging.basicConfig(level=logging.INFO) CERTIFICATE = sys.argv[1] FILE_DIR = os.path.dirname(__file__) -TOKEN_FILE = os.path.join(FILE_DIR, "access_token.txt") -with open(TOKEN_FILE) as token_file: +QUEUE_TOKEN_FILE = os.path.join(FILE_DIR, "access_token.txt") +with open(QUEUE_TOKEN_FILE) as token_file: ACCESS_TOKEN = token_file.read().strip() +SLACK_TOKEN_FILE = os.path.join(FILE_DIR, "slack_token.txt") +with open(SLACK_TOKEN_FILE) as token_file: + SLACK_ACCESS_TOKEN = token_file.read().strip() + + +SLACK_NOTIFICATION_SUCCESS = textwrap.dedent( + """\ + :lower_left_fountain_pen: Successfully signed and uploaded `{filename}` to + [google cloud]({url}) + """) + +SLACK_NOTIFICATION_FAILURE = textwrap.dedent( + """\ + :red-flag: Something went wrong while signing {filename}: + ``` + {traceback} + ``` + Please investigate on ncp-inkwell using: + ``` + sudo journalctl -u natcap-codesign.service + ``` + """) + + +def post_to_slack(message): + """Post a message to the slack channel. + + Args: + message (str): The message to post. + + Returns: + ``None`` + """ + resp = requests.post( + "https://slack.com/api/chat.postMessage", + headers={ + "Authorization": f"Bearer {SLACK_ACCESS_TOKEN}", + "Content-Type": "application/json; charset=utf-8" + }, + json={ + "channel": "CESG428BH", # sw-invest + "text": message + }) + resp.raise_for_status() + def get_from_queue(): """Get an item to sign from the queue. @@ -171,11 +217,18 @@ def main(): f"Adding {file_to_sign['https-url']} to signed files list") add_file_to_signed_list(file_to_sign['https-url']) LOGGER.info(f"Removing {filename}") + post_to_slack( + SLACK_NOTIFICATION_SUCCESS.format( + filename=filename, + url=file_to_sign['https-url'])) os.remove(filename) LOGGER.info("Signing complete.") except Exception as e: - LOGGER.exception("Unexpected error signing file") - raise e + LOGGER.exception(f"Unexpected error signing file: {e}") + post_to_slack( + SLACK_NOTIFICATION_FAILURE.format( + filename=file_to_sign['https-url'], + traceback=traceback.format_exc())) time.sleep(60) From 4077f026cf1030c473e2a2ffc0886ff163eb32e5 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 4 Feb 2025 12:48:39 -0800 Subject: [PATCH 66/80] Not using the shell script any longer. RE:#1580 --- codesigning/signing-worker/playbook.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/codesigning/signing-worker/playbook.yml b/codesigning/signing-worker/playbook.yml index 727295e65..319180a0a 100644 --- a/codesigning/signing-worker/playbook.yml +++ b/codesigning/signing-worker/playbook.yml @@ -98,12 +98,6 @@ dest: /opt/natcap-codesign/natcap-codesign.py mode: 0755 - - name: Install codesigning shell script - ansible.builtin.copy: - src: natcap-codesign.sh - dest: /opt/natcap-codesign/natcap-codesign.sh - mode: 0755 - - name: Install the codesign service ansible.builtin.copy: src: natcap-codesign.service From 8464659e595dac2d474012e3cf4653de9a842ab9 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 4 Feb 2025 13:47:40 -0800 Subject: [PATCH 67/80] Writing out the signature information to a separate file. RE:#1580 --- codesigning/signing-worker/natcap-codesign.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index 4fffea83d..1567f5603 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -200,6 +200,29 @@ def add_file_to_signed_list(url): LOGGER.info(f"Added {url} to {remote_signed_files_path}") +def note_signature_complete(local_filepath, gs_uri): + """Create a small file next to the signed file to indicate signature. + + Args: + gs_uri (str): The GCS URI of the signed file. + """ + # Using osslsigncode to verify the output always fails for me, even though + # the signature is clearly valid when checked on Windows. + process = subprocess.run( + ['osslsigncode', 'verify', '-in', local_filepath], check=False, + capture_output=True) + + temp_filepath = f'/tmp/{os.path.basename(local_filepath)}.signed' + with open(temp_filepath, 'w') as signature_file: + signature_file.write(process.stdout.decode()) + + try: + subprocess.run( + ['gsutil', 'cp', temp_filepath, f'{gs_uri}.signature'], check=True) + finally: + os.remove(temp_filepath) + + def main(): while True: try: @@ -216,6 +239,7 @@ def main(): LOGGER.info( f"Adding {file_to_sign['https-url']} to signed files list") add_file_to_signed_list(file_to_sign['https-url']) + note_signature_complete(filename, file_to_sign['gs-uri']) LOGGER.info(f"Removing {filename}") post_to_slack( SLACK_NOTIFICATION_SUCCESS.format( From 1cfc9c225d9dba3d7603ff5f5115cab8710a874a Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 4 Feb 2025 13:59:07 -0800 Subject: [PATCH 68/80] Correcting slack markdown link syntax. RE:#1580 --- codesigning/signing-worker/natcap-codesign.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index 1567f5603..80a430663 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -31,7 +31,7 @@ with open(SLACK_TOKEN_FILE) as token_file: SLACK_NOTIFICATION_SUCCESS = textwrap.dedent( """\ :lower_left_fountain_pen: Successfully signed and uploaded `{filename}` to - [google cloud]({url}) + <{url}|google cloud> """) SLACK_NOTIFICATION_FAILURE = textwrap.dedent( From 8277d5fee89e8f6033e9a016383738101f63ac8b Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 4 Feb 2025 14:15:41 -0800 Subject: [PATCH 69/80] Refactoring to handle signature files. Signature files are now pushed to the bucket alongside the .exe file, and we check whether the file exists before enqueueing the exe. RE:#1580 --- codesigning/gcp-cloudfunc/main.py | 20 +--- codesigning/signing-worker/natcap-codesign.py | 107 +++++++++--------- 2 files changed, 58 insertions(+), 69 deletions(-) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index 58a426220..247ad9841 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -135,7 +135,7 @@ def main(request): # If the file does not exist at this URL, reject it. response = requests.head(url) - if response.status_code > 400: + if response.status_code >= 400: logging.info('Rejecting URL because it does not exist') return jsonify('Requested file does not exist'), 403 @@ -151,20 +151,12 @@ def main(request): logging.info('Rejecting URL because it is too old') return jsonify('File is too old'), 400 + response = requests.head(f'{url}.signature') + if response.status_code >= 400: + logging.info('Rejecting URL because it has already been signed.') + return jsonify('File has already been signed'), 400 + with get_lock(): - # First, check to see if the file has already been signed. - signed_files_list = codesign_bucket.blob('signed_files.json') - if not signed_files_list.exists(): - signed_files_dict = {'signed_files': []} - else: - signed_files_dict = json.loads( - signed_files_list.download_as_string()) - - if url in signed_files_dict['signed_files']: - logging.info( - 'Rejecting URL because it has already been signed') - return jsonify('File has already been signed'), 400 - # Since the file has not already been signed, add the file to the # queue queuefile = codesign_bucket.blob('queue.json') diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index 80a430663..7fca83784 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 """Service script to sign InVEST windows binaries.""" -import json import logging import os import shutil @@ -31,9 +30,16 @@ with open(SLACK_TOKEN_FILE) as token_file: SLACK_NOTIFICATION_SUCCESS = textwrap.dedent( """\ :lower_left_fountain_pen: Successfully signed and uploaded `{filename}` to - <{url}|google cloud> + <{url}|google cloud> """) +SLACK_NOTIFICATION_ALREADY_SIGNED = textwrap.dedent( + """\ + :lower_left_fountain_pen: `{filename}` is already signed! + <{url}|google cloud> + """) + + SLACK_NOTIFICATION_FAILURE = textwrap.dedent( """\ :red-flag: Something went wrong while signing {filename}: @@ -163,44 +169,7 @@ def sign_file(file_to_sign): shutil.move(signed_file, file_to_sign) -def add_file_to_signed_list(url): - """Add a file to the list of signed files on GCS. - - Args: - url (str): The public HTTPS URL of the file to add to the list. - - Returns: - ``None`` - """ - # Since this process is the only one that should be writing to this file, we - # don't need to worry about race conditions. - remote_signed_files_path = 'gs://natcap-codesigning/signed_files.json' - local_signed_files_path = os.path.join(FILE_DIR, 'signed_files.json') - - # Test to see if the signed files json file exists in the bucket; create it - # if not. - exists_proc = subprocess.run( - ['gsutil', '-q', 'stat', remote_signed_files_path], check=False) - if exists_proc.returncode != 0: - signed_files_dict = {'signed_files': []} - else: - subprocess.run( - ['gsutil', 'cp', remote_signed_files_path, - local_signed_files_path], check=True) - with open(local_signed_files_path, 'r') as signed_files: - signed_files_dict = json.load(signed_files) - - with open(local_signed_files_path, 'w') as signed_files: - signed_files_dict['signed_files'].append(url) - json.dump(signed_files_dict, signed_files) - - subprocess.run( - ['gsutil', 'cp', local_signed_files_path, - remote_signed_files_path], check=True) - LOGGER.info(f"Added {url} to {remote_signed_files_path}") - - -def note_signature_complete(local_filepath, gs_uri): +def note_signature_complete(local_filepath, target_gs_uri): """Create a small file next to the signed file to indicate signature. Args: @@ -217,12 +186,31 @@ def note_signature_complete(local_filepath, gs_uri): signature_file.write(process.stdout.decode()) try: + # Upload alongside the original file subprocess.run( - ['gsutil', 'cp', temp_filepath, f'{gs_uri}.signature'], check=True) + ['gsutil', 'cp', temp_filepath, f'{target_gs_uri}.signature'], + check=True) finally: os.remove(temp_filepath) +def has_signature(filename): + """Check if a file is already signed. + + Args: + filename (str): The local filepath to the file to check. + + Returns: + ``True`` if the file is signed, ``False`` otherwise. + """ + process = subprocess.run( + ['osslsigncode', 'verify', '-in', filename, '2>&1', '|', 'grep', + 'No signature found'], check=False) + if process.returncode == 0: + return False + return True + + def main(): while True: try: @@ -232,21 +220,30 @@ def main(): else: LOGGER.info(f"Dequeued and downloading {file_to_sign['https-url']}") filename = download_file(file_to_sign['https-url']) - LOGGER.info(f"Signing {filename}") - sign_file(filename) - LOGGER.info(f"Uploading signed file to {file_to_sign['gs-uri']}") - upload_to_bucket(filename, file_to_sign['gs-uri']) - LOGGER.info( - f"Adding {file_to_sign['https-url']} to signed files list") - add_file_to_signed_list(file_to_sign['https-url']) - note_signature_complete(filename, file_to_sign['gs-uri']) - LOGGER.info(f"Removing {filename}") - post_to_slack( - SLACK_NOTIFICATION_SUCCESS.format( - filename=filename, - url=file_to_sign['https-url'])) + + LOGGER.info(f"Checking if {filename} is already signed") + if has_signature(filename): + LOGGER.info(f"{filename} is already signed, skipping") + post_to_slack( + SLACK_NOTIFICATION_ALREADY_SIGNED.format( + filename=filename, + url=file_to_sign['https-url'])) + note_signature_complete(filename, file_to_sign['gs-uri']) + else: + LOGGER.info(f"Signing {filename}") + sign_file(filename) + LOGGER.info(f"Uploading signed file to {file_to_sign['gs-uri']}") + upload_to_bucket(filename, file_to_sign['gs-uri']) + LOGGER.info( + f"Adding {file_to_sign['https-url']} to signed files list") + note_signature_complete(filename, file_to_sign['gs-uri']) + LOGGER.info(f"Removing {filename}") + post_to_slack( + SLACK_NOTIFICATION_SUCCESS.format( + filename=filename, + url=file_to_sign['https-url'])) + LOGGER.info("Signing complete.") os.remove(filename) - LOGGER.info("Signing complete.") except Exception as e: LOGGER.exception(f"Unexpected error signing file: {e}") post_to_slack( From 334bc1dc53aaf4d6329ae09ff8ff71adb5c087be Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 4 Feb 2025 14:39:22 -0800 Subject: [PATCH 70/80] Changing status code to 204 from 400. RE:#1580 --- codesigning/gcp-cloudfunc/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index 247ad9841..8d326264c 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -154,7 +154,7 @@ def main(request): response = requests.head(f'{url}.signature') if response.status_code >= 400: logging.info('Rejecting URL because it has already been signed.') - return jsonify('File has already been signed'), 400 + return jsonify('File has already been signed'), 204 with get_lock(): # Since the file has not already been signed, add the file to the From 951f93cb51ba98ae0a43a9b1b7e234d93764f368 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 4 Feb 2025 14:41:44 -0800 Subject: [PATCH 71/80] Correcting http error code checking. RE:#1580 --- codesigning/gcp-cloudfunc/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesigning/gcp-cloudfunc/main.py b/codesigning/gcp-cloudfunc/main.py index 8d326264c..f6278dc2e 100644 --- a/codesigning/gcp-cloudfunc/main.py +++ b/codesigning/gcp-cloudfunc/main.py @@ -152,7 +152,7 @@ def main(request): return jsonify('File is too old'), 400 response = requests.head(f'{url}.signature') - if response.status_code >= 400: + if response.status_code != 404: logging.info('Rejecting URL because it has already been signed.') return jsonify('File has already been signed'), 204 From f2eef429eda246c341e9c226837244d7d7b3426c Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 4 Feb 2025 15:02:20 -0800 Subject: [PATCH 72/80] Correcting how we do text searching. RE:#1580 --- codesigning/signing-worker/natcap-codesign.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index 7fca83784..8e242569d 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -204,9 +204,11 @@ def has_signature(filename): ``True`` if the file is signed, ``False`` otherwise. """ process = subprocess.run( - ['osslsigncode', 'verify', '-in', filename, '2>&1', '|', 'grep', - 'No signature found'], check=False) - if process.returncode == 0: + ['osslsigncode', 'verify', '-in', filename], check=False) + + process_stderr = process.stderr.decode() + + if 'No signature found' in process_stderr: return False return True From 9784a269a88e30af98aa10134997d4fb2fdb73ca Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 4 Feb 2025 15:24:36 -0800 Subject: [PATCH 73/80] Correcting how we check for signatures. RE:#1580 --- codesigning/signing-worker/natcap-codesign.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/codesigning/signing-worker/natcap-codesign.py b/codesigning/signing-worker/natcap-codesign.py index 8e242569d..084857e45 100755 --- a/codesigning/signing-worker/natcap-codesign.py +++ b/codesigning/signing-worker/natcap-codesign.py @@ -204,11 +204,17 @@ def has_signature(filename): ``True`` if the file is signed, ``False`` otherwise. """ process = subprocess.run( - ['osslsigncode', 'verify', '-in', filename], check=False) + ['osslsigncode', 'verify', '-in', filename], capture_output=True, + check=False) - process_stderr = process.stderr.decode() + # Handle the case where it's possible there might not be any stdout or + # stderr to decode. + process_output = "" + for output in (process.stdout, process.stderr): + if output is not None: + process_output += output.decode() - if 'No signature found' in process_stderr: + if 'No signature found' in process_output: return False return True From ec5d2e7c4661d54e1e0dcf3ddc229d37eeb0ab74 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 4 Feb 2025 16:15:50 -0800 Subject: [PATCH 74/80] Updating fetch depth to fix versioning. RE:#1767 --- .github/workflows/release-part-1.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/release-part-1.yml b/.github/workflows/release-part-1.yml index 94ddd1a2a..b106bcec2 100644 --- a/.github/workflows/release-part-1.yml +++ b/.github/workflows/release-part-1.yml @@ -36,6 +36,7 @@ jobs: - uses: actions/checkout@v4 if: env.RUN == 'true' with: + fetch-depth: 0 # fetch entire history, for versioning token: ${{ secrets.AUTORELEASE_BOT_PAT }} - name: Install dependencies From 49a07a2358830e9cf3ed6a45570cb85b3e84898c Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 4 Feb 2025 16:32:24 -0800 Subject: [PATCH 75/80] Updating and testing validation. The demand table is now required when the valuation table is provided. RE:#1769 --- src/natcap/invest/annual_water_yield.py | 7 ++--- tests/test_annual_water_yield.py | 37 +++++++++++++++++-------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/natcap/invest/annual_water_yield.py b/src/natcap/invest/annual_water_yield.py index fdaeaa862..501f1ce6b 100644 --- a/src/natcap/invest/annual_water_yield.py +++ b/src/natcap/invest/annual_water_yield.py @@ -250,11 +250,11 @@ MODEL_SPEC = { } }, "index_col": "lucode", - "required": False, + "required": "valuation_table_path", "about": gettext( "A table of water demand for each LULC class. Each LULC code " "in the LULC raster must have a corresponding row in this " - "table."), + "table. Required if 'valuation_table_path' is provided."), "name": gettext("water demand table") }, "valuation_table_path": { @@ -501,14 +501,13 @@ def execute(args): a path to an input CSV table of LULC classes, showing consumptive water use for each landuse / land-cover type (cubic meters per year) to calculate - water scarcity. + water scarcity. Required if ``valuation_table_path`` is provided. args['valuation_table_path'] (string): (optional) if a non-empty string, a path to an input CSV table of hydropower stations with the following fields to calculate valuation: 'ws_id', 'time_span', 'discount', 'efficiency', 'fraction', 'cost', 'height', 'kw_price' - Required if ``calculate_valuation`` is True. args['n_workers'] (int): (optional) The number of worker processes to use for processing this model. If omitted, computation will take diff --git a/tests/test_annual_water_yield.py b/tests/test_annual_water_yield.py index e450f4b7e..6a1a4c7ff 100644 --- a/tests/test_annual_water_yield.py +++ b/tests/test_annual_water_yield.py @@ -1,14 +1,13 @@ """Module for Regression Testing the InVEST Annual Water Yield module.""" -import unittest -import tempfile -import shutil import os +import shutil +import tempfile +import unittest -import pandas import numpy -from osgeo import gdal +import pandas import pygeoprocessing - +from osgeo import gdal REGRESSION_DATA = os.path.join( os.path.dirname(__file__), '..', 'data', 'invest-test-data', 'annual_water_yield') @@ -74,7 +73,7 @@ class AnnualWaterYieldTests(unittest.TestCase): with self.assertRaises(ValueError) as cm: annual_water_yield.execute(args) self.assertTrue('veg value must be either 1 or 0' in str(cm.exception)) - + def test_missing_lulc_value(self): """Hydro: catching missing LULC value in Biophysical table.""" from natcap.invest import annual_water_yield @@ -89,7 +88,7 @@ class AnnualWaterYieldTests(unittest.TestCase): bio_df = bio_df[bio_df['lucode'] != 2] bio_df.to_csv(bad_biophysical_path) bio_df = None - + args['biophysical_table_path'] = bad_biophysical_path with self.assertRaises(ValueError) as cm: @@ -97,13 +96,13 @@ class AnnualWaterYieldTests(unittest.TestCase): self.assertTrue( "The missing values found in the LULC raster but not the table" " are: [2]" in str(cm.exception)) - + def test_missing_lulc_demand_value(self): """Hydro: catching missing LULC value in Demand table.""" from natcap.invest import annual_water_yield args = AnnualWaterYieldTests.generate_base_args(self.workspace_dir) - + args['demand_table_path'] = os.path.join( SAMPLE_DATA, 'water_demand_table.csv') args['sub_watersheds_path'] = os.path.join( @@ -117,7 +116,7 @@ class AnnualWaterYieldTests(unittest.TestCase): demand_df = demand_df[demand_df['lucode'] != 2] demand_df.to_csv(bad_demand_path) demand_df = None - + args['demand_table_path'] = bad_demand_path with self.assertRaises(ValueError) as cm: @@ -247,7 +246,8 @@ class AnnualWaterYieldTests(unittest.TestCase): def test_validation(self): """Hydro: test failure cases on the validation function.""" - from natcap.invest import annual_water_yield, validation + from natcap.invest import annual_water_yield + from natcap.invest import validation args = AnnualWaterYieldTests.generate_base_args(self.workspace_dir) @@ -367,3 +367,16 @@ class AnnualWaterYieldTests(unittest.TestCase): self.assertTrue( 'but are not found in the valuation table' in actual_message, actual_message) + + # if the demand table is missing but the valuation table is present, + # make sure we have a validation error. + args_missing_demand_table = args.copy() + args_missing_demand_table['demand_table_path'] = '' + args_missing_demand_table['valuation_table_path'] = ( + os.path.join(SAMPLE_DATA, 'hydropower_valuation_table.csv')) + validation_warnings = annual_water_yield.validate( + args_missing_demand_table) + self.assertEqual(len(validation_warnings), 1) + self.assertEqual( + validation_warnings[0], + (['demand_table_path'], 'Input is required but has no value')) From b45b26d52c240807bdb14bbfc464cc95d515a656 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 4 Feb 2025 16:34:42 -0800 Subject: [PATCH 76/80] Noting change in HISTORY. RE:#1769 --- HISTORY.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index a4139ec97..0b29b76b1 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -45,6 +45,12 @@ Unreleased Changes * Now testing and building against Python 3.13. No longer testing and building with Python 3.8, which reached EOL. https://github.com/natcap/invest/issues/1755 +* Annual Water Yield + * Fixed an issue where the model would crash if the valuation table was + provided, but the demand table was not. Validation will now warn about + this, and the ``MODEL_SPEC`` has been improved to reflect that this table + is now required when doing valuation. + https://github.com/natcap/invest/issues/1769 * Carbon * Updated styling of the HTML report generated by the carbon model, for visual consistency with the Workbench (`InVEST #1732 From 570ecbf2a54c4bd217a83fa378a8c6463464cd49 Mon Sep 17 00:00:00 2001 From: Claire Simpson Date: Wed, 5 Feb 2025 17:38:11 -0700 Subject: [PATCH 77/80] add unit tests to crop production (regression) --- tests/test_crop_production.py | 403 +++++++++++++++++++++++++++++++++- 1 file changed, 402 insertions(+), 1 deletion(-) diff --git a/tests/test_crop_production.py b/tests/test_crop_production.py index 71ed3170a..b8fc88e21 100644 --- a/tests/test_crop_production.py +++ b/tests/test_crop_production.py @@ -5,9 +5,10 @@ import shutil import os import numpy -from osgeo import gdal +from osgeo import gdal, ogr, osr import pandas import pygeoprocessing +from shapely.geometry import Polygon gdal.UseExceptions() MODEL_DATA_PATH = os.path.join( @@ -21,6 +22,108 @@ TEST_DATA_PATH = os.path.join( 'crop_production_model') +def make_aggregate_vector(path_to_shp): + """ + Generate shapefile with two overlapping polygons + Args: + path_to_shp (str): path to store watershed results vector + Outputs: + None + """ + # (xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax) + shapely_geometry_list = [ + Polygon([(461151, 4923265-50), (461261+50, 4923265-50), + (461261+50, 4923265), (461151, 4923265)]), + Polygon([(461261, 4923265-35), (461261+60, 4923265-35), + (461261+60, 4923265+50), (461261, 4923265+50)]) + ] + + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) + projection_wkt = srs.ExportToWkt() + + vector_format = "ESRI Shapefile" + fields = {"id": ogr.OFTReal} + attribute_list = [ + {"id": 0}, + {"id": 1}, + ] + + pygeoprocessing.shapely_geometry_to_vector(shapely_geometry_list, + path_to_shp, projection_wkt, + vector_format, fields, + attribute_list) + + +def make_simple_raster(base_raster_path, array): + """Create a raster on designated path with arbitrary values. + Args: + base_raster_path (str): the raster path for making the new raster. + Returns: + Non + """ + # UTM Zone 10N + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) + projection_wkt = srs.ExportToWkt() + + origin = (461251, 4923245) + pixel_size = (30, 30) + no_data = -1 + + pygeoprocessing.numpy_array_to_raster( + array, no_data, pixel_size, origin, projection_wkt, + base_raster_path) + + +def create_nutrient_df(): + """Creates a nutrient DataFrame for testing.""" + return pandas.DataFrame([ + {'crop': 'corn', 'area (ha)': 21.0, 'production_observed': 0.2, + 'percentrefuse': 7, 'protein': 42., 'lipid': 8, 'energy': 476., + 'ca': 27.0, 'fe': 15.7, 'mg': 280.0, 'ph': 704.0, 'k': 1727.0, + 'na': 2.0, 'zn': 4.9, 'cu': 1.9, 'fl': 8, 'mn': 2.9, 'se': 0.1, + 'vita': 3.0, 'betac': 16.0, 'alphac': 2.30, 'vite': 0.8, + 'crypto': 1.6, 'lycopene': 0.36, 'lutein': 63.0, 'betat': 0.5, + 'gammat': 2.1, 'deltat': 1.9, 'vitc': 6.8, 'thiamin': 0.4, + 'riboflavin': 1.8, 'niacin': 8.2, 'pantothenic': 0.9, + 'vitb6': 1.4, 'folate': 385.0, 'vitb12': 2.0, 'vitk': 41.0}, + + {'crop': 'soybean', 'area (ha)': 5., 'production_observed': 4., + 'percentrefuse': 9, 'protein': 33., 'lipid': 2., 'energy': 99., + 'ca': 257., 'fe': 15.7, 'mg': 280., 'ph': 704.0, 'k': 197.0, + 'na': 2., 'zn': 4.9, 'cu': 1.6, 'fl': 3., 'mn': 5.2, 'se': 0.3, + 'vita': 3.0, 'betac': 16.0, 'alphac': 1.0, 'vite': 0.8, + 'crypto': 0.6, 'lycopene': 0.3, 'lutein': 61.0, 'betat': 0.5, + 'gammat': 2.3, 'deltat': 1.2, 'vitc': 3.0, 'thiamin': 0.42, + 'riboflavin': 0.82, 'niacin': 12.2, 'pantothenic': 0.92, + 'vitb6': 5.4, 'folate': 305., 'vitb12': 3., 'vitk': 42.}, + ]).set_index('crop') + + +def _create_crop_rasters(output_dir, crop_names, file_suffix): + """Creates raster files for test setup.""" + _OBSERVED_PRODUCTION_FILE_PATTERN = os.path.join( + '.', '%s_observed_production%s.tif') + _CROP_PRODUCTION_FILE_PATTERN = os.path.join( + '.', '%s_regression_production%s.tif') + + for i, crop in enumerate(crop_names): + observed_yield_path = os.path.join( + output_dir, + _OBSERVED_PRODUCTION_FILE_PATTERN % (crop, file_suffix)) + crop_production_raster_path = os.path.join( + output_dir, + _CROP_PRODUCTION_FILE_PATTERN % (crop, file_suffix)) + + # Create arbitrary raster arrays + observed_array = numpy.array([[4, i], [i*3, 4]], dtype=numpy.int16) + crop_array = numpy.array([[i, 1], [i*2, 3]], dtype=numpy.int16) + + make_simple_raster(observed_yield_path, observed_array) + make_simple_raster(crop_production_raster_path, crop_array) + + class CropProductionTests(unittest.TestCase): """Tests for the Crop Production model.""" @@ -390,6 +493,304 @@ class CropProductionTests(unittest.TestCase): pandas.testing.assert_frame_equal( expected_result_table, result_table, check_dtype=False) + def test_x_yield_op(self): + """Test `_x_yield_op""" + from natcap.invest.crop_production_regression import _x_yield_op + + # make fake data + y_max = numpy.array([[-1, 3, 2], [4, 5, 3]]) + b_x = numpy.array([[4, 3, 2], [2, 0, 3]]) + c_x = numpy.array([[4, 1, 2], [3, 0, 3]]) + lulc_array = numpy.array([[3, 3, 2], [3, -1, 3]]) + fert_rate = 0.6 + crop_lucode = 3 + pixel_area_ha = 10 + + actual_result = _x_yield_op(y_max, b_x, c_x, lulc_array, fert_rate, + crop_lucode, pixel_area_ha) + expected_result = numpy.array([[-1, -19.393047, -1], + [26.776089, -1, 15.1231]]) + + numpy.testing.assert_allclose(actual_result, expected_result) + + def test_zero_observed_yield_op(self): + """Test `_zero_observed_yield_op`""" + from natcap.invest.crop_production_regression import \ + _zero_observed_yield_op + + # make fake data + observed_yield_array = numpy.array([[0, 1, -1], [5, 6, -1]]) + observed_yield_nodata = -1 + + actual_result = _zero_observed_yield_op(observed_yield_array, + observed_yield_nodata) + + expected_result = numpy.array([[0, 1, 0], [5, 6, 0]]) + + numpy.testing.assert_allclose(actual_result, expected_result) + + def test_mask_observed_yield_op(self): + """Test `_mask_observed_yield_op`""" + from natcap.invest.crop_production_regression import \ + _mask_observed_yield_op + + # make fake data + lulc_array = numpy.array([[3, 5, -9999], [3, 3, -1]]) + observed_yield_array = numpy.array([[-1, 5, 4], [8, -9999, 91]]) + observed_yield_nodata = -1 + # note: this observed_yield_nodata value becomes the nodata value in + # the output array but the values in the observed_yield_array with + # this value are NOT treated as no data within this function + + landcover_nodata = -9999 + crop_lucode = 3 + pixel_area_ha = 10 + + actual_result = _mask_observed_yield_op( + lulc_array, observed_yield_array, observed_yield_nodata, + landcover_nodata, crop_lucode, pixel_area_ha) + + expected_result = numpy.array([[-10, 0, -1], [80, -99990, 0]]) + + numpy.testing.assert_allclose(actual_result, expected_result) + + def test_tabulate_regression_results(self): + """Test `tabulate_regression_results`""" + from natcap.invest.crop_production_regression import \ + tabulate_regression_results + + def _create_expected_results(): + """Creates the expected results DataFrame.""" + return pandas.DataFrame([ + {'crop': 'corn', 'area (ha)': 20.0, + 'production_observed': 8.0, 'production_modeled': 4.0, + 'protein_modeled': 1562400.0, 'protein_observed': 3124800.0, + 'lipid_modeled': 297600.0, 'lipid_observed': 595200.0, + 'energy_modeled': 17707200.0, 'energy_observed': 35414400.0, + 'ca_modeled': 1004400.0, 'ca_observed': 2008800.0, + 'fe_modeled': 584040.0, 'fe_observed': 1168080.0, + 'mg_modeled': 10416000.0, 'mg_observed': 20832000.0, + 'ph_modeled': 26188800.0, 'ph_observed': 52377600.0, + 'k_modeled': 64244400.0, 'k_observed': 128488800.0, + 'na_modeled': 74400.0, 'na_observed': 148800.0, + 'zn_modeled': 182280.0, 'zn_observed': 364560.0, + 'cu_modeled': 70680.0, 'cu_observed': 141360.0, + 'fl_modeled': 297600.0, 'fl_observed': 595200.0, + 'mn_modeled': 107880.0, 'mn_observed': 215760.0, + 'se_modeled': 3720.0, 'se_observed': 7440.0, + 'vita_modeled': 111600.0, 'vita_observed': 223200.0, + 'betac_modeled': 595200.0, 'betac_observed': 1190400.0, + 'alphac_modeled': 85560.0, 'alphac_observed': 171120.0, + 'vite_modeled': 29760.0, 'vite_observed': 59520.0, + 'crypto_modeled': 59520.0, 'crypto_observed': 119040.0, + 'lycopene_modeled': 13392.0, 'lycopene_observed': 26784.0, + 'lutein_modeled': 2343600.0, 'lutein_observed': 4687200.0, + 'betat_modeled': 18600.0, 'betat_observed': 37200.0, + 'gammat_modeled': 78120.0, 'gammat_observed': 156240.0, + 'deltat_modeled': 70680.0, 'deltat_observed': 141360.0, + 'vitc_modeled': 252960.0, 'vitc_observed': 505920.0, + 'thiamin_modeled': 14880.0, 'thiamin_observed': 29760.0, + 'riboflavin_modeled': 66960.0, 'riboflavin_observed': 133920.0, + 'niacin_modeled': 305040.0, 'niacin_observed': 610080.0, + 'pantothenic_modeled': 33480.0, 'pantothenic_observed': 66960.0, + 'vitb6_modeled': 52080.0, 'vitb6_observed': 104160.0, + 'folate_modeled': 14322000.0, 'folate_observed': 28644000.0, + 'vitb12_modeled': 74400.0, 'vitb12_observed': 148800.0, + 'vitk_modeled': 1525200.0, 'vitk_observed': 3050400.0}, + {'crop': 'soybean', 'area (ha)': 40.0, + 'production_observed': 12.0, 'production_modeled': 7.0, + 'protein_modeled': 2102100.0, 'protein_observed': 3603600.0, + 'lipid_modeled': 127400.0, 'lipid_observed': 218400.0, + 'energy_modeled': 6306300.0, 'energy_observed': 10810800.0, + 'ca_modeled': 16370900.0, 'ca_observed': 28064400.0, + 'fe_modeled': 1000090.0, 'fe_observed': 1714440.0, + 'mg_modeled': 17836000.0, 'mg_observed': 30576000.0, + 'ph_modeled': 44844800.0, 'ph_observed': 76876800.0, + 'k_modeled': 12548900.0, 'k_observed': 21512400.0, + 'na_modeled': 127400.0, 'na_observed': 218400.0, + 'zn_modeled': 312130.0, 'zn_observed': 535080.0, + 'cu_modeled': 101920.0, 'cu_observed': 174720.0, + 'fl_modeled': 191100.0, 'fl_observed': 327600.0, + 'mn_modeled': 331240.0, 'mn_observed': 567840.0, + 'se_modeled': 19110.0, 'se_observed': 32760.0, + 'vita_modeled': 191100.0, 'vita_observed': 327600.0, + 'betac_modeled': 1019200.0, 'betac_observed': 1747200.0, + 'alphac_modeled': 63700.0, 'alphac_observed': 109200.0, + 'vite_modeled': 50960.0, 'vite_observed': 87360.0, + 'crypto_modeled': 38220.0, 'crypto_observed': 65520.0, + 'lycopene_modeled': 19110.0, 'lycopene_observed': 32760.0, + 'lutein_modeled': 3885700.0, 'lutein_observed': 6661200.0, + 'betat_modeled': 31850.0, 'betat_observed': 54600.0, + 'gammat_modeled': 146510.0, 'gammat_observed': 251160.0, + 'deltat_modeled': 76440.0, 'deltat_observed': 131040.0, + 'vitc_modeled': 191100.0, 'vitc_observed': 327600.0, + 'thiamin_modeled': 26754.0, 'thiamin_observed': 45864.0, + 'riboflavin_modeled': 52234.0, 'riboflavin_observed': 89544.0, + 'niacin_modeled': 777140.0, 'niacin_observed': 1332240.0, + 'pantothenic_modeled': 58604.0, 'pantothenic_observed': 100464.0, + 'vitb6_modeled': 343980.0, 'vitb6_observed': 589680.0, + 'folate_modeled': 19428500.0, 'folate_observed': 33306000.0, + 'vitb12_modeled': 191100.0, 'vitb12_observed': 327600.0, + 'vitk_modeled': 2675400.0, 'vitk_observed': 4586400.0}]) + + nutrient_df = create_nutrient_df() + + pixel_area_ha = 10 + workspace_dir = self.workspace_dir + output_dir = os.path.join(workspace_dir, "OUTPUT") + os.makedirs(output_dir, exist_ok=True) + + landcover_raster_path = os.path.join(workspace_dir, "landcover.tif") + landcover_nodata = -1 + make_simple_raster(landcover_raster_path, + numpy.array([[2, 1], [2, 3]], dtype=numpy.int16)) + + file_suffix = "v1" + target_table_path = os.path.join(workspace_dir, "output_table.csv") + crop_names = ["corn", "soybean"] + + _create_crop_rasters(output_dir, crop_names, file_suffix) + + tabulate_regression_results( + nutrient_df, crop_names, pixel_area_ha, + landcover_raster_path, landcover_nodata, + output_dir, file_suffix, target_table_path + ) + + # Read only the first 2 crop's data (skipping total area) + actual_result_table = pandas.read_csv(target_table_path, nrows=2, + header=0) + expected_result_table = _create_expected_results() + + # Compare expected vs actual + pandas.testing.assert_frame_equal(actual_result_table, + expected_result_table) + + def test_aggregate_regression_results_to_polygons(self): + """Test `aggregate_regression_results_to_polygons`""" + from natcap.invest.crop_production_regression import \ + aggregate_regression_results_to_polygons + + def _create_expected_agg_table(): + """Create expected output results""" + # Define the new values manually + return pandas.DataFrame([ + {"FID": 0, "corn_modeled": 1, "corn_observed": 4, + "soybean_modeled": 2, "soybean_observed": 5, + "protein_modeled": 991200, "protein_observed": 3063900, + "lipid_modeled": 110800, "lipid_observed": 388600, + "energy_modeled": 6228600, "energy_observed": 22211700, + "ca_modeled": 4928500, "ca_observed": 12697900, + "fe_modeled": 431750, "fe_observed": 1298390, + "mg_modeled": 7700000, "mg_observed": 23156000, + "ph_modeled": 19360000, "ph_observed": 58220800, + "k_modeled": 19646500, "k_observed": 73207900, + "na_modeled": 55000, "na_observed": 165400, + "zn_modeled": 134750, "zn_observed": 405230, + "cu_modeled": 46790, "cu_observed": 143480, + "fl_modeled": 129000, "fl_observed": 434100, + "mn_modeled": 121610, "mn_observed": 344480, + "se_modeled": 6390, "se_observed": 17370, + "vita_modeled": 82500, "vita_observed": 248100, + "betac_modeled": 440000, "betac_observed": 1323200, + "alphac_modeled": 39590, "alphac_observed": 131060, + "vite_modeled": 22000, "vite_observed": 66160, + "crypto_modeled": 25800, "crypto_observed": 86820, + "lycopene_modeled": 8808, "lycopene_observed": 27042, + "lutein_modeled": 1696100, "lutein_observed": 5119100, + "betat_modeled": 13750, "betat_observed": 41350, + "gammat_modeled": 61390, "gammat_observed": 182770, + "deltat_modeled": 39510, "deltat_observed": 125280, + "vitc_modeled": 117840, "vitc_observed": 389460, + "thiamin_modeled": 11364, "thiamin_observed": 33990, + "riboflavin_modeled": 31664, "riboflavin_observed": 104270, + "niacin_modeled": 298300, "niacin_observed": 860140, + "pantothenic_modeled": 25114, "pantothenic_observed": 75340, + "vitb6_modeled": 111300, "vitb6_observed": 297780, + "folate_modeled": 9131500, "folate_observed": 28199500, + "vitb12_modeled": 73200, "vitb12_observed": 210900, + "vitk_modeled": 1145700, "vitk_observed": 3436200}, + {"FID": 1, "corn_modeled": 4, "corn_observed": 8, + "soybean_modeled": 7, "soybean_observed": 12, + "protein_modeled": 3664500, "protein_observed": 6728400, + "lipid_modeled": 425000, "lipid_observed": 813600, + "energy_modeled": 24013500, "energy_observed": 46225200, + "ca_modeled": 17375300, "ca_observed": 30073200, + "fe_modeled": 1584130, "fe_observed": 2882520, + "mg_modeled": 28252000, "mg_observed": 51408000, + "ph_modeled": 71033600, "ph_observed": 129254400, + "k_modeled": 76793300, "k_observed": 150001200, + "na_modeled": 201800, "na_observed": 367200, + "zn_modeled": 494410, "zn_observed": 899640, + "cu_modeled": 172600, "cu_observed": 316080, + "fl_modeled": 488700, "fl_observed": 922800, + "mn_modeled": 439120, "mn_observed": 783600, + "se_modeled": 22830, "se_observed": 40200, + "vita_modeled": 302700, "vita_observed": 550800, + "betac_modeled": 1614400, "betac_observed": 2937600, + "alphac_modeled": 149260, "alphac_observed": 280320, + "vite_modeled": 80720, "vite_observed": 146880, + "crypto_modeled": 97740, "crypto_observed": 184560, + "lycopene_modeled": 32502, "lycopene_observed": 59544, + "lutein_modeled": 6229300, "lutein_observed": 11348400, + "betat_modeled": 50450, "betat_observed": 91800, + "gammat_modeled": 224630, "gammat_observed": 407400, + "deltat_modeled": 147120, "deltat_observed": 272400, + "vitc_modeled": 444060, "vitc_observed": 833520, + "thiamin_modeled": 41634, "thiamin_observed": 75624, + "riboflavin_modeled": 119194, "riboflavin_observed": 223464, + "niacin_modeled": 1082180, "niacin_observed": 1942320, + "pantothenic_modeled": 92084, "pantothenic_observed": 167424, + "vitb6_modeled": 396060, "vitb6_observed": 693840, + "folate_modeled": 33750500, "folate_observed": 61950000, + "vitb12_modeled": 265500, "vitb12_observed": 476400, + "vitk_modeled": 4200600, "vitk_observed": 7636800} + ], dtype=float) + + workspace = "/Users/simpson2/Desktop/output_test_cp" + + base_aggregate_vector_path = os.path.join(workspace, + "agg_vector.shp") + make_aggregate_vector(base_aggregate_vector_path) + + target_aggregate_vector_path = os.path.join(workspace, + "agg_vector_prj.shp") + + spatial_ref = osr.SpatialReference() + spatial_ref.ImportFromEPSG(26910) # EPSG:4326 for WGS84 + landcover_raster_projection = spatial_ref.ExportToWkt() + + crop_names = ['corn', 'soybean'] + nutrient_df = create_nutrient_df() + output_dir = os.path.join(workspace, "OUTPUT") + os.makedirs(output_dir, exist_ok=True) + file_suffix = 'test' + target_aggregate_table_path = '' # unused + + _create_crop_rasters(output_dir, crop_names, file_suffix) + + aggregate_regression_results_to_polygons( + base_aggregate_vector_path, target_aggregate_vector_path, + landcover_raster_projection, crop_names, + nutrient_df, output_dir, file_suffix, + target_aggregate_table_path) + + _AGGREGATE_TABLE_FILE_PATTERN = os.path.join( + '.','aggregate_results%s.csv') + + aggregate_table_path = os.path.join( + output_dir, _AGGREGATE_TABLE_FILE_PATTERN % file_suffix) + + actual_aggregate_table = pandas.read_csv(aggregate_table_path, + dtype=float) + print(actual_aggregate_table) + + expected_aggregate_table = _create_expected_agg_table() + + pandas.testing.assert_frame_equal( + actual_aggregate_table, expected_aggregate_table) + + class CropValidationTests(unittest.TestCase): """Tests for the Crop Productions' MODEL_SPEC and validation.""" From 242ed8c5b943537fb760a62cce3bae450db00020 Mon Sep 17 00:00:00 2001 From: Claire Simpson Date: Wed, 5 Feb 2025 17:39:06 -0700 Subject: [PATCH 78/80] fixed formatting --- tests/test_crop_production.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/test_crop_production.py b/tests/test_crop_production.py index b8fc88e21..0e80d9d79 100644 --- a/tests/test_crop_production.py +++ b/tests/test_crop_production.py @@ -80,25 +80,25 @@ def create_nutrient_df(): """Creates a nutrient DataFrame for testing.""" return pandas.DataFrame([ {'crop': 'corn', 'area (ha)': 21.0, 'production_observed': 0.2, - 'percentrefuse': 7, 'protein': 42., 'lipid': 8, 'energy': 476., - 'ca': 27.0, 'fe': 15.7, 'mg': 280.0, 'ph': 704.0, 'k': 1727.0, - 'na': 2.0, 'zn': 4.9, 'cu': 1.9, 'fl': 8, 'mn': 2.9, 'se': 0.1, - 'vita': 3.0, 'betac': 16.0, 'alphac': 2.30, 'vite': 0.8, - 'crypto': 1.6, 'lycopene': 0.36, 'lutein': 63.0, 'betat': 0.5, - 'gammat': 2.1, 'deltat': 1.9, 'vitc': 6.8, 'thiamin': 0.4, - 'riboflavin': 1.8, 'niacin': 8.2, 'pantothenic': 0.9, - 'vitb6': 1.4, 'folate': 385.0, 'vitb12': 2.0, 'vitk': 41.0}, + 'percentrefuse': 7, 'protein': 42., 'lipid': 8, 'energy': 476., + 'ca': 27.0, 'fe': 15.7, 'mg': 280.0, 'ph': 704.0, 'k': 1727.0, + 'na': 2.0, 'zn': 4.9, 'cu': 1.9, 'fl': 8, 'mn': 2.9, 'se': 0.1, + 'vita': 3.0, 'betac': 16.0, 'alphac': 2.30, 'vite': 0.8, + 'crypto': 1.6, 'lycopene': 0.36, 'lutein': 63.0, 'betat': 0.5, + 'gammat': 2.1, 'deltat': 1.9, 'vitc': 6.8, 'thiamin': 0.4, + 'riboflavin': 1.8, 'niacin': 8.2, 'pantothenic': 0.9, + 'vitb6': 1.4, 'folate': 385.0, 'vitb12': 2.0, 'vitk': 41.0}, {'crop': 'soybean', 'area (ha)': 5., 'production_observed': 4., - 'percentrefuse': 9, 'protein': 33., 'lipid': 2., 'energy': 99., - 'ca': 257., 'fe': 15.7, 'mg': 280., 'ph': 704.0, 'k': 197.0, - 'na': 2., 'zn': 4.9, 'cu': 1.6, 'fl': 3., 'mn': 5.2, 'se': 0.3, - 'vita': 3.0, 'betac': 16.0, 'alphac': 1.0, 'vite': 0.8, - 'crypto': 0.6, 'lycopene': 0.3, 'lutein': 61.0, 'betat': 0.5, - 'gammat': 2.3, 'deltat': 1.2, 'vitc': 3.0, 'thiamin': 0.42, - 'riboflavin': 0.82, 'niacin': 12.2, 'pantothenic': 0.92, - 'vitb6': 5.4, 'folate': 305., 'vitb12': 3., 'vitk': 42.}, - ]).set_index('crop') + 'percentrefuse': 9, 'protein': 33., 'lipid': 2., 'energy': 99., + 'ca': 257., 'fe': 15.7, 'mg': 280., 'ph': 704.0, 'k': 197.0, + 'na': 2., 'zn': 4.9, 'cu': 1.6, 'fl': 3., 'mn': 5.2, 'se': 0.3, + 'vita': 3.0, 'betac': 16.0, 'alphac': 1.0, 'vite': 0.8, + 'crypto': 0.6, 'lycopene': 0.3, 'lutein': 61.0, 'betat': 0.5, + 'gammat': 2.3, 'deltat': 1.2, 'vitc': 3.0, 'thiamin': 0.42, + 'riboflavin': 0.82, 'niacin': 12.2, 'pantothenic': 0.92, + 'vitb6': 5.4, 'folate': 305., 'vitb12': 3., 'vitk': 42.}, + ]).set_index('crop') def _create_crop_rasters(output_dir, crop_names, file_suffix): From 7e0eb0d0df96e2c5e3d95d625a8f8ca556b960d7 Mon Sep 17 00:00:00 2001 From: Claire Simpson Date: Thu, 6 Feb 2025 12:38:05 -0700 Subject: [PATCH 79/80] reorganization, linting, cleanup --- tests/test_annual_water_yield.py | 32 ++++++++-------- tests/test_carbon.py | 57 ++++++++++++----------------- tests/test_coastal_blue_carbon.py | 44 +++++++--------------- tests/test_coastal_vulnerability.py | 12 +++--- tests/test_crop_production.py | 2 +- 5 files changed, 60 insertions(+), 87 deletions(-) diff --git a/tests/test_annual_water_yield.py b/tests/test_annual_water_yield.py index 1e8ca88df..ad51661b1 100644 --- a/tests/test_annual_water_yield.py +++ b/tests/test_annual_water_yield.py @@ -414,7 +414,6 @@ class AnnualWaterYieldTests(unittest.TestCase): validation_warnings[0], (['demand_table_path'], 'Input is required but has no value')) - def test_fractp_op(self): """Test `fractp_op`""" from natcap.invest.annual_water_yield import fractp_op @@ -447,7 +446,7 @@ class AnnualWaterYieldTests(unittest.TestCase): and `compute_water_yield_volume`""" from natcap.invest import annual_water_yield - def create_watershed_results_vector(path_to_shp): + def _create_watershed_results_vector(path_to_shp): """Generate a fake watershed results vector file.""" shapely_geometry_list = [ Polygon([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]), @@ -468,7 +467,7 @@ class AnnualWaterYieldTests(unittest.TestCase): vector_format, fields, attribute_list) - def validate_fields(vector_path, field_name, expected_values, error_msg): + def _validate_fields(vector_path, field_name, expected_values, error_msg): """ Validate a specific field in the watershed results vector by comparing actual to expected values. Expected values generated @@ -492,7 +491,7 @@ class AnnualWaterYieldTests(unittest.TestCase): # generate fake watershed results vector watershed_results_vector_path = os.path.join(self.workspace_dir, "watershed_results.shp") - create_watershed_results_vector(watershed_results_vector_path) + _create_watershed_results_vector(watershed_results_vector_path) # generate fake val_df val_df = pandas.DataFrame({'efficiency': [.7, .8], 'height': [12, 50], @@ -503,24 +502,23 @@ class AnnualWaterYieldTests(unittest.TestCase): # test water yield volume annual_water_yield.compute_water_yield_volume( watershed_results_vector_path) - validate_fields(watershed_results_vector_path, "wyield_vol", - [990.0, 800.0], - "Error with water yield volume calculation.") + _validate_fields(watershed_results_vector_path, "wyield_vol", + [990.0, 800.0], + "Error with water yield volume calculation.") # test rsupply volume annual_water_yield.compute_rsupply_volume( watershed_results_vector_path) - validate_fields(watershed_results_vector_path, "rsupply_vl", - [940.0, 730.0], - "Error calculating total realized water supply volume.") + _validate_fields(watershed_results_vector_path, "rsupply_vl", + [940.0, 730.0], + "Error calculating total realized water supply volume.") # test compute watershed valuation annual_water_yield.compute_watershed_valuation( watershed_results_vector_path, val_df) - validate_fields(watershed_results_vector_path, "hp_energy", - [19.329408, 55.5968], - "Error calculating energy.") - validate_fields(watershed_results_vector_path, "hp_val", - [501.9029748723, 4587.91946857059], - "Error calculating net present value.") - \ No newline at end of file + _validate_fields(watershed_results_vector_path, "hp_energy", + [19.329408, 55.5968], + "Error calculating energy.") + _validate_fields(watershed_results_vector_path, "hp_val", + [501.9029748723, 4587.91946857059], + "Error calculating net present value.") diff --git a/tests/test_carbon.py b/tests/test_carbon.py index 2c1e29ef8..ed0e11109 100644 --- a/tests/test_carbon.py +++ b/tests/test_carbon.py @@ -51,38 +51,6 @@ def make_simple_raster(base_raster_path, fill_val, nodata_val): new_raster = None -def make_simple_lulc_raster(base_raster_path): - """Create a 2x2 raster on designated path with arbitrary lulc codes. - - Args: - base_raster_path (str): the raster path for making the new raster. - - Returns: - None. - """ - array = numpy.array([[1, 1], [2, 3]], dtype=int) - - srs = osr.SpatialReference() - srs.ImportFromEPSG(26910) # UTM Zone 10N - projection_wkt = srs.ExportToWkt() - # origin hand-picked for this epsg: - geotransform = [461261, 1.0, 0.0, 4923265, 0.0, -1.0] - - n = 2 - gtiff_driver = gdal.GetDriverByName('GTiff') - new_raster = gtiff_driver.Create( - base_raster_path, n, n, 1, gdal.GDT_Int32, options=[ - 'TILED=YES', 'BIGTIFF=YES', 'COMPRESS=LZW', - 'BLOCKXSIZE=16', 'BLOCKYSIZE=16']) - new_raster.SetProjection(projection_wkt) - new_raster.SetGeoTransform(geotransform) - new_band = new_raster.GetRasterBand(1) - new_band.WriteArray(array) - new_raster.FlushCache() - new_band = None - new_raster = None - - def assert_raster_equal_value(base_raster_path, val_to_compare): """Assert that the entire output raster has the same value as specified. @@ -304,9 +272,32 @@ class CarbonTests(unittest.TestCase): """Test `_generate_carbon_map`""" from natcap.invest.carbon import _generate_carbon_map + def _make_simple_lulc_raster(base_raster_path): + """Create a raster on designated path with arbitrary values. + Args: + base_raster_path (str): the raster path for making the new raster. + Returns: + None. + """ + + array = numpy.array([[1, 1], [2, 3]], dtype=numpy.int32) + + # UTM Zone 10N + srs = osr.SpatialReference() + srs.ImportFromEPSG(26910) + projection_wkt = srs.ExportToWkt() + + origin = (461251, 4923245) + pixel_size = (1, 1) + no_data = -999 + + pygeoprocessing.numpy_array_to_raster( + array, no_data, pixel_size, origin, projection_wkt, + base_raster_path) + # generate a fake lulc raster lulc_path = os.path.join(self.workspace_dir, "lulc.tif") - make_simple_lulc_raster(lulc_path) + _make_simple_lulc_raster(lulc_path) # make fake carbon pool dict carbon_pool_by_type = {1: 5000, 2: 60, 3: 120} diff --git a/tests/test_coastal_blue_carbon.py b/tests/test_coastal_blue_carbon.py index ddd4081dc..e5e2b6d2c 100644 --- a/tests/test_coastal_blue_carbon.py +++ b/tests/test_coastal_blue_carbon.py @@ -26,36 +26,24 @@ LOGGER = logging.getLogger(__name__) def make_raster_from_array(base_raster_path, array): - """Create a raster on designated path with arbitrary lulc codes. - + """Create a raster on designated path with arbitrary values. Args: base_raster_path (str): the raster path for making the new raster. - array (array): array to save as raster - Returns: None. """ - + # UTM Zone 10N srs = osr.SpatialReference() - srs.ImportFromEPSG(26910) # UTM Zone 10N + srs.ImportFromEPSG(26910) projection_wkt = srs.ExportToWkt() - # origin hand-picked for this epsg: - geotransform = [461261, 1.0, 0.0, 4923265, 0.0, -1.0] - gtiff_driver = gdal.GetDriverByName('GTiff') - new_raster = gtiff_driver.Create( - base_raster_path, array.shape[0], array.shape[1], 1, - gdal.GDT_Int32, options=[ - 'TILED=YES', 'BIGTIFF=YES', 'COMPRESS=LZW', - 'BLOCKXSIZE=16', 'BLOCKYSIZE=16']) - new_raster.SetProjection(projection_wkt) - new_raster.SetGeoTransform(geotransform) - new_band = new_raster.GetRasterBand(1) - new_band.SetNoDataValue(-1) - new_band.WriteArray(array) - new_raster.FlushCache() - new_band = None - new_raster = None + origin = (461261, 4923265) + pixel_size = (1, 1) + no_data = -1 + + pygeoprocessing.numpy_array_to_raster( + array, no_data, pixel_size, origin, projection_wkt, + base_raster_path) class TestPreprocessor(unittest.TestCase): """Test Coastal Blue Carbon preprocessor functions.""" @@ -1098,10 +1086,7 @@ class TestCBC2(unittest.TestCase): """Test `_calculate_npv`""" from natcap.invest.coastal_blue_carbon import coastal_blue_carbon - def make_carbon_seq_raster(out_path): - """make a carbon sequestration raster and save it to out_path""" - - #make fake data + # make fake data net_sequestration_rasters = { 2010: os.path.join(self.workspace_dir, "carbon_seq_2010.tif"), 2011: os.path.join(self.workspace_dir, "carbon_seq_2011.tif"), @@ -1140,10 +1125,9 @@ class TestCBC2(unittest.TestCase): actual_2012 = band.ReadAsArray() # compare actual rasters to expected (based on running `_calculate_npv`) - expected_2011 = numpy.array([[100550, 50300], [200950, 5000]], - dtype=int) - expected_2012 = numpy.array([[370268, 185195], [740045, 18409]], - dtype=int) + expected_2011 = numpy.array([[100525, 50262.5], [200950, 5000]]) + expected_2012 = numpy.array([[370206.818182, 185103.409091], + [740045.454545, 18409.090909]]) numpy.testing.assert_allclose(actual_2011, expected_2011) numpy.testing.assert_allclose(actual_2012, expected_2012) diff --git a/tests/test_coastal_vulnerability.py b/tests/test_coastal_vulnerability.py index 40e3f2c93..208456e86 100644 --- a/tests/test_coastal_vulnerability.py +++ b/tests/test_coastal_vulnerability.py @@ -1560,7 +1560,7 @@ class CoastalVulnerabilityTests(unittest.TestCase): from natcap.invest.coastal_vulnerability import \ assemble_results_and_calculate_exposure - def make_shore_points_vector(shore_points_path): + def _make_shore_points_vector(shore_points_path): # create 4 points, each with a unique 'shore_id' in [0..3]. shore_geometries = [Point(0, 0), Point(1, 0), Point(2, 1), Point(3, 2)] shore_fields = {'shore_id': ogr.OFTInteger} @@ -1577,7 +1577,7 @@ class CoastalVulnerabilityTests(unittest.TestCase): ogr_geom_type=ogr.wkbPoint ) - def make_habitat_csv(habitat_csv_path): + def _make_habitat_csv(habitat_csv_path): # Example: one habitat column named 'kelp', plus 'R_hab' # We have 4 shore IDs, so we add 4 rows. Values are arbitrary. habitat_df = pandas.DataFrame( @@ -1585,7 +1585,7 @@ class CoastalVulnerabilityTests(unittest.TestCase): 'seagrass': [4, 1, 2, 4], 'R_hab': [5, 2, 5, 3]}) habitat_df.to_csv(habitat_csv_path, index=False) - def make_risk_id_path_list(): + def _make_risk_id_path_list(): # Create pickles for risk data relief_pkl = os.path.join(self.workspace_dir, 'relief.pickle') slr_pkl = os.path.join(self.workspace_dir, 'slr.pickle') @@ -1608,12 +1608,12 @@ class CoastalVulnerabilityTests(unittest.TestCase): return risk_id_path_list shore_points_path = os.path.join(self.workspace_dir, "shore_points.gpkg") - make_shore_points_vector(shore_points_path) + _make_shore_points_vector(shore_points_path) habitat_csv_path = os.path.join(self.workspace_dir, 'habitat_protection.csv') - make_habitat_csv(habitat_csv_path) + _make_habitat_csv(habitat_csv_path) - risk_id_path_list = make_risk_id_path_list() + risk_id_path_list = _make_risk_id_path_list() intermediate_vector_path = os.path.join(self.workspace_dir, 'intermediate_exposure.gpkg') diff --git a/tests/test_crop_production.py b/tests/test_crop_production.py index 0e80d9d79..f5a4acdff 100644 --- a/tests/test_crop_production.py +++ b/tests/test_crop_production.py @@ -60,7 +60,7 @@ def make_simple_raster(base_raster_path, array): Args: base_raster_path (str): the raster path for making the new raster. Returns: - Non + None. """ # UTM Zone 10N srs = osr.SpatialReference() From 952ef9f1099ae82f02bffee267ceaa0aaa30217a Mon Sep 17 00:00:00 2001 From: Claire Simpson Date: Thu, 6 Feb 2025 13:08:52 -0700 Subject: [PATCH 80/80] fixed workspace --- tests/test_crop_production.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_crop_production.py b/tests/test_crop_production.py index f5a4acdff..dc9b340c8 100644 --- a/tests/test_crop_production.py +++ b/tests/test_crop_production.py @@ -747,7 +747,7 @@ class CropProductionTests(unittest.TestCase): "vitk_modeled": 4200600, "vitk_observed": 7636800} ], dtype=float) - workspace = "/Users/simpson2/Desktop/output_test_cp" + workspace = self.workspace_dir base_aggregate_vector_path = os.path.join(workspace, "agg_vector.shp")