Merge branch 'main' of https://github.com/natcap/invest into bugfix/1350-una-cryptic-gdal-typeerror

Conflicts: HISTORY.rst tests/test_urban_nature_access.py
2023-08-29 11:11:21 -07:00 · 2023-08-29 11:11:21 -07:00 · 87c945748d
parent 5829f4e7ce 9860be876c
commit 87c945748d
86 changed files with 4559 additions and 4134 deletions
--- a/.github/actions/setup_env/action.yml
+++ b/.github/actions/setup_env/action.yml
@ -60,13 +60,14 @@ runs:
        cat environment.yml

    - name: Setup conda environment
-      uses: mamba-org/provision-with-micromamba@main
+      uses: mamba-org/setup-micromamba@v1
      with:
        environment-file: environment.yml
        environment-name: env
        channels: conda-forge
-        cache-env: true
-        cache-env-key: ${{ runner.os }}${{ runner.arch }}-${{ env.WEEK }}-${{ hashFiles('environment.yml') }}
+        init-shell: bash
+        cache-environment: true
+        cache-environment-key: ${{ runner.os }}${{ runner.arch }}-${{ env.WEEK }}-${{ hashFiles('environment.yml') }}

    - name: List conda environment
      shell: bash -l {0}
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@ -4,4 +4,4 @@ Fixes #
 ## Checklist
 - [ ] Updated HISTORY.rst and link to any relevant issue (if these changes are user-facing)
 - [ ] Updated the user's guide (if needed)
- [ ] Tested the affected models' UIs (if relevant)
+- [ ] Tested the Workbench UI (if relevant)
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@ -317,7 +317,7 @@ jobs:
        run: make userguide

      - name: Build binaries
-        run: make CONDA=micromamba binaries
+        run: make CONDA="$MAMBA_EXE" binaries

      - name: Run invest-autotest with binaries
        if : |
@ -344,20 +344,6 @@ jobs:
          yarn config set network-timeout 600000 -g
          yarn install

-      - name: Build Workbench
-        working-directory: workbench
-        env:
-          GH_TOKEN: env.GITHUB_TOKEN
-          DEBUG: electron-builder
-          CSC_IDENTITY_AUTO_DISCOVERY: false  # disable electron-builder code signing
-        run: |
-          yarn run build
-          yarn run dist
-
-      - name: Test electron app with puppeteer
-        working-directory: workbench
-        run: npx cross-env CI=true yarn run test-electron-app
-
      - name: Authenticate GCP
        if: github.event_name != 'pull_request'
        uses: google-github-actions/auth@v0
@ -368,6 +354,47 @@ jobs:
        if: github.event_name != 'pull_request'
        uses: google-github-actions/setup-gcloud@v0

+      - name: Build Workbench (PRs)
+        if: github.event_name == 'pull_request'
+        working-directory: workbench
+        env:
+          GH_TOKEN: env.GITHUB_TOKEN
+          DEBUG: electron-builder
+          CSC_IDENTITY_AUTO_DISCOVERY: false  # disable electron-builder code signing
+        run: |
+          yarn run build
+          yarn run dist
+
+      - name: Build Workbench (macOS)
+        if: github.event_name != 'pull_request' && matrix.os == 'macos-latest' # secrets not available in PR
+        working-directory: workbench
+        env:
+          GH_TOKEN: env.GITHUB_TOKEN
+          DEBUG: electron-builder
+          CSC_LINK: 2025-01-16-Expiry-AppStore-App.p12
+          CSC_KEY_PASSWORD: ${{ secrets.MACOS_CODESIGN_CERT_PASS }}
+        run: |
+          gsutil cp gs://stanford_cert/$CSC_LINK $CSC_LINK
+          yarn run build
+          yarn run dist
+
+      - name: Build Workbench (Windows)
+        if: github.event_name != 'pull_request' && matrix.os == 'windows-latest' # secrets not available in PR
+        working-directory: workbench
+        env:
+          GH_TOKEN: env.GITHUB_TOKEN
+          DEBUG: electron-builder
+          CSC_LINK: Stanford-natcap-code-signing-cert-expires-2024-01-26.p12
+          CSC_KEY_PASSWORD: ${{ secrets.WINDOWS_CODESIGN_CERT_PASS }}
+        run: |
+          gsutil cp gs://stanford_cert/$CSC_LINK $CSC_LINK
+          yarn run build
+          yarn run dist
+
+      - name: Test electron app with puppeteer
+        working-directory: workbench
+        run: npx cross-env CI=true yarn run test-electron-app
+
      - name: Sign binaries (macOS)
        if: github.event_name != 'pull_request' && matrix.os == 'macos-latest' # secrets not available in PR
        env:
--- a/HISTORY.rst
+++ b/HISTORY.rst
@ -35,12 +35,34 @@

 .. :changelog:

+3.14.0 (YYYY-MM-DD)
+-------------------
+* SDR
+    * We implemented two major functional changes to the InVEST LS Factor
+      that significantly affect most outputs of SDR and will bring the LS
+      factor output more in line with the outputs of SAGA-GIS's LS Factor.
+      A discussion of differences between these two implementations can be
+      viewed at https://github.com/natcap/invest/tree/main/doc/decision-records/ADR-0001-Update-SDR-LS-Factor.md.
+      The two specific changes implemented are:
+
+        * The LS Factor's on-pixel aspect length is now calculated as
+          ``abs(sin(slope)) + abs(cos(slope))``.
+        * The LS Factor's upstream contributing area is now calculated as
+          an estimate for the specific catchment area, calculated by
+          ``sqrt(n_pixels_upstream * pixel_area)``.
+
 Unreleased Changes
 ------------------
 * General
    * Fixed a bug in the CLI where ``invest getspec --json`` failed on
      non-json-serializable objects such as ``pint.Unit``.
      https://github.com/natcap/invest/issues/1280
+    * A new directory at `./doc/decision-records` has been created for
+      "Architecture/Any Decision Records", which will serve as a record of
+      nontrivial decisions that were made to InVEST and why.  This is
+      intended for reference by our science and software teams, and also by
+      the community at large when inquiring about a nontrivial change.
+      https://github.com/natcap/invest/issues/1079
    * Updated the package installation instructions in the API docs for clarity
      and also to highlight the ease of installation through ``conda-forge``.
      https://github.com/natcap/invest/issues/1256
@ -48,10 +70,33 @@ Unreleased Changes
      has been merged into ``utils.read_csv_to_dataframe``
      (`#1319 <https://github.com/natcap/invest/issues/1319>`_),
      (`#1327 <https://github.com/natcap/invest/issues/1327>`_)
+    * Improved the validation message that is returned when not all spatial
+      inputs overlap (`#502 <https://github.com/natcap/invest/issues/502>`_)
+    * Standardized the name and location of the taskgraph cache directory for
+      all models. It is now called ``taskgraph_cache`` and located in the top
+      level of the workspace directory.
+      (`#1230 <https://github.com/natcap/invest/issues/1230>`_)
 * Workbench
    * Fixed a bug where sampledata downloads failed silently (and progress bar
      became innacurate) if the Workbench did not have write permission to
      the download location. https://github.com/natcap/invest/issues/1070
+    * The workbench app is now distributed with a valid code signature
+      (`#727 <https://github.com/natcap/invest/issues/727>`_)
+    * Changing the language setting will now cause the app to relaunch
+      (`#1168 <https://github.com/natcap/invest/issues/1168>`_)
+    * Closing the main window will now close any user's guide windows that are
+      open. Fixed a bug where the app could not be reopened after closing.
+      (`#1258 <https://github.com/natcap/invest/issues/1258>`_)
+    * Fixed a bug where invalid metadata for a recent run would result
+      in an uncaught exception.
+      (`#1286 <https://github.com/natcap/invest/issues/1286>`_)
+    * Middle clicking an InVEST model tab was opening a blank window. Now
+      middle clicking will close that tab as expected.
+      (`#1261 <https://github.com/natcap/invest/issues/1261>`_)
+* Coastal Blue Carbon
+    * Added validation for the transition table, raising a validation error if
+      unexpected values are encountered.
+      (`#729 <https://github.com/natcap/invest/issues/729>`_)
 * Forest Carbon
    * The biophysical table is now case-insensitive.
 * HRA
@ -59,7 +104,17 @@ Unreleased Changes
      consequence criteria were skipped for a single habitat. The model now
      correctly handles this case. https://github.com/natcap/invest/issues/1250
    * Tables in the .xls format are no longer supported. This format was
-      deprecated by ``pandas``. (`#1271 <https://github.com/natcap/invest/issues/1271>`_)
+      deprecated by ``pandas``.
+      (`#1271 <https://github.com/natcap/invest/issues/1271>`_)
+    * Fixed a bug where vector inputs could be rasterized onto a grid that is
+      not exactly aligned with other raster inputs.
+      (`#1312 <https://github.com/natcap/invest/issues/1312>`_)
+* NDR
+    * The contents of the output ``cache_dir`` have been consolidated into
+      ``intermediate_outputs``.
+    * Fixed a bug where results were calculated incorrectly if the runoff proxy
+      raster (or the DEM or LULC) had no nodata value
+      (`#1005 <https://github.com/natcap/invest/issues/1005>`_)
 * Pollination
    * Several exceptions have been tidied up so that only fieldnames are
      printed instead of the python data structures representing the whole
@ -85,6 +140,8 @@ Unreleased Changes
    * Fixed an issue with sediment deposition progress logging that was
      causing the "percent complete" indicator to not progress linearly.
      https://github.com/natcap/invest/issues/1262
+    * The contents of the output ``churn_dir_not_for_humans`` have been
+      consolidated into ``intermediate_outputs``.
 * Seasonal Water Yield
    * Fixed a bug where monthy quickflow nodata pixels were not being passed
      on to the total quickflow raster, which could result in negative values
@ -96,18 +153,76 @@ Unreleased Changes
      set to 0. The old behavior was not well documented and caused some
      confusion when nodata pixels did not line up. It's safer not to fill in
      unknown data. (`#1317 <https://github.com/natcap/invest/issues/1317>`_)
+    * Negative monthly quickflow values will now be set to 0. This is because
+      very small negative values occasionally result from valid data, but they
+      should be interpreted as 0.
+      (`#1318 <https://github.com/natcap/invest/issues/1318>`_)
+    * In the monthly quickflow calculation, QF_im will be set to 0 on any pixel
+      where s_i / a_im > 100. This is done to avoid overflow errors when
+      calculating edge cases where the result would round down to 0 anyway.
+      (`#1318 <https://github.com/natcap/invest/issues/1318>`_)
+    * The contents of the output ``cache_dir`` have been consolidated into
+      ``intermediate_outputs``.
 * Urban Flood Risk
    * Fixed a bug where the model incorrectly raised an error if the
      biophysical table contained a row of all 0s.
      (`#1123 <https://github.com/natcap/invest/issues/1123>`_)
+    * The contents of the output ``temp_working_dir_not_for_humans`` have been
+      consolidated into ``intermediate_files``.
+    * Biophysical table Workbench validation now warns if there is a missing
+      curve number value.
+      (`#1346 <https://github.com/natcap/invest/issues/1346>`_)
 * Urban Nature Access
+    * Urban nature supply outputs have been renamed to add ``percapita`` to the
+      filename.
+
+        * In uniform search radius mode, ``urban_nature_supply.tif`` has been
+          renamed to ``urban_nature_supply_percapita.tif``.
+        * When defining search radii by urban nature class,
+          ``urban_nature_supply_lucode_[LUCODE].tif`` has been renamed to
+          ``urban_nature_supply_percapita_lucode_[LUCODE].tif``.
+        * When defining search radii by population groups,
+          ``urban_nature_supply_to_[POP_GROUP].tif`` has been renamed to
+          ``urban_nature_supply_percapita_to_[POP_GROUP].tif``.
+
+    * A new output for "Accessible Urban Nature" is created, indicating the
+      area of accessible greenspace available to people within the search
+      radius, weighted by the selected decay function.  The outputs vary
+      slightly depending on the selected execution mode.
+
+        * In uniform search radius mode, a single new output is created,
+          ``accessible_urban_nature.tif``.
+        * When defining search radii by urban nature class, one new
+          output raster is created for each class of urban nature.  These files
+          are named ``accessible_urban_nature_lucode_[LUCODE].tif``.
+        * When defining search radii for population groups, one new output
+          raster is created for each population group.  These files are named
+          ``accessible_urban_nature_to_[POP_GROUP].tif``.
+
+    * Urban nature classes can now be defined to occupy a proportion of a
+      pixel, such as a park that is semi-developed. This proportion is
+      provided through user input as a proportion (0-1) in the
+      ``urban_nature`` column of the LULC Attribute Table.  A value of ``0``
+      indicates that there is no urban nature in this class, ``0.333``
+      indicates that a third of the area of this LULC class is urban nature,
+      and ``1`` would indicate that the entire LULC class's area is urban
+      nature.  https://github.com/natcap/invest/issues/1180
    * Fixed an issue where, under certain circumstances, the model would raise
      a cryptic ``TypeError`` when creating the summary vector.
      https://github.com/natcap/invest/issues/1350
 * Visitation: Recreation and Tourism
    * Fixed a bug where overlapping predictor polygons would be double-counted
-      in ``polygon_area_coverage`` and ``polygon_percent_coverage`` calculations.
-      (`#1310 <https://github.com/natcap/invest/issues/1310>`_)
+      in ``polygon_area_coverage`` and ``polygon_percent_coverage``
+      calculations. (`#1310 <https://github.com/natcap/invest/issues/1310>`_)
+    * Changed the calculation of ``point_nearest_distance`` metric to match
+      the description in the User's Guide. Values are now the distance to the
+      centroid of the AOI polygon instead of the distance to the nearest
+      edge of the AOI polygon.
+      (`#1347 <https://github.com/natcap/invest/issues/1347>`_)
+* Wind Energy
+    * Updated a misleading error message that is raised when the AOI does
+      not spatially overlap another input.
+      (`#1054 <https://github.com/natcap/invest/issues/1054>`_)

 3.13.0 (2023-03-17)
 -------------------
--- a/4
+++ b/4
@ -2,11 +2,11 @@
 DATA_DIR := data
 GIT_SAMPLE_DATA_REPO        := https://bitbucket.org/natcap/invest-sample-data.git
 GIT_SAMPLE_DATA_REPO_PATH   := $(DATA_DIR)/invest-sample-data
-GIT_SAMPLE_DATA_REPO_REV    := a58b9c7bdd8a31cab469ea919fe0ebf23a6c668e
+GIT_SAMPLE_DATA_REPO_REV    := 2e7cd618c661ec3f3b2a3bddfd2ce7d4704abc05

 GIT_TEST_DATA_REPO          := https://bitbucket.org/natcap/invest-test-data.git
 GIT_TEST_DATA_REPO_PATH     := $(DATA_DIR)/invest-test-data
-GIT_TEST_DATA_REPO_REV      := a89253d83d5f70a8ea2d8a951b2d47d603505f14
+GIT_TEST_DATA_REPO_REV      := e7d32d65612f4f3578a4fb57824af4e297c65283

 GIT_UG_REPO                 := https://github.com/natcap/invest.users-guide
 GIT_UG_REPO_PATH            := doc/users-guide
--- a/doc/decision-records/ADR-0001-Update-SDR-LS-Factor.md
+++ b/doc/decision-records/ADR-0001-Update-SDR-LS-Factor.md
@ -0,0 +1,94 @@
+# ADR-0001: Update the InVEST SDR LS Factor
+
+Author: James
+
+Science Lead: Rafa
+
+## Context
+
+Since we released the updated InVEST SDR model in InVEST 3.1.0, we have seen a
+common refrain of users and NatCap science staff noticing that the LS factor
+output of SDR did not produce realistic results and that the LS factor produced
+by SAGA was much more realistic.  We have over the years made a couple of notable
+changes to the model and to the LS factor that have altered the output including:
+
+1. The SDR model's underlying routing model was changed from d-infinity to MFD in 3.5.0
+2. The $x$ parameter was changed in InVEST 3.8.1 from the true on-pixel aspect
+   $|\sin \theta|+|\cos \theta|$ (described in Zevenbergen & Thorne 1987 and repeated
+   in Desmet & Govers 1996) to the weighted mean of proportional flow from the
+   current pixel to its neighbors.
+3. A typo in a constant value in the LS factor was corrected in InVEST 3.9.1
+4. An `l_max` parameter was exposed to the user in InVEST 3.9.1
+
+Despite these changes to the LS factor, we still received occasional reports
+describing unrealistic LS factor outputs from SDR and that SAGA's LS factor
+was much more realistic.
+
+After diving into the SAGA source code, it turns out that there are several
+important differences between the two despite both using Desmet & Govers (1996)
+for their LS factor equations:
+
+1. The contributing area $A_{i,j-in}$ is not strictly defined in Desmet &
+   Govers (1996), it is only referred to as "the contributing area at the inlet
+   of a grid cell with coordinates (i, j) (m^2)".
+   InVEST assumes that "contributing area" is $area_{pixel} \cdot n\\_upstream\\_pixels$.
+   SAGA refers to this as "specific catchment area" and allows the user to choose their
+   specific catchment area equation,  where the available options are
+   "contour length simply as cell size", "contour length dependent on aspect", "square
+   root of catchment area" and "effective flow length".
+2. SAGA uses on-pixel aspect, $|\sin \theta|+|\cos \theta|$, and does not consider
+   flow direction derived from a routing model when calculating the LS factor.
+3. The length exponent $m$ differs between the implementations.  In SAGA,
+   $m = \beta / (1 + \beta)$.  In InVEST, we have a discontinuous function where
+   $m$ is dependent on the slope of the current pixel and described as "classical USLE"
+   in the user's guide and discussed in Oliveira et al (2013).
+4. SAGA's flow accumulation function [`Get_Flow()`](https://github.com/saga-gis/saga-gis/blob/master/saga-gis/src/tools/terrain_analysis/ta_hydrology/Erosion_LS_Fields.cpp#L394)
+   only considers a pixel downstream if and only if its elevation is strictly less
+   than the current pixel's elevation, which implies that flow accumulation will
+   not navigate plateaus.  InVEST's flow accumulation handles plateaus well,
+   which can lead to longer flow accumulation values on the same DEM.
+5. SAGA's flow accumulation function `Get_Flow()` uses D8, InVEST's flow
+   accumulation uses MFD.
+
+It is important to note that when evaluating differences between the SAGA and InVEST
+LS Factor implementations, it is _critical_ to use a hydrologically conditioned DEM such
+as conditioned by Wang & Liu so that we control for differences in output due
+to the presence of plateaus.
+
+Once we finally understood these discrepancies, James implemented several of the
+contributing area functions available in SAGA to see what might be most comparable
+to the real world.  Source code and a docker container for these experiments are
+available at
+https://github.com/phargogh/invest-ls-factor-vs-saga/blob/main/src/natcap/invest/sdr/sdr.py#L901.
+Some additional discussion and notes can be viewed in the related github issue:
+https://github.com/natcap/invest/issues/915.
+
+## Decision
+
+After inspecting the results, Rafa decided that we should make these changes to
+the LS Factor calculation:
+
+1. We will revert to using the on-pixel aspect, $|\sin \theta|+|\cos \theta|$.
+   This is in line with the published literature.
+2. We will convert the "contributing area" portion of the LS Factor to be
+   $\sqrt{ n\\_upstream\\_pixels \cdot area\_{pixel} }$. Rafa's opinion on this
+   is that the LS factor equations were designed for a 1-dimensional situation,
+   so our specific catchment area number should reflect this.
+
+## Status
+
+## Consequences
+
+Once implemented and released, the LS factor outputs of SDR will be
+significantly different, but they should more closely match reality.
+
+We hope that there will be fewer support requests about this once the change is
+released.
+
+## References
+
+Zevenbergen & Thorne (1987): https://searchworks.stanford.edu/articles/edb__89861226
+
+Desmet & Govers (1996): https://searchworks.stanford.edu/articles/edsgac__edsgac.A18832564
+
+Oliveira et al (2013): http://dx.doi.org/10.5772/54439
--- a/doc/decision-records/README.md
+++ b/doc/decision-records/README.md
@ -0,0 +1,12 @@
+# Architecture/Any Decision Records
+
+An ADR is a way to track decisions and their rationale in a way that is tied to
+the source code, easy to digest, and written in a way that future us will
+understand.  An ADR consists of several sections:
+
+1. The title and ADR number (for easier sorting)
+2. Context about the problem
+3. The decision that was made and why
+4. The status of implementation
+5. Consequences of the implementation
+6. Any references (especially if describing a science/software issue)
--- a/src/natcap/invest/annual_water_yield.py
+++ b/src/natcap/invest/annual_water_yield.py
@ -87,10 +87,19 @@ VALUATION_OUTPUT_FIELDS = {
    }
 }
 SUBWATERSHED_OUTPUT_FIELDS = {
+    "subws_id": {
+        "type": "integer",
+        "about": gettext("Unique identifier for each subwatershed.")
+    },
    **BASE_OUTPUT_FIELDS,
-    **SCARCITY_OUTPUT_FIELDS
+    **SCARCITY_OUTPUT_FIELDS,
+
 }
 WATERSHED_OUTPUT_FIELDS = {
+    "ws_id": {
+        "type": "integer",
+        "about": gettext("Unique identifier for each watershed.")
+    },
    **BASE_OUTPUT_FIELDS,
    **SCARCITY_OUTPUT_FIELDS,
    **VALUATION_OUTPUT_FIELDS
@ -209,6 +218,7 @@ MODEL_SPEC = {
                    "units": u.none,
                    "about": gettext("Crop coefficient for this LULC class.")}
            },
+            "index_col": "lucode",
            "about": gettext(
                "Table of biophysical parameters for each LULC class. All "
                "values in the LULC raster must have corresponding entries "
@ -239,6 +249,7 @@ MODEL_SPEC = {
                    "units": u.meter**3/u.year/u.pixel
                }
            },
+            "index_col": "lucode",
            "required": False,
            "about": gettext(
                "A table of water demand for each LULC class. Each LULC code "
@ -310,6 +321,7 @@ MODEL_SPEC = {
                        "the time span.")
                }
            },
+            "index_col": "ws_id",
            "required": False,
            "about": gettext(
                "A table mapping each watershed to the associated valuation "
@ -328,6 +340,7 @@ MODEL_SPEC = {
                },
                "watershed_results_wyield.csv": {
                    "columns": {**WATERSHED_OUTPUT_FIELDS},
+                    "index_col": "ws_id",
                    "about": "Table containing biophysical output values per watershed."
                },
                "subwatershed_results_wyield.shp": {
@ -337,6 +350,7 @@ MODEL_SPEC = {
                },
                "subwatershed_results_wyield.csv": {
                    "columns": {**SUBWATERSHED_OUTPUT_FIELDS},
+                    "index_col": "subws_id",
                    "about": "Table containing biophysical output values per subwatershed."
                },
                "per_pixel": {
@ -415,12 +429,12 @@ MODEL_SPEC = {
                        "veg.tif": {
                            "about": "Map of vegetated state.",
                            "bands": {1: {"type": "integer"}},
-                        },
-                        "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
+                        }
                    }
                }
            }
-        }
+        },
+        "taskgraph_dir": spec_utils.TASKGRAPH_DIR
    }
 }

@ -509,23 +523,23 @@ def execute(args):
    if invalid_parameters:
        raise ValueError(f'Invalid parameters passed: {invalid_parameters}')

-    # valuation_params is passed to create_vector_output()
-    # which computes valuation if valuation_params is not None.
-    valuation_params = None
+    # valuation_df is passed to create_vector_output()
+    # which computes valuation if valuation_df is not None.
+    valuation_df = None
    if 'valuation_table_path' in args and args['valuation_table_path'] != '':
        LOGGER.info(
            'Checking that watersheds have entries for every `ws_id` in the '
            'valuation table.')
        # Open/read in valuation parameters from CSV file
-        valuation_params = utils.read_csv_to_dataframe(
-            args['valuation_table_path'], 'ws_id').to_dict(orient='index')
+        valuation_df = utils.read_csv_to_dataframe(
+            args['valuation_table_path'], MODEL_SPEC['args']['valuation_table_path'])
        watershed_vector = gdal.OpenEx(
            args['watersheds_path'], gdal.OF_VECTOR)
        watershed_layer = watershed_vector.GetLayer()
        missing_ws_ids = []
        for watershed_feature in watershed_layer:
            watershed_ws_id = watershed_feature.GetField('ws_id')
-            if watershed_ws_id not in valuation_params:
+            if watershed_ws_id not in valuation_df.index:
                missing_ws_ids.append(watershed_ws_id)
        watershed_feature = None
        watershed_layer = None
@ -587,7 +601,6 @@ def execute(args):
    seasonality_constant = float(args['seasonality_constant'])

    # Initialize a TaskGraph
-    work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
@ -595,7 +608,8 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # single process mode.
-    graph = taskgraph.TaskGraph(work_token_dir, n_workers)
+    graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)

    base_raster_path_list = [
        args['eto_path'],
@ -636,48 +650,43 @@ def execute(args):
        'lulc': pygeoprocessing.get_raster_info(clipped_lulc_path)['nodata'][0]}

    # Open/read in the csv file into a dictionary and add to arguments
-    bio_dict = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
-    bio_lucodes = set(bio_dict.keys())
+    bio_df = utils.read_csv_to_dataframe(args['biophysical_table_path'],
+                                         MODEL_SPEC['args']['biophysical_table_path'])
+    bio_lucodes = set(bio_df.index.values)
    bio_lucodes.add(nodata_dict['lulc'])
    LOGGER.debug(f'bio_lucodes: {bio_lucodes}')

    if 'demand_table_path' in args and args['demand_table_path'] != '':
-        demand_dict = utils.read_csv_to_dataframe(
-            args['demand_table_path'], 'lucode').to_dict(orient='index')
+        demand_df = utils.read_csv_to_dataframe(
+            args['demand_table_path'], MODEL_SPEC['args']['demand_table_path'])
        demand_reclassify_dict = dict(
-            [(lucode, demand_dict[lucode]['demand'])
-             for lucode in demand_dict])
-        demand_lucodes = set(demand_dict.keys())
+            [(lucode, row['demand']) for lucode, row in demand_df.iterrows()])
+        demand_lucodes = set(demand_df.index.values)
        demand_lucodes.add(nodata_dict['lulc'])
        LOGGER.debug(f'demand_lucodes: {demand_lucodes}', )
    else:
        demand_lucodes = None

-    # Break the bio_dict into three separate dictionaries based on
+    # Break the bio_df into three separate dictionaries based on
    # Kc, root_depth, and LULC_veg fields to use for reclassifying
    Kc_dict = {}
    root_dict = {}
    vegetated_dict = {}

-    for lulc_code in bio_dict:
-        Kc_dict[lulc_code] = bio_dict[lulc_code]['kc']
+    for lulc_code, row in bio_df.iterrows():
+        Kc_dict[lulc_code] = row['kc']

        # Catch invalid LULC_veg values with an informative error.
-        lulc_veg_value = bio_dict[lulc_code]['lulc_veg']
-        try:
-            vegetated_dict[lulc_code] = int(lulc_veg_value)
-            if vegetated_dict[lulc_code] not in set([0, 1]):
-                raise ValueError()
-        except ValueError:
+        if row['lulc_veg'] not in set([0, 1]):
            # If the user provided an invalid LULC_veg value, raise an
            # informative error.
            raise ValueError(
-                f'LULC_veg value must be either 1 or 0, not {lulc_veg_value}')
+                f'LULC_veg value must be either 1 or 0, not {row["lulc_veg"]}')
+        vegetated_dict[lulc_code] = row['lulc_veg']

        # If LULC_veg value is 1 get root depth value
        if vegetated_dict[lulc_code] == 1:
-            root_dict[lulc_code] = bio_dict[lulc_code]['root_depth']
+            root_dict[lulc_code] = row['root_depth']
        # If LULC_veg value is 0 then we do not care about root
        # depth value so will just substitute in a 1. This
        # value will not end up being used.
@ -843,7 +852,7 @@ def execute(args):
        write_output_vector_attributes_task = graph.add_task(
            func=write_output_vector_attributes,
            args=(target_ws_path, ws_id_name, zonal_stats_pickle_list,
-                  valuation_params),
+                  valuation_df),
            target_path_list=[target_ws_path],
            dependent_task_list=[
                *zonal_stats_task_list, copy_watersheds_vector_task],
@ -879,7 +888,7 @@ def copy_vector(base_vector_path, target_vector_path):


 def write_output_vector_attributes(target_vector_path, ws_id_name,
-                                   stats_path_list, valuation_params):
+                                   stats_path_list, valuation_df):
    """Add data attributes to the vector outputs of this model.

    Join results of zonal stats to copies of the watershed shapefiles.
@ -893,7 +902,7 @@ def write_output_vector_attributes(target_vector_path, ws_id_name,
            represent watersheds or subwatersheds.
        stats_path_list (list): List of file paths to pickles storing the zonal
            stats results.
-        valuation_params (dict): The dictionary built from
+        valuation_df (pandas.DataFrame): dataframe built from
            args['valuation_table_path']. Or None if valuation table was not
            provided.

@ -929,10 +938,10 @@ def write_output_vector_attributes(target_vector_path, ws_id_name,
                _add_zonal_stats_dict_to_shape(
                    target_vector_path, ws_stats_dict, key_name, 'mean')

-    if valuation_params:
+    if valuation_df is not None:
        # only do valuation for watersheds, not subwatersheds
        if ws_id_name == 'ws_id':
-            compute_watershed_valuation(target_vector_path, valuation_params)
+            compute_watershed_valuation(target_vector_path, valuation_df)


 def convert_vector_to_csv(base_vector_path, target_csv_path):
@ -1141,14 +1150,14 @@ def pet_op(eto_pix, Kc_pix, eto_nodata, output_nodata):
    return result


-def compute_watershed_valuation(watershed_results_vector_path, val_dict):
+def compute_watershed_valuation(watershed_results_vector_path, val_df):
    """Compute net present value and energy for the watersheds.

    Args:
        watershed_results_vector_path (string):
            Path to an OGR shapefile for the watershed results.
            Where the results will be added.
-        val_dict (dict): a python dictionary that has all the valuation
+        val_df (pandas.DataFrame): a dataframe that has all the valuation
            parameters for each watershed.

    Returns:
@ -1183,26 +1192,23 @@ def compute_watershed_valuation(watershed_results_vector_path, val_dict):
        # there won't be a rsupply_vl value if the polygon feature only
        # covers nodata raster values, so check before doing math.
        if rsupply_vl is not None:
-            # Get the valuation parameters for watershed 'ws_id'
-            val_row = val_dict[ws_id]
-
            # Compute hydropower energy production (KWH)
            # This is from the equation given in the Users' Guide
            energy = (
-                val_row['efficiency'] * val_row['fraction'] *
-                val_row['height'] * rsupply_vl * 0.00272)
+                val_df['efficiency'][ws_id] * val_df['fraction'][ws_id] *
+                val_df['height'][ws_id] * rsupply_vl * 0.00272)

            dsum = 0
            # Divide by 100 because it is input at a percent and we need
            # decimal value
-            disc = val_row['discount'] / 100
+            disc = val_df['discount'][ws_id] / 100
            # To calculate the summation of the discount rate term over the life
            # span of the dam we can use a geometric series
            ratio = 1 / (1 + disc)
            if ratio != 1:
-                dsum = (1 - math.pow(ratio, val_row['time_span'])) / (1 - ratio)
+                dsum = (1 - math.pow(ratio, val_df['time_span'][ws_id])) / (1 - ratio)

-            npv = ((val_row['kw_price'] * energy) - val_row['cost']) * dsum
+            npv = ((val_df['kw_price'][ws_id] * energy) - val_df['cost'][ws_id]) * dsum

            # Get the volume field index and add value
            ws_feat.SetField(energy_field, energy)
--- a/src/natcap/invest/carbon.py
+++ b/src/natcap/invest/carbon.py
@ -130,6 +130,7 @@ MODEL_SPEC = {
                    "units": u.metric_ton/u.hectare,
                    "about": gettext("Carbon density of dead matter.")}
            },
+            "index_col": "lucode",
            "about": gettext(
                "A table that maps each LULC code to carbon pool data for "
                "that LULC type."),
@ -254,10 +255,10 @@ MODEL_SPEC = {
        "intermediate": {
            "type": "directory",
            "contents": {
-                **CARBON_OUTPUTS,
-                "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
+                **CARBON_OUTPUTS
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -366,11 +367,9 @@ def execute(args):
         (_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
         (_TMP_BASE_FILES, output_dir)], file_suffix)

-    carbon_pool_table = utils.read_csv_to_dataframe(
-        args['carbon_pools_path'], 'lucode').to_dict(orient='index')
+    carbon_pool_df = utils.read_csv_to_dataframe(
+        args['carbon_pools_path'], MODEL_SPEC['args']['carbon_pools_path'])

-    work_token_dir = os.path.join(
-        intermediate_output_dir, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
@ -378,7 +377,8 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous mode.
-    graph = taskgraph.TaskGraph(work_token_dir, n_workers)
+    graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)

    cell_size_set = set()
    raster_size_set = set()
@ -413,9 +413,7 @@ def execute(args):
        carbon_map_task_lookup[scenario_type] = []
        storage_path_list = []
        for pool_type in ['c_above', 'c_below', 'c_soil', 'c_dead']:
-            carbon_pool_by_type = dict([
-                (lucode, float(carbon_pool_table[lucode][pool_type]))
-                for lucode in carbon_pool_table])
+            carbon_pool_by_type = carbon_pool_df[pool_type].to_dict()

            lulc_key = 'lulc_%s_path' % scenario_type
            storage_key = '%s_%s' % (pool_type, scenario_type)
--- a/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py
+++ b/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py
@ -97,6 +97,7 @@ import time
 import shutil

 import numpy
+import pandas
 import pygeoprocessing
 import scipy.sparse
 import taskgraph
@ -117,6 +118,9 @@ INVALID_ANALYSIS_YEAR_MSG = gettext(
    "({latest_year})")
 INVALID_SNAPSHOT_RASTER_MSG = gettext(
    "Raster for snapshot {snapshot_year} could not be validated.")
+INVALID_TRANSITION_VALUES_MSG = gettext(
+    "The transition table expects values of {model_transitions} but found "
+    "values of {transition_values}.")

 POOL_SOIL = 'soil'
 POOL_BIOMASS = 'biomass'
@ -154,7 +158,6 @@ NET_PRESENT_VALUE_RASTER_PATTERN = 'net-present-value-at-{year}{suffix}.tif'
 CARBON_STOCK_AT_YEAR_RASTER_PATTERN = 'carbon-stock-at-{year}{suffix}.tif'

 INTERMEDIATE_DIR_NAME = 'intermediate'
-TASKGRAPH_CACHE_DIR_NAME = 'task_cache'
 OUTPUT_DIR_NAME = 'output'

 MODEL_SPEC = {
@ -167,10 +170,10 @@ MODEL_SPEC = {
        "n_workers": spec_utils.N_WORKERS,
        "landcover_snapshot_csv": {
            "type": "csv",
+            "index_col": "snapshot_year",
            "columns": {
                "snapshot_year": {
-                    "type": "number",
-                    "units": u.year_AD,
+                    "type": "integer",
                    "about": gettext(
                        "The snapshot year that this row's LULC raster "
                        "represents. Each year in this table must be unique.")
@ -204,6 +207,7 @@ MODEL_SPEC = {
        "biophysical_table_path": {
            "name": gettext("biophysical table"),
            "type": "csv",
+            "index_col": "code",
            "columns": {
                "code": {
                    "type": "integer",
@ -300,11 +304,12 @@ MODEL_SPEC = {
        "landcover_transitions_table": {
            "name": gettext("landcover transitions table"),
            "type": "csv",
+            "index_col": "lulc-class",
            "columns": {
                "lulc-class": {
-                    "type": "integer",
+                    "type": "freestyle_string",
                    "about": gettext(
-                        "LULC codes matching the codes in the biophysical "
+                        "LULC class names matching those in the biophysical "
                        "table.")},
                "[LULC CODE]": {
                    "type": "option_string",
@ -382,6 +387,7 @@ MODEL_SPEC = {
            "name": gettext("price table"),
            "type": "csv",
            "required": "use_price_table",
+            "index_col": "year",
            "columns": {
                "year": {
                    "type": "number",
@ -517,7 +523,7 @@ MODEL_SPEC = {
                }
            }
        },
-        "task_cache": spec_utils.TASKGRAPH_DIR
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -564,7 +570,10 @@ def execute(args):
    task_graph, n_workers, intermediate_dir, output_dir, suffix = (
        _set_up_workspace(args))

-    snapshots = _extract_snapshots_from_table(args['landcover_snapshot_csv'])
+    snapshots = utils.read_csv_to_dataframe(
+        args['landcover_snapshot_csv'],
+        MODEL_SPEC['args']['landcover_snapshot_csv']
+    )['raster_path'].to_dict()

    # Phase 1: alignment and preparation of inputs
    baseline_lulc_year = min(snapshots.keys())
@ -584,15 +593,14 @@ def execute(args):

    # We're assuming that the LULC initial variables and the carbon pool
    # transient table are combined into a single lookup table.
-    biophysical_parameters = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], 'code').to_dict(orient='index')
+    biophysical_df = utils.read_csv_to_dataframe(
+        args['biophysical_table_path'],
+        MODEL_SPEC['args']['biophysical_table_path'])

    # LULC Classnames are critical to the transition mapping, so they must be
    # unique.  This check is here in ``execute`` because it's possible that
    # someone might have a LOT of classes in their biophysical table.
-    unique_lulc_classnames = set(
-        params['lulc-class'] for params in biophysical_parameters.values())
-    if len(unique_lulc_classnames) != len(biophysical_parameters):
+    if not biophysical_df['lulc-class'].is_unique:
        raise ValueError(
            "All values in `lulc-class` column must be unique, but "
            "duplicates were found.")
@ -630,7 +638,7 @@ def execute(args):
        task_name='Align input landcover rasters.')

    (disturbance_matrices, accumulation_matrices) = _read_transition_matrix(
-        args['landcover_transitions_table'], biophysical_parameters)
+        args['landcover_transitions_table'], biophysical_df)

    # Baseline stocks are simply reclassified.
    # Baseline accumulation are simply reclassified
@ -664,8 +672,7 @@ def execute(args):
            func=pygeoprocessing.reclassify_raster,
            args=(
                (aligned_lulc_paths[baseline_lulc_year], 1),
-                {lucode: values[f'{pool}-initial'] for (lucode, values)
-                    in biophysical_parameters.items()},
+                biophysical_df[f'{pool}-initial'].to_dict(),
                stock_rasters[baseline_lulc_year][pool],
                gdal.GDT_Float32,
                NODATA_FLOAT32_MIN),
@ -682,9 +689,7 @@ def execute(args):
            func=pygeoprocessing.reclassify_raster,
            args=(
                (aligned_lulc_paths[baseline_lulc_year], 1),
-                {lucode: values[f'{pool}-yearly-accumulation']
-                    for (lucode, values)
-                    in biophysical_parameters.items()},
+                biophysical_df[f'{pool}-yearly-accumulation'].to_dict(),
                yearly_accum_rasters[baseline_lulc_year][pool],
                gdal.GDT_Float32,
                NODATA_FLOAT32_MIN),
@ -805,9 +810,7 @@ def execute(args):
                func=pygeoprocessing.reclassify_raster,
                args=(
                    (aligned_lulc_paths[prior_transition_year], 1),
-                    {lucode: values[f'{pool}-half-life']
-                        for (lucode, values)
-                        in biophysical_parameters.items()},
+                    biophysical_df[f'{pool}-half-life'].to_dict(),
                    halflife_rasters[current_transition_year][pool],
                    gdal.GDT_Float32,
                    NODATA_FLOAT32_MIN),
@ -868,9 +871,7 @@ def execute(args):
        yearly_accum_tasks[current_transition_year][POOL_LITTER] = task_graph.add_task(
            func=pygeoprocessing.reclassify_raster,
            args=((aligned_lulc_paths[current_transition_year], 1),
-                  {lucode: values[f'{POOL_LITTER}-yearly-accumulation']
-                   for (lucode, values) in
-                   biophysical_parameters.items()},
+                  biophysical_df[f'{POOL_LITTER}-yearly-accumulation'].to_dict(),
                  yearly_accum_rasters[current_transition_year][POOL_LITTER],
                  gdal.GDT_Float32,
                  NODATA_FLOAT32_MIN),
@ -962,11 +963,10 @@ def execute(args):
    prices = None
    if args.get('do_economic_analysis', False):  # Do if truthy
        if args.get('use_price_table', False):
-            prices = {
-                year: values['price'] for (year, values) in
-                utils.read_csv_to_dataframe(
-                    args['price_table_path'], 'year'
-                ).to_dict(orient='index').items()}
+            prices = utils.read_csv_to_dataframe(
+                args['price_table_path'],
+                MODEL_SPEC['args']['price_table_path']
+            )['price'].to_dict()
        else:
            inflation_rate = float(args['inflation_rate']) * 0.01
            annual_price = float(args['price'])
@ -1068,10 +1068,9 @@ def _set_up_workspace(args):
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous mode.

-    taskgraph_cache_dir = os.path.join(
-        args['workspace_dir'], TASKGRAPH_CACHE_DIR_NAME)
    task_graph = taskgraph.TaskGraph(
-        taskgraph_cache_dir, n_workers, reporting_interval=5.0)
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'),
+        n_workers, reporting_interval=5.0)

    suffix = utils.make_suffix_string(args, 'results_suffix')
    intermediate_dir = os.path.join(
@ -1079,7 +1078,7 @@ def _set_up_workspace(args):
    output_dir = os.path.join(
        args['workspace_dir'], OUTPUT_DIR_NAME)

-    utils.make_directories([output_dir, intermediate_dir, taskgraph_cache_dir])
+    utils.make_directories([output_dir, intermediate_dir])

    return task_graph, n_workers, intermediate_dir, output_dir, suffix

@ -1957,7 +1956,7 @@ def _sum_n_rasters(
    target_raster = None


-def _read_transition_matrix(transition_csv_path, biophysical_dict):
+def _read_transition_matrix(transition_csv_path, biophysical_df):
    """Read a transition CSV table in to a series of sparse matrices.

    Args:
@ -1975,7 +1974,7 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict):
                * ``'high-impact-disturb'`` indicating a
                    high-impact disturbance
                * ``''`` (blank), which is equivalent to no carbon change.o
-        biophysical_dict (dict): A ``dict`` mapping of integer landcover codes
+        biophysical_df (pandas.DataFrame): A table mapping integer landcover codes
            to biophysical values for disturbance and accumulation values for
            soil and biomass carbon pools.

@ -1987,14 +1986,13 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict):
        the pool for the landcover transition.
    """
    table = utils.read_csv_to_dataframe(
-        transition_csv_path, convert_cols_to_lower=False, convert_vals_to_lower=False)
+        transition_csv_path, MODEL_SPEC['args']['landcover_transitions_table']
+    ).reset_index()

    lulc_class_to_lucode = {}
-    max_lucode = 0
-    for (lucode, values) in biophysical_dict.items():
-        lulc_class_to_lucode[
-            str(values['lulc-class']).strip().lower()] = lucode
-        max_lucode = max(max_lucode, lucode)
+    max_lucode = biophysical_df.index.max()
+    for lucode, row in biophysical_df.iterrows():
+        lulc_class_to_lucode[row['lulc-class']] = lucode

    # Load up a sparse matrix with the transitions to save on memory usage.
    # The number of possible rows/cols is the value of the maximum possible
@ -2029,24 +2027,19 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict):
                        "blank line encountered.")
            break

-        # Strip any whitespace to eliminate leading/trailing whitespace
-        row = row.str.strip()
-
        # skip rows starting with a blank cell, these are part of the legend
-        if not row['lulc-class']:
+        if pandas.isna(row['lulc-class']):
            continue

        try:
-            from_colname = str(row['lulc-class']).lower()
-            from_lucode = lulc_class_to_lucode[from_colname]
+            from_lucode = lulc_class_to_lucode[row['lulc-class']]
        except KeyError:
            raise ValueError("The transition table's 'lulc-class' column has "
-                             f"a value, '{from_colname}', that was expected "
+                             f"a value, '{row['lulc-class']}', that was expected "
                             "in the biophysical table but could not be "
                             "found.")

-        for colname, field_value in row.items():
-            to_colname = str(colname).strip().lower()
+        for to_colname, field_value in row.items():

            # Skip the top row, only contains headers.
            if to_colname == 'lulc-class':
@ -2062,27 +2055,24 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict):

            # Only set values where the transition HAS a value.
            # Takes advantage of the sparse characteristic of the model.
-            if (isinstance(field_value, float) and
-                    numpy.isnan(field_value)):
+            if pandas.isna(field_value):
                continue

            # When transition is a disturbance, we use the source landcover's
            # disturbance values.
            if field_value.endswith('disturb'):
                soil_disturbance_matrix[from_lucode, to_lucode] = (
-                    biophysical_dict[from_lucode][f'soil-{field_value}'])
+                    biophysical_df[f'soil-{field_value}'][from_lucode])
                biomass_disturbance_matrix[from_lucode, to_lucode] = (
-                    biophysical_dict[from_lucode][f'biomass-{field_value}'])
+                    biophysical_df[f'biomass-{field_value}'][from_lucode])

            # When we're transitioning to a landcover that accumulates, use the
            # target landcover's accumulation value.
            elif field_value == 'accum':
                soil_accumulation_matrix[from_lucode, to_lucode] = (
-                    biophysical_dict[to_lucode][
-                        'soil-yearly-accumulation'])
+                    biophysical_df['soil-yearly-accumulation'][to_lucode])
                biomass_accumulation_matrix[from_lucode, to_lucode] = (
-                    biophysical_dict[to_lucode][
-                        'biomass-yearly-accumulation'])
+                    biophysical_df['biomass-yearly-accumulation'][to_lucode])

    disturbance_matrices = {
        'soil': soil_disturbance_matrix,
@ -2224,37 +2214,6 @@ def _reclassify_disturbance_magnitude(
        target_raster_path, gdal.GDT_Float32, NODATA_FLOAT32_MIN)


-def _extract_snapshots_from_table(csv_path):
-    """Extract the year/raster snapshot mapping from a CSV.
-
-    No validation is performed on the years or raster paths.
-
-    Args:
-        csv_path (string): The path to a CSV on disk containing snapshot
-            years and a corresponding transition raster path.  Snapshot years
-            may be in any order in the CSV, but must be integers and no two
-            years may be the same.  Snapshot raster paths must refer to a
-            raster file located on disk representing the landcover at that
-            transition.  If the path is absolute, the path will be used as
-            given.  If the path is relative, the path will be interpreted as
-            relative to the parent directory of this CSV file.
-
-    Returns:
-        A ``dict`` mapping int snapshot years to their corresponding raster
-        paths.  These raster paths will be absolute paths.
-
-    """
-    table = utils.read_csv_to_dataframe(
-        csv_path, convert_vals_to_lower=False, expand_path_cols=['raster_path'])
-
-    output_dict = {}
-    table.set_index("snapshot_year", drop=False, inplace=True)
-
-    for index, row in table.iterrows():
-        output_dict[int(index)] = row['raster_path']
-    return output_dict
-
-
@validation.invest_validator
 def validate(args, limit_to=None):
    """Validate an input dictionary for Coastal Blue Carbon.
@ -2277,8 +2236,10 @@ def validate(args, limit_to=None):

    if ("landcover_snapshot_csv" not in invalid_keys and
            "landcover_snapshot_csv" in sufficient_keys):
-        snapshots = _extract_snapshots_from_table(
-            args['landcover_snapshot_csv'])
+        snapshots = utils.read_csv_to_dataframe(
+            args['landcover_snapshot_csv'],
+            MODEL_SPEC['args']['landcover_snapshot_csv']
+        )['raster_path'].to_dict()

        for snapshot_year, snapshot_raster_path in snapshots.items():
            raster_error_message = validation.check_raster(
@ -2299,4 +2260,26 @@ def validate(args, limit_to=None):
                        analysis_year=args['analysis_year'],
                        latest_year=max(snapshots.keys()))))

+    # check for invalid options in the translation table
+    if ("landcover_transitions_table" not in invalid_keys and
+            "landcover_transitions_table" in sufficient_keys):
+        transitions_spec = MODEL_SPEC['args']['landcover_transitions_table']
+        transition_options = list(
+            transitions_spec['columns']['[LULC CODE]']['options'].keys())
+        # lowercase options since utils call will lowercase table values
+        transition_options = [x.lower() for x in transition_options]
+        transitions_df = utils.read_csv_to_dataframe(
+            args['landcover_transitions_table'], transitions_spec)
+        transitions_mask = ~transitions_df.isin(transition_options) & ~transitions_df.isna()
+        if transitions_mask.any(axis=None):
+            transition_numpy_mask = transitions_mask.values
+            transition_numpy_values = transitions_df.to_numpy()
+            bad_transition_values = list(
+                numpy.unique(transition_numpy_values[transition_numpy_mask]))
+            validation_warnings.append((
+                ['landcover_transitions_table'],
+                INVALID_TRANSITION_VALUES_MSG.format(
+                    model_transitions=(transition_options),
+                    transition_values=bad_transition_values)))
+
    return validation_warnings
--- a/src/natcap/invest/coastal_blue_carbon/preprocessor.py
+++ b/src/natcap/invest/coastal_blue_carbon/preprocessor.py
@ -36,6 +36,7 @@ MODEL_SPEC = {
                "A table mapping LULC codes from the snapshot rasters to the "
                "corresponding LULC class names, and whether or not the "
                "class is a coastal blue carbon habitat."),
+            "index_col": "code",
            "columns": {
                "code": {
                    "type": "integer",
@ -55,10 +56,10 @@ MODEL_SPEC = {
        },
        "landcover_snapshot_csv": {
            "type": "csv",
+            "index_col": "snapshot_year",
            "columns": {
                "snapshot_year": {
-                    "type": "number",
-                    "units": u.year_AD,
+                    "type": "integer",
                    "about": gettext("Year to snapshot.")},
                "raster_path": {
                    "type": "raster",
@ -82,6 +83,7 @@ MODEL_SPEC = {
                "source LULC class, and the first row represents the "
                "destination LULC classes. Cells are populated with "
                "transition states, or left empty if no such transition occurs."),
+            "index_col": "lulc-class",
            "columns": {
                "lulc-class": {
                    "type": "integer",
@ -112,6 +114,7 @@ MODEL_SPEC = {
                "Table mapping each LULC type to impact and accumulation "
                "information. This is a template that you will fill out to "
                "create the biophysical table input to the main model."),
+            "index_col": "code",
            "columns": {
                **BIOPHYSICAL_COLUMNS_SPEC,
                # remove "expression" property which doesn't go in output spec
@ -131,7 +134,7 @@ MODEL_SPEC = {
                "to match all the other LULC maps."),
            "bands": {1: {"type": "integer"}}
        },
-        "task_cache": spec_utils.TASKGRAPH_DIR
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -164,8 +167,7 @@ def execute(args):
    """
    suffix = utils.make_suffix_string(args, 'results_suffix')
    output_dir = os.path.join(args['workspace_dir'], 'outputs_preprocessor')
-    taskgraph_cache_dir = os.path.join(args['workspace_dir'], 'task_cache')
-    utils.make_directories([output_dir, taskgraph_cache_dir])
+    utils.make_directories([output_dir])

    try:
        n_workers = int(args['n_workers'])
@ -175,11 +177,13 @@ def execute(args):
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous mode.
    task_graph = taskgraph.TaskGraph(
-        taskgraph_cache_dir, n_workers, reporting_interval=5.0)
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'),
+        n_workers, reporting_interval=5.0)

-    snapshots_dict = (
-        coastal_blue_carbon._extract_snapshots_from_table(
-            args['landcover_snapshot_csv']))
+    snapshots_dict = utils.read_csv_to_dataframe(
+        args['landcover_snapshot_csv'],
+        MODEL_SPEC['args']['landcover_snapshot_csv']
+    )['raster_path'].to_dict()

    # Align the raster stack for analyzing the various transitions.
    min_pixel_size = float('inf')
@ -209,14 +213,15 @@ def execute(args):
        target_path_list=aligned_snapshot_paths,
        task_name='Align input landcover rasters')

-    landcover_table = utils.read_csv_to_dataframe(
-        args['lulc_lookup_table_path'], 'code').to_dict(orient='index')
+    landcover_df = utils.read_csv_to_dataframe(
+        args['lulc_lookup_table_path'],
+        MODEL_SPEC['args']['lulc_lookup_table_path'])

    target_transition_table = os.path.join(
        output_dir, TRANSITION_TABLE.format(suffix=suffix))
    _ = task_graph.add_task(
        func=_create_transition_table,
-        args=(landcover_table,
+        args=(landcover_df,
              aligned_snapshot_paths,
              target_transition_table),
        target_path_list=[target_transition_table],
@ -227,7 +232,7 @@ def execute(args):
        output_dir, BIOPHYSICAL_TABLE.format(suffix=suffix))
    _ = task_graph.add_task(
        func=_create_biophysical_table,
-        args=(landcover_table, target_biophysical_table_path),
+        args=(landcover_df, target_biophysical_table_path),
        target_path_list=[target_biophysical_table_path],
        task_name='Write biophysical table template')

@ -235,20 +240,20 @@ def execute(args):
    task_graph.join()


-def _create_transition_table(landcover_table, lulc_snapshot_list,
+def _create_transition_table(landcover_df, lulc_snapshot_list,
                             target_table_path):
    """Create the transition table from a series of landcover snapshots.

    Args:
-        landcover_table (dict): A dict mapping integer landcover codes to dict
-            values indicating the landcover class name in the ``lulc-class``
-            field and ``True`` or ``False`` under the
-            ``is_coastal_blue_carbon_habitat`` key.
+        landcover_df (pandas.DataFrame: A table mapping integer landcover
+            codes to values indicating the landcover class name in the
+            ``lulc-class`` column and ``True`` or ``False`` under the
+            ``is_coastal_blue_carbon_habitat`` column.
        lulc_snapshot_list (list): A list of string paths to GDAL rasters on
            disk.  All rasters must have the same spatial reference, pixel size
            and dimensions and must also all be integer rasters, where all
            non-nodata pixel values must be represented in the
-            ``landcover_table`` dict.
+            ``landcover_df`` dataframe.
        target_table_path (string): A string path to where the target
            transition table should be written.

@ -317,13 +322,13 @@ def _create_transition_table(landcover_table, lulc_snapshot_list,
    sparse_transition_table = {}
    for from_lucode, to_lucode in transition_pairs:
        try:
-            from_is_cbc = landcover_table[
-                from_lucode]['is_coastal_blue_carbon_habitat']
-            to_is_cbc = landcover_table[
-                to_lucode]['is_coastal_blue_carbon_habitat']
+            from_is_cbc = landcover_df[
+                'is_coastal_blue_carbon_habitat'][from_lucode]
+            to_is_cbc = landcover_df[
+            'is_coastal_blue_carbon_habitat'][to_lucode]
        except KeyError:
            for variable in (from_lucode, to_lucode):
-                if variable not in landcover_table:
+                if variable not in landcover_df.index:
                    raise ValueError(
                        'The landcover table is missing a row with the '
                        f'landuse code {variable}.')
@ -331,14 +336,14 @@ def _create_transition_table(landcover_table, lulc_snapshot_list,
        sparse_transition_table[(from_lucode, to_lucode)] = (
            transition_types[(from_is_cbc, to_is_cbc)])

-    code_list = sorted([code for code in landcover_table.keys()])
+    code_list = sorted(landcover_df.index)
    lulc_class_list_sorted = [
-        landcover_table[code]['lulc-class'] for code in code_list]
+        landcover_df['lulc-class'][code] for code in code_list]
    with open(target_table_path, 'w') as csv_file:
        fieldnames = ['lulc-class'] + lulc_class_list_sorted
        csv_file.write(f"{','.join(fieldnames)}\n")
        for row_code in code_list:
-            class_name = landcover_table[row_code]['lulc-class']
+            class_name = landcover_df['lulc-class'][row_code]
            row = [class_name]
            for col_code in code_list:
                try:
@ -361,7 +366,7 @@ def _create_transition_table(landcover_table, lulc_snapshot_list,
        csv_file.write("\n,NCC (no-carbon-change)")


-def _create_biophysical_table(landcover_table, target_biophysical_table_path):
+def _create_biophysical_table(landcover_df, target_biophysical_table_path):
    """Write the biophysical table template to disk.

    The biophysical table templates contains all of the fields required by the
@ -370,8 +375,8 @@ def _create_biophysical_table(landcover_table, target_biophysical_table_path):
    table.

    Args:
-        landcover_table (dict): A dict mapping int landcover codes to a dict
-            with string keys that map to numeric or string column values.
+        landcover_df (pandas.DataFrame): A table mapping int landcover codes
+            to biophysical data
        target_biophysical_table_path (string): The path to where the
            biophysical table template will be stored on disk.

@ -384,16 +389,19 @@ def _create_biophysical_table(landcover_table, target_biophysical_table_path):

    with open(target_biophysical_table_path, 'w') as bio_table:
        bio_table.write(f"{','.join(target_column_names)}\n")
-        for lulc_code in sorted(landcover_table.keys()):
+        for lulc_code, row in landcover_df.sort_index().iterrows():
            # 2 columns are defined below, and we need 1 less comma to only
            # have commas between fields.
            row = []
            for colname in target_column_names:
-                try:
-                    # Use the user's defined value if it exists
-                    row.append(str(landcover_table[lulc_code][colname]))
-                except KeyError:
-                    row.append('')
+                if colname == 'code':
+                    row.append(str(lulc_code))
+                else:
+                    try:
+                        # Use the user's defined value if it exists
+                        row.append(str(landcover_df[colname][lulc_code]))
+                    except KeyError:
+                        row.append('')
            bio_table.write(f"{','.join(row)}\n")


--- a/src/natcap/invest/coastal_vulnerability.py
+++ b/src/natcap/invest/coastal_vulnerability.py
@ -247,17 +247,11 @@ MODEL_SPEC = {
                        "represented by any value and absence of the habitat "
                        "can be represented by 0 and nodata values.")},
                "rank": {
-                    "type": "option_string",
-                    "options": {
-                        "1": {"description": gettext("very high protection")},
-                        "2": {"description": gettext("high protection")},
-                        "3": {"description": gettext("moderate protection")},
-                        "4": {"description": gettext("low protection")},
-                        "5": {"description": gettext("very low protection")}
-                    },
+                    "type": "integer",
                    "about": gettext(
                        "Relative amount of coastline protection this habitat "
-                        "provides.")
+                        "provides, from 1 (very high protection) to 5 "
+                        "(very low protection.")
                },
                "protection distance (m)": {
                    "type": "number",
@ -268,6 +262,7 @@ MODEL_SPEC = {
                        "no protection to the coastline.")
                },
            },
+            "index_col": "id",
            "about": gettext(
                "Table that specifies spatial habitat data and parameters."),
            "name": gettext("habitats table")
@ -365,6 +360,7 @@ MODEL_SPEC = {
        },
        "coastal_exposure.csv": {
            "about": "This is an identical copy of the attribute table of coastal_exposure.gpkg provided in csv format for convenience. Users may wish to modify or add to the columns of this table in order to calculate exposure indices for custom scenarios.",
+            "index_col": "shore_id",
            "columns": FINAL_OUTPUT_FIELDS
        },
        "intermediate": {
@ -460,6 +456,7 @@ MODEL_SPEC = {
                        "habitat_protection.csv": {
                            "about": (
                                "Shore points with associated habitat data"),
+                            "index_col": "shore_id",
                            "columns": {
                                "shore_id": {
                                    "type": "integer",
@ -685,10 +682,10 @@ MODEL_SPEC = {
                            "fields": WWIII_FIELDS
                        }
                    }
-                },
-                "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
+                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -800,8 +797,6 @@ def execute(args):
         geomorph_dir, wind_wave_dir, surge_dir, population_dir, slr_dir])
    file_suffix = utils.make_suffix_string(args, 'results_suffix')

-    taskgraph_cache_dir = os.path.join(
-        intermediate_dir, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
@ -809,7 +804,8 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Single process mode.
-    task_graph = taskgraph.TaskGraph(taskgraph_cache_dir, n_workers)
+    task_graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)

    model_resolution = float(args['model_resolution'])
    max_fetch_distance = float(args['max_fetch_distance'])
@ -2315,42 +2311,41 @@ def _schedule_habitat_tasks(

    """
    habitat_dataframe = utils.read_csv_to_dataframe(
-        habitat_table_path, convert_vals_to_lower=False, expand_path_cols=['path'])
-    habitat_dataframe = habitat_dataframe.rename(
-        columns={'protection distance (m)': 'distance'})
+        habitat_table_path, MODEL_SPEC['args']['habitat_table_path']
+    ).rename(columns={'protection distance (m)': 'distance'})

    habitat_task_list = []
    habitat_pickles_list = []
-    for habitat_row in habitat_dataframe.itertuples():
+    for _id, habitat_row in habitat_dataframe.iterrows():
        target_habitat_pickle_path = os.path.join(
-            working_dir, f'{habitat_row.id}{file_suffix}.pickle')
+            working_dir, f'{_id}{file_suffix}.pickle')
        habitat_pickles_list.append(target_habitat_pickle_path)
        gis_type = pygeoprocessing.get_gis_type(habitat_row.path)
        if gis_type == 2:
            habitat_task_list.append(task_graph.add_task(
                func=search_for_vector_habitat,
                args=(base_shore_point_vector_path,
-                      habitat_row.distance,
-                      habitat_row.rank,
-                      habitat_row.id,
-                      habitat_row.path,
+                      habitat_row['distance'],
+                      habitat_row['rank'],
+                      _id,
+                      habitat_row['path'],
                      target_habitat_pickle_path),
                target_path_list=[target_habitat_pickle_path],
-                task_name=f'searching for {habitat_row.id}'))
+                task_name=f'searching for {_id}'))
            continue
        if gis_type == 1:
            habitat_task_list.append(task_graph.add_task(
                func=search_for_raster_habitat,
                args=(base_shore_point_vector_path,
-                      habitat_row.distance,
-                      habitat_row.rank,
-                      habitat_row.id,
-                      habitat_row.path,
+                      habitat_row['distance'],
+                      habitat_row['rank'],
+                      _id,
+                      habitat_row['path'],
                      target_habitat_pickle_path,
                      model_resolution,
                      file_suffix),
                target_path_list=[target_habitat_pickle_path],
-                task_name=f'searching for {habitat_row.id}'))
+                task_name=f'searching for {_id}'))

    return habitat_task_list, habitat_pickles_list

@ -2835,12 +2830,14 @@ def assemble_results_and_calculate_exposure(
                final_values_dict[var_name] = pickle.load(file)

    habitat_df = utils.read_csv_to_dataframe(
-        habitat_protection_path, convert_cols_to_lower=False, convert_vals_to_lower=False)
+        habitat_protection_path, MODEL_SPEC['outputs']['intermediate'][
+            'contents']['habitats']['contents']['habitat_protection.csv']
+    ).rename(columns={'r_hab': 'R_hab'})
    output_layer.StartTransaction()
    for feature in output_layer:
        shore_id = feature.GetField(SHORE_ID_FIELD)
        # The R_hab ranks were stored in a CSV, now this dataframe:
-        rank = habitat_df[habitat_df[SHORE_ID_FIELD] == shore_id][R_hab_name]
+        rank = habitat_df.loc[shore_id, R_hab_name]
        feature.SetField(str(R_hab_name), float(rank))
        # The other variables were stored in pickles, now this dict:
        for fieldname in final_values_dict:
@ -3235,7 +3232,6 @@ def _aggregate_raster_values_in_radius(
            kernel_mask &= ~utils.array_equals_nodata(array, nodata)

        result[shore_id] = aggregation_op(array, kernel_mask)
-
    with open(target_pickle_path, 'wb') as pickle_file:
        pickle.dump(result, pickle_file)

@ -3465,8 +3461,7 @@ def _validate_habitat_table_paths(habitat_table_path):
        ValueError if any vector in the ``path`` column cannot be opened.
    """
    habitat_dataframe = utils.read_csv_to_dataframe(
-        habitat_table_path, convert_cols_to_lower=False, convert_vals_to_lower=False,
-        expand_path_cols=['path'])
+        habitat_table_path, MODEL_SPEC['args']['habitat_table_path'])
    bad_paths = []
    for habitat_row in habitat_dataframe.itertuples():
        try:
--- a/src/natcap/invest/crop_production_percentile.py
+++ b/src/natcap/invest/crop_production_percentile.py
@ -22,6 +22,87 @@ from .crop_production_regression import NUTRIENTS

 LOGGER = logging.getLogger(__name__)

+CROP_OPTIONS = {
+    # TODO: use human-readable translatable crop names (#614)
+    crop: {"description": crop} for crop in [
+        "abaca", "agave", "alfalfa", "almond", "aniseetc",
+        "apple", "apricot", "areca", "artichoke", "asparagus",
+        "avocado", "bambara", "banana", "barley", "bean",
+        "beetfor", "berrynes", "blueberry", "brazil",
+        "canaryseed", "carob", "carrot", "carrotfor", "cashew",
+        "broadbean", "buckwheat", "cabbage", "cabbagefor",
+        "cashewapple", "cassava", "castor", "cauliflower",
+        "cerealnes", "cherry", "chestnut", "chickpea",
+        "chicory", "chilleetc", "cinnamon", "citrusnes",
+        "clove", "clover", "cocoa", "coconut", "coffee",
+        "cotton", "cowpea", "cranberry", "cucumberetc",
+        "currant", "date", "eggplant", "fibrenes", "fig",
+        "flax", "fonio", "fornes", "fruitnes", "garlic",
+        "ginger", "gooseberry", "grape", "grapefruitetc",
+        "grassnes", "greenbean", "greenbroadbean", "greencorn",
+        "greenonion", "greenpea", "groundnut", "hazelnut",
+        "hemp", "hempseed", "hop", "jute", "jutelikefiber",
+        "kapokfiber", "kapokseed", "karite", "kiwi", "kolanut",
+        "legumenes", "lemonlime", "lentil", "lettuce",
+        "linseed", "lupin", "maize", "maizefor", "mango",
+        "mate", "melonetc", "melonseed", "millet",
+        "mixedgrain", "mixedgrass", "mushroom", "mustard",
+        "nutmeg", "nutnes", "oats", "oilpalm", "oilseedfor",
+        "oilseednes", "okra", "olive", "onion", "orange",
+        "papaya", "pea", "peachetc", "pear", "pepper",
+        "peppermint", "persimmon", "pigeonpea", "pimento",
+        "pineapple", "pistachio", "plantain", "plum", "poppy",
+        "potato", "pulsenes", "pumpkinetc", "pyrethrum",
+        "quince", "quinoa", "ramie", "rapeseed", "rasberry",
+        "rice", "rootnes", "rubber", "rye", "ryefor",
+        "safflower", "sesame", "sisal", "sorghum",
+        "sorghumfor", "sourcherry, soybean", "spicenes",
+        "spinach", "stonefruitnes", "strawberry", "stringbean",
+        "sugarbeet", "sugarcane", "sugarnes", "sunflower",
+        "swedefor", "sweetpotato", "tangetc", "taro", "tea",
+        "tobacco", "tomato", "triticale", "tropicalnes",
+        "tung", "turnipfor", "vanilla", "vegetablenes",
+        "vegfor", "vetch", "walnut", "watermelon", "wheat",
+        "yam", "yautia"
+    ]
+}
+
+nutrient_units = {
+    "protein":     u.gram/u.hectogram,
+    "lipid":       u.gram/u.hectogram,       # total lipid
+    "energy":      u.kilojoule/u.hectogram,
+    "ca":          u.milligram/u.hectogram,  # calcium
+    "fe":          u.milligram/u.hectogram,  # iron
+    "mg":          u.milligram/u.hectogram,  # magnesium
+    "ph":          u.milligram/u.hectogram,  # phosphorus
+    "k":           u.milligram/u.hectogram,  # potassium
+    "na":          u.milligram/u.hectogram,  # sodium
+    "zn":          u.milligram/u.hectogram,  # zinc
+    "cu":          u.milligram/u.hectogram,  # copper
+    "fl":          u.microgram/u.hectogram,  # fluoride
+    "mn":          u.milligram/u.hectogram,  # manganese
+    "se":          u.microgram/u.hectogram,  # selenium
+    "vita":        u.IU/u.hectogram,         # vitamin A
+    "betac":       u.microgram/u.hectogram,  # beta carotene
+    "alphac":      u.microgram/u.hectogram,  # alpha carotene
+    "vite":        u.milligram/u.hectogram,  # vitamin e
+    "crypto":      u.microgram/u.hectogram,  # cryptoxanthin
+    "lycopene":    u.microgram/u.hectogram,  # lycopene
+    "lutein":      u.microgram/u.hectogram,  # lutein + zeaxanthin
+    "betat":       u.milligram/u.hectogram,  # beta tocopherol
+    "gammat":      u.milligram/u.hectogram,  # gamma tocopherol
+    "deltat":      u.milligram/u.hectogram,  # delta tocopherol
+    "vitc":        u.milligram/u.hectogram,  # vitamin C
+    "thiamin":     u.milligram/u.hectogram,
+    "riboflavin":  u.milligram/u.hectogram,
+    "niacin":      u.milligram/u.hectogram,
+    "pantothenic": u.milligram/u.hectogram,  # pantothenic acid
+    "vitb6":       u.milligram/u.hectogram,  # vitamin B6
+    "folate":      u.microgram/u.hectogram,
+    "vitb12":      u.microgram/u.hectogram,  # vitamin B12
+    "vitk":        u.microgram/u.hectogram,  # vitamin K
+}
+
 MODEL_SPEC = {
    "model_name": MODEL_METADATA["crop_production_percentile"].model_title,
    "pyname": MODEL_METADATA["crop_production_percentile"].pyname,
@ -44,54 +125,12 @@ MODEL_SPEC = {
        },
        "landcover_to_crop_table_path": {
            "type": "csv",
+            "index_col": "crop_name",
            "columns": {
                "lucode": {"type": "integer"},
                "crop_name": {
                    "type": "option_string",
-                    "options": {
-                        # TODO: use human-readable translatable crop names (#614)
-                        crop: {"description": crop} for crop in [
-                            "abaca", "agave", "alfalfa", "almond", "aniseetc",
-                            "apple", "apricot", "areca", "artichoke", "asparagus",
-                            "avocado", "bambara", "banana", "barley", "bean",
-                            "beetfor", "berrynes", "blueberry", "brazil",
-                            "canaryseed", "carob", "carrot", "carrotfor", "cashew",
-                            "broadbean", "buckwheat", "cabbage", "cabbagefor",
-                            "cashewapple", "cassava", "castor", "cauliflower",
-                            "cerealnes", "cherry", "chestnut", "chickpea",
-                            "chicory", "chilleetc", "cinnamon", "citrusnes",
-                            "clove", "clover", "cocoa", "coconut", "coffee",
-                            "cotton", "cowpea", "cranberry", "cucumberetc",
-                            "currant", "date", "eggplant", "fibrenes", "fig",
-                            "flax", "fonio", "fornes", "fruitnes", "garlic",
-                            "ginger", "gooseberry", "grape", "grapefruitetc",
-                            "grassnes", "greenbean", "greenbroadbean", "greencorn",
-                            "greenonion", "greenpea", "groundnut", "hazelnut",
-                            "hemp", "hempseed", "hop", "jute", "jutelikefiber",
-                            "kapokfiber", "kapokseed", "karite", "kiwi", "kolanut",
-                            "legumenes", "lemonlime", "lentil", "lettuce",
-                            "linseed", "lupin", "maize", "maizefor", "mango",
-                            "mate", "melonetc", "melonseed", "millet",
-                            "mixedgrain", "mixedgrass", "mushroom", "mustard",
-                            "nutmeg", "nutnes", "oats", "oilpalm", "oilseedfor",
-                            "oilseednes", "okra", "olive", "onion", "orange",
-                            "papaya", "pea", "peachetc", "pear", "pepper",
-                            "peppermint", "persimmon", "pigeonpea", "pimento",
-                            "pineapple", "pistachio", "plantain", "plum", "poppy",
-                            "potato", "pulsenes", "pumpkinetc", "pyrethrum",
-                            "quince", "quinoa", "ramie", "rapeseed", "rasberry",
-                            "rice", "rootnes", "rubber", "rye", "ryefor",
-                            "safflower", "sesame", "sisal", "sorghum",
-                            "sorghumfor", "sourcherry, soybean", "spicenes",
-                            "spinach", "stonefruitnes", "strawberry", "stringbean",
-                            "sugarbeet", "sugarcane", "sugarnes", "sunflower",
-                            "swedefor", "sweetpotato", "tangetc", "taro", "tea",
-                            "tobacco", "tomato", "triticale", "tropicalnes",
-                            "tung", "turnipfor", "vanilla", "vegetablenes",
-                            "vegfor", "vetch", "walnut", "watermelon", "wheat",
-                            "yam", "yautia"
-                        ]
-                    }
+                    "options": CROP_OPTIONS
                }
            },
            "about": gettext(
@ -116,6 +155,7 @@ MODEL_SPEC = {
                    "contents": {
                        "[CROP]_percentile_yield_table.csv": {
                            "type": "csv",
+                            "index_col": "climate_bin",
                            "columns": {
                                "climate_bin": {"type": "integer"},
                                "yield_25th": {
@ -163,45 +203,19 @@ MODEL_SPEC = {
                },
                "crop_nutrient.csv": {
                    "type": "csv",
+                    "index_col": "crop",
                    "columns": {
-                        nutrient: {
+                        "crop": {
+                            "type": "option_string",
+                            "options": CROP_OPTIONS
+                        },
+                        "percentrefuse": {
+                            "type": "percent"
+                        },
+                        **{nutrient: {
                            "type": "number",
                            "units": units
-                        } for nutrient, units in {
-                            "protein":     u.gram/u.hectogram,
-                            "lipid":       u.gram/u.hectogram,       # total lipid
-                            "energy":      u.kilojoule/u.hectogram,
-                            "ca":          u.milligram/u.hectogram,  # calcium
-                            "fe":          u.milligram/u.hectogram,  # iron
-                            "mg":          u.milligram/u.hectogram,  # magnesium
-                            "ph":          u.milligram/u.hectogram,  # phosphorus
-                            "k":           u.milligram/u.hectogram,  # potassium
-                            "na":          u.milligram/u.hectogram,  # sodium
-                            "zn":          u.milligram/u.hectogram,  # zinc
-                            "cu":          u.milligram/u.hectogram,  # copper
-                            "fl":          u.microgram/u.hectogram,  # fluoride
-                            "mn":          u.milligram/u.hectogram,  # manganese
-                            "se":          u.microgram/u.hectogram,  # selenium
-                            "vita":        u.IU/u.hectogram,         # vitamin A
-                            "betac":       u.microgram/u.hectogram,  # beta carotene
-                            "alphac":      u.microgram/u.hectogram,  # alpha carotene
-                            "vite":        u.milligram/u.hectogram,  # vitamin e
-                            "crypto":      u.microgram/u.hectogram,  # cryptoxanthin
-                            "lycopene":    u.microgram/u.hectogram,  # lycopene
-                            "lutein":      u.microgram/u.hectogram,  # lutein + zeaxanthin
-                            "betaT":       u.milligram/u.hectogram,  # beta tocopherol
-                            "gammaT":      u.milligram/u.hectogram,  # gamma tocopherol
-                            "deltaT":      u.milligram/u.hectogram,  # delta tocopherol
-                            "vitc":        u.milligram/u.hectogram,  # vitamin C
-                            "thiamin":     u.milligram/u.hectogram,
-                            "riboflavin":  u.milligram/u.hectogram,
-                            "niacin":      u.milligram/u.hectogram,
-                            "pantothenic": u.milligram/u.hectogram,  # pantothenic acid
-                            "vitb6":       u.milligram/u.hectogram,  # vitamin B6
-                            "folate":      u.microgram/u.hectogram,
-                            "vitb12":      u.microgram/u.hectogram,  # vitamin B12
-                            "vitk":        u.microgram/u.hectogram,  # vitamin K
-                        }.items()
+                        } for nutrient, units in nutrient_units.items()}
                    }
                }
            },
@ -213,6 +227,7 @@ MODEL_SPEC = {
        "aggregate_results.csv": {
            "created_if": "aggregate_polygon_path",
            "about": "Model results aggregated to AOI polygons",
+            "index_col": "FID",
            "columns": {
                "FID": {
                    "type": "integer",
@ -251,6 +266,7 @@ MODEL_SPEC = {
        },
        "result_table.csv": {
            "about": "Model results aggregated by crop",
+            "index_col": "crop",
            "columns": {
                "crop": {
                    "type": "freestyle_string",
@ -346,10 +362,10 @@ MODEL_SPEC = {
                    "bands": {1: {
                        "type": "number", "units": u.metric_ton/u.hectare
                    }}
-                },
-                "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
+                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -405,12 +421,7 @@ _AGGREGATE_VECTOR_FILE_PATTERN = os.path.join(
 _AGGREGATE_TABLE_FILE_PATTERN = os.path.join(
    '.', 'aggregate_results%s.csv')

-_EXPECTED_NUTRIENT_TABLE_HEADERS = [
-    'Protein', 'Lipid', 'Energy', 'Ca', 'Fe', 'Mg', 'Ph', 'K', 'Na', 'Zn',
-    'Cu', 'Fl', 'Mn', 'Se', 'VitA', 'betaC', 'alphaC', 'VitE', 'Crypto',
-    'Lycopene', 'Lutein', 'betaT', 'gammaT', 'deltaT', 'VitC', 'Thiamin',
-    'Riboflavin', 'Niacin', 'Pantothenic', 'VitB6', 'Folate', 'VitB12',
-    'VitK']
+_EXPECTED_NUTRIENT_TABLE_HEADERS = list(nutrient_units.keys())
 _EXPECTED_LUCODE_TABLE_HEADER = 'lucode'
 _NODATA_YIELD = -1

@ -458,10 +469,11 @@ def execute(args):
        None.

    """
-    crop_to_landcover_table = utils.read_csv_to_dataframe(
-        args['landcover_to_crop_table_path'], 'crop_name').to_dict(orient='index')
+    crop_to_landcover_df = utils.read_csv_to_dataframe(
+        args['landcover_to_crop_table_path'],
+        MODEL_SPEC['args']['landcover_to_crop_table_path'])
    bad_crop_name_list = []
-    for crop_name in crop_to_landcover_table:
+    for crop_name in crop_to_landcover_df.index:
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
@ -498,8 +510,6 @@ def execute(args):
        edge_samples=11)

    # Initialize a TaskGraph
-    work_token_dir = os.path.join(
-        output_dir, _INTERMEDIATE_OUTPUT_DIR, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
@ -507,14 +517,14 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Single process mode.
-    task_graph = taskgraph.TaskGraph(work_token_dir, n_workers)
+    task_graph = taskgraph.TaskGraph(
+        os.path.join(output_dir, 'taskgraph_cache'), n_workers)
    dependent_task_list = []

    crop_lucode = None
    observed_yield_nodata = None
-    for crop_name in crop_to_landcover_table:
-        crop_lucode = crop_to_landcover_table[crop_name][
-            _EXPECTED_LUCODE_TABLE_HEADER]
+    for crop_name, row in crop_to_landcover_df.iterrows():
+        crop_lucode = row[_EXPECTED_LUCODE_TABLE_HEADER]
        LOGGER.info("Processing crop %s", crop_name)
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
@ -540,11 +550,13 @@ def execute(args):
        climate_percentile_yield_table_path = os.path.join(
            args['model_data_path'],
            _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name)
-        crop_climate_percentile_table = utils.read_csv_to_dataframe(
-            climate_percentile_yield_table_path, 'climate_bin').to_dict(orient='index')
+        crop_climate_percentile_df = utils.read_csv_to_dataframe(
+            climate_percentile_yield_table_path,
+            MODEL_SPEC['args']['model_data_path']['contents'][
+                'climate_percentile_yield_tables']['contents'][
+                '[CROP]_percentile_yield_table.csv'])
        yield_percentile_headers = [
-            x for x in list(crop_climate_percentile_table.values())[0]
-            if x != 'climate_bin']
+            x for x in crop_climate_percentile_df.columns if x != 'climate_bin']

        reclassify_error_details = {
            'raster_name': f'{crop_name} Climate Bin',
@ -556,10 +568,8 @@ def execute(args):
                output_dir,
                _INTERPOLATED_YIELD_PERCENTILE_FILE_PATTERN % (
                    crop_name, yield_percentile_id, file_suffix))
-            bin_to_percentile_yield = dict([
-                (bin_id,
-                 crop_climate_percentile_table[bin_id][yield_percentile_id])
-                for bin_id in crop_climate_percentile_table])
+            bin_to_percentile_yield = (
+                crop_climate_percentile_df[yield_percentile_id].to_dict())
            # reclassify nodata to a valid value of 0
            # we're assuming that the crop doesn't exist where there is no data
            # this is more likely than assuming the crop does exist, esp.
@ -698,17 +708,17 @@ def execute(args):

    # both 'crop_nutrient.csv' and 'crop' are known data/header values for
    # this model data.
-    nutrient_table = utils.read_csv_to_dataframe(
+    nutrient_df = utils.read_csv_to_dataframe(
        os.path.join(args['model_data_path'], 'crop_nutrient.csv'),
-        'crop', convert_cols_to_lower=False, convert_vals_to_lower=False
-        ).to_dict(orient='index')
+        MODEL_SPEC['args']['model_data_path']['contents']['crop_nutrient.csv'])
    result_table_path = os.path.join(
        output_dir, 'result_table%s.csv' % file_suffix)

+    crop_names = crop_to_landcover_df.index.to_list()
    tabulate_results_task = task_graph.add_task(
        func=tabulate_results,
-        args=(nutrient_table, yield_percentile_headers,
-              crop_to_landcover_table, pixel_area_ha,
+        args=(nutrient_df, yield_percentile_headers,
+              crop_names, pixel_area_ha,
              args['landcover_raster_path'], landcover_nodata,
              output_dir, file_suffix, result_table_path),
        target_path_list=[result_table_path],
@ -727,7 +737,7 @@ def execute(args):
            args=(args['aggregate_polygon_path'],
                  target_aggregate_vector_path,
                  landcover_raster_info['projection_wkt'],
-                  crop_to_landcover_table, nutrient_table,
+                  crop_names, nutrient_df,
                  yield_percentile_headers, output_dir, file_suffix,
                  aggregate_results_table_path),
            target_path_list=[target_aggregate_vector_path,
@ -851,19 +861,18 @@ def _mask_observed_yield_op(


 def tabulate_results(
-        nutrient_table, yield_percentile_headers,
-        crop_to_landcover_table, pixel_area_ha, landcover_raster_path,
+        nutrient_df, yield_percentile_headers,
+        crop_names, pixel_area_ha, landcover_raster_path,
        landcover_nodata, output_dir, file_suffix, target_table_path):
    """Write table with total yield and nutrient results by crop.

    This function includes all the operations that write to results_table.csv.

    Args:
-        nutrient_table (dict): a lookup of nutrient values by crop in the
-            form of nutrient_table[<crop>][<nutrient>].
+        nutrient_df (pandas.DataFrame): a table of nutrient values by crop
        yield_percentile_headers (list): list of strings indicating percentiles
            at which yield was calculated.
-        crop_to_landcover_table (dict): landcover codes keyed by crop names
+        crop_names (list): list of crop names
        pixel_area_ha (float): area of lulc raster cells (hectares)
        landcover_raster_path (string): path to landcover raster
        landcover_nodata (float): landcover raster nodata value
@ -894,7 +903,7 @@ def tabulate_results(
            'crop,area (ha),' + 'production_observed,' +
            ','.join(production_percentile_headers) + ',' + ','.join(
                nutrient_headers) + '\n')
-        for crop_name in sorted(crop_to_landcover_table):
+        for crop_name in sorted(crop_names):
            result_table.write(crop_name)
            production_lookup = {}
            production_pixel_count = 0
@ -942,19 +951,19 @@ def tabulate_results(

            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
-                1 - nutrient_table[crop_name]['Percentrefuse'] / 100)
+                1 - nutrient_df['percentrefuse'][crop_name] / 100)
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for yield_percentile_id in sorted(yield_percentile_headers):
                    total_nutrient = (
                        nutrient_factor *
                        production_lookup[yield_percentile_id] *
-                        nutrient_table[crop_name][nutrient_id])
+                        nutrient_df[nutrient_id][crop_name])
                    result_table.write(",%f" % (total_nutrient))
                result_table.write(
                    ",%f" % (
                        nutrient_factor *
                        production_lookup['observed'] *
-                        nutrient_table[crop_name][nutrient_id]))
+                        nutrient_df[nutrient_id][crop_name]))
            result_table.write('\n')

        total_area = 0
@ -972,8 +981,8 @@ def tabulate_results(

 def aggregate_to_polygons(
        base_aggregate_vector_path, target_aggregate_vector_path,
-        landcover_raster_projection, crop_to_landcover_table,
-        nutrient_table, yield_percentile_headers, output_dir, file_suffix,
+        landcover_raster_projection, crop_names,
+        nutrient_df, yield_percentile_headers, output_dir, file_suffix,
        target_aggregate_table_path):
    """Write table with aggregate results of yield and nutrient values.

@ -986,9 +995,8 @@ def aggregate_to_polygons(
        target_aggregate_vector_path (string):
            path to re-projected copy of polygon vector
        landcover_raster_projection (string): a WKT projection string
-        crop_to_landcover_table (dict): landcover codes keyed by crop names
-        nutrient_table (dict): a lookup of nutrient values by crop in the
-            form of nutrient_table[<crop>][<nutrient>].
+        crop_names (list): list of crop names
+        nutrient_df (pandas.DataFrame): a table of nutrient values by crop
        yield_percentile_headers (list): list of strings indicating percentiles
            at which yield was calculated.
        output_dir (string): the file path to the output workspace.
@ -1012,10 +1020,10 @@ def aggregate_to_polygons(
    total_nutrient_table = collections.defaultdict(
        lambda: collections.defaultdict(lambda: collections.defaultdict(
            float)))
-    for crop_name in crop_to_landcover_table:
+    for crop_name in crop_names:
        # convert 100g to Mg and fraction left over from refuse
        nutrient_factor = 1e4 * (
-            1 - nutrient_table[crop_name]['Percentrefuse'] / 100)
+            1 - nutrient_df['percentrefuse'][crop_name] / 100)
        # loop over percentiles
        for yield_percentile_id in yield_percentile_headers:
            percentile_crop_production_raster_path = os.path.join(
@ -1040,24 +1048,24 @@ def aggregate_to_polygons(
                            total_yield_lookup['%s_%s' % (
                                crop_name, yield_percentile_id)][
                                    id_index]['sum'] *
-                            nutrient_table[crop_name][nutrient_id])
+                            nutrient_df[nutrient_id][crop_name])

        # process observed
        observed_yield_path = os.path.join(
            output_dir, _OBSERVED_PRODUCTION_FILE_PATTERN % (
                crop_name, file_suffix))
-        total_yield_lookup['%s_observed' % crop_name] = (
+        total_yield_lookup[f'{crop_name}_observed'] = (
            pygeoprocessing.zonal_statistics(
                (observed_yield_path, 1),
                target_aggregate_vector_path))
        for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
-            for id_index in total_yield_lookup['%s_observed' % crop_name]:
+            for id_index in total_yield_lookup[f'{crop_name}_observed']:
                total_nutrient_table[
                    nutrient_id]['observed'][id_index] += (
                        nutrient_factor *
                        total_yield_lookup[
-                            '%s_observed' % crop_name][id_index]['sum'] *
-                        nutrient_table[crop_name][nutrient_id])
+                            f'{crop_name}_observed'][id_index]['sum'] *
+                        nutrient_df[nutrient_id][crop_name])

    # report everything to a table
    with open(target_aggregate_table_path, 'w') as aggregate_table:
--- a/src/natcap/invest/crop_production_regression.py
+++ b/src/natcap/invest/crop_production_regression.py
@ -86,6 +86,7 @@ MODEL_SPEC = {
        },
        "landcover_to_crop_table_path": {
            "type": "csv",
+            "index_col": "crop_name",
            "columns": {
                "lucode": {"type": "integer"},
                "crop_name": {
@ -101,6 +102,7 @@ MODEL_SPEC = {
        },
        "fertilization_rate_table_path": {
            "type": "csv",
+            "index_col": "crop_name",
            "columns": {
                "crop_name": {
                    "type": "option_string",
@ -129,29 +131,38 @@ MODEL_SPEC = {
                    "contents": {
                        "[CROP]_regression_yield_table.csv": {
                            "type": "csv",
+                            "index_col": "climate_bin",
                            "columns": {
-                                'climate_bin': {"type": "integer"},
-                                'yield_ceiling': {
+                                "climate_bin": {"type": "integer"},
+                                "yield_ceiling": {
                                    "type": "number",
                                    "units": u.metric_ton/u.hectare
                                },
-                                'b_nut':  {"type": "number", "units": u.none},
-                                'b_k2o':  {"type": "number", "units": u.none},
-                                'c_n':    {"type": "number", "units": u.none},
-                                'c_p2o5': {"type": "number", "units": u.none},
-                                'c_k2o':  {"type": "number", "units": u.none}
+                                "b_nut":  {"type": "number", "units": u.none},
+                                "b_k2o":  {"type": "number", "units": u.none},
+                                "c_n":    {"type": "number", "units": u.none},
+                                "c_p2o5": {"type": "number", "units": u.none},
+                                "c_k2o":  {"type": "number", "units": u.none}
                            }
                        }
                    }
                },
                "crop_nutrient.csv": {
                    "type": "csv",
+                    "index_col": "crop",
                    "columns": {
-                        nutrient: {
+                        "crop": {
+                            "type": "option_string",
+                            "options": CROPS
+                        },
+                        "percentrefuse": {
+                            "type": "percent"
+                        },
+                        **{nutrient: {
                            "about": about,
                            "type": "number",
                            "units": units
-                        } for nutrient, about, units in NUTRIENTS
+                        } for nutrient, about, units in NUTRIENTS}
                    }
                },
                "extended_climate_bin_maps": {
@ -186,6 +197,7 @@ MODEL_SPEC = {
        "aggregate_results.csv": {
            "created_if": "aggregate_polygon_path",
            "about": "Table of results aggregated by ",
+            "index_col": "FID",
            "columns": {
                "FID": {
                    "type": "integer",
@ -213,6 +225,7 @@ MODEL_SPEC = {
        },
        "result_table.csv": {
            "about": "Table of results aggregated by crop",
+            "index_col": "crop",
            "columns": {
                "crop": {
                    "type": "freestyle_string",
@ -306,10 +319,10 @@ MODEL_SPEC = {
                    "bands": {1: {
                        "type": "number", "units": u.metric_ton/u.hectare
                    }}
-                },
-                "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
+                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -319,7 +332,7 @@ _REGRESSION_TABLE_PATTERN = os.path.join(
    'climate_regression_yield_tables', '%s_regression_yield_table.csv')

 _EXPECTED_REGRESSION_TABLE_HEADERS = [
-    'climate_bin', 'yield_ceiling', 'b_nut', 'b_k2o', 'c_n', 'c_p2o5', 'c_k2o']
+    'yield_ceiling', 'b_nut', 'b_k2o', 'c_n', 'c_p2o5', 'c_k2o']

 # crop_name, yield_regression_id, file_suffix
 _COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN = os.path.join(
@ -409,11 +422,11 @@ _AGGREGATE_TABLE_FILE_PATTERN = os.path.join(
    '.', 'aggregate_results%s.csv')

 _EXPECTED_NUTRIENT_TABLE_HEADERS = [
-    'Protein', 'Lipid', 'Energy', 'Ca', 'Fe', 'Mg', 'Ph', 'K', 'Na', 'Zn',
-    'Cu', 'Fl', 'Mn', 'Se', 'VitA', 'betaC', 'alphaC', 'VitE', 'Crypto',
-    'Lycopene', 'Lutein', 'betaT', 'gammaT', 'deltaT', 'VitC', 'Thiamin',
-    'Riboflavin', 'Niacin', 'Pantothenic', 'VitB6', 'Folate', 'VitB12',
-    'VitK']
+    'protein', 'lipid', 'energy', 'ca', 'fe', 'mg', 'ph', 'k', 'na', 'zn',
+    'cu', 'fl', 'mn', 'se', 'vita', 'betac', 'alphac', 'vite', 'crypto',
+    'lycopene', 'lutein', 'betat', 'gammat', 'deltat', 'vitc', 'thiamin',
+    'riboflavin', 'niacin', 'pantothenic', 'vitb6', 'folate', 'vitb12',
+    'vitk']
 _EXPECTED_LUCODE_TABLE_HEADER = 'lucode'
 _NODATA_YIELD = -1

@ -470,8 +483,6 @@ def execute(args):
        output_dir, os.path.join(output_dir, _INTERMEDIATE_OUTPUT_DIR)])

    # Initialize a TaskGraph
-    work_token_dir = os.path.join(
-        output_dir, _INTERMEDIATE_OUTPUT_DIR, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
@ -479,20 +490,21 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Single process mode.
-    task_graph = taskgraph.TaskGraph(work_token_dir, n_workers)
+    task_graph = taskgraph.TaskGraph(
+        os.path.join(output_dir, 'taskgraph_cache'), n_workers)
    dependent_task_list = []

    LOGGER.info(
        "Checking if the landcover raster is missing lucodes")
-    crop_to_landcover_table = utils.read_csv_to_dataframe(
-        args['landcover_to_crop_table_path'], 'crop_name').to_dict(orient='index')
+    crop_to_landcover_df = utils.read_csv_to_dataframe(
+        args['landcover_to_crop_table_path'],
+        MODEL_SPEC['args']['landcover_to_crop_table_path'])

-    crop_to_fertlization_rate_table = utils.read_csv_to_dataframe(
-        args['fertilization_rate_table_path'], 'crop_name').to_dict(orient='index')
+    crop_to_fertilization_rate_df = utils.read_csv_to_dataframe(
+        args['fertilization_rate_table_path'],
+        MODEL_SPEC['args']['fertilization_rate_table_path'])

-    crop_lucodes = [
-        x[_EXPECTED_LUCODE_TABLE_HEADER]
-        for x in crop_to_landcover_table.values()]
+    crop_lucodes = list(crop_to_landcover_df[_EXPECTED_LUCODE_TABLE_HEADER])

    unique_lucodes = numpy.array([])
    for _, lu_band_data in pygeoprocessing.iterblocks(
@ -509,9 +521,7 @@ def execute(args):
            "aren't in the landcover raster: %s", missing_lucodes)

    LOGGER.info("Checking that crops correspond to known types.")
-    for crop_name in crop_to_landcover_table:
-        crop_lucode = crop_to_landcover_table[crop_name][
-            _EXPECTED_LUCODE_TABLE_HEADER]
+    for crop_name in crop_to_landcover_df.index:
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
            _EXTENDED_CLIMATE_BIN_FILE_PATTERN % crop_name)
@ -543,9 +553,8 @@ def execute(args):
    crop_lucode = None
    observed_yield_nodata = None

-    for crop_name in crop_to_landcover_table:
-        crop_lucode = crop_to_landcover_table[crop_name][
-            _EXPECTED_LUCODE_TABLE_HEADER]
+    for crop_name, row in crop_to_landcover_df.iterrows():
+        crop_lucode = row[_EXPECTED_LUCODE_TABLE_HEADER]
        LOGGER.info("Processing crop %s", crop_name)
        crop_climate_bin_raster_path = os.path.join(
            args['model_data_path'],
@ -568,19 +577,19 @@ def execute(args):
            task_name='crop_climate_bin')
        dependent_task_list.append(crop_climate_bin_task)

-        crop_regression_table_path = os.path.join(
-            args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name)
-
-        crop_regression_table = utils.read_csv_to_dataframe(
-            crop_regression_table_path, 'climate_bin').to_dict(orient='index')
-        for bin_id in crop_regression_table:
+        crop_regression_df = utils.read_csv_to_dataframe(
+            os.path.join(args['model_data_path'],
+                         _REGRESSION_TABLE_PATTERN % crop_name),
+            MODEL_SPEC['args']['model_data_path']['contents'][
+                'climate_regression_yield_tables']['contents'][
+                '[CROP]_regression_yield_table.csv'])
+        for _, row in crop_regression_df.iterrows():
            for header in _EXPECTED_REGRESSION_TABLE_HEADERS:
-                if crop_regression_table[bin_id][header.lower()] == '':
-                    crop_regression_table[bin_id][header.lower()] = 0
+                if numpy.isnan(row[header]):
+                    row[header] = 0

        yield_regression_headers = [
-            x for x in list(crop_regression_table.values())[0]
-            if x != 'climate_bin']
+            x for x in crop_regression_df.columns if x != 'climate_bin']

        reclassify_error_details = {
            'raster_name': f'{crop_name} Climate Bin',
@ -597,10 +606,7 @@ def execute(args):
                    output_dir,
                    _INTERPOLATED_YIELD_REGRESSION_FILE_PATTERN % (
                        crop_name, yield_regression_id, file_suffix)))
-            bin_to_regression_value = dict([
-                (bin_id,
-                 crop_regression_table[bin_id][yield_regression_id])
-                for bin_id in crop_regression_table])
+            bin_to_regression_value = crop_regression_df[yield_regression_id].to_dict()
            # reclassify nodata to a valid value of 0
            # we're assuming that the crop doesn't exist where there is no data
            # this is more likely than assuming the crop does exist, esp.
@ -653,8 +659,8 @@ def execute(args):
                   (regression_parameter_raster_path_lookup['b_nut'], 1),
                   (regression_parameter_raster_path_lookup['c_n'], 1),
                   (args['landcover_raster_path'], 1),
-                   (crop_to_fertlization_rate_table[crop_name]
-                    ['nitrogen_rate'], 'raw'),
+                   (crop_to_fertilization_rate_df['nitrogen_rate'][crop_name],
+                    'raw'),
                   (crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
                  _x_yield_op,
                  nitrogen_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
@ -672,8 +678,8 @@ def execute(args):
                   (regression_parameter_raster_path_lookup['b_nut'], 1),
                   (regression_parameter_raster_path_lookup['c_p2o5'], 1),
                   (args['landcover_raster_path'], 1),
-                   (crop_to_fertlization_rate_table[crop_name]
-                    ['phosphorus_rate'], 'raw'),
+                   (crop_to_fertilization_rate_df['phosphorus_rate'][crop_name],
+                    'raw'),
                   (crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
                  _x_yield_op,
                  phosphorus_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
@ -691,8 +697,8 @@ def execute(args):
                   (regression_parameter_raster_path_lookup['b_k2o'], 1),
                   (regression_parameter_raster_path_lookup['c_k2o'], 1),
                   (args['landcover_raster_path'], 1),
-                   (crop_to_fertlization_rate_table[crop_name]
-                    ['potassium_rate'], 'raw'),
+                   (crop_to_fertilization_rate_df['potassium_rate'][crop_name],
+                    'raw'),
                   (crop_lucode, 'raw'), (pixel_area_ha, 'raw')],
                  _x_yield_op,
                  potassium_yield_raster_path, gdal.GDT_Float32, _NODATA_YIELD),
@ -796,18 +802,18 @@ def execute(args):

    # both 'crop_nutrient.csv' and 'crop' are known data/header values for
    # this model data.
-    nutrient_table = utils.read_csv_to_dataframe(
+    nutrient_df = utils.read_csv_to_dataframe(
        os.path.join(args['model_data_path'], 'crop_nutrient.csv'),
-        'crop', convert_cols_to_lower=False, convert_vals_to_lower=False
-        ).to_dict(orient='index')
+        MODEL_SPEC['args']['model_data_path']['contents']['crop_nutrient.csv'])

    LOGGER.info("Generating report table")
+    crop_names = list(crop_to_landcover_df.index)
    result_table_path = os.path.join(
        output_dir, 'result_table%s.csv' % file_suffix)
    _ = task_graph.add_task(
        func=tabulate_regression_results,
-        args=(nutrient_table,
-              crop_to_landcover_table, pixel_area_ha,
+        args=(nutrient_df,
+              crop_names, pixel_area_ha,
              args['landcover_raster_path'], landcover_nodata,
              output_dir, file_suffix, result_table_path),
        target_path_list=[result_table_path],
@ -827,7 +833,7 @@ def execute(args):
            args=(args['aggregate_polygon_path'],
                  target_aggregate_vector_path,
                  landcover_raster_info['projection_wkt'],
-                  crop_to_landcover_table, nutrient_table,
+                  crop_names, nutrient_df,
                  output_dir, file_suffix,
                  aggregate_results_table_path),
            target_path_list=[target_aggregate_vector_path,
@ -929,17 +935,16 @@ def _mask_observed_yield_op(


 def tabulate_regression_results(
-        nutrient_table,
-        crop_to_landcover_table, pixel_area_ha, landcover_raster_path,
+        nutrient_df,
+        crop_names, pixel_area_ha, landcover_raster_path,
        landcover_nodata, output_dir, file_suffix, target_table_path):
    """Write table with total yield and nutrient results by crop.

    This function includes all the operations that write to results_table.csv.

    Args:
-        nutrient_table (dict): a lookup of nutrient values by crop in the
-            form of nutrient_table[<crop>][<nutrient>].
-        crop_to_landcover_table (dict): landcover codes keyed by crop names
+        nutrient_df (pandas.DataFrame): a table of nutrient values by crop
+        crop_names (list): list of crop names
        pixel_area_ha (float): area of lulc raster cells (hectares)
        landcover_raster_path (string): path to landcover raster
        landcover_nodata (float): landcover raster nodata value
@ -960,7 +965,7 @@ def tabulate_regression_results(
        result_table.write(
            'crop,area (ha),' + 'production_observed,production_modeled,' +
            ','.join(nutrient_headers) + '\n')
-        for crop_name in sorted(crop_to_landcover_table):
+        for crop_name in sorted(crop_names):
            result_table.write(crop_name)
            production_lookup = {}
            production_pixel_count = 0
@ -1006,18 +1011,18 @@ def tabulate_regression_results(

            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
-                1 - nutrient_table[crop_name]['Percentrefuse'] / 100)
+                1 - nutrient_df['percentrefuse'][crop_name] / 100)
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                total_nutrient = (
                    nutrient_factor *
                    production_lookup['modeled'] *
-                    nutrient_table[crop_name][nutrient_id])
+                    nutrient_df[nutrient_id][crop_name])
                result_table.write(",%f" % (total_nutrient))
                result_table.write(
                    ",%f" % (
                        nutrient_factor *
                        production_lookup['observed'] *
-                        nutrient_table[crop_name][nutrient_id]))
+                        nutrient_df[nutrient_id][crop_name]))
            result_table.write('\n')

        total_area = 0
@ -1035,8 +1040,8 @@ def tabulate_regression_results(

 def aggregate_regression_results_to_polygons(
        base_aggregate_vector_path, target_aggregate_vector_path,
-        landcover_raster_projection, crop_to_landcover_table,
-        nutrient_table, output_dir, file_suffix,
+        landcover_raster_projection, crop_names,
+        nutrient_df, output_dir, file_suffix,
        target_aggregate_table_path):
    """Write table with aggregate results of yield and nutrient values.

@ -1049,9 +1054,8 @@ def aggregate_regression_results_to_polygons(
        target_aggregate_vector_path (string):
            path to re-projected copy of polygon vector
        landcover_raster_projection (string): a WKT projection string
-        crop_to_landcover_table (dict): landcover codes keyed by crop names
-        nutrient_table (dict): a lookup of nutrient values by crop in the
-            form of nutrient_table[<crop>][<nutrient>].
+        crop_names (list): list of crop names
+        nutrient_df (pandas.DataFrame): a table of nutrient values by crop
        output_dir (string): the file path to the output workspace.
        file_suffix (string): string to append to any output filenames.
        target_aggregate_table_path (string): path to 'aggregate_results.csv'
@ -1072,10 +1076,10 @@ def aggregate_regression_results_to_polygons(
    total_nutrient_table = collections.defaultdict(
        lambda: collections.defaultdict(lambda: collections.defaultdict(
            float)))
-    for crop_name in crop_to_landcover_table:
+    for crop_name in crop_names:
        # convert 100g to Mg and fraction left over from refuse
        nutrient_factor = 1e4 * (
-            1 - nutrient_table[crop_name]['Percentrefuse'] / 100)
+            1 - nutrient_df['percentrefuse'][crop_name] / 100)
        LOGGER.info(
            "Calculating zonal stats for %s", crop_name)
        crop_production_raster_path = os.path.join(
@ -1093,7 +1097,7 @@ def aggregate_regression_results_to_polygons(
                        nutrient_factor *
                        total_yield_lookup['%s_modeled' % crop_name][
                            fid_index]['sum'] *
-                        nutrient_table[crop_name][nutrient_id])
+                        nutrient_df[nutrient_id][crop_name])

        # process observed
        observed_yield_path = os.path.join(
@ -1111,7 +1115,7 @@ def aggregate_regression_results_to_polygons(
                        nutrient_factor * # percent crop used * 1000 [100g per Mg]
                        total_yield_lookup[
                            '%s_observed' % crop_name][fid_index]['sum'] *
-                        nutrient_table[crop_name][nutrient_id])  # nutrient unit per 100g crop
+                        nutrient_df[nutrient_id][crop_name])  # nutrient unit per 100g crop

    # report everything to a table
    aggregate_table_path = os.path.join(
--- a/src/natcap/invest/datastack.py
+++ b/src/natcap/invest/datastack.py
@ -336,7 +336,7 @@ def build_datastack_archive(args, model_name, datastack_path):
                    data_dir, f'{key}_csv_data')

                dataframe = utils.read_csv_to_dataframe(
-                    source_path, convert_vals_to_lower=False)
+                    source_path, args_spec[key])
                csv_source_dir = os.path.abspath(os.path.dirname(source_path))
                for spatial_column_name in spatial_columns:
                    # Iterate through the spatial columns, identify the set of
--- a/src/natcap/invest/delineateit/delineateit.py
+++ b/src/natcap/invest/delineateit/delineateit.py
@ -137,7 +137,7 @@ MODEL_SPEC = {
            "geometries": spec_utils.POINT,
            "fields": {}
        },
-        "_work_tokens": spec_utils.TASKGRAPH_DIR
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -221,8 +221,6 @@ def execute(args):
    file_registry = utils.build_file_registry(
        [(_OUTPUT_FILES, output_directory)], file_suffix)

-    work_token_dir = os.path.join(output_directory, '_work_tokens')
-
    # Manually setting n_workers to be -1 so that everything happens in the
    # same thread.
    try:
@ -232,7 +230,8 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1
-    graph = taskgraph.TaskGraph(work_token_dir, n_workers=n_workers)
+    graph = taskgraph.TaskGraph(
+        os.path.join(output_directory, '_work_tokens'), n_workers=n_workers)

    fill_pits_task = graph.add_task(
        pygeoprocessing.routing.fill_pits,
--- a/src/natcap/invest/forest_carbon_edge_effect.py
+++ b/src/natcap/invest/forest_carbon_edge_effect.py
@ -10,6 +10,7 @@ import time
 import uuid

 import numpy
+import pandas
 import pygeoprocessing
 import scipy.spatial
 import taskgraph
@ -64,6 +65,7 @@ MODEL_SPEC = {
        },
        "biophysical_table_path": {
            "type": "csv",
+            "index_col": "lucode",
            "columns": {
                "lucode": spec_utils.LULC_TABLE_COLUMN,
                "is_tropical_forest": {
@ -249,10 +251,10 @@ MODEL_SPEC = {
                    "bands": {1: {
                        "type": "number", "units": u.metric_ton/u.hectare
                    }}
-                },
-                "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
+                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -375,8 +377,6 @@ def execute(args):
    file_suffix = utils.make_suffix_string(args, 'results_suffix')

    # Initialize a TaskGraph
-    taskgraph_working_dir = os.path.join(
-        intermediate_dir, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
@ -384,7 +384,8 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # single process mode.
-    task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers)
+    task_graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)

    # used to keep track of files generated by this module
    output_file_registry = {
@ -418,16 +419,15 @@ def execute(args):
    # Map non-forest landcover codes to carbon biomasses
    LOGGER.info('Calculating direct mapped carbon stocks')
    carbon_maps = []
-    biophysical_table = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
-    biophysical_keys = [
-        x.lower() for x in list(biophysical_table.values())[0].keys()]
+    biophysical_df = utils.read_csv_to_dataframe(
+        args['biophysical_table_path'],
+        MODEL_SPEC['args']['biophysical_table_path'])
    pool_list = [('c_above', True)]
    if args['pools_to_calculate'] == 'all':
        pool_list.extend([
            ('c_below', False), ('c_soil', False), ('c_dead', False)])
    for carbon_pool_type, ignore_tropical_type in pool_list:
-        if carbon_pool_type in biophysical_keys:
+        if carbon_pool_type in biophysical_df.columns:
            carbon_maps.append(
                output_file_registry[carbon_pool_type+'_map'])
            task_graph.add_task(
@ -630,8 +630,8 @@ def _calculate_lulc_carbon_map(

    """
    # classify forest pixels from lulc
-    biophysical_table = utils.read_csv_to_dataframe(
-        biophysical_table_path, 'lucode').to_dict(orient='index')
+    biophysical_df = utils.read_csv_to_dataframe(
+        biophysical_table_path, MODEL_SPEC['args']['biophysical_table_path'])

    lucode_to_per_cell_carbon = {}
    cell_size = pygeoprocessing.get_raster_info(
@ -639,24 +639,22 @@ def _calculate_lulc_carbon_map(
    cell_area_ha = abs(cell_size[0]) * abs(cell_size[1]) / 10000

    # Build a lookup table
-    for lucode in biophysical_table:
+    for lucode, row in biophysical_df.iterrows():
        if compute_forest_edge_effects:
-            is_tropical_forest = (
-                int(biophysical_table[int(lucode)]['is_tropical_forest']))
+            is_tropical_forest = row['is_tropical_forest']
        else:
-            is_tropical_forest = 0
-        if ignore_tropical_type and is_tropical_forest == 1:
+            is_tropical_forest = False
+        if ignore_tropical_type and is_tropical_forest:
            # if tropical forest above ground, lookup table is nodata
-            lucode_to_per_cell_carbon[int(lucode)] = NODATA_VALUE
+            lucode_to_per_cell_carbon[lucode] = NODATA_VALUE
        else:
-            try:
-                lucode_to_per_cell_carbon[int(lucode)] = float(
-                    biophysical_table[lucode][carbon_pool_type]) * cell_area_ha
-            except ValueError:
+            if pandas.isna(row[carbon_pool_type]):
                raise ValueError(
                    "Could not interpret carbon pool value as a number. "
                    f"lucode: {lucode}, pool_type: {carbon_pool_type}, "
-                    f"value: {biophysical_table[lucode][carbon_pool_type]}")
+                    f"value: {row[carbon_pool_type]}")
+            lucode_to_per_cell_carbon[lucode] = row[carbon_pool_type] * cell_area_ha
+

    # map aboveground carbon from table to lulc that is not forest
    reclass_error_details = {
@ -696,11 +694,9 @@ def _map_distance_from_tropical_forest_edge(

    """
    # Build a list of forest lucodes
-    biophysical_table = utils.read_csv_to_dataframe(
-        biophysical_table_path, 'lucode').to_dict(orient='index')
-    forest_codes = [
-        lucode for (lucode, ludata) in biophysical_table.items()
-        if int(ludata['is_tropical_forest']) == 1]
+    biophysical_df = utils.read_csv_to_dataframe(
+        biophysical_table_path, MODEL_SPEC['args']['biophysical_table_path'])
+    forest_codes = biophysical_df[biophysical_df['is_tropical_forest']].index.values

    # Make a raster where 1 is non-forest landcover types and 0 is forest
    lulc_nodata = pygeoprocessing.get_raster_info(
--- a/src/natcap/invest/habitat_quality.py
+++ b/src/natcap/invest/habitat_quality.py
@ -77,6 +77,7 @@ MODEL_SPEC = {
        },
        "threats_table_path": {
            "type": "csv",
+            "index_col": "threat",
            "columns": {
                "threat": {
                    "type": "freestyle_string",
@ -170,8 +171,13 @@ MODEL_SPEC = {
        },
        "sensitivity_table_path": {
            "type": "csv",
+            "index_col": "lulc",
            "columns": {
                "lulc": spec_utils.LULC_TABLE_COLUMN,
+                "name": {
+                    "type": "freestyle_string",
+                    "required": False
+                },
                "habitat": {
                    "type": "ratio",
                    "about": gettext(
@ -303,10 +309,10 @@ MODEL_SPEC = {
                            "bands": {1: {"type": "integer"}}
                        }
                    }
-                },
-                "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
+                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }
 # All out rasters besides rarity should be gte to 0. Set nodata accordingly.
@ -371,28 +377,23 @@ def execute(args):
        args['workspace_dir'], 'intermediate')
    utils.make_directories([intermediate_output_dir, output_dir])

-    taskgraph_working_dir = os.path.join(
-        intermediate_output_dir, '_taskgraph_working_dir')
-
    n_workers = int(args.get('n_workers', -1))
-    task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers)
+    task_graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)

    LOGGER.info("Checking Threat and Sensitivity tables for compliance")
    # Get CSVs as dictionaries and ensure the key is a string for threats.
-    threat_dict = {
-        str(key): value for key, value in utils.read_csv_to_dataframe(
-            args['threats_table_path'], 'THREAT',
-            expand_path_cols=['cur_path', 'fut_path', 'base_path']
-            ).to_dict(orient='index').items()}
-    sensitivity_dict = utils.read_csv_to_dataframe(
-        args['sensitivity_table_path'], 'LULC').to_dict(orient='index')
+    threat_df = utils.read_csv_to_dataframe(
+        args['threats_table_path'], MODEL_SPEC['args']['threats_table_path']
+    ).fillna('')
+    sensitivity_df = utils.read_csv_to_dataframe(
+        args['sensitivity_table_path'],
+        MODEL_SPEC['args']['sensitivity_table_path'])

    half_saturation_constant = float(args['half_saturation_constant'])

    # Dictionary for reclassing habitat values
-    sensitivity_reclassify_habitat_dict = {
-        int(key): float(val['habitat']) for key, val in
-        sensitivity_dict.items()}
+    sensitivity_reclassify_habitat_dict = sensitivity_df['habitat'].to_dict()

    # declare dictionaries to store the land cover and the threat rasters
    # pertaining to the different threats
@ -419,13 +420,12 @@ def execute(args):

            # for each threat given in the CSV file try opening the associated
            # raster which should be found relative to the Threat CSV
-            for threat in threat_dict:
+            for threat, row in threat_df.iterrows():
                LOGGER.debug(f"Validating path for threat: {threat}")
                threat_table_path_col = _THREAT_SCENARIO_MAP[lulc_key]
-                threat_path = threat_dict[threat][threat_table_path_col]

                threat_validate_result = _validate_threat_path(
-                    threat_path, lulc_key)
+                    row[threat_table_path_col], lulc_key)
                if threat_validate_result == 'error':
                    raise ValueError(
                        'There was an Error locating a threat raster from '
@ -516,7 +516,7 @@ def execute(args):
            intermediate_output_dir,
            (f'{os.path.splitext(os.path.basename(lulc_path))[0]}'
             f'_aligned{file_suffix}.tif'))
-        for threat in threat_dict:
+        for threat in threat_df.index.values:
            threat_path = threat_path_dict['threat' + lulc_key][threat]
            if threat_path in lulc_and_threat_raster_list:
                aligned_threat_path = os.path.join(
@ -578,10 +578,7 @@ def execute(args):
        access_task_list.append(rasterize_access_task)

    # calculate the weight sum which is the sum of all the threats' weights
-    weight_sum = 0.0
-    for threat_data in threat_dict.values():
-        # Sum weight of threats
-        weight_sum = weight_sum + threat_data['weight']
+    weight_sum = threat_df['weight'].sum()

    # for each land cover raster provided compute habitat quality
    for lulc_key, lulc_path in lulc_path_dict.items():
@ -618,9 +615,9 @@ def execute(args):
        exit_landcover = False

        # adjust each threat/threat raster for distance, weight, and access
-        for threat, threat_data in threat_dict.items():
+        for threat, row in threat_df.iterrows():
            LOGGER.debug(
-                f'Calculating threat: {threat}.\nThreat data: {threat_data}')
+                f'Calculating threat: {threat}.\nThreat data: {row}')

            # get the threat raster for the specific threat
            threat_raster_path = threat_path_dict['threat' + lulc_key][threat]
@ -634,7 +631,7 @@ def execute(args):
                exit_landcover = True
                break
            # Check to make sure max_dist is greater than 0
-            if threat_data['max_dist'] <= 0.0:
+            if row['max_dist'] <= 0:
                raise ValueError(
                    f"The max distance for threat: '{threat}' is less than"
                    " or equal to 0. MAX_DIST should be a positive value.")
@ -650,17 +647,15 @@ def execute(args):
                dependent_task_list=[align_task],
                task_name=f'distance edt {lulc_key} {threat}')

-            decay_type = threat_data['decay']
-
            filtered_threat_raster_path = os.path.join(
                intermediate_output_dir,
-                f'filtered_{decay_type}_{threat}{lulc_key}{file_suffix}.tif')
+                f'filtered_{row["decay"]}_{threat}{lulc_key}{file_suffix}.tif')

            dist_decay_task = task_graph.add_task(
                func=_decay_distance,
                args=(
-                    distance_raster_path, threat_data['max_dist'],
-                    decay_type, filtered_threat_raster_path),
+                    distance_raster_path, row['max_dist'],
+                    row['decay'], filtered_threat_raster_path),
                target_path_list=[filtered_threat_raster_path],
                dependent_task_list=[dist_edt_task],
                task_name=f'distance decay {lulc_key} {threat}')
@ -672,9 +667,7 @@ def execute(args):
                f'sens_{threat}{lulc_key}{file_suffix}.tif')

            # Dictionary for reclassing threat sensitivity values
-            sensitivity_reclassify_threat_dict = {
-                int(key): float(val[threat]) for key, val in
-                sensitivity_dict.items()}
+            sensitivity_reclassify_threat_dict = sensitivity_df[threat].to_dict()

            reclass_error_details = {
                'raster_name': 'LULC', 'column_name': 'lucode',
@ -686,11 +679,11 @@ def execute(args):
                      reclass_error_details),
                target_path_list=[sens_raster_path],
                dependent_task_list=[align_task],
-                task_name=f'sens_raster_{decay_type}{lulc_key}_{threat}')
+                task_name=f'sens_raster_{row["decay"]}{lulc_key}_{threat}')
            sensitivity_task_list.append(sens_threat_task)

            # get the normalized weight for each threat
-            weight_avg = threat_data['weight'] / weight_sum
+            weight_avg = row['weight'] / weight_sum

            # add the threat raster adjusted by distance and the raster
            # representing sensitivity to the list to be past to
@ -724,7 +717,7 @@ def execute(args):
            dependent_task_list=[
                *threat_decay_task_list, *sensitivity_task_list,
                *access_task_list],
-            task_name=f'tot_degradation_{decay_type}{lulc_key}_{threat}')
+            task_name=f'tot_degradation_{row["decay"]}{lulc_key}_{threat}')

        # Compute habitat quality
        # ksq: a term used below to compute habitat quality
@ -1154,20 +1147,18 @@ def validate(args, limit_to=None):
    if ("threats_table_path" not in invalid_keys and
            "sensitivity_table_path" not in invalid_keys and
            "threat_raster_folder" not in invalid_keys):
-
        # Get CSVs as dictionaries and ensure the key is a string for threats.
-        threat_dict = {
-            str(key): value for key, value in utils.read_csv_to_dataframe(
-                args['threats_table_path'], 'THREAT',
-                expand_path_cols=['cur_path', 'fut_path', 'base_path']
-                ).to_dict(orient='index').items()}
-        sensitivity_dict = utils.read_csv_to_dataframe(
-            args['sensitivity_table_path'], 'LULC').to_dict(orient='index')
+        threat_df = utils.read_csv_to_dataframe(
+                args['threats_table_path'],
+                MODEL_SPEC['args']['threats_table_path']).fillna('')
+        sensitivity_df = utils.read_csv_to_dataframe(
+            args['sensitivity_table_path'],
+            MODEL_SPEC['args']['sensitivity_table_path'])

        # check that the threat names in the threats table match with the
        # threats columns in the sensitivity table.
-        sens_header_set = set(list(sensitivity_dict.values())[0])
-        threat_set = {threat for threat in threat_dict}
+        sens_header_set = set(sensitivity_df.columns)
+        threat_set = set(threat_df.index.values)
        missing_sens_header_set = threat_set.difference(sens_header_set)

        if missing_sens_header_set:
@ -1191,14 +1182,14 @@ def validate(args, limit_to=None):
                # for each threat given in the CSV file try opening the
                # associated raster which should be found in
                # threat_raster_folder
-                for threat in threat_dict:
+                for threat, row in threat_df.iterrows():
                    threat_table_path_col = _THREAT_SCENARIO_MAP[lulc_key]
-                    if threat_table_path_col not in threat_dict[threat]:
+                    if threat_table_path_col not in row:
                        bad_threat_columns.append(threat_table_path_col)
                        break

                    # Threat path from threat CSV is relative to CSV
-                    threat_path = threat_dict[threat][threat_table_path_col]
+                    threat_path = row[threat_table_path_col]

                    threat_validate_result = _validate_threat_path(
                        threat_path, lulc_key)
--- a/src/natcap/invest/hra.py
+++ b/src/natcap/invest/hra.py
@ -66,6 +66,7 @@ MODEL_SPEC = {
            "name": gettext("habitat stressor table"),
            "about": gettext("A table describing each habitat and stressor."),
            "type": "csv",
+            "index_col": "name",
            "columns": {
                "name": {
                    "type": "freestyle_string",
@ -437,7 +438,7 @@ MODEL_SPEC = {
                }
            }
        },
-        ".taskgraph": spec_utils.TASKGRAPH_DIR
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -496,7 +497,6 @@ def execute(args):
    intermediate_dir = os.path.join(args['workspace_dir'],
                                    'intermediate_outputs')
    output_dir = os.path.join(args['workspace_dir'], 'outputs')
-    taskgraph_working_dir = os.path.join(args['workspace_dir'], '.taskgraph')
    utils.make_directories([intermediate_dir, output_dir])
    suffix = utils.make_suffix_string(args, 'results_suffix')

@ -526,7 +526,8 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # single process mode.
-    graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers)
+    graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)

    # parse the info table and get info dicts for habitats, stressors.
    habitats_info, stressors_info = _parse_info_table(args['info_table_path'])
@ -1584,7 +1585,7 @@ def _align(raster_path_map, vector_path_map, target_pixel_size,
            layer = None
            vector = None

-            _create_raster_from_bounding_box(
+            pygeoprocessing.create_raster_from_bounding_box(
                target_raster_path=target_raster_path,
                target_bounding_box=target_bounding_box,
                target_pixel_size=target_pixel_size,
@ -1599,74 +1600,6 @@ def _align(raster_path_map, vector_path_map, target_pixel_size,
                burn_values=burn_values, option_list=rasterize_option_list)


-def _create_raster_from_bounding_box(
-        target_raster_path, target_bounding_box, target_pixel_size,
-        target_pixel_type, target_srs_wkt, target_nodata=None,
-        fill_value=None):
-    """Create a raster from a given bounding box.
-
-    Args:
-        target_raster_path (string): The path to where the new raster should be
-            created on disk.
-        target_bounding_box (tuple): a 4-element iterable of (minx, miny,
-            maxx, maxy) in projected units matching the SRS of
-            ``target_srs_wkt``.
-        target_pixel_size (tuple): A 2-element tuple of the (x, y) pixel size
-            of the target raster.  Elements are in units of the target SRS.
-        target_pixel_type (int): The GDAL GDT_* type of the target raster.
-        target_srs_wkt (string): The SRS of the target raster, in Well-Known
-            Text format.
-        target_nodata (float): If provided, the nodata value of the target
-            raster.
-        fill_value=None (number): If provided, the value that the target raster
-            should be filled with.
-
-    Returns:
-        ``None``
-    """
-    bbox_minx, bbox_miny, bbox_maxx, bbox_maxy = target_bounding_box
-
-    driver = gdal.GetDriverByName('GTiff')
-    n_bands = 1
-    n_cols = int(numpy.ceil(
-        abs((bbox_maxx - bbox_minx) / target_pixel_size[0])))
-    n_rows = int(numpy.ceil(
-        abs((bbox_maxy - bbox_miny) / target_pixel_size[1])))
-
-    raster = driver.Create(
-        target_raster_path, n_cols, n_rows, n_bands, target_pixel_type,
-        options=['TILED=YES', 'BIGTIFF=YES', 'COMPRESS=DEFLATE',
-                 'BLOCKXSIZE=256', 'BLOCKYSIZE=256'])
-    raster.SetProjection(target_srs_wkt)
-
-    # Set the transform based on the upper left corner and given pixel
-    # dimensions.  Bounding box is in format [minx, miny, maxx, maxy]
-    if target_pixel_size[0] < 0:
-        x_source = bbox_maxx
-    else:
-        x_source = bbox_minx
-    if target_pixel_size[1] < 0:
-        y_source = bbox_maxy
-    else:
-        y_source = bbox_miny
-    raster_transform = [
-        x_source, target_pixel_size[0], 0.0,
-        y_source, 0.0, target_pixel_size[1]]
-    raster.SetGeoTransform(raster_transform)
-
-    # Fill the band if requested.
-    band = raster.GetRasterBand(1)
-    if fill_value is not None:
-        band.Fill(fill_value)
-
-    # Set the nodata value.
-    if target_nodata is not None:
-        band.SetNoDataValue(float(target_nodata))
-
-    band = None
-    raster = None
-
-
 def _simplify(source_vector_path, tolerance, target_vector_path,
              preserve_columns=None):
    """Simplify a geometry to a given tolerance.
@ -1841,12 +1774,15 @@ def _open_table_as_dataframe(table_path, **kwargs):
        excel_df = pandas.read_excel(table_path, **kwargs)
        excel_df.columns = excel_df.columns.str.lower()
        excel_df['path'] = excel_df['path'].apply(
-            lambda p: utils.expand_path(p, table_path))
+            lambda p: utils.expand_path(p, table_path)).astype('string')
+        excel_df['name'] = excel_df['name'].astype('string')
+        excel_df['type'] = excel_df['type'].astype('string')
+        excel_df['stressor buffer (meters)'] = excel_df['stressor buffer (meters)'].astype(float)
+        excel_df = excel_df.set_index('name')
        return excel_df
    else:
        return utils.read_csv_to_dataframe(
-            table_path, convert_vals_to_lower=False,
-            expand_path_cols=['path'], **kwargs)
+            table_path, MODEL_SPEC['args']['info_table_path'], **kwargs)


 def _parse_info_table(info_table_path):
@ -1871,8 +1807,12 @@ def _parse_info_table(info_table_path):
    """
    info_table_path = os.path.abspath(info_table_path)

-    table = _open_table_as_dataframe(info_table_path)
-    table = table.set_index('name')
+    try:
+        table = _open_table_as_dataframe(info_table_path)
+    except ValueError as err:
+        if 'Index has duplicate keys' in str(err):
+            raise ValueError("Habitat and stressor names may not overlap.")
+
    table = table.rename(columns={'stressor buffer (meters)': 'buffer'})

    # Drop the buffer column from the habitats list; we don't need it.
@ -1883,15 +1823,6 @@ def _parse_info_table(info_table_path):
    stressors = table.loc[table['type'] == 'stressor'].drop(
        columns=['type']).to_dict(orient='index')

-    # habitats and stressors must be nonoverlapping sets.
-    repeated_habitats_stressors = set(
-        habitats.keys()).intersection(stressors.keys())
-    if repeated_habitats_stressors:
-        raise ValueError(
-            "Habitat and stressor names may not overlap. These names are "
-            "both habitats and stressors: "
-            f"{', '.join(repeated_habitats_stressors)}")
-
    return (habitats, stressors)


--- a/src/natcap/invest/ndr/ndr.py
+++ b/src/natcap/invest/ndr/ndr.py
@ -73,6 +73,7 @@ MODEL_SPEC = {
        },
        "biophysical_table_path": {
            "type": "csv",
+            "index_col": "lucode",
            "columns": {
                "lucode": spec_utils.LULC_TABLE_COLUMN,
                "load_[NUTRIENT]": {  # nitrogen or phosphorus nutrient loads
@ -373,53 +374,60 @@ MODEL_SPEC = {
                        "type": "integer"
                    }}
                },
-                "cache_dir": {
-                    "type": "directory",
-                    "contents": {
-                        "aligned_dem.tif": {
-                            "about": "Copy of the DEM clipped to the extent of the other inputs",
-                            "bands": {1: {"type": "number", "units": u.meter}}
-                        },
-                        "aligned_lulc.tif": {
-                            "about": (
-                                "Copy of the LULC clipped to the extent of the other inputs "
-                                "and reprojected to the DEM projection"),
-                            "bands": {1: {"type": "integer"}}
-                        },
-                        "aligned_runoff_proxy.tif": {
-                            "about": (
-                                "Copy of the runoff proxy clipped to the extent of the other inputs "
-                                "and reprojected to the DEM projection"),
-                            "bands": {1: {"type": "number", "units": u.none}}
-                        },
-                        "filled_dem.tif": spec_utils.FILLED_DEM,
-                        "slope.tif": spec_utils.SLOPE,
-                        "subsurface_export_n.pickle": {
-                            "about": "Pickled zonal statistics of nitrogen subsurface export"
-                        },
-                        "subsurface_load_n.pickle": {
-                            "about": "Pickled zonal statistics of nitrogen subsurface load"
-                        },
-                        "surface_export_n.pickle": {
-                            "about": "Pickled zonal statistics of nitrogen surface export"
-                        },
-                        "surface_export_p.pickle": {
-                            "about": "Pickled zonal statistics of phosphorus surface export"
-                        },
-                        "surface_load_n.pickle": {
-                            "about": "Pickled zonal statistics of nitrogen surface load"
-                        },
-                        "surface_load_p.pickle": {
-                            "about": "Pickled zonal statistics of phosphorus surface load"
-                        },
-                        "total_export_n.pickle": {
-                            "about": "Pickled zonal statistics of total nitrogen export"
-                        },
-                        "taskgraph.db": {}
-                    }
+                "aligned_dem.tif": {
+                    "about": "Copy of the DEM clipped to the extent of the other inputs",
+                    "bands": {1: {"type": "number", "units": u.meter}}
+                },
+                "aligned_lulc.tif": {
+                    "about": (
+                        "Copy of the LULC clipped to the extent of the other inputs "
+                        "and reprojected to the DEM projection"),
+                    "bands": {1: {"type": "integer"}}
+                },
+                "aligned_runoff_proxy.tif": {
+                    "about": (
+                        "Copy of the runoff proxy clipped to the extent of the other inputs "
+                        "and reprojected to the DEM projection"),
+                    "bands": {1: {"type": "number", "units": u.none}}
+                },
+                "masked_dem.tif": {
+                    "about": "DEM input masked to exclude pixels outside the watershed",
+                    "bands": {1: {"type": "number", "units": u.meter}}
+                },
+                "masked_lulc.tif": {
+                    "about": "LULC input masked to exclude pixels outside the watershed",
+                    "bands": {1: {"type": "integer"}}
+                },
+                "masked_runoff_proxy.tif": {
+                    "about": "Runoff proxy input masked to exclude pixels outside the watershed",
+                    "bands": {1: {"type": "number", "units": u.none}}
+                },
+                "filled_dem.tif": spec_utils.FILLED_DEM,
+                "slope.tif": spec_utils.SLOPE,
+                "subsurface_export_n.pickle": {
+                    "about": "Pickled zonal statistics of nitrogen subsurface export"
+                },
+                "subsurface_load_n.pickle": {
+                    "about": "Pickled zonal statistics of nitrogen subsurface load"
+                },
+                "surface_export_n.pickle": {
+                    "about": "Pickled zonal statistics of nitrogen surface export"
+                },
+                "surface_export_p.pickle": {
+                    "about": "Pickled zonal statistics of phosphorus surface export"
+                },
+                "surface_load_n.pickle": {
+                    "about": "Pickled zonal statistics of nitrogen surface load"
+                },
+                "surface_load_p.pickle": {
+                    "about": "Pickled zonal statistics of phosphorus surface load"
+                },
+                "total_export_n.pickle": {
+                    "about": "Pickled zonal statistics of total nitrogen export"
                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -463,14 +471,14 @@ _INTERMEDIATE_BASE_FILES = {
    'thresholded_slope_path': 'thresholded_slope.tif',
    'dist_to_channel_path': 'dist_to_channel.tif',
    'drainage_mask': 'what_drains_to_stream.tif',
-}
-
-_CACHE_BASE_FILES = {
    'filled_dem_path': 'filled_dem.tif',
    'aligned_dem_path': 'aligned_dem.tif',
+    'masked_dem_path': 'masked_dem.tif',
    'slope_path': 'slope.tif',
    'aligned_lulc_path': 'aligned_lulc.tif',
+    'masked_lulc_path': 'masked_lulc.tif',
    'aligned_runoff_proxy_path': 'aligned_runoff_proxy.tif',
+    'masked_runoff_proxy_path': 'masked_runoff_proxy.tif',
    'surface_load_n_pickle_path': 'surface_load_n.pickle',
    'surface_load_p_pickle_path': 'surface_load_p.pickle',
    'subsurface_load_n_pickle_path': 'subsurface_load_n.pickle',
@ -537,14 +545,14 @@ def execute(args):
        None

    """
-    def _validate_inputs(nutrients_to_process, lucode_to_parameters):
+    def _validate_inputs(nutrients_to_process, biophysical_df):
        """Validate common errors in inputs.

        Args:
            nutrients_to_process (list): list of 'n' and/or 'p'
-            lucode_to_parameters (dictionary): biophysical input table mapping
-                lucode to dictionary of table parameters.  Used to validate
-                the correct columns are input
+            biophysical_df (pandas.DataFrame): dataframe representation of
+                the input biophysical table. Used to validate the correct
+                columns are input

        Returns:
            None
@ -564,7 +572,7 @@ def execute(args):
        # is missing.
        row_header_table_list = []

-        lu_parameter_row = list(lucode_to_parameters.values())[0]
+        lu_parameter_row = biophysical_df.columns.to_list()
        row_header_table_list.append(
            (lu_parameter_row, ['load_', 'eff_', 'crit_len_'],
             args['biophysical_table_path']))
@ -594,8 +602,7 @@ def execute(args):
    output_dir = os.path.join(args['workspace_dir'])
    intermediate_output_dir = os.path.join(
        args['workspace_dir'], INTERMEDIATE_DIR_NAME)
-    cache_dir = os.path.join(intermediate_output_dir, 'cache_dir')
-    utils.make_directories([output_dir, intermediate_output_dir, cache_dir])
+    utils.make_directories([output_dir, intermediate_output_dir])

    try:
        n_workers = int(args['n_workers'])
@ -605,13 +612,13 @@ def execute(args):
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous mode.
    task_graph = taskgraph.TaskGraph(
-        cache_dir, n_workers, reporting_interval=5.0)
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'),
+        n_workers, reporting_interval=5.0)

    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    f_reg = utils.build_file_registry(
        [(_OUTPUT_BASE_FILES, output_dir),
-         (_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
-         (_CACHE_BASE_FILES, cache_dir)], file_suffix)
+         (_INTERMEDIATE_BASE_FILES, intermediate_output_dir)], file_suffix)

    # Build up a list of nutrients to process based on what's checked on
    nutrients_to_process = []
@ -619,10 +626,11 @@ def execute(args):
        if args['calc_' + nutrient_id]:
            nutrients_to_process.append(nutrient_id)

-    lucode_to_parameters = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
+    biophysical_df = utils.read_csv_to_dataframe(
+        args['biophysical_table_path'],
+        MODEL_SPEC['args']['biophysical_table_path'])

-    _validate_inputs(nutrients_to_process, lucode_to_parameters)
+    _validate_inputs(nutrients_to_process, biophysical_df)

    # these are used for aggregation in the last step
    field_pickle_map = {}
@ -646,18 +654,64 @@ def execute(args):
            base_raster_list, aligned_raster_list,
            ['near']*len(base_raster_list), dem_info['pixel_size'],
            'intersection'),
-        kwargs={
-            'base_vector_path_list': [args['watersheds_path']],
-            'vector_mask_options': {
-                'mask_vector_path': args['watersheds_path']}},
+        kwargs={'base_vector_path_list': [args['watersheds_path']]},
        target_path_list=aligned_raster_list,
        task_name='align rasters')

+    # Use the cutline feature of gdal.Warp to mask pixels outside the watershed
+    # it's possible that the DEM, LULC, or runoff proxy inputs might have an
+    # undefined nodata value. since we're introducing nodata pixels, set a nodata
+    # value if one is not already defined.
+    rp_nodata = pygeoprocessing.get_raster_info(
+        f_reg['aligned_runoff_proxy_path'])['nodata'][0]
+    mask_runoff_proxy_task = task_graph.add_task(
+        func=gdal.Warp,
+        kwargs={
+            'destNameOrDestDS': f_reg['masked_runoff_proxy_path'],
+            'srcDSOrSrcDSTab': f_reg['aligned_runoff_proxy_path'],
+            'dstNodata': _TARGET_NODATA if rp_nodata is None else rp_nodata,
+            'cutlineDSName': args['watersheds_path']},
+        dependent_task_list=[align_raster_task],
+        target_path_list=[f_reg['masked_runoff_proxy_path']],
+        task_name='mask runoff proxy raster')
+
+    dem_nodata = pygeoprocessing.get_raster_info(
+        f_reg['aligned_dem_path'])['nodata'][0]
+    dem_target_nodata = float(  # GDAL expects a python float, not numpy.float32
+        numpy.finfo(numpy.float32).min if dem_nodata is None else dem_nodata)
+    mask_dem_task = task_graph.add_task(
+        func=gdal.Warp,
+        kwargs={
+            'destNameOrDestDS': f_reg['masked_dem_path'],
+            'srcDSOrSrcDSTab': f_reg['aligned_dem_path'],
+            'outputType': gdal.GDT_Float32,
+            'dstNodata': dem_target_nodata,
+            'cutlineDSName': args['watersheds_path']},
+        dependent_task_list=[align_raster_task],
+        target_path_list=[f_reg['masked_dem_path']],
+        task_name='mask dem raster')
+
+    lulc_nodata = pygeoprocessing.get_raster_info(
+        f_reg['aligned_lulc_path'])['nodata'][0]
+    lulc_target_nodata = (
+        numpy.iinfo(numpy.int32).min if lulc_nodata is None else lulc_nodata)
+    mask_lulc_task = task_graph.add_task(
+        func=gdal.Warp,
+        kwargs={
+            'destNameOrDestDS': f_reg['masked_lulc_path'],
+            'srcDSOrSrcDSTab': f_reg['aligned_lulc_path'],
+            'outputType': gdal.GDT_Int32,
+            'dstNodata': lulc_target_nodata,
+            'cutlineDSName': args['watersheds_path']},
+        dependent_task_list=[align_raster_task],
+        target_path_list=[f_reg['masked_lulc_path']],
+        task_name='mask lulc raster')
+
    fill_pits_task = task_graph.add_task(
        func=pygeoprocessing.routing.fill_pits,
        args=(
-            (f_reg['aligned_dem_path'], 1), f_reg['filled_dem_path']),
-        kwargs={'working_dir': cache_dir},
+            (f_reg['masked_dem_path'], 1), f_reg['filled_dem_path']),
+        kwargs={'working_dir': intermediate_output_dir},
        dependent_task_list=[align_raster_task],
        target_path_list=[f_reg['filled_dem_path']],
        task_name='fill pits')
@ -666,7 +720,7 @@ def execute(args):
        func=pygeoprocessing.routing.flow_dir_mfd,
        args=(
            (f_reg['filled_dem_path'], 1), f_reg['flow_direction_path']),
-        kwargs={'working_dir': cache_dir},
+        kwargs={'working_dir': intermediate_output_dir},
        dependent_task_list=[fill_pits_task],
        target_path_list=[f_reg['flow_direction_path']],
        task_name='flow dir')
@ -707,7 +761,7 @@ def execute(args):

    runoff_proxy_index_task = task_graph.add_task(
        func=_normalize_raster,
-        args=((f_reg['aligned_runoff_proxy_path'], 1),
+        args=((f_reg['masked_runoff_proxy_path'], 1),
              f_reg['runoff_proxy_index_path']),
        target_path_list=[f_reg['runoff_proxy_index_path']],
        dependent_task_list=[align_raster_task],
@ -790,14 +844,16 @@ def execute(args):
        # Perrine says that 'n' is the only case where we could consider a
        # prop subsurface component.  So there's a special case for that.
        if nutrient == 'n':
-            subsurface_proportion_type = 'proportion_subsurface_n'
+            subsurface_proportion_map = (
+                biophysical_df['proportion_subsurface_n'].to_dict())
        else:
-            subsurface_proportion_type = None
+            subsurface_proportion_map = None
        load_task = task_graph.add_task(
            func=_calculate_load,
            args=(
-                f_reg['aligned_lulc_path'], lucode_to_parameters,
-                f'load_{nutrient}', load_path),
+                f_reg['masked_lulc_path'],
+                biophysical_df[f'load_{nutrient}'],
+                load_path),
            dependent_task_list=[align_raster_task],
            target_path_list=[load_path],
            task_name=f'{nutrient} load')
@ -813,9 +869,8 @@ def execute(args):
        surface_load_path = f_reg[f'surface_load_{nutrient}_path']
        surface_load_task = task_graph.add_task(
            func=_map_surface_load,
-            args=(modified_load_path, f_reg['aligned_lulc_path'],
-                  lucode_to_parameters, subsurface_proportion_type,
-                  surface_load_path),
+            args=(modified_load_path, f_reg['masked_lulc_path'],
+                  subsurface_proportion_map, surface_load_path),
            target_path_list=[surface_load_path],
            dependent_task_list=[modified_load_task, align_raster_task],
            task_name=f'map surface load {nutrient}')
@ -824,8 +879,8 @@ def execute(args):
        eff_task = task_graph.add_task(
            func=_map_lulc_to_val_mask_stream,
            args=(
-                f_reg['aligned_lulc_path'], f_reg['stream_path'],
-                lucode_to_parameters, f'eff_{nutrient}', eff_path),
+                f_reg['masked_lulc_path'], f_reg['stream_path'],
+                biophysical_df[f'eff_{nutrient}'].to_dict(), eff_path),
            target_path_list=[eff_path],
            dependent_task_list=[align_raster_task, stream_extraction_task],
            task_name=f'ret eff {nutrient}')
@ -834,8 +889,9 @@ def execute(args):
        crit_len_task = task_graph.add_task(
            func=_map_lulc_to_val_mask_stream,
            args=(
-                f_reg['aligned_lulc_path'], f_reg['stream_path'],
-                lucode_to_parameters, f'crit_len_{nutrient}', crit_len_path),
+                f_reg['masked_lulc_path'], f_reg['stream_path'],
+                biophysical_df[f'crit_len_{nutrient}'].to_dict(),
+                crit_len_path),
            target_path_list=[crit_len_path],
            dependent_task_list=[align_raster_task, stream_extraction_task],
            task_name=f'ret eff {nutrient}')
@ -879,12 +935,11 @@ def execute(args):

        # only calculate subsurface things for nitrogen
        if nutrient == 'n':
-            proportion_subsurface_map = {
-                lucode: params['proportion_subsurface_n']
-                for lucode, params in lucode_to_parameters.items()}
+            proportion_subsurface_map = (
+                biophysical_df['proportion_subsurface_n'].to_dict())
            subsurface_load_task = task_graph.add_task(
                func=_map_subsurface_load,
-                args=(modified_load_path, f_reg['aligned_lulc_path'],
+                args=(modified_load_path, f_reg['masked_lulc_path'],
                      proportion_subsurface_map, f_reg['sub_load_n_path']),
                target_path_list=[f_reg['sub_load_n_path']],
                dependent_task_list=[modified_load_task, align_raster_task],
@ -1174,18 +1229,13 @@ def _normalize_raster(base_raster_path_band, target_normalized_raster_path):
        target_nodata)


-def _calculate_load(
-        lulc_raster_path, lucode_to_parameters, load_type,
-        target_load_raster):
+def _calculate_load(lulc_raster_path, lucode_to_load, target_load_raster):
    """Calculate load raster by mapping landcover and multiplying by area.

    Args:
        lulc_raster_path (string): path to integer landcover raster.
-        lucode_to_parameters (dict): a mapping of landcover IDs to a
-            dictionary indexed by the value of `load_{load_type}` that
-            represents a per-area nutrient load.
-        load_type (string): represent nutrient to map, either 'load_n' or
-            'load_p'.
+        lucode_to_load (dict): a mapping of landcover IDs to per-area
+            nutrient load.
        target_load_raster (string): path to target raster that will have
            total load per pixel.

@ -1205,8 +1255,7 @@ def _calculate_load(
            if lucode != nodata_landuse:
                try:
                    result[lucode_array == lucode] = (
-                        lucode_to_parameters[lucode][load_type] *
-                        cell_area_ha)
+                        lucode_to_load[lucode] * cell_area_ha)
                except KeyError:
                    raise KeyError(
                        'lucode: %d is present in the landuse raster but '
@ -1290,18 +1339,17 @@ def _sum_rasters(raster_path_list, target_nodata, target_result_path):


 def _map_surface_load(
-        modified_load_path, lulc_raster_path, lucode_to_parameters,
-        subsurface_proportion_type, target_surface_load_path):
+        modified_load_path, lulc_raster_path, lucode_to_subsurface_proportion,
+        target_surface_load_path):
    """Calculate surface load from landcover raster.

    Args:
        modified_load_path (string): path to modified load raster with units
            of kg/pixel.
        lulc_raster_path (string): path to landcover raster.
-        lucode_to_parameters (dict): maps landcover codes to a dictionary that
-            can be indexed by `subsurface_proportion_type`.
-        subsurface_proportion_type (string): if None no subsurface transfer
-            is mapped.  Otherwise indexed from lucode_to_parameters.
+        lucode_to_subsurface_proportion (dict): maps landcover codes to
+            subsurface proportion values. Or if None, no subsurface transfer
+            is mapped.
        target_surface_load_path (string): path to target raster.

    Returns:
@ -1311,16 +1359,15 @@ def _map_surface_load(
    lulc_raster_info = pygeoprocessing.get_raster_info(lulc_raster_path)
    nodata_landuse = lulc_raster_info['nodata'][0]

-    keys = sorted(numpy.array(list(lucode_to_parameters)))
-    if subsurface_proportion_type is not None:
+    if lucode_to_subsurface_proportion is not None:
+        keys = sorted(lucode_to_subsurface_proportion.keys())
        subsurface_values = numpy.array(
-            [lucode_to_parameters[x][subsurface_proportion_type]
-             for x in keys])
+            [lucode_to_subsurface_proportion[x] for x in keys])

    def _map_surface_load_op(lucode_array, modified_load_array):
        """Convert unit load to total load & handle nodata."""
        # If we don't have subsurface, just return 0.0.
-        if subsurface_proportion_type is None:
+        if lucode_to_subsurface_proportion is None:
            return numpy.where(
                ~utils.array_equals_nodata(lucode_array, nodata_landuse),
                modified_load_array, _TARGET_NODATA)
@ -1382,17 +1429,13 @@ def _map_subsurface_load(


 def _map_lulc_to_val_mask_stream(
-        lulc_raster_path, stream_path, lucode_to_parameters, map_id,
-        target_eff_path):
+        lulc_raster_path, stream_path, lucodes_to_vals, target_eff_path):
    """Make retention efficiency raster from landcover.

    Args:
        lulc_raster_path (string): path to landcover raster.
        stream_path (string) path to stream layer 0, no stream 1 stream.
-        lucode_to_parameters (dict) mapping of landcover code to a dictionary
-            that contains the key in `map_id`
-        map_id (string): the id in the lookup table with values to map
-            landcover to efficiency.
+        lucodes_to_val (dict) mapping of landcover codes to values
        target_eff_path (string): target raster that contains the mapping of
            landcover codes to retention efficiency values except where there
            is a stream in which case the retention efficiency is 0.
@ -1401,9 +1444,8 @@ def _map_lulc_to_val_mask_stream(
        None.

    """
-    keys = sorted(numpy.array(list(lucode_to_parameters)))
-    values = numpy.array(
-        [lucode_to_parameters[x][map_id] for x in keys])
+    lucodes = sorted(lucodes_to_vals.keys())
+    values = numpy.array([lucodes_to_vals[x] for x in lucodes])

    nodata_landuse = pygeoprocessing.get_raster_info(
        lulc_raster_path)['nodata'][0]
@ -1417,7 +1459,7 @@ def _map_lulc_to_val_mask_stream(
        result = numpy.empty(valid_mask.shape, dtype=numpy.float32)
        result[:] = _TARGET_NODATA
        index = numpy.digitize(
-            lucode_array[valid_mask].ravel(), keys, right=True)
+            lucode_array[valid_mask].ravel(), lucodes, right=True)
        result[valid_mask] = (
            values[index] * (1 - stream_array[valid_mask]))
        return result
--- a/src/natcap/invest/pollination.py
+++ b/src/natcap/invest/pollination.py
@ -39,6 +39,7 @@ MODEL_SPEC = {
        },
        "guild_table_path": {
            "type": "csv",
+            "index_col": "species",
            "columns": {
                "species": {
                    "type": "freestyle_string",
@ -87,6 +88,7 @@ MODEL_SPEC = {
        },
        "landcover_biophysical_table_path": {
            "type": "csv",
+            "index_col": "lucode",
            "columns": {
                "lucode": spec_utils.LULC_TABLE_COLUMN,
                "nesting_[SUBSTRATE]_availability_index": {
@ -309,10 +311,10 @@ MODEL_SPEC = {
                    "about": "Farm vector reprojected to the LULC projection",
                    "fields": {},
                    "geometries": spec_utils.POLYGONS
-                },
-                "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
+                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -322,7 +324,7 @@ _INDEX_NODATA = -1
 _NESTING_SUBSTRATE_PATTERN = 'nesting_([^_]+)_availability_index'
 _FLORAL_RESOURCES_AVAILABLE_PATTERN = 'floral_resources_([^_]+)_index'
 _EXPECTED_BIOPHYSICAL_HEADERS = [
-    'lucode', _NESTING_SUBSTRATE_PATTERN, _FLORAL_RESOURCES_AVAILABLE_PATTERN]
+    _NESTING_SUBSTRATE_PATTERN, _FLORAL_RESOURCES_AVAILABLE_PATTERN]

 # These are patterns expected in the guilds table
 _NESTING_SUITABILITY_PATTERN = 'nesting_suitability_([^_]+)_index'
@ -332,7 +334,7 @@ _FORAGING_ACTIVITY_RE_PATTERN = _FORAGING_ACTIVITY_PATTERN % '([^_]+)'
 _RELATIVE_SPECIES_ABUNDANCE_FIELD = 'relative_abundance'
 _ALPHA_HEADER = 'alpha'
 _EXPECTED_GUILD_HEADERS = [
-    'species', _NESTING_SUITABILITY_PATTERN, _FORAGING_ACTIVITY_RE_PATTERN,
+    _NESTING_SUITABILITY_PATTERN, _FORAGING_ACTIVITY_RE_PATTERN,
    _ALPHA_HEADER, _RELATIVE_SPECIES_ABUNDANCE_FIELD]

 _NESTING_SUBSTRATE_INDEX_FILEPATTERN = 'nesting_substrate_index_%s%s.tif'
@ -502,8 +504,6 @@ def execute(args):
    # create initial working directories and determine file suffixes
    intermediate_output_dir = os.path.join(
        args['workspace_dir'], 'intermediate_outputs')
-    work_token_dir = os.path.join(
-        intermediate_output_dir, '_taskgraph_working_dir')
    output_dir = os.path.join(args['workspace_dir'])
    utils.make_directories(
        [output_dir, intermediate_output_dir])
@ -532,7 +532,8 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous mode.
-    task_graph = taskgraph.TaskGraph(work_token_dir, n_workers)
+    task_graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)

    if farm_vector_path is not None:
        # ensure farm vector is in the same projection as the landcover map
@ -718,6 +719,7 @@ def execute(args):
    pollinator_abundance_task_map = {}
    floral_resources_index_path_map = {}
    floral_resources_index_task_map = {}
+    alpha_kernel_map = {}
    for species in scenario_variables['species_list']:
        # calculate foraging_effectiveness[species]
        # FE(x, s) = sum_j [RA(l(x), j) * fa(s, j)]
@ -762,11 +764,17 @@ def execute(args):
            intermediate_output_dir, _KERNEL_FILE_PATTERN % (
                alpha, file_suffix))

-        alpha_kernel_raster_task = task_graph.add_task(
-            task_name=f'decay_kernel_raster_{alpha}',
-            func=utils.exponential_decay_kernel_raster,
-            args=(alpha, kernel_path),
-            target_path_list=[kernel_path])
+        # to avoid creating duplicate kernel rasters check to see if an
+        # adequate kernel task has already been submitted
+        try:
+            alpha_kernel_raster_task = alpha_kernel_map[kernel_path]
+        except:
+            alpha_kernel_raster_task = task_graph.add_task(
+                task_name=f'decay_kernel_raster_{alpha}',
+                func=utils.exponential_decay_kernel_raster,
+                args=(alpha, kernel_path),
+                target_path_list=[kernel_path])
+            alpha_kernel_map[kernel_path] = alpha_kernel_raster_task

        # convolve FE with alpha_s
        floral_resources_index_path = os.path.join(
@ -1179,23 +1187,22 @@ def _parse_scenario_variables(args):
    else:
        farm_vector_path = None

-    guild_table = utils.read_csv_to_dataframe(
-        guild_table_path, 'species').to_dict(orient='index')
+    guild_df = utils.read_csv_to_dataframe(
+        guild_table_path, MODEL_SPEC['args']['guild_table_path'])

    LOGGER.info('Checking to make sure guild table has all expected headers')
-    guild_headers = list(guild_table.values())[0].keys()
    for header in _EXPECTED_GUILD_HEADERS:
-        matches = re.findall(header, " ".join(guild_headers))
+        matches = re.findall(header, " ".join(guild_df.columns))
        if len(matches) == 0:
            raise ValueError(
                "Expected a header in guild table that matched the pattern "
                f"'{header}' but was unable to find one. Here are all the "
-                f"headers from {guild_table_path}: {', '.join(guild_headers)}")
+                f"headers from {guild_table_path}: {', '.join(guild_df.columns)}")

-    landcover_biophysical_table = utils.read_csv_to_dataframe(
-        landcover_biophysical_table_path, 'lucode').to_dict(orient='index')
-    biophysical_table_headers = (
-        list(landcover_biophysical_table.values())[0].keys())
+    landcover_biophysical_df = utils.read_csv_to_dataframe(
+        landcover_biophysical_table_path,
+        MODEL_SPEC['args']['landcover_biophysical_table_path'])
+    biophysical_table_headers = landcover_biophysical_df.columns
    for header in _EXPECTED_BIOPHYSICAL_HEADERS:
        matches = re.findall(header, " ".join(biophysical_table_headers))
        if len(matches) == 0:
@ -1211,7 +1218,7 @@ def _parse_scenario_variables(args):
    # this dict to dict will map substrate types to guild/biophysical headers
    # ex substrate_to_header['cavity']['biophysical']
    substrate_to_header = collections.defaultdict(dict)
-    for header in guild_headers:
+    for header in guild_df.columns:
        match = re.match(_FORAGING_ACTIVITY_RE_PATTERN, header)
        if match:
            season = match.group(1)
@ -1297,55 +1304,48 @@ def _parse_scenario_variables(args):
    # * substrate_list (list of string)
    result['substrate_list'] = sorted(substrate_to_header)
    # * species_list (list of string)
-    result['species_list'] = sorted(guild_table)
+    result['species_list'] = sorted(guild_df.index)

    result['alpha_value'] = dict()
    for species in result['species_list']:
-        result['alpha_value'][species] = float(
-            guild_table[species][_ALPHA_HEADER])
+        result['alpha_value'][species] = guild_df[_ALPHA_HEADER][species]

    # * species_abundance[species] (string->float)
-    total_relative_abundance = numpy.sum([
-        guild_table[species][_RELATIVE_SPECIES_ABUNDANCE_FIELD]
-        for species in result['species_list']])
+    total_relative_abundance = guild_df[_RELATIVE_SPECIES_ABUNDANCE_FIELD].sum()
    result['species_abundance'] = {}
    for species in result['species_list']:
        result['species_abundance'][species] = (
-            guild_table[species][_RELATIVE_SPECIES_ABUNDANCE_FIELD] /
-            float(total_relative_abundance))
+            guild_df[_RELATIVE_SPECIES_ABUNDANCE_FIELD][species] /
+            total_relative_abundance)

    # map the relative foraging activity of a species during a certain season
    # (species, season)
    result['species_foraging_activity'] = dict()
    for species in result['species_list']:
        total_activity = numpy.sum([
-            guild_table[species][_FORAGING_ACTIVITY_PATTERN % season]
+            guild_df[_FORAGING_ACTIVITY_PATTERN % season][species]
            for season in result['season_list']])
        for season in result['season_list']:
            result['species_foraging_activity'][(species, season)] = (
-                guild_table[species][_FORAGING_ACTIVITY_PATTERN % season] /
-                float(total_activity))
+                guild_df[_FORAGING_ACTIVITY_PATTERN % season][species] /
+                total_activity)

    # * landcover_substrate_index[substrate][landcover] (float)
    result['landcover_substrate_index'] = collections.defaultdict(dict)
-    for raw_landcover_id in landcover_biophysical_table:
-        landcover_id = int(raw_landcover_id)
+    for landcover_id, row in landcover_biophysical_df.iterrows():
        for substrate in result['substrate_list']:
            substrate_biophysical_header = (
                substrate_to_header[substrate]['biophysical'])
            result['landcover_substrate_index'][substrate][landcover_id] = (
-                landcover_biophysical_table[landcover_id][
-                    substrate_biophysical_header])
+                row[substrate_biophysical_header])

    # * landcover_floral_resources[season][landcover] (float)
    result['landcover_floral_resources'] = collections.defaultdict(dict)
-    for raw_landcover_id in landcover_biophysical_table:
-        landcover_id = int(raw_landcover_id)
+    for landcover_id, row in landcover_biophysical_df.iterrows():
        for season in result['season_list']:
            floral_rources_header = season_to_header[season]['biophysical']
            result['landcover_floral_resources'][season][landcover_id] = (
-                landcover_biophysical_table[landcover_id][
-                    floral_rources_header])
+                row[floral_rources_header])

    # * species_substrate_index[(species, substrate)] (tuple->float)
    result['species_substrate_index'] = collections.defaultdict(dict)
@ -1353,7 +1353,7 @@ def _parse_scenario_variables(args):
        for substrate in result['substrate_list']:
            substrate_guild_header = substrate_to_header[substrate]['guild']
            result['species_substrate_index'][species][substrate] = (
-                guild_table[species][substrate_guild_header])
+                guild_df[substrate_guild_header][species])

    # * foraging_activity_index[(species, season)] (tuple->float)
    result['foraging_activity_index'] = {}
@ -1362,7 +1362,7 @@ def _parse_scenario_variables(args):
            key = (species, season)
            foraging_biophyiscal_header = season_to_header[season]['guild']
            result['foraging_activity_index'][key] = (
-                guild_table[species][foraging_biophyiscal_header])
+                guild_df[foraging_biophyiscal_header][species])

    return result

--- a/src/natcap/invest/recreation/recmodel_client.py
+++ b/src/natcap/invest/recreation/recmodel_client.py
@ -77,7 +77,7 @@ predictor_table_columns = {
            "point_nearest_distance": {
                "description": gettext(
                    "Predictor is a point vector. Metric is the Euclidean "
-                    "distance between the center of each AOI grid cell and "
+                    "distance between the centroid of each AOI grid cell and "
                    "the nearest point in this layer.")},
            "line_intersect_length": {
                "description": gettext(
@ -192,6 +192,7 @@ MODEL_SPEC = {
        },
        "predictor_table_path": {
            "type": "csv",
+            "index_col": "id",
            "columns": predictor_table_columns,
            "required": "compute_regression",
            "about": gettext(
@ -202,6 +203,7 @@ MODEL_SPEC = {
        },
        "scenario_predictor_table_path": {
            "type": "csv",
+            "index_col": "id",
            "columns": predictor_table_columns,
            "required": False,
            "about": gettext(
@ -233,7 +235,12 @@ MODEL_SPEC = {
        },
        "monthly_table.csv": {
            "about": gettext("Table of monthly photo-user-days."),
+            "index_col": "poly_id",
            "columns": {
+                "poly_id": {
+                    "type": "integer",
+                    "about": gettext("Polygon ID")
+                },
                "[YEAR]-[MONTH]": {
                    "about": gettext(
                        "Total photo-user-days counted in each cell in the "
@ -324,10 +331,10 @@ MODEL_SPEC = {
                },
                "server_version.pickle": {
                    "about": gettext("Server version info")
-                },
-                "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
+                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -410,7 +417,7 @@ def execute(args):
                    * 'point_count': count of the points contained in the
                      response polygon
                    * 'point_nearest_distance': distance to the nearest point
-                      from the response polygon
+                      from the centroid of the response polygon
                    * 'line_intersect_length': length of lines that intersect
                      with the response polygon in projected units of AOI
                    * 'polygon_area': area of the polygon contained within
@ -472,7 +479,6 @@ def execute(args):
         (_INTERMEDIATE_BASE_FILES, intermediate_dir)], file_suffix)

    # Initialize a TaskGraph
-    taskgraph_db_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
@ -480,7 +486,8 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # single process mode.
-    task_graph = taskgraph.TaskGraph(taskgraph_db_dir, n_workers)
+    task_graph = taskgraph.TaskGraph(
+        os.path.join(output_dir, 'taskgraph_cache'), n_workers)

    if args['grid_aoi']:
        prep_aoi_task = task_graph.add_task(
@ -853,16 +860,14 @@ def _schedule_predictor_data_processing(
        'line_intersect_length': _line_intersect_length,
    }

-    predictor_table = utils.read_csv_to_dataframe(
-        predictor_table_path, 'id', expand_path_cols=['path']
-        ).to_dict(orient='index')
+    predictor_df = utils.read_csv_to_dataframe(
+        predictor_table_path, MODEL_SPEC['args']['predictor_table_path'])
    predictor_task_list = []
    predictor_json_list = []  # tracks predictor files to add to shp

-    for predictor_id in predictor_table:
+    for predictor_id, row in predictor_df.iterrows():
        LOGGER.info(f"Building predictor {predictor_id}")
-
-        predictor_type = predictor_table[predictor_id]['type'].strip()
+        predictor_type = row['type']
        if predictor_type.startswith('raster'):
            # type must be one of raster_sum or raster_mean
            raster_op_mode = predictor_type.split('_')[1]
@ -871,7 +876,7 @@ def _schedule_predictor_data_processing(
            predictor_json_list.append(predictor_target_path)
            predictor_task_list.append(task_graph.add_task(
                func=_raster_sum_mean,
-                args=(predictor_table[predictor_id]['path'], raster_op_mode,
+                args=(row['path'], raster_op_mode,
                      response_vector_path, predictor_target_path),
                target_path_list=[predictor_target_path],
                task_name=f'predictor {predictor_id}'))
@ -884,8 +889,7 @@ def _schedule_predictor_data_processing(
            predictor_task_list.append(task_graph.add_task(
                func=_polygon_area,
                args=(predictor_type, response_polygons_pickle_path,
-                      predictor_table[predictor_id]['path'],
-                      predictor_target_path),
+                      row['path'], predictor_target_path),
                target_path_list=[predictor_target_path],
                dependent_task_list=[prepare_response_polygons_task],
                task_name=f'predictor {predictor_id}'))
@ -896,8 +900,7 @@ def _schedule_predictor_data_processing(
            predictor_task_list.append(task_graph.add_task(
                func=predictor_functions[predictor_type],
                args=(response_polygons_pickle_path,
-                      predictor_table[predictor_id]['path'],
-                      predictor_target_path),
+                      row['path'], predictor_target_path),
                target_path_list=[predictor_target_path],
                dependent_task_list=[prepare_response_polygons_task],
                task_name=f'predictor {predictor_id}'))
@ -1167,7 +1170,7 @@ def _line_intersect_length(
 def _point_nearest_distance(
        response_polygons_pickle_path, point_vector_path,
        predictor_target_path):
-    """Calculate distance to nearest point for all polygons.
+    """Calculate distance to nearest point for the centroid of all polygons.

    Args:
        response_polygons_pickle_path (str): path to a pickled dictionary which
@ -1197,7 +1200,7 @@ def _point_nearest_distance(
                f"{(100*index)/len(response_polygons_lookup):.2f}% complete"))

        point_distance_lookup[str(feature_id)] = min([
-            geometry.distance(point) for point in points])
+            geometry.centroid.distance(point) for point in points])
    LOGGER.info(f"{os.path.basename(point_vector_path)} point distance: "
                "100.00% complete")
    with open(predictor_target_path, 'w') as jsonfile:
@ -1546,10 +1549,10 @@ def _validate_same_id_lengths(table_path):
        tables.

    """
-    predictor_table = utils.read_csv_to_dataframe(
-        table_path, 'id').to_dict(orient='index')
+    predictor_df = utils.read_csv_to_dataframe(
+        table_path, MODEL_SPEC['args']['predictor_table_path'])
    too_long = set()
-    for p_id in predictor_table:
+    for p_id in predictor_df.index:
        if len(p_id) > 10:
            too_long.add(p_id)
    if len(too_long) > 0:
@ -1580,21 +1583,21 @@ def _validate_same_ids_and_types(
        tables.

    """
-    predictor_table = utils.read_csv_to_dataframe(
-        predictor_table_path, 'id').to_dict(orient='index')
+    predictor_df = utils.read_csv_to_dataframe(
+        predictor_table_path, MODEL_SPEC['args']['predictor_table_path'])

-    scenario_predictor_table = utils.read_csv_to_dataframe(
-        scenario_predictor_table_path, 'id').to_dict(orient='index')
+    scenario_predictor_df = utils.read_csv_to_dataframe(
+        scenario_predictor_table_path,
+        MODEL_SPEC['args']['scenario_predictor_table_path'])

-    predictor_table_pairs = set([
-        (p_id, predictor_table[p_id]['type'].strip()) for p_id in predictor_table])
-    scenario_predictor_table_pairs = set([
-        (p_id, scenario_predictor_table[p_id]['type'].strip()) for p_id in
-        scenario_predictor_table])
-    if predictor_table_pairs != scenario_predictor_table_pairs:
+    predictor_pairs = set([
+        (p_id, row['type']) for p_id, row in predictor_df.iterrows()])
+    scenario_predictor_pairs = set([
+        (p_id, row['type']) for p_id, row in scenario_predictor_df.iterrows()])
+    if predictor_pairs != scenario_predictor_pairs:
        raise ValueError('table pairs unequal.\n\t'
-                         f'predictor: {predictor_table_pairs}\n\t'
-                         f'scenario:{scenario_predictor_table_pairs}')
+                         f'predictor: {predictor_pairs}\n\t'
+                         f'scenario:{scenario_predictor_pairs}')
    LOGGER.info('tables validate correctly')


@ -1617,8 +1620,8 @@ def _validate_same_projection(base_vector_path, table_path):
    # This will load the table as a list of paths which we can iterate through
    # without bothering the rest of the table structure
    data_paths = utils.read_csv_to_dataframe(
-        table_path, convert_vals_to_lower=False, expand_path_cols=['path']
-    ).squeeze('columns')['path'].tolist()
+        table_path, MODEL_SPEC['args']['predictor_table_path']
+    )['path'].tolist()

    base_vector = gdal.OpenEx(base_vector_path, gdal.OF_VECTOR)
    base_layer = base_vector.GetLayer()
@ -1674,14 +1677,14 @@ def _validate_predictor_types(table_path):
        ValueError if any value in the ``type`` column does not match a valid
        type, ignoring leading/trailing whitespace.
    """
-    df = utils.read_csv_to_dataframe(table_path, convert_vals_to_lower=False)
+    df = utils.read_csv_to_dataframe(
+        table_path, MODEL_SPEC['args']['predictor_table_path'])
    # ignore leading/trailing whitespace because it will be removed
    # when the type values are used
-    type_list = set([type.strip() for type in df['type']])
    valid_types = set({'raster_mean', 'raster_sum', 'point_count',
                       'point_nearest_distance', 'line_intersect_length',
                       'polygon_area_coverage', 'polygon_percent_coverage'})
-    difference = type_list.difference(valid_types)
+    difference = set(df['type']).difference(valid_types)
    if difference:
        raise ValueError('The table contains invalid type value(s): '
                         f'{difference}. The allowed types are: {valid_types}')
--- a/src/natcap/invest/routedem.py
+++ b/src/natcap/invest/routedem.py
@ -107,7 +107,7 @@ MODEL_SPEC = {
        },
    },
    "outputs": {
-        "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR,
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR,
        "filled.tif": spec_utils.FILLED_DEM,
        "flow_accumulation.tif": spec_utils.FLOW_ACCUMULATION,
        "flow_direction.tif": spec_utils.FLOW_DIRECTION,
@ -341,8 +341,7 @@ def execute(args):
        ``None``
    """
    file_suffix = utils.make_suffix_string(args, 'results_suffix')
-    task_cache_dir = os.path.join(args['workspace_dir'], '_taskgraph_working_dir')
-    utils.make_directories([args['workspace_dir'], task_cache_dir])
+    utils.make_directories([args['workspace_dir']])

    if ('calculate_flow_direction' in args and
            bool(args['calculate_flow_direction'])):
@ -373,7 +372,8 @@ def execute(args):
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous mode.

-    graph = taskgraph.TaskGraph(task_cache_dir, n_workers=n_workers)
+    graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers=n_workers)

    # Calculate slope.  This is intentionally on the original DEM, not
    # on the pitfilled DEM.  If the user really wants the slop of the filled
--- a/src/natcap/invest/scenario_gen_proximity.py
+++ b/src/natcap/invest/scenario_gen_proximity.py
@ -121,6 +121,7 @@ MODEL_SPEC = {
        "nearest_to_edge.csv": {
            "about": gettext(
                "Table of land cover classes and the amount of each that was converted for the nearest-to-edge conversion scenario."),
+            "index_col": "lucode",
            "columns": {
                "lucode": {
                    "type": "integer",
@ -140,6 +141,7 @@ MODEL_SPEC = {
        "farthest_from_edge.csv": {
            "about": gettext(
                "Table of land cover classes and the amount of each that was converted for the nearest-to-edge conversion scenario."),
+            "index_col": "lucode",
            "columns": {
                "lucode": {
                    "type": "integer",
@ -175,10 +177,10 @@ MODEL_SPEC = {
                        "Map of the distance from each pixel to the nearest "
                        "edge of the focal landcover."),
                    "bands": {1: {"type": "number", "units": u.pixel}}
-                },
-                "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
+                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -251,8 +253,6 @@ def execute(args):
    utils.make_directories(
        [output_dir, intermediate_output_dir, tmp_dir])

-    work_token_dir = os.path.join(
-        intermediate_output_dir, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
@ -260,7 +260,8 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Single process mode.
-    task_graph = taskgraph.TaskGraph(work_token_dir, n_workers)
+    task_graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)

    area_to_convert = float(args['area_to_convert'])
    replacement_lucode = int(args['replacement_lucode'])
--- a/src/natcap/invest/scenic_quality/scenic_quality.py
+++ b/src/natcap/invest/scenic_quality/scenic_quality.py
@ -209,10 +209,10 @@ MODEL_SPEC = {
                "visibility_[FEATURE_ID].tif": {
                    "about": gettext("Map of visibility for a given structure's viewpoint. This raster has pixel values of 0 (not visible), 1 (visible), or nodata (where the DEM is nodata)."),
                    "bands": {1: {"type": "integer"}}
-                },
-                "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
+                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -290,7 +290,6 @@ def execute(args):
         (_INTERMEDIATE_BASE_FILES, intermediate_dir)],
        file_suffix)

-    work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
@ -298,7 +297,8 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous execution
-    graph = taskgraph.TaskGraph(work_token_dir, n_workers)
+    graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)

    reprojected_aoi_task = graph.add_task(
        pygeoprocessing.reproject_vector,
@ -549,10 +549,10 @@ def _determine_valid_viewpoints(dem_path, structures_path):

            # Coordinates in map units to pass to viewshed algorithm
            geometry = point.GetGeometryRef()
-            if geometry.GetGeometryType() != ogr.wkbPoint:
+            if geometry.GetGeometryName() != 'POINT':
                raise AssertionError(
-                    f"Feature {point.GetFID()} is not a Point geometry. "
-                    "Features must be a Point.")
+                    f"Feature {point.GetFID()} must be a POINT geometry, "
+                    f"not {geometry.GetGeometryName()}")

            viewpoint = (geometry.GetX(), geometry.GetY())

--- a/src/natcap/invest/sdr/sdr.py
+++ b/src/natcap/invest/sdr/sdr.py
@ -87,6 +87,7 @@ MODEL_SPEC = {
        },
        "biophysical_table_path": {
            "type": "csv",
+            "index_col": "lucode",
            "columns": {
                "lucode": spec_utils.LULC_TABLE_COLUMN,
                "usle_c": {
@ -351,57 +352,52 @@ MODEL_SPEC = {
                        "times the thresholded slope (in eq. (74))"),
                    "bands": {1: {"type": "ratio"}}
                },
-                "churn_dir_not_for_humans": {
-                    "type": "directory",
-                    "contents": {
-                        "aligned_dem.tif": {
-                            "about": gettext(
-                                "Copy of the input DEM, clipped to the extent "
-                                "of the other raster inputs."),
-                            "bands": {1: {
-                                "type": "number",
-                                "units": u.meter
-                            }}
-                        },
-                        "aligned_drainage.tif": {
-                            "about": gettext(
-                                "Copy of the input drainage map, clipped to "
-                                "the extent of the other raster inputs and "
-                                "aligned to the DEM."),
-                            "bands": {1: {"type": "integer"}},
-                        },
-                        "aligned_erodibility.tif": {
-                            "about": gettext(
-                                "Copy of the input erodibility map, clipped to "
-                                "the extent of the other raster inputs and "
-                                "aligned to the DEM."),
-                            "bands": {1: {
-                                "type": "number",
-                                "units": u.metric_ton*u.hectare*u.hour/(u.hectare*u.megajoule*u.millimeter)
-                            }}
-                        },
-                        "aligned_erosivity.tif": {
-                            "about": gettext(
-                                "Copy of the input erosivity map, clipped to "
-                                "the extent of the other raster inputs and "
-                                "aligned to the DEM."),
-                            "bands": {1: {
-                                "type": "number",
-                                "units": u.megajoule*u.millimeter/(u.hectare*u.hour*u.year)
-                            }}
-                        },
-                        "aligned_lulc.tif": {
-                            "about": gettext(
-                                "Copy of the input drainage map, clipped to "
-                                "the extent of the other raster inputs and "
-                                "aligned to the DEM."),
-                            "bands": {1: {"type": "integer"}},
-                        },
-                        "taskgraph.db": {}
-                    }
+                "aligned_dem.tif": {
+                    "about": gettext(
+                        "Copy of the input DEM, clipped to the extent "
+                        "of the other raster inputs."),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.meter
+                    }}
+                },
+                "aligned_drainage.tif": {
+                    "about": gettext(
+                        "Copy of the input drainage map, clipped to "
+                        "the extent of the other raster inputs and "
+                        "aligned to the DEM."),
+                    "bands": {1: {"type": "integer"}},
+                },
+                "aligned_erodibility.tif": {
+                    "about": gettext(
+                        "Copy of the input erodibility map, clipped to "
+                        "the extent of the other raster inputs and "
+                        "aligned to the DEM."),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.metric_ton*u.hectare*u.hour/(u.hectare*u.megajoule*u.millimeter)
+                    }}
+                },
+                "aligned_erosivity.tif": {
+                    "about": gettext(
+                        "Copy of the input erosivity map, clipped to "
+                        "the extent of the other raster inputs and "
+                        "aligned to the DEM."),
+                    "bands": {1: {
+                        "type": "number",
+                        "units": u.megajoule*u.millimeter/(u.hectare*u.hour*u.year)
+                    }}
+                },
+                "aligned_lulc.tif": {
+                    "about": gettext(
+                        "Copy of the input drainage map, clipped to "
+                        "the extent of the other raster inputs and "
+                        "aligned to the DEM."),
+                    "bands": {1: {"type": "integer"}},
                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -420,6 +416,11 @@ _OUTPUT_BASE_FILES = {
 INTERMEDIATE_DIR_NAME = 'intermediate_outputs'

 _INTERMEDIATE_BASE_FILES = {
+    'aligned_dem_path': 'aligned_dem.tif',
+    'aligned_drainage_path': 'aligned_drainage.tif',
+    'aligned_erodibility_path': 'aligned_erodibility.tif',
+    'aligned_erosivity_path': 'aligned_erosivity.tif',
+    'aligned_lulc_path': 'aligned_lulc.tif',
    'cp_factor_path': 'cp.tif',
    'd_dn_path': 'd_dn.tif',
    'd_up_path': 'd_up.tif',
@ -440,17 +441,9 @@ _INTERMEDIATE_BASE_FILES = {
    'w_path': 'w.tif',
    'ws_inverse_path': 'ws_inverse.tif',
    'e_prime_path': 'e_prime.tif',
-    'weighted_avg_aspect_path': 'weighted_avg_aspect.tif',
    'drainage_mask': 'what_drains_to_stream.tif',
 }

-_TMP_BASE_FILES = {
-    'aligned_dem_path': 'aligned_dem.tif',
-    'aligned_drainage_path': 'aligned_drainage.tif',
-    'aligned_erodibility_path': 'aligned_erodibility.tif',
-    'aligned_erosivity_path': 'aligned_erosivity.tif',
-    'aligned_lulc_path': 'aligned_lulc.tif',
-}

 # Target nodata is for general rasters that are positive, and _IC_NODATA are
 # for rasters that are any range
@ -501,42 +494,27 @@ def execute(args):

    """
    file_suffix = utils.make_suffix_string(args, 'results_suffix')
-    biophysical_table = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
+    biophysical_df = utils.read_csv_to_dataframe(
+        args['biophysical_table_path'], MODEL_SPEC['args']['biophysical_table_path'])

    # Test to see if c or p values are outside of 0..1
-    for table_key in ['usle_c', 'usle_p']:
-        for (lulc_code, table) in biophysical_table.items():
-            try:
-                float(lulc_code)
-            except ValueError:
-                raise ValueError(
-                    f'Value "{lulc_code}" from the "lucode" column of the '
-                    f'biophysical table is not a number. Please check the '
-                    f'formatting of {args["biophysical_table_path"]}')
-            try:
-                float_value = float(table[table_key])
-                if float_value < 0 or float_value > 1:
-                    raise ValueError(
-                        f'{float_value} is not within range 0..1')
-            except ValueError:
+    for key in ['usle_c', 'usle_p']:
+        for lulc_code, row in biophysical_df.iterrows():
+            if row[key] < 0 or row[key] > 1:
                raise ValueError(
                    f'A value in the biophysical table is not a number '
                    f'within range 0..1. The offending value is in '
-                    f'column "{table_key}", lucode row "{lulc_code}", '
-                    f'and has value "{table[table_key]}"')
+                    f'column "{key}", lucode row "{lulc_code}", '
+                    f'and has value "{row[key]}"')

    intermediate_output_dir = os.path.join(
        args['workspace_dir'], INTERMEDIATE_DIR_NAME)
    output_dir = os.path.join(args['workspace_dir'])
-    churn_dir = os.path.join(
-        intermediate_output_dir, 'churn_dir_not_for_humans')
-    utils.make_directories([output_dir, intermediate_output_dir, churn_dir])
+    utils.make_directories([output_dir, intermediate_output_dir])

    f_reg = utils.build_file_registry(
        [(_OUTPUT_BASE_FILES, output_dir),
-         (_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
-         (_TMP_BASE_FILES, churn_dir)], file_suffix)
+         (_INTERMEDIATE_BASE_FILES, intermediate_output_dir)], file_suffix)

    try:
        n_workers = int(args['n_workers'])
@ -546,7 +524,8 @@ def execute(args):
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous mode.
    task_graph = taskgraph.TaskGraph(
-        churn_dir, n_workers, reporting_interval=5.0)
+        os.path.join(output_dir, 'taskgraph_cache'),
+        n_workers, reporting_interval=5.0)

    base_list = []
    aligned_list = []
@ -617,14 +596,6 @@ def execute(args):
        dependent_task_list=[pit_fill_task],
        task_name='flow direction calculation')

-    weighted_avg_aspect_task = task_graph.add_task(
-        func=sdr_core.calculate_average_aspect,
-        args=(f_reg['flow_direction_path'],
-              f_reg['weighted_avg_aspect_path']),
-        target_path_list=[f_reg['weighted_avg_aspect_path']],
-        dependent_task_list=[flow_dir_task],
-        task_name='weighted average of multiple-flow aspects')
-
    flow_accumulation_task = task_graph.add_task(
        func=pygeoprocessing.routing.flow_accumulation_mfd,
        args=(
@ -639,13 +610,11 @@ def execute(args):
        args=(
            f_reg['flow_accumulation_path'],
            f_reg['slope_path'],
-            f_reg['weighted_avg_aspect_path'],
            float(args['l_max']),
            f_reg['ls_path']),
        target_path_list=[f_reg['ls_path']],
        dependent_task_list=[
-            flow_accumulation_task, slope_task,
-            weighted_avg_aspect_task],
+            flow_accumulation_task, slope_task],
        task_name='ls factor calculation')

    stream_task = task_graph.add_task(
@ -675,19 +644,21 @@ def execute(args):
        drainage_raster_path_task = (
            f_reg['stream_path'], stream_task)

+    lulc_to_c = biophysical_df['usle_c'].to_dict()
    threshold_w_task = task_graph.add_task(
        func=_calculate_w,
        args=(
-            biophysical_table, f_reg['aligned_lulc_path'], f_reg['w_path'],
+            lulc_to_c, f_reg['aligned_lulc_path'], f_reg['w_path'],
            f_reg['thresholded_w_path']),
        target_path_list=[f_reg['w_path'], f_reg['thresholded_w_path']],
        dependent_task_list=[align_task],
        task_name='calculate W')

+    lulc_to_cp = (biophysical_df['usle_c'] * biophysical_df['usle_p']).to_dict()
    cp_task = task_graph.add_task(
        func=_calculate_cp,
        args=(
-            biophysical_table, f_reg['aligned_lulc_path'],
+            lulc_to_cp, f_reg['aligned_lulc_path'],
            f_reg['cp_factor_path']),
        target_path_list=[f_reg['cp_factor_path']],
        dependent_task_list=[align_task],
@ -1029,26 +1000,61 @@ def _calculate_what_drains_to_stream(


 def _calculate_ls_factor(
-        flow_accumulation_path, slope_path, avg_aspect_path, l_max,
-        target_ls_prime_factor_path):
+        flow_accumulation_path, slope_path, l_max,
+        target_ls_factor_path):
    """Calculate LS factor.

-    Calculates a modified LS factor as Equation 3 from "Extension and
+    Calculates the LS factor using Equation 3 from "Extension and
    validation of a geographic information system-based method for calculating
    the Revised Universal Soil Loss Equation length-slope factor for erosion
-    risk assessments in large watersheds" where the ``x`` term is the average
-    aspect ratio weighted by proportional flow to account for multiple flow
-    direction.
+    risk assessments in large watersheds".
+
+    The equation for this is::
+
+                 (upstream_area + pixel_area)^(m+1) - upstream_area^(m+1)
+        LS = S * --------------------------------------------------------
+                       (pixel_area^(m+2)) * aspect_dir * 22.13^(m)
+
+    Where
+
+        * ``S`` is the slope factor defined in equation 4 from the same paper,
+          calculated by the following where ``b`` is the slope in radians:
+
+          * ``S = 10.8 * sin(b) + 0.03`` where slope < 9%
+          * ``S = 16.8 * sin(b) - 0.50`` where slope >= 9%
+
+        * ``upstream_area`` is interpreted as the square root of the
+          catchment area, to match SAGA-GIS's method for calculating LS
+          Factor.
+        * ``pixel_area`` is the area of the pixel in square meters.
+        * ``m`` is the slope-length exponent of the RUSLE LS-factor,
+          which, as discussed in Oliveira et al. 2013 is a function of the
+          on-pixel slope theta:
+
+          * ``m = 0.2`` when ``theta <= 1%``
+          * ``m = 0.3`` when ``1% < theta <= 3.5%``
+          * ``m = 0.4`` when ``3.5% < theta <= 5%``
+          * ``m = 0.5`` when ``5% < theta <= 9%``
+          * ``m = (beta / (1+beta)`` when ``theta > 9%``, where
+            ``beta = (sin(theta) / 0.0896) / (3*sin(theta)^0.8 + 0.56)``
+
+        * ``aspect_dir`` is calculated by ``|sin(alpha)| + |cos(alpha)|``
+          for the given pixel.
+
+    Oliveira et al can be found at:
+
+        Oliveira, A.H., Silva, M.A. da, Silva, M.L.N., Curi, N., Neto, G.K.,
+        Freitas, D.A.F. de, 2013. Development of Topographic Factor Modeling
+        for Application in Soil Erosion Models, in: Intechopen (Ed.), Soil
+        Processes and Current Trends in Quality Assessment. p. 28.

    Args:
        flow_accumulation_path (string): path to raster, pixel values are the
            contributing upslope area at that cell. Pixel size is square.
        slope_path (string): path to slope raster as a percent
-        avg_aspect_path (string): The path to to raster of the weighted average
-            of aspects based on proportional flow.
        l_max (float): if the calculated value of L exceeds this value
            it is clamped to this value.
-        target_ls_prime_factor_path (string): path to output ls_prime_factor
+        target_ls_factor_path (string): path to output ls_prime_factor
            raster

    Returns:
@ -1056,8 +1062,6 @@ def _calculate_ls_factor(

    """
    slope_nodata = pygeoprocessing.get_raster_info(slope_path)['nodata'][0]
-    avg_aspect_nodata = pygeoprocessing.get_raster_info(
-        avg_aspect_path)['nodata'][0]

    flow_accumulation_info = pygeoprocessing.get_raster_info(
        flow_accumulation_path)
@ -1065,14 +1069,12 @@ def _calculate_ls_factor(
    cell_size = abs(flow_accumulation_info['pixel_size'][0])
    cell_area = cell_size ** 2

-    def ls_factor_function(
-            percent_slope, flow_accumulation, avg_aspect, l_max):
-        """Calculate the LS' factor.
+    def ls_factor_function(percent_slope, flow_accumulation, l_max):
+        """Calculate the LS factor.

        Args:
            percent_slope (numpy.ndarray): slope in percent
            flow_accumulation (numpy.ndarray): upslope pixels
-            avg_aspect (numpy.ndarray): the weighted average aspect from MFD
            l_max (float): max L factor, clamp to this value if L exceeds it

        Returns:
@ -1082,16 +1084,27 @@ def _calculate_ls_factor(
        # avg aspect intermediate output should always have a defined
        # nodata value from pygeoprocessing
        valid_mask = (
-            (~utils.array_equals_nodata(avg_aspect, avg_aspect_nodata)) &
            ~utils.array_equals_nodata(percent_slope, slope_nodata) &
            ~utils.array_equals_nodata(
                flow_accumulation, flow_accumulation_nodata))
        result = numpy.empty(valid_mask.shape, dtype=numpy.float32)
        result[:] = _TARGET_NODATA

-        contributing_area = (flow_accumulation[valid_mask]-1) * cell_area
+        # Although Desmet & Govers (1996) discusses "upstream contributing
+        # area", this is not strictly defined. We decided to use the square
+        # root of the upstream contributing area here as an estimate, which
+        # matches the SAGA LS Factor option "square root of catchment area".
+        # See the InVEST ADR-0001 for more information.
+        # We subtract 1 from the flow accumulation because FA includes itself
+        # in its count of pixels upstream and our LS factor equation wants only
+        # those pixels that are strictly upstream.
+        contributing_area = numpy.sqrt(
+            (flow_accumulation[valid_mask]-1) * cell_area)
        slope_in_radians = numpy.arctan(percent_slope[valid_mask] / 100.0)

+        aspect_length = (numpy.fabs(numpy.sin(slope_in_radians)) +
+                         numpy.fabs(numpy.cos(slope_in_radians)))
+
        # From Equation 4 in "Extension and validation of a geographic
        # information system ..."
        slope_factor = numpy.where(
@ -1121,7 +1134,7 @@ def _calculate_ls_factor(
        l_factor = (
            ((contributing_area + cell_area)**(m_exp+1) -
             contributing_area ** (m_exp+1)) /
-            ((cell_size ** (m_exp + 2)) * (avg_aspect[valid_mask]**m_exp) *
+            ((cell_size ** (m_exp + 2)) * (aspect_length**m_exp) *
             (22.13**m_exp)))

        # threshold L factor to l_max
@ -1130,12 +1143,10 @@ def _calculate_ls_factor(
        result[valid_mask] = l_factor * slope_factor
        return result

-    # call vectorize datasets to calculate the ls_factor
    pygeoprocessing.raster_calculator(
-        [(path, 1) for path in [
-            slope_path, flow_accumulation_path, avg_aspect_path]] + [
+        [(path, 1) for path in [slope_path, flow_accumulation_path]] + [
            (l_max, 'raw')],
-        ls_factor_function, target_ls_prime_factor_path, gdal.GDT_Float32,
+        ls_factor_function, target_ls_factor_path, gdal.GDT_Float32,
        _TARGET_NODATA)


@ -1277,15 +1288,14 @@ def _add_drainage(stream_path, drainage_path, out_stream_and_drainage_path):


 def _calculate_w(
-        biophysical_table, lulc_path, w_factor_path,
+        lulc_to_c, lulc_path, w_factor_path,
        out_thresholded_w_factor_path):
    """W factor: map C values from LULC and lower threshold to 0.001.

    W is a factor in calculating d_up accumulation for SDR.

    Args:
-        biophysical_table (dict): map of LULC codes to dictionaries that
-            contain at least a 'usle_c' field
+        lulc_to_c (dict): mapping of LULC codes to C values
        lulc_path (string): path to LULC raster
        w_factor_path (string): path to outputed raw W factor
        out_thresholded_w_factor_path (string): W factor from `w_factor_path`
@ -1295,9 +1305,6 @@ def _calculate_w(
        None

    """
-    lulc_to_c = dict(
-        [(lulc_code, float(table['usle_c'])) for
-         (lulc_code, table) in biophysical_table.items()])
    if pygeoprocessing.get_raster_info(lulc_path)['nodata'][0] is None:
        # will get a case where the raster might be masked but nothing to
        # replace so 0 is used by default. Ensure this exists in lookup.
@ -1326,13 +1333,11 @@ def _calculate_w(
        gdal.GDT_Float32, _TARGET_NODATA)


-def _calculate_cp(biophysical_table, lulc_path, cp_factor_path):
+def _calculate_cp(lulc_to_cp, lulc_path, cp_factor_path):
    """Map LULC to C*P value.

    Args:
-        biophysical_table (dict): map of lulc codes to dictionaries that
-            contain at least the entry 'usle_c" and 'usle_p' corresponding to
-            those USLE components.
+        lulc_to_cp (dict): mapping of lulc codes to CP values
        lulc_path (string): path to LULC raster
        cp_factor_path (string): path to output raster of LULC mapped to C*P
            values
@ -1341,9 +1346,6 @@ def _calculate_cp(biophysical_table, lulc_path, cp_factor_path):
        None

    """
-    lulc_to_cp = dict(
-        [(lulc_code, float(table['usle_c']) * float(table['usle_p'])) for
-         (lulc_code, table) in biophysical_table.items()])
    if pygeoprocessing.get_raster_info(lulc_path)['nodata'][0] is None:
        # will get a case where the raster might be masked but nothing to
        # replace so 0 is used by default. Ensure this exists in lookup.
--- a/src/natcap/invest/sdr/sdr_core.pyx
+++ b/src/natcap/invest/sdr/sdr_core.pyx
@ -675,127 +675,3 @@ def calculate_sediment_deposition(

    LOGGER.info('Sediment deposition 100% complete')
    sediment_deposition_raster.close()
-
-
-def calculate_average_aspect(
-        mfd_flow_direction_path, target_average_aspect_path):
-    """Calculate the Weighted Average Aspect Ratio from MFD.
-
-    Calculates the average aspect ratio weighted by proportional flow
-    direction.
-
-    Args:
-        mfd_flow_direction_path (string): The path to an MFD flow direction
-            raster.
-        target_average_aspect_path (string): The path to where the calculated
-            weighted average aspect raster should be written.
-
-    Returns:
-        ``None``.
-
-    """
-    LOGGER.info('Calculating average aspect')
-
-    cdef float average_aspect_nodata = -1
-    pygeoprocessing.new_raster_from_base(
-        mfd_flow_direction_path, target_average_aspect_path,
-        gdal.GDT_Float32, [average_aspect_nodata], [average_aspect_nodata])
-
-    flow_direction_info = pygeoprocessing.get_raster_info(
-        mfd_flow_direction_path)
-    cdef int mfd_flow_direction_nodata = flow_direction_info['nodata'][0]
-    cdef int n_cols, n_rows
-    n_cols, n_rows = flow_direction_info['raster_size']
-
-    cdef _ManagedRaster mfd_flow_direction_raster = _ManagedRaster(
-        mfd_flow_direction_path, 1, False)
-
-    cdef _ManagedRaster average_aspect_raster = _ManagedRaster(
-        target_average_aspect_path, 1, True)
-
-    cdef int seed_row = 0
-    cdef int seed_col = 0
-    cdef int n_pixels_visited = 0
-    cdef int win_xsize, win_ysize, xoff, yoff
-    cdef int row_index, col_index, neighbor_index
-    cdef int flow_weight_in_direction
-    cdef int weight_sum
-    cdef int seed_flow_value
-    cdef float aspect_weighted_average, aspect_weighted_sum
-
-    # the flow_lengths array is the functional equivalent
-    # of calculating |sin(alpha)| + |cos(alpha)|.
-    cdef float* flow_lengths = [
-        1, <float>SQRT2,
-        1, <float>SQRT2,
-        1, <float>SQRT2,
-        1, <float>SQRT2
-    ]
-
-    # Loop over iterblocks to maintain cache locality
-    # Find each non-nodata pixel and calculate proportional flow
-    # Multiply proportional flow times the flow length x_d
-    # write the final value to the raster.
-    for offset_dict in pygeoprocessing.iterblocks(
-            (mfd_flow_direction_path, 1), offset_only=True, largest_block=0):
-        win_xsize = offset_dict['win_xsize']
-        win_ysize = offset_dict['win_ysize']
-        xoff = offset_dict['xoff']
-        yoff = offset_dict['yoff']
-
-        LOGGER.info('Average aspect %.2f%% complete', 100 * (
-            n_pixels_visited / float(n_cols * n_rows)))
-
-        for row_index in range(win_ysize):
-            seed_row = yoff + row_index
-            for col_index in range(win_xsize):
-                seed_col = xoff + col_index
-                seed_flow_value = <int>mfd_flow_direction_raster.get(
-                    seed_col, seed_row)
-
-                # Skip this seed if it's nodata (Currently expected to be 0).
-                # No need to set the nodata value here since we have already
-                # filled the raster with nodata values at creation time.
-                if seed_flow_value == mfd_flow_direction_nodata:
-                    continue
-
-                weight_sum = 0
-                aspect_weighted_sum = 0
-                for neighbor_index in range(8):
-                    neighbor_row = seed_row + ROW_OFFSETS[neighbor_index]
-                    if neighbor_row == -1 or neighbor_row == n_rows:
-                        continue
-
-                    neighbor_col = seed_col + COL_OFFSETS[neighbor_index]
-                    if neighbor_col == -1 or neighbor_col == n_cols:
-                        continue
-
-                    flow_weight_in_direction = (seed_flow_value >> (
-                        neighbor_index * 4) & 0xF)
-                    weight_sum += flow_weight_in_direction
-
-                    aspect_weighted_sum += (
-                        flow_lengths[neighbor_index] *
-                        flow_weight_in_direction)
-
-                # Weight sum should never be less than 0.
-                # Since it's an int, we can compare it directly against the
-                # value of 0.
-                if weight_sum == 0:
-                    aspect_weighted_average = average_aspect_nodata
-                else:
-                    # We already know that weight_sum will be > 0 because we
-                    # check for it in the condition above.
-                    with cython.cdivision(True):
-                        aspect_weighted_average = (
-                            aspect_weighted_sum / <float>weight_sum)
-
-                average_aspect_raster.set(
-                    seed_col, seed_row, aspect_weighted_average)
-
-        n_pixels_visited += win_xsize * win_ysize
-
-    LOGGER.info('Average aspect 100.00% complete')
-
-    mfd_flow_direction_raster.close()
-    average_aspect_raster.close()
--- a/src/natcap/invest/seasonal_water_yield/seasonal_water_yield.py
+++ b/src/natcap/invest/seasonal_water_yield/seasonal_water_yield.py
@ -107,6 +107,7 @@ MODEL_SPEC = {
        },
        "biophysical_table_path": {
            "type": "csv",
+            "index_col": "lucode",
            "columns": {
                "lucode": spec_utils.LULC_TABLE_COLUMN,
                "cn_[SOIL_GROUP]": {
@ -137,6 +138,7 @@ MODEL_SPEC = {
        },
        "rain_events_table_path": {
            "type": "csv",
+            "index_col": "month",
            "columns": {
                "month": {
                    "type": "number",
@ -212,6 +214,7 @@ MODEL_SPEC = {
        },
        "climate_zone_table_path": {
            "type": "csv",
+            "index_col": "cz_id",
            "columns": {
                "cz_id": {
                    "type": "integer",
@ -253,6 +256,7 @@ MODEL_SPEC = {
        },
        "monthly_alpha_path": {
            "type": "csv",
+            "index_col": "month",
            "columns": {
                "month": {
                    "type": "number",
@ -409,10 +413,62 @@ MODEL_SPEC = {
                    "bands": {1: {
                        "type": "integer"
                    }}
+                },
+                'Si.tif': {
+                    "about": gettext("Map of the S_i factor derived from CN"),
+                    "bands": {1: {"type": "number", "units": u.inch}}
+                },
+                'lulc_aligned.tif': {
+                    "about": gettext("Copy of LULC input, aligned and clipped "
+                                     "to match the other spatial inputs"),
+                    "bands": {1: {"type": "integer"}}
+                },
+                'dem_aligned.tif': {
+                    "about": gettext("Copy of DEM input, aligned and clipped "
+                                     "to match the other spatial inputs"),
+                    "bands": {1: {"type": "number", "units": u.meter}}
+                },
+                'pit_filled_dem.tif': {
+                    "about": gettext("Pit filled DEM"),
+                    "bands": {1: {"type": "number", "units": u.meter}}
+                },
+                'soil_group_aligned.tif': {
+                    "about": gettext("Copy of soil groups input, aligned and "
+                                     "clipped to match the other spatial inputs"),
+                    "bands": {1: {"type": "integer"}}
+                },
+                'flow_accum.tif': spec_utils.FLOW_ACCUMULATION,
+                'prcp_a[MONTH].tif': {
+                    "bands": {1: {"type": "number", "units": u.millimeter/u.year}},
+                    "about": gettext("Monthly precipitation rasters, aligned and "
+                                     "clipped to match the other spatial inputs")
+                },
+                'n_events[MONTH].tif': {
+                    "about": gettext("Map of monthly rain events"),
+                    "bands": {1: {"type": "integer"}}
+                },
+                'et0_a[MONTH].tif': {
+                    "bands": {1: {"type": "number", "units": u.millimeter}},
+                    "about": gettext("Monthly ET0 rasters, aligned and "
+                                     "clipped to match the other spatial inputs")
+                },
+                'kc_[MONTH].tif': {
+                    "about": gettext("Map of monthly KC values"),
+                    "bands": {1: {"type": "number", "units": u.none}}
+                },
+                'l_aligned.tif': {
+                    "about": gettext("Copy of user-defined local recharge input, "
+                                     "aligned and clipped to match the other spatial inputs"),
+                    "bands": {1: {"type": "number", "units": u.millimeter}}
+                },
+                'cz_aligned.tif': {
+                    "about": gettext("Copy of user-defined climate zones raster, "
+                                     "aligned and clipped to match the other spatial inputs"),
+                    "bands": {1: {"type": "integer"}}
                }
            }
        },
-        "cache_dir": spec_utils.TASKGRAPH_DIR
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -437,18 +493,10 @@ _INTERMEDIATE_BASE_FILES = {
    'flow_dir_mfd_path': 'flow_dir_mfd.tif',
    'qfm_path_list': ['qf_%d.tif' % (x+1) for x in range(N_MONTHS)],
    'stream_path': 'stream.tif',
-}
-
-_TMP_BASE_FILES = {
-    'outflow_direction_path': 'outflow_direction.tif',
-    'outflow_weights_path': 'outflow_weights.tif',
-    'kc_path': 'kc.tif',
    'si_path': 'Si.tif',
    'lulc_aligned_path': 'lulc_aligned.tif',
    'dem_aligned_path': 'dem_aligned.tif',
    'dem_pit_filled_path': 'pit_filled_dem.tif',
-    'loss_path': 'loss.tif',
-    'zero_absorption_source_path': 'zero_absorption.tif',
    'soil_group_aligned_path': 'soil_group_aligned.tif',
    'flow_accum_path': 'flow_accum.tif',
    'precip_path_aligned_list': ['prcp_a%d.tif' % x for x in range(N_MONTHS)],
@ -457,7 +505,6 @@ _TMP_BASE_FILES = {
    'kc_path_list': ['kc_%d.tif' % x for x in range(N_MONTHS)],
    'l_aligned_path': 'l_aligned.tif',
    'cz_aligned_raster_path': 'cz_aligned.tif',
-    'l_sum_pre_clamp': 'l_sum_pre_clamp.tif'
 }


@ -561,41 +608,20 @@ def _execute(args):
    # fail early on a missing required rain events table
    if (not args['user_defined_local_recharge'] and
            not args['user_defined_climate_zones']):
-        rain_events_lookup = (
-            utils.read_csv_to_dataframe(
-                args['rain_events_table_path'], 'month'
-                ).to_dict(orient='index'))
+        rain_events_df = utils.read_csv_to_dataframe(
+            args['rain_events_table_path'],
+            MODEL_SPEC['args']['rain_events_table_path'])

-    biophysical_table = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
-
-    bad_value_list = []
-    for lucode, value in biophysical_table.items():
-        for biophysical_id in ['cn_a', 'cn_b', 'cn_c', 'cn_d'] + [
-                'kc_%d' % (month_index+1) for month_index in range(N_MONTHS)]:
-            try:
-                _ = float(value[biophysical_id])
-            except ValueError:
-                bad_value_list.append(
-                    (biophysical_id, lucode, value[biophysical_id]))
-
-    if bad_value_list:
-        raise ValueError(
-            'biophysical_table at %s seems to have the following incorrect '
-            'values (expecting all floating point numbers): %s' % (
-                args['biophysical_table_path'], ','.join(
-                    ['%s(lucode %d): "%s"' % (
-                        lucode, biophysical_id, bad_value)
-                     for lucode, biophysical_id, bad_value in
-                        bad_value_list])))
+    biophysical_df = utils.read_csv_to_dataframe(
+        args['biophysical_table_path'],
+        MODEL_SPEC['args']['biophysical_table_path'])

    if args['monthly_alpha']:
        # parse out the alpha lookup table of the form (month_id: alpha_val)
-        alpha_month_map = dict(
-            (key, val['alpha']) for key, val in
-            utils.read_csv_to_dataframe(
-                args['monthly_alpha_path'], 'month'
-            ).to_dict(orient='index').items())
+        alpha_month_map = utils.read_csv_to_dataframe(
+            args['monthly_alpha_path'],
+            MODEL_SPEC['args']['monthly_alpha_path']
+        )['alpha'].to_dict()
    else:
        # make all 12 entries equal to args['alpha_m']
        alpha_m = float(fractions.Fraction(args['alpha_m']))
@ -610,9 +636,8 @@ def _execute(args):
    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    intermediate_output_dir = os.path.join(
        args['workspace_dir'], 'intermediate_outputs')
-    cache_dir = os.path.join(args['workspace_dir'], 'cache_dir')
    output_dir = args['workspace_dir']
-    utils.make_directories([intermediate_output_dir, cache_dir, output_dir])
+    utils.make_directories([intermediate_output_dir, output_dir])

    try:
        n_workers = int(args['n_workers'])
@ -622,13 +647,13 @@ def _execute(args):
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous mode.
    task_graph = taskgraph.TaskGraph(
-        cache_dir, n_workers, reporting_interval=5.0)
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'),
+        n_workers, reporting_interval=5)

    LOGGER.info('Building file registry')
    file_registry = utils.build_file_registry(
        [(_OUTPUT_BASE_FILES, output_dir),
-         (_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
-         (_TMP_BASE_FILES, cache_dir)], file_suffix)
+         (_INTERMEDIATE_BASE_FILES, intermediate_output_dir)], file_suffix)

    LOGGER.info('Checking that the AOI is not the output aggregate vector')
    if (os.path.normpath(args['aoi_path']) ==
@ -706,7 +731,7 @@ def _execute(args):
        args=(
            (file_registry['dem_aligned_path'], 1),
            file_registry['dem_pit_filled_path']),
-        kwargs={'working_dir': cache_dir},
+        kwargs={'working_dir': intermediate_output_dir},
        target_path_list=[file_registry['dem_pit_filled_path']],
        dependent_task_list=[align_task],
        task_name='fill dem pits')
@ -716,7 +741,7 @@ def _execute(args):
        args=(
            (file_registry['dem_pit_filled_path'], 1),
            file_registry['flow_dir_mfd_path']),
-        kwargs={'working_dir': cache_dir},
+        kwargs={'working_dir': intermediate_output_dir},
        target_path_list=[file_registry['flow_dir_mfd_path']],
        dependent_task_list=[fill_pit_task],
        task_name='flow dir mfd')
@ -762,22 +787,18 @@ def _execute(args):
            'table_name': 'Climate Zone'}
        for month_id in range(N_MONTHS):
            if args['user_defined_climate_zones']:
-                cz_rain_events_lookup = (
-                    utils.read_csv_to_dataframe(
-                        args['climate_zone_table_path'], 'cz_id'
-                    ).to_dict(orient='index'))
-                month_label = MONTH_ID_TO_LABEL[month_id]
-                climate_zone_rain_events_month = dict([
-                    (cz_id, cz_rain_events_lookup[cz_id][month_label]) for
-                    cz_id in cz_rain_events_lookup])
-                n_events_nodata = -1
+                cz_rain_events_df = utils.read_csv_to_dataframe(
+                    args['climate_zone_table_path'],
+                    MODEL_SPEC['args']['climate_zone_table_path'])
+                climate_zone_rain_events_month = (
+                    cz_rain_events_df[MONTH_ID_TO_LABEL[month_id]].to_dict())
                n_events_task = task_graph.add_task(
                    func=utils.reclassify_raster,
                    args=(
                        (file_registry['cz_aligned_raster_path'], 1),
                        climate_zone_rain_events_month,
                        file_registry['n_events_path_list'][month_id],
-                        gdal.GDT_Float32, n_events_nodata,
+                        gdal.GDT_Float32, TARGET_NODATA,
                        reclass_error_details),
                    target_path_list=[
                        file_registry['n_events_path_list'][month_id]],
@ -785,15 +806,14 @@ def _execute(args):
                    task_name='n_events for month %d' % month_id)
                reclassify_n_events_task_list.append(n_events_task)
            else:
-                # rain_events_lookup defined near entry point of execute
-                n_events = rain_events_lookup[month_id+1]['events']
                n_events_task = task_graph.add_task(
                    func=pygeoprocessing.new_raster_from_base,
                    args=(
                        file_registry['dem_aligned_path'],
                        file_registry['n_events_path_list'][month_id],
                        gdal.GDT_Float32, [TARGET_NODATA]),
-                    kwargs={'fill_value_list': (n_events,)},
+                    kwargs={'fill_value_list': (
+                        rain_events_df['events'][month_id+1],)},
                    target_path_list=[
                        file_registry['n_events_path_list'][month_id]],
                    dependent_task_list=[align_task],
@ -806,7 +826,8 @@ def _execute(args):
            args=(
                file_registry['lulc_aligned_path'],
                file_registry['soil_group_aligned_path'],
-                biophysical_table, file_registry['cn_path']),
+                biophysical_df,
+                file_registry['cn_path']),
            target_path_list=[file_registry['cn_path']],
            dependent_task_list=[align_task],
            task_name='calculate curve number')
@ -827,8 +848,6 @@ def _execute(args):
                func=_calculate_monthly_quick_flow,
                args=(
                    file_registry['precip_path_aligned_list'][month_index],
-                    file_registry['lulc_aligned_path'],
-                    file_registry['cn_path'],
                    file_registry['n_events_path_list'][month_index],
                    file_registry['stream_path'],
                    file_registry['si_path'],
@ -855,16 +874,13 @@ def _execute(args):
            'raster_name': 'LULC', 'column_name': 'lucode',
            'table_name': 'Biophysical'}
        for month_index in range(N_MONTHS):
-            kc_lookup = dict([
-                (lucode, biophysical_table[lucode]['kc_%d' % (month_index+1)])
-                for lucode in biophysical_table])
-            kc_nodata = -1  # a reasonable nodata value
+            kc_lookup = biophysical_df['kc_%d' % (month_index+1)].to_dict()
            kc_task = task_graph.add_task(
                func=utils.reclassify_raster,
                args=(
                    (file_registry['lulc_aligned_path'], 1), kc_lookup,
                    file_registry['kc_path_list'][month_index],
-                    gdal.GDT_Float32, kc_nodata, reclass_error_details),
+                    gdal.GDT_Float32, TARGET_NODATA, reclass_error_details),
                target_path_list=[file_registry['kc_path_list'][month_index]],
                dependent_task_list=[align_task],
                task_name='classify kc month %d' % month_index)
@ -978,7 +994,7 @@ def _calculate_vri(l_path, target_vri_path):
        None.

    """
-    qb_sum = 0.0
+    qb_sum = 0
    qb_valid_count = 0
    l_nodata = pygeoprocessing.get_raster_info(l_path)['nodata'][0]

@ -1039,122 +1055,167 @@ def _calculate_annual_qfi(qfm_path_list, target_qf_path):
        qfi_sum_op, target_qf_path, gdal.GDT_Float32, qf_nodata)


-def _calculate_monthly_quick_flow(
-        precip_path, lulc_raster_path, cn_path, n_events_raster_path,
-        stream_path, si_path, qf_monthly_path):
+def _calculate_monthly_quick_flow(precip_path, n_events_path, stream_path,
+        si_path, qf_monthly_path):
    """Calculate quick flow for a month.

    Args:
-        precip_path (string): path to file that correspond to monthly
-            precipitation
-        lulc_raster_path (string): path to landcover raster
-        cn_path (string): path to curve number raster
-        n_events_raster_path (string): a path to a raster where each pixel
+        precip_path (string): path to monthly precipitation raster
+        n_events_path (string): a path to a raster where each pixel
            indicates the number of rain events.
        stream_path (string): path to stream mask raster where 1 indicates a
            stream pixel, 0 is a non-stream but otherwise valid area from the
            original DEM, and nodata indicates areas outside the valid DEM.
        si_path (string): path to raster that has potential maximum retention
-        qf_monthly_path_list (list of string): list of paths to output monthly
-            rasters.
+        qf_monthly_path (string): path to output monthly QF raster.

    Returns:
        None
    """
+    p_nodata = pygeoprocessing.get_raster_info(precip_path)['nodata'][0]
+    n_nodata = pygeoprocessing.get_raster_info(n_events_path)['nodata'][0]
+    stream_nodata = pygeoprocessing.get_raster_info(stream_path)['nodata'][0]
    si_nodata = pygeoprocessing.get_raster_info(si_path)['nodata'][0]

-    qf_nodata = -1
-    p_nodata = pygeoprocessing.get_raster_info(precip_path)['nodata'][0]
-    n_events_nodata = pygeoprocessing.get_raster_info(
-        n_events_raster_path)['nodata'][0]
-    stream_nodata = pygeoprocessing.get_raster_info(stream_path)['nodata'][0]
-
-    def qf_op(p_im, s_i, n_events, stream_array):
+    def qf_op(p_im, s_i, n_m, stream):
        """Calculate quick flow as in Eq [1] in user's guide.

        Args:
            p_im (numpy.array): precipitation at pixel i on month m
            s_i (numpy.array): factor that is 1000/CN_i - 10
-                (Equation 1b from user's guide)
-            n_events (numpy.array): number of rain events on the pixel
-            stream_mask (numpy.array): 1 if stream, otherwise not a stream
-                pixel.
+            n_m (numpy.array): number of rain events on pixel i in month m
+            stream (numpy.array): 1 if stream, otherwise not a stream pixel.

        Returns:
            quick flow (numpy.array)
-
        """
-        # s_i is an intermediate output which will always have a defined
-        # nodata value
-        valid_mask = ((p_im != 0.0) &
-                      (stream_array != 1) &
-                      (n_events > 0) &
-                      ~utils.array_equals_nodata(s_i, si_nodata))
-        if p_nodata is not None:
-            valid_mask &= ~utils.array_equals_nodata(p_im, p_nodata)
-        if n_events_nodata is not None:
-            valid_mask &= ~utils.array_equals_nodata(n_events, n_events_nodata)
-        # stream_nodata is the only input that carry over nodata values from
+        valid_p_mask = ~utils.array_equals_nodata(p_im, p_nodata)
+        valid_n_mask = ~utils.array_equals_nodata(n_m, n_nodata)
+        # precip mask: both p_im and n_m are defined and greater than 0
+        precip_mask = valid_p_mask & valid_n_mask & (p_im > 0) & (n_m > 0)
+        stream_mask = stream == 1
+        # stream_nodata is the only input that carries over nodata values from
        # the aligned DEM.
-        if stream_nodata is not None:
-            valid_mask &= ~utils.array_equals_nodata(
-                stream_array, stream_nodata)
+        valid_mask = (
+          valid_p_mask &
+          valid_n_mask &
+          ~utils.array_equals_nodata(stream, stream_nodata) &
+          ~utils.array_equals_nodata(s_i, si_nodata))

-        valid_n_events = n_events[valid_mask]
-        valid_si = s_i[valid_mask]
+        # QF is defined in terms of three cases:
+        #
+        # 1. Where there is no precipitation, QF = 0
+        #    (even if stream or s_i is undefined)
+        #
+        # 2. Where there is precipitation and we're on a stream, QF = P
+        #    (even if s_i is undefined)
+        #
+        # 3. Where there is precipitation and we're not on a stream, use the
+        #    quickflow equation (only if all four inputs are defined):
+        #    QF_im = 25.4 * n_m * (
+        #       (a_im - s_i) * exp(-0.2 * s_i / a_im) +
+        #       s_i^2 / a_im * exp(0.8 * s_i / a_im) * E1(s_i / a_im)
+        #    )
+        #
+        # When evaluating the QF equation, there are a few edge cases:
+        #
+        # 3a. Where s_i = 0, you get NaN and a warning from numpy because
+        #     E1(0 / a_im) = infinity. In this case, per conversation with
+        #     Rafa, the final term of the equation should evaluate to 0, and
+        #     the equation can be simplified to QF_im = P_im
+        #     (which makes sense because if s_i = 0, no water is retained).
+        #
+        #     Solution: Preemptively set QF_im equal to P_im where s_i = 0 in
+        #     order to avoid calculations with infinity.
+        #
+        # 3b. When the ratio s_i / a_im becomes large, QF approaches 0.
+        #     [NOTE: I don't know how to prove this mathematically, but it
+        #     holds true when I tested with reasonable values of s_i and a_im].
+        #     The exp() term becomes very large, while the E1() term becomes
+        #     very small.
+        #
+        #     Per conversation with Rafa and Lisa, large s_i / a_im ratios
+        #     shouldn't happen often with real world data. But if they did, it
+        #     would be a situation where there is very little precipitation
+        #     spread out over relatively many rain events and the soil is very
+        #     absorbent, so logically, QF should be effectively zero.
+        #
+        #     To avoid overflow, we set a threshold of 100 for the s_i / a_im
+        #     ratio. Where s_i / a_im > 100, we set QF to 0. 100 was chosen
+        #     because it's a nice whole number that gets us close to the
+        #     float32 max without surpassing it (exp(0.8*100) = 5e34). When
+        #     s_i / a_im = 100, the actual result of the QF equation is on the
+        #     order of 1e-6, so it should be rounded down to 0 anyway.
+        #
+        # 3c. Otherwise, evaluate the QF equation as usual.
+        #
+        # 3d. With certain inputs [for example: n_m = 10, CN = 50, p_im = 30],
+        #     it's possible that the QF equation evaluates to a very small
+        #     negative value. Per conversation with Lisa and Rafa, this is an
+        #     edge case that the equation was not designed for. Negative QF
+        #     doesn't make sense, so we set any negative QF values to 0.

+        # qf_im is the quickflow at pixel i on month m
+        qf_im = numpy.full(p_im.shape, TARGET_NODATA, dtype=numpy.float32)
+
+        # case 1: where there is no precipitation
+        qf_im[~precip_mask] = 0
+
+        # case 2: where there is precipitation and we're on a stream
+        qf_im[precip_mask & stream_mask] = p_im[precip_mask & stream_mask]
+
+        # case 3: where there is precipitation and we're not on a stream
+        case_3_mask = valid_mask & precip_mask & ~stream_mask
+
+        # for consistent indexing, make a_im the same shape as the other
+        # arrays even though we only use it in case 3
+        a_im = numpy.full(p_im.shape, numpy.nan, dtype=numpy.float32)
        # a_im is the mean rain depth on a rainy day at pixel i on month m
-        # the 25.4 converts inches to mm since Si is in inches
-        a_im = numpy.empty(valid_n_events.shape)
-        a_im = p_im[valid_mask] / (valid_n_events * 25.4)
-        qf_im = numpy.empty(p_im.shape)
-        qf_im[:] = qf_nodata
+        # the 25.4 converts inches to mm since s_i is in inches
+        a_im[case_3_mask] = p_im[case_3_mask] / (n_m[case_3_mask] * 25.4)

-        # Precompute the last two terms in quickflow so we can handle a
-        # numerical instability when s_i is large and/or a_im is small
-        # on large valid_si/a_im this number will be zero and the latter
-        # exponent will also be zero because of a divide by zero. rather than
-        # raise that numerical warning, just handle it manually
-        E1 = scipy.special.expn(1, valid_si / a_im)
-        E1[valid_si == 0] = 0
-        nonzero_e1_mask = E1 != 0
-        exp_result = numpy.zeros(valid_si.shape)
-        exp_result[nonzero_e1_mask] = numpy.exp(
-            (0.8 * valid_si[nonzero_e1_mask]) / a_im[nonzero_e1_mask] +
-            numpy.log(E1[nonzero_e1_mask]))
+        # case 3a: when s_i = 0, qf = p
+        case_3a_mask = case_3_mask & (s_i == 0)
+        qf_im[case_3a_mask] = p_im[case_3a_mask]

-        # qf_im is the quickflow at pixel i on month m Eq. [1]
-        qf_im[valid_mask] = (25.4 * valid_n_events * (
-            (a_im - valid_si) * numpy.exp(-0.2 * valid_si / a_im) +
-            valid_si ** 2 / a_im * exp_result))
+        # case 3b: set quickflow to 0 when the s_i/a_im ratio is too large
+        case_3b_mask = case_3_mask & (s_i / a_im > 100)
+        qf_im[case_3b_mask] = 0
+
+        # case 3c: evaluate the equation as usual
+        case_3c_mask = case_3_mask & ~(case_3a_mask | case_3b_mask)
+        qf_im[case_3c_mask] = (
+            25.4 * n_m[case_3c_mask] * (
+                ((a_im[case_3c_mask] - s_i[case_3c_mask]) *
+                 numpy.exp(-0.2 * s_i[case_3c_mask] / a_im[case_3c_mask])) +
+                (s_i[case_3c_mask] ** 2 / a_im[case_3c_mask] *
+                 numpy.exp(0.8 * s_i[case_3c_mask] / a_im[case_3c_mask]) *
+                 scipy.special.exp1(s_i[case_3c_mask] / a_im[case_3c_mask]))
+            )
+        )
+
+        # case 3d: set any negative values to 0
+        qf_im[valid_mask & (qf_im < 0)] = 0

-        # if precip is 0, then QF should be zero
-        qf_im[(p_im == 0) | (n_events == 0)] = 0.0
-        # if we're on a stream, set quickflow to the precipitation
-        valid_stream_precip_mask = stream_array == 1
-        if p_nodata is not None:
-            valid_stream_precip_mask &= ~utils.array_equals_nodata(
-                p_im, p_nodata)
-        qf_im[valid_stream_precip_mask] = p_im[valid_stream_precip_mask]
        return qf_im

    pygeoprocessing.raster_calculator(
        [(path, 1) for path in [
-            precip_path, si_path, n_events_raster_path, stream_path]], qf_op,
-        qf_monthly_path, gdal.GDT_Float32, qf_nodata)
+            precip_path, si_path, n_events_path, stream_path]],
+        qf_op, qf_monthly_path, gdal.GDT_Float32, TARGET_NODATA)


 def _calculate_curve_number_raster(
-        lulc_raster_path, soil_group_path, biophysical_table, cn_path):
+        lulc_raster_path, soil_group_path, biophysical_df, cn_path):
    """Calculate the CN raster from the landcover and soil group rasters.

    Args:
        lulc_raster_path (string): path to landcover raster
        soil_group_path (string): path to raster indicating soil group where
            pixel values are in [1,2,3,4]
-        biophysical_table (dict): maps landcover IDs to dictionaries that
-            contain at least the keys 'cn_a', 'cn_b', 'cn_c', 'cn_d', that
-            map to the curve numbers for that landcover and soil type.
+        biophysical_df (pandas.DataFrame): table mapping landcover IDs to the
+            columns 'cn_a', 'cn_b', 'cn_c', 'cn_d', that contain
+            the curve number values for that landcover and soil type.
        cn_path (string): path to output curve number raster to be output
            which will be the dimensions of the intersection of
            `lulc_raster_path` and `soil_group_path` the cell size of
@ -1172,12 +1233,11 @@ def _calculate_curve_number_raster(
        4: 'cn_d',
    }
    # curve numbers are always positive so -1 a good nodata choice
-    cn_nodata = -1
    lulc_to_soil = {}
    lulc_nodata = pygeoprocessing.get_raster_info(
        lulc_raster_path)['nodata'][0]

-    lucodes = list(biophysical_table)
+    lucodes = biophysical_df.index.to_list()
    if lulc_nodata is not None:
        lucodes.append(lulc_nodata)

@ -1190,12 +1250,12 @@ def _calculate_curve_number_raster(
        for lucode in sorted(lucodes):
            if lucode != lulc_nodata:
                lulc_to_soil[soil_id]['cn_values'].append(
-                    biophysical_table[lucode][soil_column])
+                    biophysical_df[soil_column][lucode])
                lulc_to_soil[soil_id]['lulc_values'].append(lucode)
            else:
                # handle the lulc nodata with cn nodata
                lulc_to_soil[soil_id]['lulc_values'].append(lulc_nodata)
-                lulc_to_soil[soil_id]['cn_values'].append(cn_nodata)
+                lulc_to_soil[soil_id]['cn_values'].append(TARGET_NODATA)

        # Making the landcover array a float32 in case the user provides a
        # float landcover map like Kate did.
@ -1213,7 +1273,7 @@ def _calculate_curve_number_raster(
    def cn_op(lulc_array, soil_group_array):
        """Map lulc code and soil to a curve number."""
        cn_result = numpy.empty(lulc_array.shape)
-        cn_result[:] = cn_nodata
+        cn_result[:] = TARGET_NODATA

        # if lulc_array value not in lulc_to_soil[soil_group_id]['lulc_values']
        # then numpy.digitize will not bin properly and cause an IndexError
@ -1252,10 +1312,9 @@ def _calculate_curve_number_raster(
            cn_result[current_soil_mask] = cn_values[current_soil_mask]
        return cn_result

-    cn_nodata = -1
    pygeoprocessing.raster_calculator(
        [(lulc_raster_path, 1), (soil_group_path, 1)], cn_op, cn_path,
-        gdal.GDT_Float32, cn_nodata)
+        gdal.GDT_Float32, TARGET_NODATA)


 def _calculate_si_raster(cn_path, stream_path, si_path):
@ -1269,7 +1328,6 @@ def _calculate_si_raster(cn_path, stream_path, si_path):
    Returns:
        None
    """
-    si_nodata = -1
    cn_nodata = pygeoprocessing.get_raster_info(cn_path)['nodata'][0]

    def si_op(ci_factor, stream_mask):
@ -1278,17 +1336,17 @@ def _calculate_si_raster(cn_path, stream_path, si_path):
            ~utils.array_equals_nodata(ci_factor, cn_nodata) &
            (ci_factor > 0))
        si_array = numpy.empty(ci_factor.shape)
-        si_array[:] = si_nodata
+        si_array[:] = TARGET_NODATA
        # multiply by the stream mask != 1 so we get 0s on the stream and
        # unaffected results everywhere else
        si_array[valid_mask] = (
-            (1000.0 / ci_factor[valid_mask] - 10) * (
+            (1000 / ci_factor[valid_mask] - 10) * (
                stream_mask[valid_mask] != 1))
        return si_array

    pygeoprocessing.raster_calculator(
        [(cn_path, 1), (stream_path, 1)], si_op, si_path, gdal.GDT_Float32,
-        si_nodata)
+        TARGET_NODATA)


 def _aggregate_recharge(
@ -1350,7 +1408,7 @@ def _aggregate_recharge(
                        "no coverage for polygon %s", ', '.join(
                            [str(poly_feat.GetField(_)) for _ in range(
                                poly_feat.GetFieldCount())]))
-                    value = 0.0
+                    value = 0
            elif op_type == 'sum':
                value = aggregate_stats[poly_index]['sum']
            poly_feat.SetField(aggregate_field_id, float(value))
--- a/src/natcap/invest/stormwater.py
+++ b/src/natcap/invest/stormwater.py
@ -46,6 +46,7 @@ MODEL_SPEC = {
        "precipitation_path": spec_utils.PRECIP,
        "biophysical_table": {
            "type": "csv",
+            "index_col": "lucode",
            "columns": {
                "lucode": spec_utils.LULC_TABLE_COLUMN,
                "emc_[POLLUTANT]": {
@ -363,10 +364,10 @@ MODEL_SPEC = {
                        "calculated by convolving the search kernel with the "
                        "retention ratio raster."),
                    "bands": {1: {"type": "ratio"}}
-                },
-                "cache_dir": spec_utils.TASKGRAPH_DIR
+                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -438,14 +439,14 @@ def execute(args):
    suffix = utils.make_suffix_string(args, 'results_suffix')
    output_dir = args['workspace_dir']
    intermediate_dir = os.path.join(output_dir, 'intermediate')
-    cache_dir = os.path.join(intermediate_dir, 'cache_dir')
-    utils.make_directories(
-        [args['workspace_dir'], intermediate_dir, cache_dir])
+    utils.make_directories([args['workspace_dir'], intermediate_dir])
    files = utils.build_file_registry(
        [(INTERMEDIATE_OUTPUTS, intermediate_dir),
         (FINAL_OUTPUTS, output_dir)], suffix)

-    task_graph = taskgraph.TaskGraph(cache_dir, int(args.get('n_workers', -1)))
+    task_graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'),
+        int(args.get('n_workers', -1)))

    # get the necessary base raster info
    source_lulc_raster_info = pygeoprocessing.get_raster_info(
@ -482,11 +483,12 @@ def execute(args):
        task_name='align input rasters')

    # Build a lookup dictionary mapping each LULC code to its row
-    biophysical_dict = utils.read_csv_to_dataframe(
-        args['biophysical_table'], 'lucode').to_dict(orient='index')
-    # sort the LULC codes upfront because we use the sorted list in multiple
+    # sort by the LULC codes upfront because we use the sorted list in multiple
    # places. it's more efficient to do this once.
-    sorted_lucodes = sorted(biophysical_dict)
+    biophysical_df = utils.read_csv_to_dataframe(
+        args['biophysical_table'], MODEL_SPEC['args']['biophysical_table']
+    ).sort_index()
+    sorted_lucodes = biophysical_df.index.to_list()

    # convert the nested dictionary in to a 2D array where rows are LULC codes
    # in sorted order and columns correspond to soil groups in order
@ -498,10 +500,8 @@ def execute(args):
    # up with their indices in the array. this is more efficient than
    # decrementing the whole soil group array by 1.
    retention_ratio_array = numpy.array([
-        [1 - biophysical_dict[lucode][f'rc_{soil_group}']
-            for soil_group in ['a', 'b', 'c', 'd']
-         ] for lucode in sorted_lucodes
-    ], dtype=numpy.float32)
+        1 - biophysical_df[f'rc_{soil_group}'].to_numpy()
+        for soil_group in ['a', 'b', 'c', 'd']], dtype=numpy.float32).T

    # Calculate stormwater retention ratio and volume from
    # LULC, soil groups, biophysical data, and precipitation
@ -522,10 +522,6 @@ def execute(args):
    if args['adjust_retention_ratios']:
        # in raster coord system units
        radius = float(args['retention_radius'])
-        # boolean mapping for each LULC code whether it's connected
-        is_connected_map = {
-            lucode: 1 if biophysical_dict[lucode]['is_connected'] else 0
-            for lucode in biophysical_dict}

        reproject_roads_task = task_graph.add_task(
            func=pygeoprocessing.reproject_vector,
@ -591,7 +587,7 @@ def execute(args):
            func=pygeoprocessing.reclassify_raster,
            args=(
                (files['lulc_aligned_path'], 1),
-                is_connected_map,
+                biophysical_df['is_connected'].astype(int).to_dict(),
                files['connected_lulc_path'],
                gdal.GDT_Byte,
                UINT8_NODATA),
@ -706,14 +702,12 @@ def execute(args):

    # (Optional) Calculate stormwater percolation ratio and volume from
    # LULC, soil groups, biophysical table, and precipitation
-    if 'pe_a' in next(iter(biophysical_dict.values())):
+    if 'pe_a' in biophysical_df.columns:
        LOGGER.info('percolation data detected in biophysical table. '
                    'Will calculate percolation ratio and volume rasters.')
        percolation_ratio_array = numpy.array([
-            [biophysical_dict[lucode][f'pe_{soil_group}']
-                for soil_group in ['a', 'b', 'c', 'd']
-             ] for lucode in sorted_lucodes
-        ], dtype=numpy.float32)
+            biophysical_df[f'pe_{soil_group}'].to_numpy()
+            for soil_group in ['a', 'b', 'c', 'd']], dtype=numpy.float32).T
        percolation_ratio_task = task_graph.add_task(
            func=lookup_ratios,
            args=(
@ -749,8 +743,8 @@ def execute(args):

    # get all EMC columns from an arbitrary row in the dictionary
    # strip the first four characters off 'EMC_pollutant' to get pollutant name
-    pollutants = [key[4:] for key in next(iter(biophysical_dict.values()))
-                  if key.startswith('emc_')]
+    pollutants = [
+        col[4:] for col in biophysical_df.columns if col.startswith('emc_')]
    LOGGER.debug(f'Pollutants found in biophysical table: {pollutants}')

    # Calculate avoided pollutant load for each pollutant from retention volume
@ -766,9 +760,7 @@ def execute(args):
            output_dir, f'actual_pollutant_load_{pollutant}{suffix}.tif')
        actual_load_paths.append(actual_pollutant_load_path)
        # make an array mapping each LULC code to the pollutant EMC value
-        emc_array = numpy.array(
-            [biophysical_dict[lucode][f'emc_{pollutant}']
-                for lucode in sorted_lucodes], dtype=numpy.float32)
+        emc_array = biophysical_df[f'emc_{pollutant}'].to_numpy(dtype=numpy.float32)

        # calculate avoided load from retention volume
        avoided_load_task = task_graph.add_task(
--- a/src/natcap/invest/urban_cooling_model.py
+++ b/src/natcap/invest/urban_cooling_model.py
@ -55,6 +55,7 @@ MODEL_SPEC = {
        "biophysical_table_path": {
            "name": gettext("biophysical table"),
            "type": "csv",
+            "index_col": "lucode",
            "columns": {
                "lucode": spec_utils.LULC_TABLE_COLUMN,
                "kc": {
@ -170,6 +171,7 @@ MODEL_SPEC = {
        "energy_consumption_table_path": {
            "name": gettext("energy consumption table"),
            "type": "csv",
+            "index_col": "type",
            "columns": {
                "type": {
                    "type": "integer",
@ -340,10 +342,10 @@ MODEL_SPEC = {
                        "reference of the LULC."),
                    "geometries": spec_utils.POLYGONS,
                    "fields": {}
-                },
-                "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
+                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -410,8 +412,8 @@ def execute(args):
    intermediate_dir = os.path.join(
        args['workspace_dir'], 'intermediate')
    utils.make_directories([args['workspace_dir'], intermediate_dir])
-    biophysical_lucode_map = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
+    biophysical_df = utils.read_csv_to_dataframe(
+        args['biophysical_table_path'], MODEL_SPEC['args']['biophysical_table_path'])

    # cast to float and calculate relative weights
    # Use default weights for shade, albedo, eti if the user didn't provide
@ -454,7 +456,7 @@ def execute(args):
        n_workers = -1  # Synchronous mode.

    task_graph = taskgraph.TaskGraph(
-        os.path.join(intermediate_dir, '_taskgraph_working_dir'), n_workers)
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)

    # align all the input rasters.
    aligned_lulc_raster_path = os.path.join(
@ -496,16 +498,13 @@ def execute(args):
        'raster_name': 'LULC', 'column_name': 'lucode',
        'table_name': 'Biophysical'}
    for prop in reclassification_props:
-        prop_map = dict(
-            (lucode, x[prop])
-            for lucode, x in biophysical_lucode_map.items())
-
        prop_raster_path = os.path.join(
            intermediate_dir, f'{prop}{file_suffix}.tif')
        prop_task = task_graph.add_task(
            func=utils.reclassify_raster,
            args=(
-                (aligned_lulc_raster_path, 1), prop_map, prop_raster_path,
+                (aligned_lulc_raster_path, 1),
+                biophysical_df[prop].to_dict(), prop_raster_path,
                gdal.GDT_Float32, TARGET_NODATA, reclass_error_details),
            target_path_list=[prop_raster_path],
            dependent_task_list=[align_task],
@ -1079,8 +1078,9 @@ def calculate_energy_savings(
                  for field in target_building_layer.schema]
    type_field_index = fieldnames.index('type')

-    energy_consumption_table = utils.read_csv_to_dataframe(
-        energy_consumption_table_path, 'type').to_dict(orient='index')
+    energy_consumption_df = utils.read_csv_to_dataframe(
+        energy_consumption_table_path,
+        MODEL_SPEC['args']['energy_consumption_table_path'])

    target_building_layer.StartTransaction()
    last_time = time.time()
@ -1104,7 +1104,7 @@ def calculate_energy_savings(
        # Building type should be an integer and has to match the building
        # types in the energy consumption table.
        target_type = target_feature.GetField(int(type_field_index))
-        if target_type not in energy_consumption_table:
+        if target_type not in energy_consumption_df.index:
            target_building_layer.CommitTransaction()
            target_building_layer = None
            target_building_vector = None
@ -1114,16 +1114,14 @@ def calculate_energy_savings(
                "that has no corresponding entry in the energy consumption "
                f"table at {energy_consumption_table_path}")

-        consumption_increase = float(
-            energy_consumption_table[target_type]['consumption'])
+        consumption_increase = energy_consumption_df['consumption'][target_type]

        # Load building cost if we can, but don't adjust the value if the cost
        # column is not there.
        # NOTE: if the user has an empty column value but the 'cost' column
        # exists, this will raise an error.
        try:
-            building_cost = float(
-                energy_consumption_table[target_type]['cost'])
+            building_cost = energy_consumption_df['cost'][target_type]
        except KeyError:
            # KeyError when cost column not present.
            building_cost = 1
--- a/src/natcap/invest/urban_flood_risk_mitigation.py
+++ b/src/natcap/invest/urban_flood_risk_mitigation.py
@ -57,6 +57,7 @@ MODEL_SPEC = {
        },
        "curve_number_table_path": {
            "type": "csv",
+            "index_col": "lucode",
            "columns": {
                "lucode": {
                    "type": "integer",
@ -91,6 +92,7 @@ MODEL_SPEC = {
        },
        "infrastructure_damage_loss_table_path": {
            "type": "csv",
+            "index_col": "type",
            "columns": {
                "type": {
                    "type": "integer",
@ -184,12 +186,7 @@ MODEL_SPEC = {
                        "the same spatial reference as the LULC."),
                    "geometries": spec_utils.POLYGONS,
                    "fields": {}
-                }
-            }
-        },
-        "temp_working_dir_not_for_humans": {
-            "type": "directory",
-            "contents": {
+                },
                "aligned_lulc.tif": {
                    "about": "Aligned and clipped copy of the LULC.",
                    "bands": {1: {"type": "integer"}}
@ -205,10 +202,10 @@ MODEL_SPEC = {
                "s_max.tif": {
                    "about": "Map of potential retention.",
                    "bands": {1: {"type": "number", "units": u.millimeter}}
-                },
-                "taskgraph_data.db": {}
+                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -259,12 +256,10 @@ def execute(args):

    file_suffix = utils.make_suffix_string(args, 'results_suffix')

-    temporary_working_dir = os.path.join(
-        args['workspace_dir'], 'temp_working_dir_not_for_humans')
    intermediate_dir = os.path.join(
        args['workspace_dir'], 'intermediate_files')
    utils.make_directories([
-        args['workspace_dir'], intermediate_dir, temporary_working_dir])
+        args['workspace_dir'], intermediate_dir])

    try:
        n_workers = int(args['n_workers'])
@ -273,13 +268,14 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous mode.
-    task_graph = taskgraph.TaskGraph(temporary_working_dir, n_workers)
+    task_graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)

    # Align LULC with soils
    aligned_lulc_path = os.path.join(
-        temporary_working_dir, f'aligned_lulc{file_suffix}.tif')
+        intermediate_dir, f'aligned_lulc{file_suffix}.tif')
    aligned_soils_path = os.path.join(
-        temporary_working_dir,
+        intermediate_dir,
        f'aligned_soils_hydrological_group{file_suffix}.tif')

    lulc_raster_info = pygeoprocessing.get_raster_info(
@ -306,20 +302,20 @@ def execute(args):
        task_name='align raster stack')

    # Load CN table
-    cn_table = utils.read_csv_to_dataframe(
-        args['curve_number_table_path'], 'lucode').to_dict(orient='index')
+    cn_df = utils.read_csv_to_dataframe(
+        args['curve_number_table_path'],
+        MODEL_SPEC['args']['curve_number_table_path'])

    # make cn_table into a 2d array where first dim is lucode, second is
    # 0..3 to correspond to CN_A..CN_D
    data = []
    row_ind = []
    col_ind = []
-    for lucode in cn_table:
-        data.extend([
-            cn_table[lucode][f'cn_{soil_id}']
-            for soil_id in ['a', 'b', 'c', 'd']])
-        row_ind.extend([int(lucode)] * 4)
+    for lucode, row in cn_df.iterrows():
+        data.extend([row[f'cn_{soil_id}'] for soil_id in ['a', 'b', 'c', 'd']])
+        row_ind.extend([lucode] * 4)
    col_ind = [0, 1, 2, 3] * (len(row_ind) // 4)
+
    lucode_to_cn_table = scipy.sparse.csr_matrix((data, (row_ind, col_ind)))

    cn_nodata = -1
@ -327,7 +323,7 @@ def execute(args):
    soil_type_nodata = soil_raster_info['nodata'][0]

    cn_raster_path = os.path.join(
-        temporary_working_dir, f'cn_raster{file_suffix}.tif')
+        intermediate_dir, f'cn_raster{file_suffix}.tif')
    align_raster_stack_task.join()

    cn_raster_task = task_graph.add_task(
@ -344,7 +340,7 @@ def execute(args):
    # Generate S_max
    s_max_nodata = -9999
    s_max_raster_path = os.path.join(
-        temporary_working_dir, f's_max{file_suffix}.tif')
+        intermediate_dir, f's_max{file_suffix}.tif')
    s_max_task = task_graph.add_task(
        func=pygeoprocessing.raster_calculator,
        args=(
@ -649,7 +645,9 @@ def _calculate_damage_to_infrastructure_in_aoi(
    infrastructure_layer = infrastructure_vector.GetLayer()

    damage_type_map = utils.read_csv_to_dataframe(
-        structures_damage_table, 'type').to_dict(orient='index')
+        structures_damage_table,
+        MODEL_SPEC['args']['infrastructure_damage_loss_table_path']
+    )['damage'].to_dict()

    infrastructure_layer_defn = infrastructure_layer.GetLayerDefn()
    type_index = -1
@ -703,8 +701,8 @@ def _calculate_damage_to_infrastructure_in_aoi(
                intersection_geometry = aoi_geometry_shapely.intersection(
                    infrastructure_geometry)
                damage_type = int(infrastructure_feature.GetField(type_index))
-                damage = damage_type_map[damage_type]['damage']
-                total_damage += intersection_geometry.area * damage
+                total_damage += (
+                    intersection_geometry.area * damage_type_map[damage_type])

        aoi_damage[aoi_feature.GetFID()] = total_damage

@ -939,5 +937,25 @@ def validate(args, limit_to=None):
            be an empty list if validation succeeds.

    """
-    return validation.validate(args, MODEL_SPEC['args'],
-                               MODEL_SPEC['args_with_spatial_overlap'])
+    validation_warnings = validation.validate(
+        args, MODEL_SPEC['args'], MODEL_SPEC['args_with_spatial_overlap'])
+
+    sufficient_keys = validation.get_sufficient_keys(args)
+    invalid_keys = validation.get_invalid_keys(validation_warnings)
+
+    if ("curve_number_table_path" not in invalid_keys and
+            "curve_number_table_path" in sufficient_keys):
+        # Load CN table. Resulting DF has index and CN_X columns only.
+        cn_df = utils.read_csv_to_dataframe(
+            args['curve_number_table_path'],
+            MODEL_SPEC['args']['curve_number_table_path'])
+        # Check for NaN values.
+        nan_mask = cn_df.isna()
+        if nan_mask.any(axis=None):
+            nan_lucodes = nan_mask[nan_mask.any(axis=1)].index
+            lucode_list = list(nan_lucodes.values)
+            validation_warnings.append((
+                ['curve_number_table_path'],
+                f'Missing curve numbers for lucode(s) {lucode_list}'))
+
+    return validation_warnings
--- a/src/natcap/invest/urban_nature_access.py
+++ b/src/natcap/invest/urban_nature_access.py
@ -1,5 +1,4 @@
 import collections
-import functools
 import logging
 import math
 import os
@ -76,14 +75,17 @@ MODEL_SPEC = {
                "corresponding values in this table.  Each row is a land use "
                "land cover class."
            ),
+            'index_col': 'lucode',
            'columns': {
                'lucode': spec_utils.LULC_TABLE_COLUMN,
                'urban_nature': {
-                    'type': 'number',
-                    'units': u.none,
+                    'type': 'ratio',
                    'about': (
-                        "Binary code indicating whether the LULC type is "
-                        "(1) or is not (0) an urban nature type."
+                        "The proportion (0-1) indicating the naturalness of "
+                        "the land types. 0 indicates the naturalness level of "
+                        "this LULC type is lowest (0% nature), while 1 "
+                        "indicates that of this LULC type is the highest "
+                        "(100% nature)"
                    ),
                },
                'search_radius_m': {
@ -256,9 +258,10 @@ MODEL_SPEC = {
            'name': 'population group radii table',
            'type': 'csv',
            'required': f'search_radius_mode == "{RADIUS_OPT_POP_GROUP}"',
+            'index_col': 'pop_group',
            'columns': {
                "pop_group": {
-                    "type": "ratio",
+                    "type": "freestyle_string",
                    "required": False,
                    "about": gettext(
                        "The name of the population group. Names must match "
@ -302,8 +305,9 @@ MODEL_SPEC = {
        'output': {
            "type": "directory",
            "contents": {
-                "urban_nature_supply.tif": {
-                    "about": "The calculated supply of urban nature.",
+                "urban_nature_supply_percapita.tif": {
+                    "about": (
+                        "The calculated supply per capita of urban nature."),
                    "bands": {1: {
                        "type": "number",
                        "units": u.m**2,
@ -412,13 +416,44 @@ MODEL_SPEC = {
                    "bands": {1: {"type": "number", "units": u.m**2/u.person}},
                    "created_if":
                        f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
-                }
+                },
+
+                # when RADIUS_OPT_UNIFORM
+                "accessible_urban_nature.tif": {
+                    "about": gettext(
+                        "The area of greenspace available within the defined "
+                        "radius, weighted by the selected decay function."),
+                    "bands": {1: {"type": "number", "units": u.m**2}},
+                    "created_if":
+                        f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
+                },
+
+                # When RADIUS_OPT_URBAN_NATURE
+                "accessible_urban_nature_lucode_[LUCODE].tif": {
+                    "about": gettext(
+                        "The area of greenspace available within the radius "
+                        "associated with urban nature class LUCODE, weighted "
+                        "by the selected decay function."),
+                    "bands": {1: {"type": "number", "units": u.m**2}},
+                    "created_if":
+                        f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
+                },
+
+                # When RADIUS_OPT_POP_GROUP
+                "accessible_urban_nature_to_[POP_GROUP].tif": {
+                    "about": gettext(
+                        "The area of greenspace available within the radius "
+                        "associated with group POP_GROUP, weighted by the "
+                        "selected decay function."),
+                    "bands": {1: {"type": "number", "units": u.m**2}},
+                    "created_if":
+                        f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
+                },
            },
        },
        'intermediate': {
            'type': 'directory',
            'contents': {
-                '_taskgraph_working_dir': spec_utils.TASKGRAPH_DIR,
                "aligned_lulc.tif": {
                    "about": gettext(
                        "A copy of the user's land use land cover raster. "
@ -445,6 +480,7 @@ MODEL_SPEC = {
                    ),
                    "bands": {1: {'type': 'number', 'units': u.count}},
                },
+
                # when RADIUS_OPT_UNIFORM
                "distance_weighted_population_within_[SEARCH_RADIUS].tif": {
                    "about": gettext(
@ -484,13 +520,13 @@ MODEL_SPEC = {
                    "created_if":
                        f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
                },
-                "urban_nature_supply_lucode_[LUCODE].tif": {
+                "urban_nature_supply_percapita_lucode_[LUCODE].tif": {
                    "about": gettext(
                        "The urban nature supplied to populations due to the "
                        "land use land cover code LUCODE"),
                    "bands": {1: {"type": "number", "units": u.m**2/u.person}},
                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_UNIFORM}'",
+                        f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
                },
                "urban_nature_population_ratio_lucode_[LUCODE].tif": {
                    "about": gettext(
@ -501,14 +537,6 @@ MODEL_SPEC = {
                    "created_if":
                        f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
                },
-                "urban_nature_supply_lucode_[LUCODE].tif": {
-                    "about": gettext(
-                        "The urban nature supplied to populations due to "
-                        "the land use land cover class LUCODE."),
-                    "bands": {1: {"type": "number", "units": u.m**2/u.person}},
-                    "created_if":
-                        f"search_radius_mode == '{RADIUS_OPT_URBAN_NATURE}'",
-                },

                # When RADIUS_OPT_POP_GROUP
                "population_in_[POP_GROUP].tif": {
@ -547,10 +575,10 @@ MODEL_SPEC = {
                    "created_if":
                        f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
                },
-                "urban_nature_supply_to_[POP_GROUP].tif": {
+                "urban_nature_supply_percapita_to_[POP_GROUP].tif": {
                    "about": gettext(
-                        "The urban nature supply to population group "
-                        "POP_GROUP."),
+                        "The urban nature supply per capita to population "
+                        "group POP_GROUP."),
                    "bands": {1: {"type": "number", "units": u.m**2/u.person}},
                    "created_if":
                        f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
@ -570,16 +598,16 @@ MODEL_SPEC = {
                    "bands": {1: {"type": "number", "units": u.people}},
                    "created_if":
                        f"search_radius_mode == '{RADIUS_OPT_POP_GROUP}'",
-                },
-            },
-
-        }
+                }
+            }
+        },
+        'taskgraph_cache': spec_utils.TASKGRAPH_DIR,
    }
 }


 _OUTPUT_BASE_FILES = {
-    'urban_nature_supply': 'urban_nature_supply.tif',
+    'urban_nature_supply_percapita': 'urban_nature_supply_percapita.tif',
    'admin_boundaries': 'admin_boundaries.gpkg',
    'urban_nature_balance_percapita': 'urban_nature_balance_percapita.tif',
    'urban_nature_balance_totalpop': 'urban_nature_balance_totalpop.tif',
@ -620,9 +648,10 @@ def execute(args):
            CSV with the following columns:

            * ``lucode``: (required) the integer landcover code represented.
-            * ``urban_nature``: (required) ``0`` or ``1`` indicating whether
-              this landcover code is (``1``) or is not (``0``) an urban nature
-              pixel.
+            * ``urban_nature``: (required) a proportion (0-1) representing
+              how much of this landcover type is urban nature.  ``0``
+              indicates none of this type's area is urban nature, ``1``
+              indicates all of this type's area is urban nature.
            * ``search_radius_m``: (conditionally required) the search radius
              for this urban nature LULC class in meters. Required for all
              urban nature LULC codes if ``args['search_radius_mode'] ==
@ -677,7 +706,6 @@ def execute(args):
         (_INTERMEDIATE_BASE_FILES, intermediate_dir)],
        suffix)

-    work_token_dir = os.path.join(intermediate_dir, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
@ -685,7 +713,8 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # Synchronous execution
-    graph = taskgraph.TaskGraph(work_token_dir, n_workers)
+    graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)

    kernel_creation_functions = {
        KERNEL_LABEL_DICHOTOMY: _kernel_dichotomy,
@ -902,14 +931,16 @@ def execute(args):
                    aoi_reprojection_task, lulc_mask_task]
            )

-    attr_table = utils.read_csv_to_dataframe(args['lulc_attribute_table'])
+    attr_table = utils.read_csv_to_dataframe(
+        args['lulc_attribute_table'],
+        MODEL_SPEC['args']['lulc_attribute_table'])
    kernel_paths = {}  # search_radius, kernel path
    kernel_tasks = {}  # search_radius, kernel task

    if args['search_radius_mode'] == RADIUS_OPT_UNIFORM:
        search_radii = set([float(args['search_radius'])])
    elif args['search_radius_mode'] == RADIUS_OPT_URBAN_NATURE:
-        urban_nature_attrs = attr_table[attr_table['urban_nature'] == 1]
+        urban_nature_attrs = attr_table[attr_table['urban_nature'] > 0]
        try:
            search_radii = set(urban_nature_attrs['search_radius_m'].unique())
        except KeyError as missing_key:
@ -918,16 +949,14 @@ def execute(args):
                f"attribute table {args['lulc_attribute_table']}")
        # Build an iterable of plain tuples: (lucode, search_radius_m)
        lucode_to_search_radii = list(
-            urban_nature_attrs[['lucode', 'search_radius_m']].itertuples(
-                index=False, name=None))
+            urban_nature_attrs[['search_radius_m']].itertuples(name=None))
    elif args['search_radius_mode'] == RADIUS_OPT_POP_GROUP:
        pop_group_table = utils.read_csv_to_dataframe(
-            args['population_group_radii_table'])
+            args['population_group_radii_table'],
+            MODEL_SPEC['args']['population_group_radii_table'])
        search_radii = set(pop_group_table['search_radius_m'].unique())
        # Build a dict of {pop_group: search_radius_m}
-        search_radii_by_pop_group = dict(
-            pop_group_table[['pop_group', 'search_radius_m']].itertuples(
-                index=False, name=None))
+        search_radii_by_pop_group = pop_group_table['search_radius_m'].to_dict()
    else:
        valid_options = ', '.join(
            MODEL_SPEC['args']['search_radius_mode']['options'].keys())
@ -989,6 +1018,21 @@ def execute(args):
            dependent_task_list=[lulc_mask_task]
        )

+        accessible_urban_nature_path = os.path.join(
+            output_dir, f'accessible_urban_nature{suffix}.tif')
+        _ = graph.add_task(
+            _convolve_and_set_lower_bound,
+            kwargs={
+                "signal_path_band": (urban_nature_pixels_path, 1),
+                "kernel_path_band": (kernel_paths[search_radius_m], 1),
+                "target_path": accessible_urban_nature_path,
+                "working_dir": intermediate_dir,
+            },
+            task_name='Accessible urban nature',
+            target_path_list=[accessible_urban_nature_path],
+            dependent_task_list=[urban_nature_reclassification_task]
+        )
+
        urban_nature_population_ratio_path = os.path.join(
            intermediate_dir,
            f'urban_nature_population_ratio{suffix}.tif')
@ -1005,17 +1049,17 @@ def execute(args):
                urban_nature_reclassification_task, decayed_population_task,
            ])

-        urban_nature_supply_task = graph.add_task(
+        urban_nature_supply_percapita_task = graph.add_task(
            _convolve_and_set_lower_bound,
            kwargs={
                'signal_path_band': (
                    urban_nature_population_ratio_path, 1),
                'kernel_path_band': (kernel_path, 1),
-                'target_path': file_registry['urban_nature_supply'],
+                'target_path': file_registry['urban_nature_supply_percapita'],
                'working_dir': intermediate_dir,
            },
            task_name='2SFCA - urban nature supply',
-            target_path_list=[file_registry['urban_nature_supply']],
+            target_path_list=[file_registry['urban_nature_supply_percapita']],
            dependent_task_list=[
                kernel_tasks[search_radius_m],
                urban_nature_population_ratio_task])
@ -1044,8 +1088,8 @@ def execute(args):
                dependent_task_list=[
                    kernel_tasks[search_radius_m], population_mask_task])

-        partial_urban_nature_supply_paths = []
-        partial_urban_nature_supply_tasks = []
+        partial_urban_nature_supply_percapita_paths = []
+        partial_urban_nature_supply_percapita_tasks = []
        for lucode, search_radius_m in lucode_to_search_radii:
            urban_nature_pixels_path = os.path.join(
                intermediate_dir,
@ -1063,6 +1107,22 @@ def execute(args):
                dependent_task_list=[lulc_mask_task]
            )

+            accessible_urban_nature_path = os.path.join(
+                output_dir,
+                f'accessible_urban_nature_lucode_{lucode}{suffix}.tif')
+            _ = graph.add_task(
+                _convolve_and_set_lower_bound,
+                kwargs={
+                    "signal_path_band": (urban_nature_pixels_path, 1),
+                    "kernel_path_band": (kernel_paths[search_radius_m], 1),
+                    "target_path": accessible_urban_nature_path,
+                    "working_dir": intermediate_dir,
+                },
+                task_name='Accessible urban nature',
+                target_path_list=[accessible_urban_nature_path],
+                dependent_task_list=[urban_nature_reclassification_task]
+            )
+
            urban_nature_population_ratio_path = os.path.join(
                intermediate_dir,
                f'urban_nature_population_ratio_lucode_{lucode}{suffix}.tif')
@ -1080,35 +1140,37 @@ def execute(args):
                    decayed_population_tasks[search_radius_m],
                ])

-            urban_nature_supply_path = os.path.join(
+            urban_nature_supply_percapita_path = os.path.join(
                intermediate_dir,
-                f'urban_nature_supply_lucode_{lucode}{suffix}.tif')
-            partial_urban_nature_supply_paths.append(urban_nature_supply_path)
-            partial_urban_nature_supply_tasks.append(graph.add_task(
+                f'urban_nature_supply_percapita_lucode_{lucode}{suffix}.tif')
+            partial_urban_nature_supply_percapita_paths.append(
+                urban_nature_supply_percapita_path)
+            partial_urban_nature_supply_percapita_tasks.append(graph.add_task(
                pygeoprocessing.convolve_2d,
                kwargs={
                    'signal_path_band': (
                        urban_nature_population_ratio_path, 1),
                    'kernel_path_band': (kernel_paths[search_radius_m], 1),
-                    'target_path': urban_nature_supply_path,
+                    'target_path': urban_nature_supply_percapita_path,
                    'working_dir': intermediate_dir,
                },
                task_name=f'2SFCA - urban_nature supply for lucode {lucode}',
-                target_path_list=[urban_nature_supply_path],
+                target_path_list=[urban_nature_supply_percapita_path],
                dependent_task_list=[
                    kernel_tasks[search_radius_m],
                    urban_nature_population_ratio_task]))

-        urban_nature_supply_task = graph.add_task(
+        urban_nature_supply_percapita_task = graph.add_task(
            ndr._sum_rasters,
            kwargs={
-                'raster_path_list': partial_urban_nature_supply_paths,
+                'raster_path_list': partial_urban_nature_supply_percapita_paths,
                'target_nodata': FLOAT32_NODATA,
-                'target_result_path': file_registry['urban_nature_supply'],
+                'target_result_path':
+                    file_registry['urban_nature_supply_percapita'],
            },
            task_name='2SFCA - urban nature supply total',
-            target_path_list=[file_registry['urban_nature_supply']],
-            dependent_task_list=partial_urban_nature_supply_tasks
+            target_path_list=[file_registry['urban_nature_supply_percapita']],
+            dependent_task_list=partial_urban_nature_supply_percapita_tasks
        )

    # Search radius mode 3: search radii are defined per population group.
@ -1133,6 +1195,23 @@ def execute(args):
        decayed_population_in_group_tasks = []
        for pop_group in split_population_fields:
            search_radius_m = search_radii_by_pop_group[pop_group]
+
+            accessible_urban_nature_path = os.path.join(
+                output_dir,
+                f'accessible_urban_nature_to_{pop_group}{suffix}.tif')
+            _ = graph.add_task(
+                _convolve_and_set_lower_bound,
+                kwargs={
+                    "signal_path_band": (urban_nature_pixels_path, 1),
+                    "kernel_path_band": (kernel_paths[search_radius_m], 1),
+                    "target_path": accessible_urban_nature_path,
+                    "working_dir": intermediate_dir,
+                },
+                task_name='Accessible urban nature',
+                target_path_list=[accessible_urban_nature_path],
+                dependent_task_list=[urban_nature_reclassification_task]
+            )
+
            decayed_population_in_group_path = os.path.join(
                intermediate_dir,
                f'distance_weighted_population_in_{pop_group}{suffix}.tif')
@ -1185,42 +1264,36 @@ def execute(args):
                sum_of_decayed_population_task,
            ])

-        # Create a dict of {pop_group: search_radius_m}
-        group_radii_table = utils.read_csv_to_dataframe(
-            args['population_group_radii_table'])
-        search_radii = dict(
-            group_radii_table[['pop_group', 'search_radius_m']].itertuples(
-                index=False, name=None))
-        urban_nature_supply_by_group_paths = {}
-        urban_nature_supply_by_group_tasks = []
+        urban_nature_supply_percapita_by_group_paths = {}
+        urban_nature_supply_percapita_by_group_tasks = []
        urban_nature_balance_totalpop_by_group_paths = {}
        urban_nature_balance_totalpop_by_group_tasks = []
        supply_population_paths = {'over': {}, 'under': {}}
        supply_population_tasks = {'over': {}, 'under': {}}
        for pop_group, proportional_pop_path in (
                proportional_population_paths.items()):
-            search_radius_m = search_radii[pop_group]
-            urban_nature_supply_to_group_path = os.path.join(
+            search_radius_m = search_radii_by_pop_group[pop_group]
+            urban_nature_supply_percapita_to_group_path = os.path.join(
                intermediate_dir,
-                f'urban_nature_supply_to_{pop_group}{suffix}.tif')
-            urban_nature_supply_by_group_paths[
-                pop_group] = urban_nature_supply_to_group_path
-            urban_nature_supply_by_group_task = graph.add_task(
+                f'urban_nature_supply_percapita_to_{pop_group}{suffix}.tif')
+            urban_nature_supply_percapita_by_group_paths[
+                pop_group] = urban_nature_supply_percapita_to_group_path
+            urban_nature_supply_percapita_by_group_task = graph.add_task(
                _convolve_and_set_lower_bound,
                kwargs={
                    'signal_path_band': (
                        file_registry['urban_nature_population_ratio'], 1),
                    'kernel_path_band': (kernel_paths[search_radius_m], 1),
-                    'target_path': urban_nature_supply_to_group_path,
+                    'target_path': urban_nature_supply_percapita_to_group_path,
                    'working_dir': intermediate_dir,
                },
                task_name=f'2SFCA - urban nature supply for {pop_group}',
-                target_path_list=[urban_nature_supply_to_group_path],
+                target_path_list=[urban_nature_supply_percapita_to_group_path],
                dependent_task_list=[
                    kernel_tasks[search_radius_m],
                    urban_nature_population_ratio_task])
-            urban_nature_supply_by_group_tasks.append(
-                urban_nature_supply_by_group_task)
+            urban_nature_supply_percapita_by_group_tasks.append(
+                urban_nature_supply_percapita_by_group_task)

            # Calculate SUP_DEMi_cap for each population group.
            per_cap_urban_nature_balance_pop_group_path = os.path.join(
@ -1230,7 +1303,7 @@ def execute(args):
                pygeoprocessing.raster_calculator,
                kwargs={
                    'base_raster_path_band_const_list': [
-                        (urban_nature_supply_to_group_path, 1),
+                        (urban_nature_supply_percapita_to_group_path, 1),
                        (float(args['urban_nature_demand']), 'raw')
                    ],
                    'local_op': _urban_nature_balance_percapita_op,
@ -1244,7 +1317,7 @@ def execute(args):
                target_path_list=[
                    per_cap_urban_nature_balance_pop_group_path],
                dependent_task_list=[
-                    urban_nature_supply_by_group_task,
+                    urban_nature_supply_percapita_by_group_task,
                ])

            urban_nature_balance_totalpop_by_group_path = os.path.join(
@ -1303,21 +1376,21 @@ def execute(args):
                        proportional_population_tasks[pop_group],
                    ])

-        urban_nature_supply_task = graph.add_task(
+        urban_nature_supply_percapita_task = graph.add_task(
            _weighted_sum,
            kwargs={
                'raster_path_list':
-                    [urban_nature_supply_by_group_paths[group] for group in
+                    [urban_nature_supply_percapita_by_group_paths[group] for group in
                     sorted(split_population_fields)],
                'weight_raster_list':
                    [pop_group_proportion_paths[group] for group in
                     sorted(split_population_fields)],
-                'target_path': file_registry['urban_nature_supply'],
+                'target_path': file_registry['urban_nature_supply_percapita'],
            },
            task_name='2SFCA - urban nature supply total',
-            target_path_list=[file_registry['urban_nature_supply']],
+            target_path_list=[file_registry['urban_nature_supply_percapita']],
            dependent_task_list=[
-                *urban_nature_supply_by_group_tasks,
+                *urban_nature_supply_percapita_by_group_tasks,
                *pop_group_proportion_tasks.values(),
            ])

@ -1325,7 +1398,7 @@ def execute(args):
            pygeoprocessing.raster_calculator,
            kwargs={
                'base_raster_path_band_const_list': [
-                    (file_registry['urban_nature_supply'], 1),
+                    (file_registry['urban_nature_supply_percapita'], 1),
                    (float(args['urban_nature_demand']), 'raw')
                ],
                'local_op': _urban_nature_balance_percapita_op,
@ -1337,7 +1410,7 @@ def execute(args):
            task_name='Calculate per-capita urban nature balance',
            target_path_list=[file_registry['urban_nature_balance_percapita']],
            dependent_task_list=[
-                urban_nature_supply_task,
+                urban_nature_supply_percapita_task,
            ])

        urban_nature_balance_totalpop_task = graph.add_task(
@ -1388,7 +1461,7 @@ def execute(args):
            pygeoprocessing.raster_calculator,
            kwargs={
                'base_raster_path_band_const_list': [
-                    (file_registry['urban_nature_supply'], 1),
+                    (file_registry['urban_nature_supply_percapita'], 1),
                    (float(args['urban_nature_demand']), 'raw')
                ],
                'local_op': _urban_nature_balance_percapita_op,
@ -1400,7 +1473,7 @@ def execute(args):
            task_name='Calculate per-capita urban nature balance',
            target_path_list=[file_registry['urban_nature_balance_percapita']],
            dependent_task_list=[
-                urban_nature_supply_task,
+                urban_nature_supply_percapita_task,
            ])

        # This is "SUP_DEMi" from the user's guide
@ -1730,13 +1803,16 @@ def _reclassify_urban_nature_area(
    """Reclassify LULC pixels into the urban nature area they represent.

    After execution, urban nature pixels will have values representing the
-    pixel's area, while pixels that are not urban nature will have a pixel
-    value of 0.  Nodata values will propagate to the output raster.
+    pixel's area of urban nature (pixel area * proportion of urban nature),
+    while pixels that are not urban nature will have a pixel value of 0.
+    Nodata values will propagate to the output raster.

    Args:
        lulc_raster_path (string): The path to a land-use/land-cover raster.
        lulc_attribute_table (string): The path to a CSV table representing
            LULC attributes.  Must have "lucode" and "urban_nature" columns.
+            The "urban_nature" column represents a proportion 0-1 of how much
+            of the pixel's area represents urban nature.
        target_raster_path (string): Where the reclassified urban nature raster
            should be written.
        only_these_urban_nature_codes=None (iterable or None): If ``None``, all
@ -1748,8 +1824,8 @@ def _reclassify_urban_nature_area(
    Returns:
        ``None``
    """
-    attribute_table_dict = utils.read_csv_to_dataframe(
-        lulc_attribute_table, 'lucode').to_dict(orient='index')
+    lulc_attribute_df = utils.read_csv_to_dataframe(
+        lulc_attribute_table, MODEL_SPEC['args']['lulc_attribute_table'])

    squared_pixel_area = abs(
        numpy.multiply(*_square_off_pixels(lulc_raster_path)))
@ -1758,14 +1834,15 @@ def _reclassify_urban_nature_area(
        valid_urban_nature_codes = set(only_these_urban_nature_codes)
    else:
        valid_urban_nature_codes = set(
-            lucode for lucode, attributes in attribute_table_dict.items()
-            if (attributes['urban_nature']) == 1)
+            lulc_attribute_df[lulc_attribute_df['urban_nature'] > 0].index)

    urban_nature_area_map = {}
-    for lucode, attributes in attribute_table_dict.items():
+    for row in lulc_attribute_df[['urban_nature']].itertuples():
+        lucode = row.Index
+        urban_nature_proportion = row.urban_nature
        urban_nature_area = 0
        if lucode in valid_urban_nature_codes:
-            urban_nature_area = squared_pixel_area
+            urban_nature_area = squared_pixel_area * urban_nature_proportion
        urban_nature_area_map[lucode] = urban_nature_area

    lulc_raster_info = pygeoprocessing.get_raster_info(lulc_raster_path)
--- a/src/natcap/invest/utils.py
+++ b/src/natcap/invest/utils.py
@ -597,85 +597,119 @@ def expand_path(path, base_path):
    return os.path.abspath(os.path.join(os.path.dirname(base_path), path))


-def read_csv_to_dataframe(
-        path, index_col=False, usecols=None, convert_cols_to_lower=True,
-        convert_vals_to_lower=True, expand_path_cols=None, sep=None, engine='python',
-        encoding='utf-8-sig', **kwargs):
+def read_csv_to_dataframe(path, spec, **kwargs):
    """Return a dataframe representation of the CSV.

-    Wrapper around ``pandas.read_csv`` that standardizes the column names by
-    stripping leading/trailing whitespace and optionally making all lowercase.
-    This helps avoid common errors caused by user-supplied CSV files with
-    column names that don't exactly match the specification. Strips
-    leading/trailing whitespace from data entries as well.
+    Wrapper around ``pandas.read_csv`` that performs some common data cleaning
+    based on information in the arg spec.

-    Also sets custom defaults for some kwargs passed to ``pandas.read_csv``.
+    Columns are filtered to just those that match a pattern in the spec.
+    Column names are lowercased and whitespace is stripped off. Empty rows are
+    dropped. Values in each column are processed and cast to an appropriate
+    dtype according to the type in the spec:
+
+    - Values in raster, vector, csv, file, and directory columns are cast to
+      str, whitespace stripped, and expanded as paths relative to the input path
+    - Values in freestyle_string and option_string columns are cast to str,
+      whitespace stripped, and converted to lowercase
+    - Values in number, ratio, and percent columns are cast to float
+    - Values in integer columns are cast to int
+    - Values in boolean columns are cast to bool
+
+    Empty or NA cells are returned as ``numpy.nan`` (for floats) or
+    ``pandas.NA`` (for all other types).
+
+    Also sets custom defaults for some kwargs passed to ``pandas.read_csv``,
+    which you can override with kwargs:
+
+    - sep=None: lets the Python engine infer the separator
+    - engine='python': The 'python' engine supports the sep=None option.
+    - encoding='utf-8-sig': 'utf-8-sig' handles UTF-8 with or without BOM.

    Args:
        path (str): path to a CSV file
-        index_col (str): name of column to use as the dataframe index. If
-            ``convert_cols_to_lower``, this column name and the dataframe column names
-            will be lowercased before they are compared. If ``usecols``
-            is defined, this must be included in ``usecols``.
-        usecols (list(str)): list of column names to subset from the dataframe.
-            If ``convert_cols_to_lower``, these names and the dataframe column names
-            will be lowercased before they are compared.
-        convert_cols_to_lower (bool): if True, convert all column names to lowercase
-        convert_vals_to_lower (bool): if True, convert all table values to lowercase
-        expand_path_cols (list[string])): if provided, a list of the names of
-            columns that contain paths to expand. Any relative paths in these
-            columns will be expanded to absolute paths. It is assumed that
-            relative paths are relative to the CSV's path.
-        sep: kwarg of ``pandas.read_csv``. Defaults to None, which
-            lets the Python engine infer the separator
-        engine (str): kwarg of ``pandas.read_csv``. The 'python' engine
-            supports the sep=None option.
-        encoding (str): kwarg of ``pandas.read_csv``. Using the 'utf-8-sig'
-            encoding handles UTF-8 with or without BOM.
+        spec (dict): dictionary specifying the structure of the CSV table
        **kwargs: additional kwargs will be passed to ``pandas.read_csv``

    Returns:
        pandas.DataFrame with the contents of the given CSV
-
    """
+    # build up a list of regex patterns to match columns against columns from
+    # the table that match a pattern in this list (after stripping whitespace
+    # and lowercasing) will be included in the dataframe
+    patterns = []
+    for column in spec['columns']:
+        column = column.lower()
+        match = re.match(r'(.*)\[(.+)\](.*)', column)
+        if match:
+            # for column name patterns, convert it to a regex pattern
+            groups = match.groups()
+            patterns.append(f'{groups[0]}(.+){groups[2]}')
+        else:
+            # for regular column names, use the exact name as the pattern
+            patterns.append(column.replace('(', '\(').replace(')', '\)'))
+
    try:
        # set index_col=False to force pandas not to index by any column
        # this is useful in case of trailing separators
        # we'll explicitly set the index column later on
-        dataframe = pandas.read_csv(
-            path, index_col=False, sep=sep, engine=engine, encoding=encoding, **kwargs)
+        df = pandas.read_csv(
+            path,
+            index_col=False,
+            usecols=lambda col: any(
+                re.fullmatch(pattern, col.strip().lower()) for pattern in patterns
+            ),
+            **{
+                'sep': None,
+                'engine': 'python',
+                'encoding': 'utf-8-sig',
+                **kwargs
+            })
    except UnicodeDecodeError as error:
        LOGGER.error(
            f'The file {path} must be encoded as UTF-8 or ASCII')
        raise error

-    # strip whitespace from column names
+    # strip whitespace from column names and convert to lowercase
    # this won't work on integer types, which happens if you set header=None
    # however, there's little reason to use this function if there's no header
-    dataframe.columns = dataframe.columns.str.strip()
+    df.columns = df.columns.str.strip().str.lower()

-    # convert column names to lowercase
-    if convert_cols_to_lower:
-        dataframe.columns = dataframe.columns.str.lower()
-        # if 'to_lower`, case handling is done before trying to access the data.
-        # the columns are stripped of leading/trailing whitespace in
-        # ``read_csv_to_dataframe``, and also lowercased if ``to_lower`` so we only
-        # need to convert the rest of the table.
-        if index_col and isinstance(index_col, str):
-            index_col = index_col.lower()
-        # lowercase column names
-        if usecols:
-            usecols = [col.lower() for col in usecols]
+    # drop any empty rows
+    df = df.dropna(how="all")

-    # Subset dataframe by columns if desired
-    if usecols:
-        dataframe = dataframe[usecols]
+    available_cols = set(df.columns)

-    # Set 'index_col' as the index of the dataframe
-    if index_col:
+    for col_spec, pattern in zip(spec['columns'].values(), patterns):
+        matching_cols = [c for c in available_cols if re.match(pattern, c)]
+        available_cols -= set(matching_cols)
+        for col in matching_cols:
+            try:
+                if col_spec['type'] in ['csv', 'directory', 'file', 'raster', 'vector', {'vector', 'raster'}]:
+                    df[col] = df[col].apply(
+                        lambda p: p if pandas.isna(p) else expand_path(str(p).strip(), path))
+                    df[col] = df[col].astype(pandas.StringDtype())
+                elif col_spec['type'] in {'freestyle_string', 'option_string'}:
+                    df[col] = df[col].apply(
+                        lambda s: s if pandas.isna(s) else str(s).strip().lower())
+                    df[col] = df[col].astype(pandas.StringDtype())
+                elif col_spec['type'] in {'number', 'percent', 'ratio'}:
+                    df[col] = df[col].astype(float)
+                elif col_spec['type'] == 'integer':
+                    df[col] = df[col].astype(pandas.Int64Dtype())
+                elif col_spec['type'] == 'boolean':
+                    df[col] = df[col].astype('boolean')
+            except ValueError as err:
+                raise ValueError(
+                    f'Value(s) in the "{col}" column of the table {path} '
+                    f'could not be interpreted as {col_spec["type"]}s. '
+                    f'Original error: {err}')
+
+     # set the index column, if specified
+    if 'index_col' in spec and spec['index_col'] is not None:
+        index_col = spec['index_col'].lower()
        try:
-            dataframe = dataframe.set_index(
-                index_col, drop=False, verify_integrity=True)
+            df = df.set_index(index_col, verify_integrity=True)
        except KeyError:
            # If 'index_col' is not a column then KeyError is raised for using
            # it as the index column
@ -683,33 +717,7 @@ def read_csv_to_dataframe(
                         f"in the table {path}")
            raise

-    # convert table values to lowercase
-    if convert_vals_to_lower:
-        dataframe = dataframe.applymap(
-            lambda x: x.lower() if isinstance(x, str) else x)
-
-    # expand paths
-    if expand_path_cols:
-        for col in expand_path_cols:
-            # allow for the case where a column is optional
-            if col in dataframe:
-                dataframe[col] = dataframe[col].apply(
-                    # if the whole column is empty, cells will be parsed as NaN
-                    # catch that before trying to expand them as paths
-                    lambda p: '' if pandas.isna(p) else expand_path(p, path))
-
-    # drop any empty rows
-    dataframe = dataframe.dropna(how="all")
-
-    # fill the rest of empty or NaN values with empty string
-    dataframe = dataframe.fillna(value="")
-
-    # strip whitespace from table values
-    # Remove values with leading ('^ +') and trailing (' +$') whitespace.
-    # Regular expressions using 'replace' only substitute on strings.
-    dataframe = dataframe.replace(r"^ +| +$", r"", regex=True)
-
-    return dataframe
+    return df


 def make_directories(directory_list):
--- a/src/natcap/invest/validation.py
+++ b/src/natcap/invest/validation.py
@ -56,7 +56,8 @@ MESSAGES = {
    'NOT_AN_INTEGER': gettext('Value "{value}" does not represent an integer'),
    'NOT_BOOLEAN': gettext("Value must be either True or False, not {value}"),
    'NO_PROJECTION': gettext('Spatial file {filepath} has no projection'),
-    'BBOX_NOT_INTERSECT': gettext("Bounding boxes do not intersect: {bboxes}"),
+    'BBOX_NOT_INTERSECT': gettext('Not all of the spatial layers overlap each '
+        'other. All bounding boxes must intersect: {bboxes}'),
    'NEED_PERMISSION': gettext('You must have {permission} access to this file'),
 }

--- a/src/natcap/invest/wave_energy.py
+++ b/src/natcap/invest/wave_energy.py
@ -423,6 +423,7 @@ MODEL_SPEC = {
                        "Table of value ranges for each captured wave energy "
                        "quantile group as well as the number of pixels for "
                        "each group."),
+                    "index_col": "Percentile Group",
                    "columns": {
                        **PERCENTILE_TABLE_FIELDS,
                        "Value Range (megawatt hours per year, MWh/yr)": {
@ -459,6 +460,7 @@ MODEL_SPEC = {
                        "Table of value ranges for each net present value "
                        "quantile group as well as the number of pixels for "
                        "each group."),
+                    "index_col": "Percentile Group",
                    "columns": {
                        **PERCENTILE_TABLE_FIELDS,
                        "Value Range (thousands of currency units, currency)": {
@ -497,6 +499,7 @@ MODEL_SPEC = {
                    "about": gettext(
                        "Table of value ranges for each wave power quantile "
                        "group as well as the number of pixels for each group."),
+                    "index_col": "Percentile Group",
                    "columns": {
                        **PERCENTILE_TABLE_FIELDS,
                        "Value Range (wave power per unit width of wave crest length, kW/m)": {
@ -597,10 +600,10 @@ MODEL_SPEC = {
                "LandPts.txt": {
                    "created_if": "valuation_container",
                    "about": "This text file logs records of the landing point coordinates."
-                },
-                "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
+                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -722,8 +725,6 @@ def execute(args):
    utils.make_directories([intermediate_dir, output_dir])

    # Initialize a TaskGraph
-    taskgraph_working_dir = os.path.join(
-        intermediate_dir, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
@ -731,7 +732,8 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # single process mode.
-    task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers)
+    task_graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)

    # Append a _ to the suffix if it's not empty and doesn't already have one
    file_suffix = utils.make_suffix_string(args, 'results_suffix')
@ -743,7 +745,7 @@ def execute(args):
    # arrays. Also store the amount of energy the machine produces
    # in a certain wave period/height state as a 2D array
    machine_perf_dict = {}
-    machine_perf_data = utils.read_csv_to_dataframe(args['machine_perf_path'])
+    machine_perf_data = pandas.read_csv(args['machine_perf_path'])
    # Get the wave period fields, starting from the second column of the table
    machine_perf_dict['periods'] = machine_perf_data.columns.values[1:]
    # Build up the height field by taking the first column of the table
@ -775,12 +777,15 @@ def execute(args):

    # Check if required column fields are entered in the land grid csv file
    if 'land_gridPts_path' in args:
-        # Create a grid_land_data dataframe for later use in valuation
-        grid_land_data = utils.read_csv_to_dataframe(
-            args['land_gridPts_path'], convert_vals_to_lower=False)
-        required_col_names = ['id', 'type', 'lat', 'long', 'location']
-        grid_land_data, missing_grid_land_fields = _get_validated_dataframe(
-            args['land_gridPts_path'], required_col_names)
+        # Create a grid_land_df dataframe for later use in valuation
+        grid_land_df = utils.read_csv_to_dataframe(
+            args['land_gridPts_path'],
+            MODEL_SPEC['args']['land_gridPts_path'])
+        missing_grid_land_fields = []
+        for field in ['id', 'type', 'lat', 'long', 'location']:
+            if field not in grid_land_df.columns:
+                missing_grid_land_fields.append(field)
+
        if missing_grid_land_fields:
            raise ValueError(
                'The following column fields are missing from the Grid '
@ -1080,20 +1085,12 @@ def execute(args):
    grid_vector_path = os.path.join(
        output_dir, 'GridPts_prj%s.shp' % file_suffix)

-    grid_data = grid_land_data.loc[
-        grid_land_data['type'].str.upper() == 'GRID']
-    land_data = grid_land_data.loc[
-        grid_land_data['type'].str.upper() == 'LAND']
-
-    grid_dict = grid_data.to_dict('index')
-    land_dict = land_data.to_dict('index')
-
    # Make a point shapefile for grid points
    LOGGER.info('Creating Grid Points Vector.')
    create_grid_points_vector_task = task_graph.add_task(
        func=_dict_to_point_vector,
-        args=(grid_dict, grid_vector_path, 'grid_points', analysis_area_sr_wkt,
-              aoi_sr_wkt),
+        args=(grid_land_df[grid_land_df['type'] == 'grid'].to_dict('index'),
+              grid_vector_path, 'grid_points', analysis_area_sr_wkt, aoi_sr_wkt),
        target_path_list=[grid_vector_path],
        task_name='create_grid_points_vector')

@ -1101,8 +1098,8 @@ def execute(args):
    LOGGER.info('Creating Landing Points Vector.')
    create_land_points_vector_task = task_graph.add_task(
        func=_dict_to_point_vector,
-        args=(land_dict, land_vector_path, 'land_points', analysis_area_sr_wkt,
-              aoi_sr_wkt),
+        args=(grid_land_df[grid_land_df['type'] == 'land'].to_dict('index'),
+              land_vector_path, 'land_points', analysis_area_sr_wkt, aoi_sr_wkt),
        target_path_list=[land_vector_path],
        task_name='create_land_points_vector')

@ -1413,26 +1410,6 @@ def _add_target_fields_to_wave_vector(
    target_wave_vector = None


-def _get_validated_dataframe(csv_path, field_list):
-    """Return a dataframe with upper cased fields, and a list of missing fields.
-
-    Args:
-        csv_path (str): path to the csv to be converted to a dataframe.
-        field_list (list): a list of fields in string format.
-
-    Returns:
-        dataframe (pandas.DataFrame): from csv with upper-cased fields.
-        missing_fields (list): missing fields as string format in dataframe.
-
-    """
-    dataframe = utils.read_csv_to_dataframe(csv_path, convert_vals_to_lower=False)
-    missing_fields = []
-    for field in field_list:
-        if field not in dataframe.columns:
-            missing_fields.append(field)
-    return dataframe, missing_fields
-
-
 def _dict_to_point_vector(base_dict_data, target_vector_path, layer_name,
                          base_sr_wkt, target_sr_wkt):
    """Given a dictionary of data create a point shapefile that represents it.
@ -1493,6 +1470,7 @@ def _dict_to_point_vector(base_dict_data, target_vector_path, layer_name,
    for point_dict in base_dict_data.values():
        latitude = float(point_dict['lat'])
        longitude = float(point_dict['long'])
+        point_dict['id'] = int(point_dict['id'])
        # When projecting to WGS84, extents -180 to 180 are used for longitude.
        # In case input longitude is from -360 to 0 convert
        if longitude < -180:
@ -1505,7 +1483,7 @@ def _dict_to_point_vector(base_dict_data, target_vector_path, layer_name,
        target_layer.CreateFeature(output_feature)

        for field_name in point_dict:
-            output_feature.SetField(field_name, point_dict[field_name])
+            output_feature.SetField(field_name.upper(), point_dict[field_name])
        output_feature.SetGeometryDirectly(geom)
        target_layer.SetFeature(output_feature)
        output_feature = None
@ -1670,9 +1648,13 @@ def _machine_csv_to_dict(machine_csv_path):
    machine_dict = {}
    # make columns and indexes lowercased and strip whitespace
    machine_data = utils.read_csv_to_dataframe(
-        machine_csv_path, 'name', convert_vals_to_lower=False)
-    machine_data.index = machine_data.index.str.strip()
-    machine_data.index = machine_data.index.str.lower()
+        machine_csv_path,
+        {
+            'index_col': 'name',
+            'columns': {
+                'name': {'type': 'freestyle_string'},
+                'value': {'type': 'number'}
+        }})

    # drop NaN indexed rows in dataframe
    machine_data = machine_data[machine_data.index.notnull()]
--- a/src/natcap/invest/wind_energy.py
+++ b/src/natcap/invest/wind_energy.py
@ -328,6 +328,7 @@ MODEL_SPEC = {
        },
        "grid_points_path": {
            "type": "csv",
+            "index_col": "id",
            "columns": {
                "id": {
                    "type": "integer",
@ -382,6 +383,7 @@ MODEL_SPEC = {
        },
        "wind_schedule": {
            "type": "csv",
+            "index_col": "year",
            "columns": {
                "year": {
                    "type": "number",
@ -524,10 +526,10 @@ MODEL_SPEC = {
                    "about": "Wind data",
                    "geometries": spec_utils.POINT,
                    "fields": OUTPUT_WIND_DATA_FIELDS
-                },
-                "_taskgraph_working_dir": spec_utils.TASKGRAPH_DIR
+                }
            }
-        }
+        },
+        "taskgraph_cache": spec_utils.TASKGRAPH_DIR
    }
 }

@ -654,7 +656,6 @@ def execute(args):
    suffix = utils.make_suffix_string(args, 'results_suffix')

    # Initialize a TaskGraph
-    taskgraph_working_dir = os.path.join(inter_dir, '_taskgraph_working_dir')
    try:
        n_workers = int(args['n_workers'])
    except (KeyError, ValueError, TypeError):
@ -662,7 +663,8 @@ def execute(args):
        # ValueError when n_workers is an empty string.
        # TypeError when n_workers is None.
        n_workers = -1  # single process mode.
-    task_graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers)
+    task_graph = taskgraph.TaskGraph(
+        os.path.join(args['workspace_dir'], 'taskgraph_cache'), n_workers)

    # Resample the bathymetry raster if it does not have square pixel size
    try:
@ -754,9 +756,11 @@ def execute(args):
        # If Price Table provided use that for price of energy, validate inputs
        time = int(val_parameters_dict['time_period'])
        if args['price_table']:
-            wind_price_df = utils.read_csv_to_dataframe(args['wind_schedule'])
+            wind_price_df = utils.read_csv_to_dataframe(
+                args['wind_schedule'], MODEL_SPEC['args']['wind_schedule']
+            ).sort_index()  # sort by year

-            year_count = len(wind_price_df['year'])
+            year_count = len(wind_price_df)
            if year_count != time + 1:
                raise ValueError(
                    "The 'time' argument in the Global Wind Energy Parameters "
@ -765,7 +769,6 @@ def execute(args):

            # Save the price values into a list where the indices of the list
            # indicate the time steps for the lifespan of the wind farm
-            wind_price_df.sort_values('year', inplace=True)
            price_list = wind_price_df['price'].tolist()
        else:
            change_rate = float(args["rate_change"])
@ -1134,19 +1137,11 @@ def execute(args):

        # Read the grid points csv, and convert it to land and grid dictionary
        grid_land_df = utils.read_csv_to_dataframe(
-            args['grid_points_path'], convert_vals_to_lower=False)
-
-        # Make separate dataframes based on 'TYPE'
-        grid_df = grid_land_df.loc[(
-            grid_land_df['type'].str.upper() == 'GRID')]
-        land_df = grid_land_df.loc[(
-            grid_land_df['type'].str.upper() == 'LAND')]
+            args['grid_points_path'], MODEL_SPEC['args']['grid_points_path'])

        # Convert the dataframes to dictionaries, using 'ID' (the index) as key
-        grid_df.set_index('id', inplace=True)
-        grid_dict = grid_df.to_dict('index')
-        land_df.set_index('id', inplace=True)
-        land_dict = land_df.to_dict('index')
+        grid_dict = grid_land_df[grid_land_df['type'] == 'grid'].to_dict('index')
+        land_dict = grid_land_df[grid_land_df['type'] == 'land'].to_dict('index')

        grid_vector_path = os.path.join(
            inter_dir, 'val_grid_points%s.shp' % suffix)
@ -1974,7 +1969,8 @@ def _read_csv_wind_data(wind_data_path, hub_height):

    """
    wind_point_df = utils.read_csv_to_dataframe(
-        wind_data_path, convert_cols_to_lower=False, convert_vals_to_lower=False)
+        wind_data_path, MODEL_SPEC['args']['wind_data_path'])
+    wind_point_df.columns = wind_point_df.columns.str.upper()

    # Calculate scale value at new hub height given reference values.
    # See equation 3 in users guide
@ -2631,10 +2627,12 @@ def _clip_vector_by_vector(
        shutil.rmtree(temp_dir, ignore_errors=True)

    if empty_clip:
+        # The "clip_vector_path" is always the AOI.
        raise ValueError(
            f"Clipping {base_vector_path} by {clip_vector_path} returned 0"
-            " features. If an AOI was provided this could mean the AOI and"
-            " Wind Data do not intersect spatially.")
+            f" features. This means the AOI and {base_vector_path} do not"
+            " intersect spatially. Please check that the AOI has spatial"
+            " overlap with all input data.")

    LOGGER.info('Finished _clip_vector_by_vector')

--- a/tests/test_coastal_blue_carbon.py
+++ b/tests/test_coastal_blue_carbon.py
@ -10,6 +10,7 @@ import textwrap
 import unittest

 import numpy
+import pandas
 import pygeoprocessing
 from natcap.invest import utils
 from osgeo import gdal
@ -151,10 +152,9 @@ class TestPreprocessor(unittest.TestCase):
                       pprint.pformat(non_suffixed_files)))

        expected_landcover_codes = set(range(0, 24))
-        found_landcover_codes = set(utils.read_csv_to_dataframe(
-            os.path.join(outputs_dir,
-                         'carbon_biophysical_table_template_150225.csv'),
-            'code').to_dict(orient='index').keys())
+        found_landcover_codes = set(pandas.read_csv(
+            os.path.join(outputs_dir, 'carbon_biophysical_table_template_150225.csv')
+        )['code'].values)
        self.assertEqual(expected_landcover_codes, found_landcover_codes)

    def test_transition_table(self):
@ -188,25 +188,27 @@ class TestPreprocessor(unittest.TestCase):
            lulc_csv.write('0,mangrove,True\n')
            lulc_csv.write('1,parking lot,False\n')

-        landcover_table = utils.read_csv_to_dataframe(
-            landcover_table_path, 'code').to_dict(orient='index')
+        landcover_df = utils.read_csv_to_dataframe(
+            landcover_table_path,
+            preprocessor.MODEL_SPEC['args']['lulc_lookup_table_path'])
        target_table_path = os.path.join(self.workspace_dir,
                                         'transition_table.csv')

        # Remove landcover code 1 from the table; expect error.
-        del landcover_table[1]
+        landcover_df = landcover_df.drop(1)
        with self.assertRaises(ValueError) as context:
            preprocessor._create_transition_table(
-                landcover_table, [filename_a, filename_b], target_table_path)
+                landcover_df, [filename_a, filename_b], target_table_path)

        self.assertIn('missing a row with the landuse code 1',
                      str(context.exception))

        # Re-load the landcover table
-        landcover_table = utils.read_csv_to_dataframe(
-            landcover_table_path, 'code').to_dict(orient='index')
+        landcover_df = utils.read_csv_to_dataframe(
+            landcover_table_path,
+            preprocessor.MODEL_SPEC['args']['lulc_lookup_table_path'])
        preprocessor._create_transition_table(
-            landcover_table, [filename_a, filename_b], target_table_path)
+            landcover_df, [filename_a, filename_b], target_table_path)

        with open(target_table_path) as transition_table:
            self.assertEqual(
@ -235,46 +237,13 @@ class TestCBC2(unittest.TestCase):
        """Remove workspace after each test function."""
        shutil.rmtree(self.workspace_dir)

-    def test_extract_snapshots(self):
-        """CBC: Extract snapshots from a snapshot CSV."""
-        from natcap.invest.coastal_blue_carbon import coastal_blue_carbon
-        csv_path = os.path.join(self.workspace_dir, 'snapshots.csv')
-
-        transition_years = (2000, 2010, 2020)
-        transition_rasters = []
-        with open(csv_path, 'w') as transitions_csv:
-            # Check that we can interpret varying case.
-            transitions_csv.write('snapshot_YEAR,raster_PATH\n')
-            for transition_year in transition_years:
-                # Write absolute paths.
-                transition_file_path = os.path.join(
-                    self.workspace_dir, f'{transition_year}.tif)')
-                transition_rasters.append(transition_file_path)
-                transitions_csv.write(
-                    f'{transition_year},{transition_file_path}\n')
-
-            # Make one path relative to the workspace, where the transitions
-            # CSV also lives.
-            # The expected raster path is absolute.
-            transitions_csv.write('2030,some_path.tif\n')
-            transition_years += (2030,)
-            transition_rasters.append(os.path.join(self.workspace_dir,
-                                                   'some_path.tif'))
-
-        extracted_transitions = (
-            coastal_blue_carbon._extract_snapshots_from_table(csv_path))
-
-        self.assertEqual(
-            extracted_transitions,
-            dict(zip(transition_years, transition_rasters)))
-
    def test_read_invalid_transition_matrix(self):
        """CBC: Test exceptions in invalid transition structure."""
        # The full biophysical table will have much, much more information.  To
        # keep the test simple, I'm only tracking the columns I know I'll need
        # in this function.
        from natcap.invest.coastal_blue_carbon import coastal_blue_carbon
-        biophysical_table = {
+        biophysical_table = pandas.DataFrame({
            1: {'lulc-class': 'a',
                'soil-yearly-accumulation': 2,
                'biomass-yearly-accumulation': 3,
@ -290,7 +259,7 @@ class TestCBC2(unittest.TestCase):
                'biomass-yearly-accumulation': 11,
                'soil-high-impact-disturb': 12,
                'biomass-high-impact-disturb': 13}
-        }
+        }).T

        transition_csv_path = os.path.join(self.workspace_dir,
                                           'transitions.csv')
@ -332,7 +301,7 @@ class TestCBC2(unittest.TestCase):
        # keep the test simple, I'm only tracking the columns I know I'll need
        # in this function.
        from natcap.invest.coastal_blue_carbon import coastal_blue_carbon
-        biophysical_table = {
+        biophysical_table = pandas.DataFrame({
            1: {'lulc-class': 'a',
                'soil-yearly-accumulation': 2,
                'biomass-yearly-accumulation': 3,
@ -348,7 +317,7 @@ class TestCBC2(unittest.TestCase):
                'biomass-yearly-accumulation': 11,
                'soil-high-impact-disturb': 12,
                'biomass-high-impact-disturb': 13}
-        }
+        }).T

        transition_csv_path = os.path.join(self.workspace_dir,
                                           'transitions.csv')
@ -366,14 +335,14 @@ class TestCBC2(unittest.TestCase):

        expected_biomass_disturbance = numpy.zeros((4, 4), dtype=numpy.float32)
        expected_biomass_disturbance[1, 3] = (
-            biophysical_table[1]['biomass-high-impact-disturb'])
+            biophysical_table['biomass-high-impact-disturb'][1])
        numpy.testing.assert_allclose(
            expected_biomass_disturbance,
            disturbance_matrices['biomass'].toarray())

        expected_soil_disturbance = numpy.zeros((4, 4), dtype=numpy.float32)
        expected_soil_disturbance[1, 3] = (
-            biophysical_table[1]['soil-high-impact-disturb'])
+            biophysical_table['soil-high-impact-disturb'][1])
        numpy.testing.assert_allclose(
            expected_soil_disturbance,
            disturbance_matrices['soil'].toarray())
@ -381,22 +350,22 @@ class TestCBC2(unittest.TestCase):
        expected_biomass_accumulation = numpy.zeros(
            (4, 4), dtype=numpy.float32)
        expected_biomass_accumulation[3, 1] = (
-            biophysical_table[1]['biomass-yearly-accumulation'])
+            biophysical_table['biomass-yearly-accumulation'][1])
        expected_biomass_accumulation[1, 2] = (
-            biophysical_table[2]['biomass-yearly-accumulation'])
+            biophysical_table['biomass-yearly-accumulation'][2])
        expected_biomass_accumulation[2, 3] = (
-            biophysical_table[3]['biomass-yearly-accumulation'])
+            biophysical_table['biomass-yearly-accumulation'][3])
        numpy.testing.assert_allclose(
            expected_biomass_accumulation,
            accumulation_matrices['biomass'].toarray())

        expected_soil_accumulation = numpy.zeros((4, 4), dtype=numpy.float32)
        expected_soil_accumulation[3, 1] = (
-            biophysical_table[1]['soil-yearly-accumulation'])
+            biophysical_table['soil-yearly-accumulation'][1])
        expected_soil_accumulation[1, 2] = (
-            biophysical_table[2]['soil-yearly-accumulation'])
+            biophysical_table['soil-yearly-accumulation'][2])
        expected_soil_accumulation[2, 3] = (
-            biophysical_table[3]['soil-yearly-accumulation'])
+            biophysical_table['soil-yearly-accumulation'][3])
        numpy.testing.assert_allclose(
            expected_soil_accumulation,
            accumulation_matrices['soil'].toarray())
@ -649,8 +618,10 @@ class TestCBC2(unittest.TestCase):
        args = TestCBC2._create_model_args(self.workspace_dir)
        args['workspace_dir'] = os.path.join(self.workspace_dir, 'workspace')

-        prior_snapshots = coastal_blue_carbon._extract_snapshots_from_table(
-            args['landcover_snapshot_csv'])
+        prior_snapshots = utils.read_csv_to_dataframe(
+            args['landcover_snapshot_csv'],
+            coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv']
+        )['raster_path'].to_dict()
        baseline_year = min(prior_snapshots.keys())
        baseline_raster = prior_snapshots[baseline_year]
        with open(args['landcover_snapshot_csv'], 'w') as snapshot_csv:
@ -825,8 +796,10 @@ class TestCBC2(unittest.TestCase):
        args = TestCBC2._create_model_args(self.workspace_dir)
        args['workspace_dir'] = os.path.join(self.workspace_dir, 'workspace')

-        prior_snapshots = coastal_blue_carbon._extract_snapshots_from_table(
-            args['landcover_snapshot_csv'])
+        prior_snapshots = utils.read_csv_to_dataframe(
+            args['landcover_snapshot_csv'],
+            coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv']
+        )['raster_path'].to_dict()
        baseline_year = min(prior_snapshots.keys())
        baseline_raster = prior_snapshots[baseline_year]
        with open(args['landcover_snapshot_csv'], 'w') as snapshot_csv:
@ -889,8 +862,10 @@ class TestCBC2(unittest.TestCase):
            raster.write('not a raster')

        # Write over the landcover snapshot CSV
-        prior_snapshots = coastal_blue_carbon._extract_snapshots_from_table(
-            args['landcover_snapshot_csv'])
+        prior_snapshots = utils.read_csv_to_dataframe(
+            args['landcover_snapshot_csv'],
+            coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv']
+        )['raster_path'].to_dict()
        baseline_year = min(prior_snapshots)
        with open(args['landcover_snapshot_csv'], 'w') as snapshot_table:
            snapshot_table.write('snapshot_year,raster_path\n')
@ -902,8 +877,18 @@ class TestCBC2(unittest.TestCase):
        # analysis year must be >= the last transition year.
        args['analysis_year'] = baseline_year

+        # Write invalid entries to landcover transition table
+        with open(args['landcover_transitions_table'], 'w') as transition_table:
+            transition_table.write('lulc-class,Developed,Forest,Water\n')
+            transition_table.write('Developed,NCC,,invalid\n')
+            transition_table.write('Forest,accum,disturb,low-impact-disturb\n')
+            transition_table.write('Water,disturb,med-impact-disturb,high-impact-disturb\n')
+        transition_options = [
+                'accum', 'high-impact-disturb', 'med-impact-disturb',
+                'low-impact-disturb', 'ncc']
+
        validation_warnings = coastal_blue_carbon.validate(args)
-        self.assertEqual(len(validation_warnings), 2)
+        self.assertEqual(len(validation_warnings), 3)
        self.assertIn(
            coastal_blue_carbon.INVALID_SNAPSHOT_RASTER_MSG.format(
                snapshot_year=baseline_year + 10),
@ -912,6 +897,11 @@ class TestCBC2(unittest.TestCase):
            coastal_blue_carbon.INVALID_ANALYSIS_YEAR_MSG.format(
                analysis_year=2000, latest_year=2010),
            validation_warnings[1][1])
+        self.assertIn(
+            coastal_blue_carbon.INVALID_TRANSITION_VALUES_MSG.format(
+                model_transitions=transition_options,
+                transition_values=['disturb', 'invalid']),
+            validation_warnings[2][1])

    def test_track_first_disturbance(self):
        """CBC: Track disturbances over time."""
--- a/tests/test_crop_production.py
+++ b/tests/test_crop_production.py
@ -63,14 +63,11 @@ class CropProductionTests(unittest.TestCase):
        pandas.testing.assert_frame_equal(
            expected_agg_result_table, agg_result_table, check_dtype=False)

-        result_table_path = os.path.join(
-            args['workspace_dir'], 'result_table.csv')
-        expected_result_table_path = os.path.join(
-            TEST_DATA_PATH, 'expected_result_table.csv')
        expected_result_table = pandas.read_csv(
-            expected_result_table_path)
+            os.path.join(TEST_DATA_PATH, 'expected_result_table.csv')
+        )
        result_table = pandas.read_csv(
-            result_table_path)
+            os.path.join(args['workspace_dir'], 'result_table.csv'))
        pandas.testing.assert_frame_equal(
            expected_result_table, result_table, check_dtype=False)

@ -314,14 +311,10 @@ class CropProductionTests(unittest.TestCase):

        crop_production_regression.execute(args)

-        agg_result_table_path = os.path.join(
-            args['workspace_dir'], 'aggregate_results.csv')
-        expected_agg_result_table_path = os.path.join(
-            TEST_DATA_PATH, 'expected_regression_aggregate_results.csv')
        expected_agg_result_table = pandas.read_csv(
-            expected_agg_result_table_path)
+            os.path.join(TEST_DATA_PATH, 'expected_regression_aggregate_results.csv'))
        agg_result_table = pandas.read_csv(
-            agg_result_table_path)
+            os.path.join(args['workspace_dir'], 'aggregate_results.csv'))
        pandas.testing.assert_frame_equal(
            expected_agg_result_table, agg_result_table, check_dtype=False)

@ -387,14 +380,10 @@ class CropProductionTests(unittest.TestCase):

        crop_production_regression.execute(args)

-        result_table_path = os.path.join(
-            args['workspace_dir'], 'result_table.csv')
-        expected_result_table_path = os.path.join(
-            TEST_DATA_PATH, 'expected_regression_result_table_no_nodata.csv')
-        expected_result_table = pandas.read_csv(
-            expected_result_table_path)
+        expected_result_table = pandas.read_csv(os.path.join(
+            TEST_DATA_PATH, 'expected_regression_result_table_no_nodata.csv'))
        result_table = pandas.read_csv(
-            result_table_path)
+            os.path.join(args['workspace_dir'], 'result_table.csv'))
        pandas.testing.assert_frame_equal(
            expected_result_table, result_table, check_dtype=False)

--- a/tests/test_datastack.py
+++ b/tests/test_datastack.py
@ -378,8 +378,14 @@ class DatastackArchiveTests(unittest.TestCase):
                filecmp.cmp(archive_params[key], params[key], shallow=False))

        spatial_csv_dict = utils.read_csv_to_dataframe(
-            archive_params['spatial_table'], 'ID',
-            convert_cols_to_lower=True, convert_vals_to_lower=True).to_dict(orient='index')
+            archive_params['spatial_table'],
+            {
+                'index_col': 'id',
+                'columns': {
+                    'id': {'type': 'integer'},
+                    'path': {'type': 'file'}
+                }
+            }).to_dict(orient='index')
        spatial_csv_dir = os.path.dirname(archive_params['spatial_table'])
        numpy.testing.assert_allclose(
            pygeoprocessing.raster_to_numpy_array(
--- a/tests/test_habitat_quality.py
+++ b/tests/test_habitat_quality.py
@ -1934,7 +1934,7 @@ class HabitatQualityTests(unittest.TestCase):
        self.assertTrue(
            validate_result,
            "expected failed validations instead didn't get any.")
-        self.assertIn("Bounding boxes do not intersect", validate_result[0][1])
+        self.assertIn("bounding boxes must intersect", validate_result[0][1])

    def test_habitat_quality_argspec_missing_projection(self):
        """Habitat Quality: raise error on missing projection."""
--- a/tests/test_hra.py
+++ b/tests/test_hra.py
@ -514,40 +514,6 @@ class HRAUnitTests(unittest.TestCase):
            (source_array != nodata).astype(numpy.uint8)
        )

-    def test_create_raster_from_bounding_box(self):
-        """HRA: test creation of a raster from a bbox."""
-        from natcap.invest import hra
-
-        # [minx, miny, maxx, maxy]
-        bounding_box = [
-            ORIGIN[0],
-            ORIGIN[1] - 100,  # force rounding up of pixel dimensions
-            ORIGIN[0] + 90,  # no rounding up needed
-            ORIGIN[1],
-        ]
-        pixel_size = (30, -30)
-        target_raster_path = os.path.join(self.workspace_dir, 'raster.tif')
-        hra._create_raster_from_bounding_box(
-            target_raster_path, bounding_box, pixel_size, gdal.GDT_Byte,
-            SRS_WKT, target_nodata=2, fill_value=2)
-
-        try:
-            raster = gdal.OpenEx(target_raster_path)
-            band = raster.GetRasterBand(1)
-            self.assertEqual(
-                raster.GetGeoTransform(),
-                (ORIGIN[0], pixel_size[0], 0.0, ORIGIN[1], 0.0, pixel_size[1])
-            )
-            self.assertEqual(raster.RasterXSize, 3)
-            self.assertEqual(raster.RasterYSize, 4)
-            self.assertEqual(band.GetNoDataValue(), 2)
-            numpy.testing.assert_array_equal(
-                band.ReadAsArray(),
-                numpy.full((4, 3), 2, dtype=numpy.uint8))
-        finally:
-            band = None
-            raster = None
-
    def test_align(self):
        """HRA: test alignment function."""
        from natcap.invest import hra
@ -749,21 +715,22 @@ class HRAUnitTests(unittest.TestCase):

        # No matter the supported file format, make sure we have consistent
        # table headings.
-        source_df = pandas.read_csv(io.StringIO(textwrap.dedent("""\
-                FOO,bar,BaZ,path
-                1, 2, 3,foo.tif""")))
+        source_df = pandas.DataFrame({
+            'name': pandas.Series(['1'], dtype='string'),
+            'type': pandas.Series(['2'], dtype='string'),
+            'stressor buffer (meters)': pandas.Series([3], dtype=float),
+            'path': pandas.Series(['foo.tif'], dtype='string')
+        })

-        expected_df = source_df.copy()  # defaults to a deepcopy.
-        expected_df.columns = expected_df.columns.str.lower()
-        expected_df['path'] = [os.path.join(self.workspace_dir, 'foo.tif')]
+        expected_df = source_df.copy().set_index('name')  # defaults to a deepcopy.
+        expected_df['path']['1'] = os.path.join(self.workspace_dir, 'foo.tif')

        for filename, func in [('target.csv', source_df.to_csv),
                               ('target.xlsx', source_df.to_excel)]:
            full_filepath = os.path.join(self.workspace_dir, filename)
            func(full_filepath, index=False)
-
            opened_df = hra._open_table_as_dataframe(full_filepath)
-            pandas.testing.assert_frame_equal(expected_df, opened_df)
+            pandas.testing.assert_frame_equal(expected_df, opened_df, check_index_type=False)

    def test_pairwise_risk(self):
        """HRA: check pairwise risk calculations."""
--- a/tests/test_model_specs.py
+++ b/tests/test_model_specs.py
@ -173,7 +173,11 @@ class ValidateModelSpecs(unittest.TestCase):
                        spec['columns'][column],
                        f'{key}.columns.{column}',
                        parent_type=t)
+                if 'index_col' in spec:
+                    self.assertIn(spec['index_col'], spec['columns'])
+
                attrs.discard('columns')
+                attrs.discard('index_col')

            elif t == 'directory':
                # directory type should have a contents property that maps each
@ -249,6 +253,7 @@ class ValidateModelSpecs(unittest.TestCase):
            types = arg['type'] if isinstance(
                arg['type'], set) else [arg['type']]
            attrs = set(arg.keys())
+
            for t in types:
                self.assertIn(t, valid_nested_types[parent_type])

@ -395,8 +400,12 @@ class ValidateModelSpecs(unittest.TestCase):
                                f'{name}.{direction}.{header}',
                                parent_type=t)

-                        attrs.discard('rows')
-                        attrs.discard('columns')
+                    if 'index_col' in arg:
+                        self.assertIn(arg['index_col'], arg['columns'])
+                        attrs.discard('index_col')
+
+                    attrs.discard('rows')
+                    attrs.discard('columns')

                    # csv type may optionally have an 'excel_ok' attribute
                    if 'excel_ok' in arg:
--- a/tests/test_ndr.py
+++ b/tests/test_ndr.py
@ -107,7 +107,6 @@ class NDRTests(unittest.TestCase):

        # use predefined directory so test can clean up files during teardown
        args = NDRTests.generate_base_args(self.workspace_dir)
-        # make args explicit that this is a base run of SWY
        args['biophysical_table_path'] = os.path.join(
            REGRESSION_DATA, 'input', 'biophysical_table_missing_headers.csv')
        with self.assertRaises(ValueError):
@ -171,7 +170,6 @@ class NDRTests(unittest.TestCase):

        # use predefined directory so test can clean up files during teardown
        args = NDRTests.generate_base_args(self.workspace_dir)
-        # make args explicit that this is a base run of SWY
        args['biophysical_table_path'] = os.path.join(
            REGRESSION_DATA, 'input', 'biophysical_table_missing_lucode.csv')
        with self.assertRaises(KeyError) as cm:
@ -187,7 +185,6 @@ class NDRTests(unittest.TestCase):

        # use predefined directory so test can clean up files during teardown
        args = NDRTests.generate_base_args(self.workspace_dir)
-        # make args explicit that this is a base run of SWY
        args['calc_n'] = False
        args['calc_p'] = False
        with self.assertRaises(ValueError):
@ -210,8 +207,6 @@ class NDRTests(unittest.TestCase):
                os.path.join(self.workspace_dir, 'watershed_results_ndr.gpkg'),
                'wb') as f:
            f.write(b'')
-
-        # make args explicit that this is a base run of SWY
        ndr.execute(args)

        result_vector = ogr.Open(os.path.join(
@ -248,6 +243,53 @@ class NDRTests(unittest.TestCase):
                    args['workspace_dir'], 'intermediate_outputs',
                    'what_drains_to_stream.tif')))

+    def test_regression_undefined_nodata(self):
+        """NDR test when DEM, LULC and runoff proxy have undefined nodata."""
+        from natcap.invest.ndr import ndr
+
+        # use predefined directory so test can clean up files during teardown
+        args = NDRTests.generate_base_args(self.workspace_dir)
+
+        # unset nodata values for DEM, LULC, and runoff proxy
+        # this is ok because the test data is 100% valid
+        # regression test for https://github.com/natcap/invest/issues/1005
+        for key in ['runoff_proxy_path', 'dem_path', 'lulc_path']:
+            target_path = os.path.join(self.workspace_dir, f'{key}_no_nodata.tif')
+            source = gdal.OpenEx(args[key], gdal.OF_RASTER)
+            driver = gdal.GetDriverByName('GTIFF')
+            target = driver.CreateCopy(target_path, source)
+            target.GetRasterBand(1).DeleteNoDataValue()
+            source, target = None, None
+            args[key] = target_path
+
+        ndr.execute(args)
+
+        result_vector = ogr.Open(os.path.join(
+            args['workspace_dir'], 'watershed_results_ndr.gpkg'))
+        result_layer = result_vector.GetLayer()
+        result_feature = result_layer.GetFeature(1)
+        result_layer = None
+        result_vector = None
+        mismatch_list = []
+        # these values were generated by manual inspection of regression
+        # results
+        for field, expected_value in [
+                ('p_surface_load', 41.921860),
+                ('p_surface_export', 5.899117),
+                ('n_surface_load', 2978.519775),
+                ('n_surface_export', 289.0498),
+                ('n_subsurface_load', 28.614094),
+                ('n_subsurface_export', 15.61077),
+                ('n_total_export', 304.660614)]:
+            val = result_feature.GetField(field)
+            if not numpy.isclose(val, expected_value):
+                mismatch_list.append(
+                    (field, 'expected: %f' % expected_value,
+                     'actual: %f' % val))
+        result_feature = None
+        if mismatch_list:
+            raise RuntimeError("results not expected: %s" % mismatch_list)
+
    def test_validation(self):
        """NDR test argument validation."""
        from natcap.invest import validation
--- a/tests/test_recreation.py
+++ b/tests/test_recreation.py
@ -581,14 +581,14 @@ class TestRecServer(unittest.TestCase):
        expected_grid_vector_path = os.path.join(
            REGRESSION_DATA, 'predictor_data_all_metrics.shp')
        utils._assert_vectors_equal(
-            out_grid_vector_path, expected_grid_vector_path, 1e-3)
+            expected_grid_vector_path, out_grid_vector_path, 1e-3)

        out_scenario_path = os.path.join(
            args['workspace_dir'], 'scenario_results.shp')
        expected_scenario_path = os.path.join(
            REGRESSION_DATA, 'scenario_results_all_metrics.shp')
        utils._assert_vectors_equal(
-            out_scenario_path, expected_scenario_path, 1e-3)
+            expected_scenario_path, out_scenario_path, 1e-3)

    def test_results_suffix_on_serverside_files(self):
        """Recreation test suffix gets added to files created on server."""
@ -924,7 +924,7 @@ class RecreationRegressionTests(unittest.TestCase):
            REGRESSION_DATA, 'square_grid_vector_path.shp')

        utils._assert_vectors_equal(
-            out_grid_vector_path, expected_grid_vector_path)
+            expected_grid_vector_path, out_grid_vector_path)

    def test_hex_grid(self):
        """Recreation hex grid regression test."""
@ -941,7 +941,7 @@ class RecreationRegressionTests(unittest.TestCase):
            REGRESSION_DATA, 'hex_grid_vector_path.shp')

        utils._assert_vectors_equal(
-            out_grid_vector_path, expected_grid_vector_path)
+            expected_grid_vector_path, out_grid_vector_path)

    @unittest.skip("skipping to avoid remote server call (issue #3753)")
    def test_no_grid_execute(self):
@ -1007,7 +1007,7 @@ class RecreationRegressionTests(unittest.TestCase):
            REGRESSION_DATA, 'hex_grid_vector_path.shp')

        utils._assert_vectors_equal(
-            out_grid_vector_path, expected_grid_vector_path)
+            expected_grid_vector_path, out_grid_vector_path)

    def test_existing_regression_coef(self):
        """Recreation test regression coefficients handle existing output."""
@ -1030,8 +1030,9 @@ class RecreationRegressionTests(unittest.TestCase):

        # make outputs to be overwritten
        predictor_dict = utils.read_csv_to_dataframe(
-            predictor_table_path, 'id',
-            convert_cols_to_lower=True, convert_vals_to_lower=True).to_dict(orient='index')
+            predictor_table_path,
+            recmodel_client.MODEL_SPEC['args']['predictor_table_path']
+        ).to_dict(orient='index')
        predictor_list = predictor_dict.keys()
        tmp_working_dir = tempfile.mkdtemp(dir=self.workspace_dir)
        empty_json_list = [
@ -1057,7 +1058,7 @@ class RecreationRegressionTests(unittest.TestCase):
            REGRESSION_DATA, 'test_regression_coefficients.shp')

        utils._assert_vectors_equal(
-            out_coefficient_vector_path, expected_coeff_vector_path, 1e-6)
+            expected_coeff_vector_path, out_coefficient_vector_path, 1e-6)

    def test_predictor_table_absolute_paths(self):
        """Recreation test validation from full path."""
--- a/tests/test_scenic_quality.py
+++ b/tests/test_scenic_quality.py
@ -126,7 +126,8 @@ class ScenicQualityTests(unittest.TestCase):
        with self.assertRaises(AssertionError) as cm:
            scenic_quality._determine_valid_viewpoints(
                dem_path, viewpoints_path)
-        self.assertIn('Feature 1 is not a Point geometry', str(cm.exception))
+        self.assertIn('Feature 1 must be a POINT geometry, not LINESTRING',
+                      str(cm.exception))

    def test_exception_when_no_structures_aoi_overlap(self):
        """SQ: model raises exception when AOI does not overlap structures."""
--- a/tests/test_sdr.py
+++ b/tests/test_sdr.py
@ -141,11 +141,11 @@ class SDRTests(unittest.TestCase):

        sdr.execute(args)
        expected_results = {
-            'usle_tot': 13.90210914612,
-            'sed_export': 0.55185163021,
-            'sed_dep': 8.80130577087,
-            'avoid_exp': 57971.87890625,
-            'avoid_eros': 1458232.5,
+            'usle_tot': 2.62457418442,
+            'sed_export': 0.09748090804,
+            'sed_dep': 1.71672844887,
+            'avoid_exp': 10199.7490234375,
+            'avoid_eros': 274510.75,
        }

        vector_path = os.path.join(
@ -213,10 +213,10 @@ class SDRTests(unittest.TestCase):

        sdr.execute(args)
        expected_results = {
-            'sed_export': 0.55185163021,
-            'usle_tot': 13.90210914612,
-            'avoid_exp': 57971.87890625,
-            'avoid_eros': 1458232.5,
+            'sed_export': 0.09748090804,
+            'usle_tot': 2.62457418442,
+            'avoid_exp': 10199.7490234375,
+            'avoid_eros': 274510.75,
        }

        vector_path = os.path.join(
@ -238,10 +238,10 @@ class SDRTests(unittest.TestCase):
        sdr.execute(args)

        expected_results = {
-            'sed_export': 0.67064666748,
-            'usle_tot': 12.6965303421,
-            'avoid_exp': 69130.8203125,
-            'avoid_eros': 1317588.375,
+            'sed_export': 0.08896198869,
+            'usle_tot': 1.86480903625,
+            'avoid_exp': 9204.283203125,
+            'avoid_eros': 194613.28125,
        }

        vector_path = os.path.join(
@ -264,10 +264,10 @@ class SDRTests(unittest.TestCase):
        sdr.execute(args)

        expected_results = {
-            'sed_export': 0.97192692757,
-            'usle_tot': 12.68887424469,
-            'avoid_exp': 100960.9609375,
-            'avoid_eros': 1329122.0,
+            'sed_export': 0.17336219549,
+            'usle_tot': 2.56186032295,
+            'avoid_exp': 17980.52734375,
+            'avoid_eros': 267931.71875,
        }

        vector_path = os.path.join(
@ -303,8 +303,7 @@ class SDRTests(unittest.TestCase):
        with self.assertRaises(ValueError) as context:
            sdr.execute(args)
        self.assertIn(
-            f'A value in the biophysical table is not a number '
-            f'within range 0..1.', str(context.exception))
+            f'could not be interpreted as ratios', str(context.exception))

    def test_lucode_not_a_number(self):
        """SDR test expected exception for invalid data in lucode column."""
@ -325,8 +324,7 @@ class SDRTests(unittest.TestCase):
        with self.assertRaises(ValueError) as context:
            sdr.execute(args)
        self.assertIn(
-            f'Value "{invalid_value}" from the "lucode" column of the '
-            f'biophysical table is not a number.', str(context.exception))
+            'could not be interpreted as integers', str(context.exception))

    def test_missing_lulc_value(self):
        """SDR test for ValueError when LULC value not found in table."""
@ -393,3 +391,43 @@ class SDRTests(unittest.TestCase):
        what_drains = pygeoprocessing.raster_to_numpy_array(
            target_what_drains_path)
        numpy.testing.assert_allclose(what_drains, expected_drainage)
+
+    def test_ls_factor(self):
+        """SDR test for our LS Factor function."""
+        from natcap.invest.sdr import sdr
+
+        nodata = -1
+
+        # These varying percent slope values should cover all of the slope
+        # factor and slope table cases.
+        pct_slope_array = numpy.array(
+            [[1.5, 4, 8, 10, 15, nodata]], dtype=numpy.float32)
+        flow_accum_array = numpy.array(
+            [[100, 100, 100, 100, 10000000, nodata]], dtype=numpy.float32)
+        l_max = 25  # affects the last item in the array only
+
+        srs = osr.SpatialReference()
+        srs.ImportFromEPSG(26910)  # NAD83 / UTM zone 11N
+        srs_wkt = srs.ExportToWkt()
+        origin = (463250, 4929700)
+        pixel_size = (30, -30)
+
+        pct_slope_path = os.path.join(self.workspace_dir, 'pct_slope.tif')
+        pygeoprocessing.numpy_array_to_raster(
+            pct_slope_array, nodata, pixel_size, origin, srs_wkt,
+            pct_slope_path)
+
+        flow_accum_path = os.path.join(self.workspace_dir, 'flow_accum.tif')
+        pygeoprocessing.numpy_array_to_raster(
+            flow_accum_array, nodata, pixel_size, origin, srs_wkt,
+            flow_accum_path)
+
+        target_ls_factor_path = os.path.join(self.workspace_dir, 'ls.tif')
+        sdr._calculate_ls_factor(flow_accum_path, pct_slope_path, l_max,
+                                 target_ls_factor_path)
+
+        ls = pygeoprocessing.raster_to_numpy_array(target_ls_factor_path)
+        expected_ls = numpy.array(
+            [[0.253996, 0.657229, 1.345856, 1.776729, 49.802994, -1]],
+            dtype=numpy.float32)
+        numpy.testing.assert_allclose(ls, expected_ls, rtol=1e-6)
--- a/tests/test_seasonal_water_yield_regression.py
+++ b/tests/test_seasonal_water_yield_regression.py
@ -737,8 +737,8 @@ class SeasonalWaterYieldRegressionTests(unittest.TestCase):

        with self.assertRaises(ValueError) as context:
            seasonal_water_yield.execute(args)
-        self.assertTrue(
-            'expecting all floating point numbers' in str(context.exception))
+        self.assertIn(
+            'could not be interpreted as numbers', str(context.exception))

    def test_monthly_alpha_regression(self):
        """SWY monthly alpha values regression test on sample data.
@ -974,12 +974,6 @@ class SeasonalWaterYieldRegressionTests(unittest.TestCase):
        precip_array = numpy.array([
            [10, 10],
            [10, 10]], dtype=numpy.float32)
-        lulc_array = numpy.array([
-            [1, 1],
-            [2, 2]], dtype=numpy.float32)
-        cn_array = numpy.array([
-            [40, 40],
-            [80, 80]], dtype=numpy.float32)
        si_array = numpy.array([
            [15, 15],
            [2.5, 2.5]], dtype=numpy.float32)
@ -990,13 +984,12 @@ class SeasonalWaterYieldRegressionTests(unittest.TestCase):
            [0, 0],
            [0, 0]], dtype=numpy.float32)

+        # results calculated by wolfram alpha
        expected_quickflow_array = numpy.array([
-            [-4.82284552e-36, -4.82284552e-36],
-            [ 6.19275831e-01,  6.19275831e-01]])
+            [0, 0],
+            [0.61928378,  0.61928378]])

        precip_path = os.path.join(self.workspace_dir, 'precip.tif')
-        lulc_path = os.path.join(self.workspace_dir, 'lulc.tif')
-        cn_path = os.path.join(self.workspace_dir, 'cn.tif')
        si_path = os.path.join(self.workspace_dir, 'si.tif')
        n_events_path = os.path.join(self.workspace_dir, 'n_events.tif')
        stream_path = os.path.join(self.workspace_dir, 'stream.tif')
@ -1008,13 +1001,11 @@ class SeasonalWaterYieldRegressionTests(unittest.TestCase):

        # write all the test arrays to raster files
        for array, path in [(precip_array, precip_path),
-                            (lulc_array, lulc_path),
                            (n_events_array, n_events_path)]:
            # make the nodata value undefined for user inputs
            pygeoprocessing.numpy_array_to_raster(
                array, None, (1, -1), (1180000, 690000), project_wkt, path)
-        for array, path in [(cn_array, cn_path),
-                            (si_array, si_path),
+        for array, path in [(si_array, si_path),
                            (stream_mask, stream_path)]:
            # define a nodata value for intermediate outputs
            pygeoprocessing.numpy_array_to_raster(
@ -1022,13 +1013,119 @@ class SeasonalWaterYieldRegressionTests(unittest.TestCase):

        # save the quickflow results raster to quickflow.tif
        seasonal_water_yield._calculate_monthly_quick_flow(
-            precip_path, lulc_path, cn_path, n_events_path, stream_path,
-            si_path, output_path)
+            precip_path, n_events_path, stream_path, si_path, output_path)
        # read the raster output back in to a numpy array
        quickflow_array = pygeoprocessing.raster_to_numpy_array(output_path)
        # assert each element is close to the expected value
-        self.assertTrue(numpy.isclose(
-            quickflow_array, expected_quickflow_array).all())
+        numpy.testing.assert_allclose(
+            quickflow_array, expected_quickflow_array, atol=1e-5)
+
+    def test_monthly_quickflow_si_zero(self):
+        """Test `_calculate_monthly_quick_flow` when s_i is zero"""
+        from natcap.invest.seasonal_water_yield import seasonal_water_yield
+
+        # QF should be equal to P when s_i is 0
+        precip_array = numpy.array([[10.5]], dtype=numpy.float32)
+        si_array = numpy.array([[0]], dtype=numpy.float32)
+        n_events_array = numpy.array([[10]], dtype=numpy.float32)
+        stream_mask = numpy.array([[0]], dtype=numpy.float32)
+        expected_quickflow_array = numpy.array([[10.5]])
+
+        precip_path = os.path.join(self.workspace_dir, 'precip.tif')
+        si_path = os.path.join(self.workspace_dir, 'si.tif')
+        n_events_path = os.path.join(self.workspace_dir, 'n_events.tif')
+        stream_path = os.path.join(self.workspace_dir, 'stream.tif')
+
+        srs = osr.SpatialReference()
+        srs.ImportFromEPSG(26910)  # UTM Zone 10N
+        project_wkt = srs.ExportToWkt()
+        output_path = os.path.join(self.workspace_dir, 'quickflow.tif')
+
+        # write all the test arrays to raster files
+        for array, path in [(precip_array, precip_path),
+                            (n_events_array, n_events_path),
+                            (si_array, si_path),
+                            (stream_mask, stream_path)]:
+            # define a nodata value for intermediate outputs
+            pygeoprocessing.numpy_array_to_raster(
+                array, -1, (1, -1), (1180000, 690000), project_wkt, path)
+        seasonal_water_yield._calculate_monthly_quick_flow(
+            precip_path, n_events_path, stream_path, si_path, output_path)
+        numpy.testing.assert_allclose(
+            pygeoprocessing.raster_to_numpy_array(output_path),
+            expected_quickflow_array, atol=1e-5)
+
+    def test_monthly_quickflow_large_si_aim_ratio(self):
+        """Test `_calculate_monthly_quick_flow` with large s_i/a_im ratio"""
+        from natcap.invest.seasonal_water_yield import seasonal_water_yield
+
+        # with these values, the QF equation would overflow float32 if
+        # we didn't catch it early
+        precip_array = numpy.array([[6]], dtype=numpy.float32)
+        si_array = numpy.array([[23.33]], dtype=numpy.float32)
+        n_events_array = numpy.array([[10]], dtype=numpy.float32)
+        stream_mask = numpy.array([[0]], dtype=numpy.float32)
+        expected_quickflow_array = numpy.array([[0]])
+
+        precip_path = os.path.join(self.workspace_dir, 'precip.tif')
+        si_path = os.path.join(self.workspace_dir, 'si.tif')
+        n_events_path = os.path.join(self.workspace_dir, 'n_events.tif')
+        stream_path = os.path.join(self.workspace_dir, 'stream.tif')
+
+        srs = osr.SpatialReference()
+        srs.ImportFromEPSG(26910)  # UTM Zone 10N
+        project_wkt = srs.ExportToWkt()
+        output_path = os.path.join(self.workspace_dir, 'quickflow.tif')
+
+        # write all the test arrays to raster files
+        for array, path in [(precip_array, precip_path),
+                            (n_events_array, n_events_path),
+                            (si_array, si_path),
+                            (stream_mask, stream_path)]:
+            # define a nodata value for intermediate outputs
+            pygeoprocessing.numpy_array_to_raster(
+                array, -1, (1, -1), (1180000, 690000), project_wkt, path)
+        seasonal_water_yield._calculate_monthly_quick_flow(
+            precip_path, n_events_path, stream_path, si_path, output_path)
+        numpy.testing.assert_allclose(
+            pygeoprocessing.raster_to_numpy_array(output_path),
+            expected_quickflow_array, atol=1e-5)
+
+    def test_monthly_quickflow_negative_values_set_to_zero(self):
+        """Test `_calculate_monthly_quick_flow` with negative QF result"""
+        from natcap.invest.seasonal_water_yield import seasonal_water_yield
+
+        # with these values, the QF equation evaluates to a small negative
+        # number. assert that it is set to zero
+        precip_array = numpy.array([[30]], dtype=numpy.float32)
+        si_array = numpy.array([[10]], dtype=numpy.float32)
+        n_events_array = numpy.array([[10]], dtype=numpy.float32)
+        stream_mask = numpy.array([[0]], dtype=numpy.float32)
+        expected_quickflow_array = numpy.array([[0]])
+
+        precip_path = os.path.join(self.workspace_dir, 'precip.tif')
+        si_path = os.path.join(self.workspace_dir, 'si.tif')
+        n_events_path = os.path.join(self.workspace_dir, 'n_events.tif')
+        stream_path = os.path.join(self.workspace_dir, 'stream.tif')
+
+        srs = osr.SpatialReference()
+        srs.ImportFromEPSG(26910)  # UTM Zone 10N
+        project_wkt = srs.ExportToWkt()
+        output_path = os.path.join(self.workspace_dir, 'quickflow.tif')
+
+        # write all the test arrays to raster files
+        for array, path in [(precip_array, precip_path),
+                            (n_events_array, n_events_path),
+                            (si_array, si_path),
+                            (stream_mask, stream_path)]:
+            # define a nodata value for intermediate outputs
+            pygeoprocessing.numpy_array_to_raster(
+                array, -1, (1, -1), (1180000, 690000), project_wkt, path)
+        seasonal_water_yield._calculate_monthly_quick_flow(
+            precip_path, n_events_path, stream_path, si_path, output_path)
+        numpy.testing.assert_allclose(
+            pygeoprocessing.raster_to_numpy_array(output_path),
+            expected_quickflow_array, atol=1e-5)

    def test_calculate_annual_qfi_different_nodata_areas(self):
        """Test with qf rasters with different areas of nodata."""
@ -1079,8 +1176,8 @@ class SeasonalWaterYieldRegressionTests(unittest.TestCase):
            [100, 100],
            [200, 200]], dtype=numpy.float32)
        quickflow_array = numpy.array([
-            [-4.8e-36, -4.822e-36],
-            [ 6.1e-01,  6.1e-01]], dtype=numpy.float32)
+            [0, 0],
+            [0.61, 0.61]], dtype=numpy.float32)
        flow_dir_array = numpy.array([
            [15, 25],
            [50, 50]], dtype=numpy.float32)
--- a/tests/test_ufrm.py
+++ b/tests/test_ufrm.py
@ -360,3 +360,17 @@ class UFRMTests(unittest.TestCase):
            [(['curve_number_table_path'],
              validation.MESSAGES['MATCHED_NO_HEADERS'].format(
                  header='column', header_name='cn_a'))])
+
+        # test missing CN_X values raise warnings
+        args = self._make_args()
+        cn_table = pandas.read_csv(args['curve_number_table_path'])
+        cn_table.at[0, 'CN_A'] = numpy.nan
+        new_cn_path = os.path.join(
+            self.workspace_dir, 'cn_missing_value_table.csv')
+        cn_table.to_csv(new_cn_path, index=False)
+        args['curve_number_table_path'] = new_cn_path
+        result = urban_flood_risk_mitigation.validate(args)
+        self.assertEqual(
+            result,
+            [(['curve_number_table_path'],
+              'Missing curve numbers for lucode(s) [0]')])
--- a/tests/test_urban_nature_access.py
+++ b/tests/test_urban_nature_access.py
@ -85,7 +85,8 @@ def _build_model_args(workspace):
            6,0,100
            7,1,100
            8,0,100
-            9,1,100"""))
+            9,1,100
+            """))

    admin_geom = [
        shapely.geometry.box(
@ -342,7 +343,7 @@ class UNATests(unittest.TestCase):
        from natcap.invest import urban_nature_access

        nodata = urban_nature_access.FLOAT32_NODATA
-        urban_nature_supply = numpy.array([
+        urban_nature_supply_percapita = numpy.array([
            [nodata, 100.5],
            [75, 100]], dtype=numpy.float32)
        urban_nature_demand = 50
@ -353,7 +354,7 @@ class UNATests(unittest.TestCase):

        urban_nature_budget = (
            urban_nature_access._urban_nature_balance_percapita_op(
-                urban_nature_supply, urban_nature_demand))
+                urban_nature_supply_percapita, urban_nature_demand))
        expected_urban_nature_budget = numpy.array([
            [nodata, 50.5],
            [25, 50]], dtype=numpy.float32)
@ -480,6 +481,16 @@ class UNATests(unittest.TestCase):
        admin_vector = None
        admin_layer = None

+        accessible_urban_nature_array = pygeoprocessing.raster_to_numpy_array(
+            os.path.join(args['workspace_dir'], 'output',
+                         'accessible_urban_nature_suffix.tif'))
+        valid_mask = ~utils.array_equals_nodata(
+            accessible_urban_nature_array, urban_nature_access.FLOAT32_NODATA)
+        valid_pixels = accessible_urban_nature_array[valid_mask]
+        self.assertAlmostEqual(numpy.sum(valid_pixels), 6221004.41259766)
+        self.assertAlmostEqual(numpy.min(valid_pixels), 1171.7352294921875)
+        self.assertAlmostEqual(numpy.max(valid_pixels), 11898.0712890625)
+
    def test_split_urban_nature(self):
        from natcap.invest import urban_nature_access

@ -532,6 +543,23 @@ class UNATests(unittest.TestCase):
        admin_vector = None
        admin_layer = None

+        output_dir = os.path.join(args['workspace_dir'], 'output')
+        self._assert_urban_nature(os.path.join(
+            output_dir, 'accessible_urban_nature_lucode_1_suffix.tif'),
+            72000.0, 0.0, 900.0)
+        self._assert_urban_nature(os.path.join(
+            output_dir, 'accessible_urban_nature_lucode_3_suffix.tif'),
+            1034934.9864730835, 0.0, 4431.1650390625)
+        self._assert_urban_nature(os.path.join(
+            output_dir, 'accessible_urban_nature_lucode_5_suffix.tif'),
+            2837622.9519348145, 0.0, 8136.6884765625)
+        self._assert_urban_nature(os.path.join(
+            output_dir, 'accessible_urban_nature_lucode_7_suffix.tif'),
+            8112734.805541992, 2019.2935791015625, 17729.431640625)
+        self._assert_urban_nature(os.path.join(
+            output_dir, 'accessible_urban_nature_lucode_9_suffix.tif'),
+            7744116.974121094, 1567.57958984375, 12863.4619140625)
+
    def test_split_population(self):
        """UNA: test split population optional module.

@ -602,6 +630,36 @@ class UNATests(unittest.TestCase):
                rtol=1e-6
            )

+    def _assert_urban_nature(self, path, sum_value, min_value, max_value):
+        """Compare a raster's sum, min and max to given values.
+
+        The raster is assumed to be an accessible urban nature raster.
+
+        Args:
+            path (str): The path to an urban nature raster.
+            sum_value (float): The expected sum of the raster.
+            min_value (float): The expected min of the raster.
+            max_value (float): The expected max of the raster.
+
+        Returns:
+            ``None``
+
+        Raises:
+            AssertionError: When the raster's sum, min or max values are not
+            numerically close to the expected values.
+        """
+        from natcap.invest import urban_nature_access
+
+        accessible_urban_nature_array = (
+            pygeoprocessing.raster_to_numpy_array(path))
+        valid_mask = ~utils.array_equals_nodata(
+            accessible_urban_nature_array,
+            urban_nature_access.FLOAT32_NODATA)
+        valid_pixels = accessible_urban_nature_array[valid_mask]
+        self.assertAlmostEqual(numpy.sum(valid_pixels), sum_value)
+        self.assertAlmostEqual(numpy.min(valid_pixels), min_value)
+        self.assertAlmostEqual(numpy.max(valid_pixels), max_value)
+
    def test_radii_by_pop_group(self):
        """UNA: Test defining radii by population group."""
        from natcap.invest import urban_nature_access
@ -666,11 +724,19 @@ class UNATests(unittest.TestCase):
            self.assertAlmostEqual(
                expected_value, summary_feature.GetField(fieldname))

+        output_dir = os.path.join(args['workspace_dir'], 'output')
+        self._assert_urban_nature(os.path.join(
+            output_dir, 'accessible_urban_nature_to_pop_male.tif'),
+            6221004.412597656, 1171.7352294921875, 11898.0712890625)
+        self._assert_urban_nature(os.path.join(
+            output_dir, 'accessible_urban_nature_to_pop_female.tif'),
+            6221004.412597656, 1171.7352294921875, 11898.0712890625)
+
    def test_modes_same_radii_same_results(self):
        """UNA: all modes have same results when consistent radii.

        Although the different modes have different ways of defining their
-        search radii, the urban_nature_supply raster should be numerically
+        search radii, the urban_nature_supply_percapita raster should be numerically
        equivalent if they all use the same search radii.

        This is a good gut-check of basic model behavior across modes.
@ -772,16 +838,19 @@ class UNATests(unittest.TestCase):

        uniform_radius_supply = pygeoprocessing.raster_to_numpy_array(
            os.path.join(uniform_args['workspace_dir'], 'output',
-                         'urban_nature_supply_uniform.tif'))
-        split_urban_nature_supply = pygeoprocessing.raster_to_numpy_array(
-            os.path.join(split_urban_nature_args['workspace_dir'], 'output',
-                         'urban_nature_supply_urban_nature.tif'))
+                         'urban_nature_supply_percapita_uniform.tif'))
+        split_urban_nature_supply_percapita = (
+            pygeoprocessing.raster_to_numpy_array(
+                os.path.join(
+                    split_urban_nature_args['workspace_dir'], 'output',
+                    'urban_nature_supply_percapita_urban_nature.tif')))
        split_pop_groups_supply = pygeoprocessing.raster_to_numpy_array(
            os.path.join(pop_group_args['workspace_dir'], 'output',
-                         'urban_nature_supply_popgroup.tif'))
+                         'urban_nature_supply_percapita_popgroup.tif'))

        numpy.testing.assert_allclose(
-            uniform_radius_supply, split_urban_nature_supply, rtol=1e-6)
+            uniform_radius_supply, split_urban_nature_supply_percapita,
+            rtol=1e-6)
        numpy.testing.assert_allclose(
            uniform_radius_supply, split_pop_groups_supply, rtol=1e-6)

@ -893,9 +962,76 @@ class UNATests(unittest.TestCase):
        # TODO
        pass

+    def test_urban_nature_proportion(self):
+        """UNA: Run the model with urban nature proportion."""
+        from natcap.invest import urban_nature_access
+
+        args = _build_model_args(self.workspace_dir)
+        args['search_radius_mode'] = urban_nature_access.RADIUS_OPT_UNIFORM
+        args['search_radius'] = 1000
+        with open(args['lulc_attribute_table'], 'a') as attr_table:
+            attr_table.write("10,0.5,100\n")
+
+        # make sure our inputs validate
+        validation_results = urban_nature_access.validate(args)
+        self.assertEqual(validation_results, [])
+
+        urban_nature_access.execute(args)
+
+    def test_reclassify_urban_nature(self):
+        """UNA: Test for urban nature area reclassification."""
+        from natcap.invest import urban_nature_access
+        args = _build_model_args(self.workspace_dir)
+
+        # Rewrite the lulc attribute table to use proportions of urban nature.
+        with open(args['lulc_attribute_table'], 'w') as attr_table:
+            attr_table.write(textwrap.dedent(
+                """\
+                lucode,urban_nature,search_radius_m
+                0,0,100
+                1,0.1,100
+                2,0,100
+                3,0.3,100
+                4,0,100
+                5,0.5,100
+                6,0,100
+                7,0.7,100
+                8,0,100
+                9,0.9,100
+                """))
+
+        urban_nature_area_path = os.path.join(
+            self.workspace_dir, 'urban_nature_area.tif')
+
+        for limit_to_lucodes in (None, set([1, 3])):
+            urban_nature_access._reclassify_urban_nature_area(
+                args['lulc_raster_path'], args['lulc_attribute_table'],
+                urban_nature_area_path,
+                only_these_urban_nature_codes=limit_to_lucodes)
+
+            # The source lulc is randomized, so need to programmatically build
+            # up the expected array.
+            source_lulc_array = pygeoprocessing.raster_to_numpy_array(
+                args['lulc_raster_path'])
+            pixel_area = abs(_DEFAULT_PIXEL_SIZE[0] * _DEFAULT_PIXEL_SIZE[1])
+            expected_array = numpy.zeros(source_lulc_array.shape,
+                                         dtype=numpy.float32)
+            for i in range(1, 10, 2):
+                if limit_to_lucodes is not None:
+                    if i not in limit_to_lucodes:
+                        continue
+                factor = float(f"0.{i}")
+                expected_array[source_lulc_array == i] = factor * pixel_area
+
+            reclassified_array = pygeoprocessing.raster_to_numpy_array(
+                urban_nature_area_path)
+            numpy.testing.assert_array_almost_equal(
+                reclassified_array, expected_array)
+
    def test_validate(self):
        """UNA: Basic test for validation."""
        from natcap.invest import urban_nature_access
        args = _build_model_args(self.workspace_dir)
-        args['search_radius_mode'] = urban_nature_access.RADIUS_OPT_URBAN_NATURE
+        args['search_radius_mode'] = (
+            urban_nature_access.RADIUS_OPT_URBAN_NATURE)
        self.assertEqual(urban_nature_access.validate(args), [])
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@ -15,6 +15,7 @@ import warnings

 import numpy
 import numpy.testing
+import pandas as pd
 import pygeoprocessing
 from osgeo import gdal
 from osgeo import ogr
@ -254,7 +255,7 @@ class GaussianDecayUtilsTests(unittest.TestCase):
            # The sigma*3 is the maximum radius from the center
            # Anything greater than that distance should be set to 0 by the
            # gaussian kernel creation function.
-            kernel[dist_from_center > (sigma * 3)] = 0.0
+            kernel[dist_from_center > (sigma * 3)] = 0
            return kernel / numpy.sum(kernel)

        expected_matrix = gkern()
@ -619,12 +620,14 @@ class ReadCSVToDataframeTests(unittest.TestCase):
        with open(csv_file, 'w') as file_obj:
            file_obj.write(textwrap.dedent(
                """\
-                HEADER,
-                A,
+                header,
+                a,
                b
                """
            ))
-        df = utils.read_csv_to_dataframe(csv_file)
+        df = utils.read_csv_to_dataframe(
+            csv_file,
+            {'columns': {'header': {'type': 'freestyle_string'}}})
        # header and table values should be lowercased
        self.assertEqual(df.columns[0], 'header')
        self.assertEqual(df['header'][0], 'a')
@ -642,15 +645,19 @@ class ReadCSVToDataframeTests(unittest.TestCase):
        with open(table_path, 'w') as table_file:
            table_file.write(csv_text)

-        result = utils.read_csv_to_dataframe(
-            table_path, 'lucode').to_dict(orient='index')
-        expected_result = {
-            1: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1},
-            2: {'desc': 'bread', 'val1': 1, 'val2': 4, 'lucode': 2},
-            3: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3},
-            4: {'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4}}
-
-        self.assertDictEqual(result, expected_result)
+        df = utils.read_csv_to_dataframe(
+            table_path,
+            {
+                'index_col': 'lucode',
+                'columns': {
+                    'desc': {'type': 'freestyle_string'},
+                    'lucode': {'type': 'integer'},
+                    'val1': {'type': 'number'},
+                    'val2': {'type': 'number'}
+            }})
+        self.assertEqual(df.index.name, 'lucode')
+        self.assertEqual(list(df.index.values), [1, 2, 3, 4])
+        self.assertEqual(df['desc'][2], 'bread')

    def test_non_unique_keys(self):
        """utils: test error is raised if keys are not unique."""
@ -665,7 +672,16 @@ class ReadCSVToDataframeTests(unittest.TestCase):
            table_file.write(csv_text)

        with self.assertRaises(ValueError):
-            utils.read_csv_to_dataframe(table_path, 'lucode')
+            utils.read_csv_to_dataframe(
+                table_path,
+                {
+                    'index_col': 'lucode',
+                    'columns': {
+                        'desc': {'type': 'freestyle_string'},
+                        'lucode': {'type': 'integer'},
+                        'val1': {'type': 'number'},
+                        'val2': {'type': 'number'}
+                }})

    def test_missing_key_field(self):
        """utils: test error is raised when missing key field."""
@ -680,29 +696,16 @@ class ReadCSVToDataframeTests(unittest.TestCase):
            table_file.write(csv_text)

        with self.assertRaises(KeyError):
-            utils.read_csv_to_dataframe(table_path, 'lucode')
-
-    def test_nan_holes(self):
-        """utils: test empty strings returned when missing data is present."""
-        from natcap.invest import utils
-        csv_text = ("lucode,desc,val1,val2\n"
-                    "1,corn,0.5,2\n"
-                    "2,,1,4\n"
-                    "3,beans,0.5,4\n"
-                    "4,butter,,1")
-        table_path = os.path.join(self.workspace_dir, 'table.csv')
-        with open(table_path, 'w') as table_file:
-            table_file.write(csv_text)
-
-        result = utils.read_csv_to_dataframe(
-            table_path, 'lucode').to_dict(orient='index')
-        expected_result = {
-            1: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1},
-            2: {'desc': '', 'val1': 1, 'val2': 4, 'lucode': 2},
-            3: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3},
-            4: {'desc': 'butter', 'val1': '', 'val2': 1, 'lucode': 4}}
-
-        self.assertDictEqual(result, expected_result)
+            utils.read_csv_to_dataframe(
+                table_path,
+                {
+                    'index_col': 'lucode',
+                    'columns': {
+                        'desc': {'type': 'freestyle_string'},
+                        'lucode': {'type': 'integer'},
+                        'val1': {'type': 'number'},
+                        'val2': {'type': 'number'}
+                }})

    def test_nan_row(self):
        """utils: test NaN row is dropped."""
@ -717,60 +720,88 @@ class ReadCSVToDataframeTests(unittest.TestCase):
            table_file.write(csv_text)

        result = utils.read_csv_to_dataframe(
-            table_path, 'lucode').to_dict(orient='index')
+            table_path,
+            {
+                'index_col': 'lucode',
+                'columns': {
+                    'desc': {'type': 'freestyle_string'},
+                    'lucode': {'type': 'integer'},
+                    'val1': {'type': 'number'},
+                    'val2': {'type': 'number'}
+            }}).to_dict(orient='index')
        expected_result = {
-            1.0: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1.0},
-            3.0: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3.0},
-            4.0: {'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4.0}}
+            1: {'desc': 'corn', 'val1': 0.5, 'val2': 2},
+            3: {'desc': 'beans', 'val1': 0.5, 'val2': 4},
+            4: {'desc': 'butter', 'val1': 9, 'val2': 1}}

        self.assertDictEqual(result, expected_result)

    def test_column_subset(self):
        """utils: test column subset is properly returned."""
        from natcap.invest import utils
-        csv_text = ("lucode,desc,val1,val2\n"
-                    "1,corn,0.5,2\n"
-                    "2,bread,1,4\n"
-                    "3,beans,0.5,4\n"
-                    "4,butter,9,1")
        table_path = os.path.join(self.workspace_dir, 'table.csv')
        with open(table_path, 'w') as table_file:
-            table_file.write(csv_text)
+            table_file.write(
+                "lucode,desc,val1,val2\n"
+                "1,corn,0.5,2\n"
+                "2,bread,1,4\n"
+                "3,beans,0.5,4\n"
+                "4,butter,9,1")
+        df = utils.read_csv_to_dataframe(
+            table_path,
+            {
+                'columns': {
+                    'lucode': {'type': 'integer'},
+                    'val1': {'type': 'number'},
+                    'val2': {'type': 'number'}
+            }
+        })
+        self.assertEqual(list(df.columns), ['lucode', 'val1', 'val2'])

-        result = utils.read_csv_to_dataframe(
-            table_path, 'lucode',
-            usecols=['lucode', 'val1', 'val2']).to_dict(orient='index')
-
-        expected_result = {
-            1: {'val1': 0.5, 'val2': 2, 'lucode': 1},
-            2: {'val1': 1, 'val2': 4, 'lucode': 2},
-            3: {'val1': 0.5, 'val2': 4, 'lucode': 3},
-            4: {'val1': 9, 'val2': 1, 'lucode': 4}}
-
-        self.assertDictEqual(result, expected_result)
+    def test_column_pattern_matching(self):
+        """utils: test column subset is properly returned."""
+        from natcap.invest import utils
+        table_path = os.path.join(self.workspace_dir, 'table.csv')
+        with open(table_path, 'w') as table_file:
+            table_file.write(
+                "lucode,grassland_value,forest_value,wetland_valueee\n"
+                "1,0.5,2\n"
+                "2,1,4\n"
+                "3,0.5,4\n"
+                "4,9,1")
+        df = utils.read_csv_to_dataframe(
+            table_path, {
+                'columns': {
+                    'lucode': {'type': 'integer'},
+                    '[HABITAT]_value': {'type': 'number'}
+            }
+        })
+        self.assertEqual(
+            list(df.columns), ['lucode', 'grassland_value', 'forest_value'])

    def test_trailing_comma(self):
        """utils: test a trailing comma on first line is handled properly."""
        from natcap.invest import utils
-        csv_text = ("lucode,desc,val1,val2\n"
-                    "1,corn,0.5,2,\n"
-                    "2,bread,1,4\n"
-                    "3,beans,0.5,4\n"
-                    "4,butter,9,1")
        table_path = os.path.join(self.workspace_dir, 'table.csv')
        with open(table_path, 'w') as table_file:
-            table_file.write(csv_text)
-
+            table_file.write(
+                "lucode,desc,val1,val2\n"
+                "1,corn,0.5,2,\n"
+                "2,bread,1,4\n"
+                "3,beans,0.5,4\n"
+                "4,butter,9,1")
        result = utils.read_csv_to_dataframe(
-            table_path, 'lucode').to_dict(orient='index')
+            table_path,
+            {
+                'columns': {
+                    'desc': {'type': 'freestyle_string'},
+                    'lucode': {'type': 'integer'},
+                    'val1': {'type': 'number'},
+                    'val2': {'type': 'number'}
+            }})
+        self.assertEqual(result['val2'][0], 2)
+        self.assertEqual(result['lucode'][1], 2)

-        expected_result = {
-            1: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1},
-            2: {'desc': 'bread', 'val1': 1, 'val2': 4, 'lucode': 2},
-            3: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3},
-            4: {'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4}}
-
-        self.assertDictEqual(result, expected_result)

    def test_trailing_comma_second_line(self):
        """utils: test a trailing comma on second line is handled properly."""
@ -785,58 +816,24 @@ class ReadCSVToDataframeTests(unittest.TestCase):
            table_file.write(csv_text)

        result = utils.read_csv_to_dataframe(
-            table_path, 'lucode').to_dict(orient='index')
+            table_path,
+            {
+                'index_col': 'lucode',
+                'columns': {
+                    'desc': {'type': 'freestyle_string'},
+                    'lucode': {'type': 'integer'},
+                    'val1': {'type': 'number'},
+                    'val2': {'type': 'number'}
+            }}).to_dict(orient='index')

        expected_result = {
-            1: {'desc': 'corn', 'val1': 0.5, 'val2': 2, 'lucode': 1},
-            2: {'desc': 'bread', 'val1': 1, 'val2': 4, 'lucode': 2},
-            3: {'desc': 'beans', 'val1': 0.5, 'val2': 4, 'lucode': 3},
-            4: {'desc': 'butter', 'val1': 9, 'val2': 1, 'lucode': 4}}
+            1: {'desc': 'corn', 'val1': 0.5, 'val2': 2},
+            2: {'desc': 'bread', 'val1': 1, 'val2': 4},
+            3: {'desc': 'beans', 'val1': 0.5, 'val2': 4},
+            4: {'desc': 'butter', 'val1': 9, 'val2': 1}}

        self.assertDictEqual(result, expected_result)

-    def test_results_lowercase_non_numeric(self):
-        """utils: text handling of converting to lowercase."""
-        from natcap.invest import utils
-
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                """\
-                header1,HEADER2,header3
-                1,2,bar
-                4,5,FOO
-                """
-            ))
-
-        lookup_dict = utils.read_csv_to_dataframe(
-            csv_file, 'header1').to_dict(orient='index')
-
-        self.assertEqual(lookup_dict[4]['header3'], 'foo')
-        self.assertEqual(lookup_dict[1]['header2'], 2)
-
-    def test_results_uppercase_numeric_cast(self):
-        """utils: test handling of uppercase, num. casting, blank values."""
-        from natcap.invest import utils
-
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                """\
-                header1,HEADER2,header3,missing_column,
-                1,2,3,
-                4,FOO,bar,
-                """
-            ))
-
-        lookup_dict = utils.read_csv_to_dataframe(
-            csv_file, 'header1',
-            convert_cols_to_lower=False, convert_vals_to_lower=False).to_dict(orient='index')
-
-        self.assertEqual(lookup_dict[4]['HEADER2'], 'FOO')
-        self.assertEqual(lookup_dict[4]['header3'], 'bar')
-        self.assertEqual(lookup_dict[1]['header1'], 1)
-
    def test_csv_dialect_detection_semicolon_delimited(self):
        """utils: test that we can parse semicolon-delimited CSVs."""
        from natcap.invest import utils
@ -851,13 +848,183 @@ class ReadCSVToDataframeTests(unittest.TestCase):
                """
            ))

-        lookup_dict = utils.read_csv_to_dataframe(
-            csv_file, 'header1',
-            convert_cols_to_lower=False, convert_vals_to_lower=False).to_dict(orient='index')
+        df = utils.read_csv_to_dataframe(
+            csv_file,
+            {'columns': {
+                'header1': {'type': 'integer'},
+                'header2': {'type': 'freestyle_string'},
+                'header3': {'type': 'freestyle_string'}
+            }
+        })
+        self.assertEqual(df['header2'][1], 'foo')
+        self.assertEqual(df['header3'][1], 'bar')
+        self.assertEqual(df['header1'][0], 1)

-        self.assertEqual(lookup_dict[4]['HEADER2'], 'FOO')
-        self.assertEqual(lookup_dict[4]['header3'], 'bar')
-        self.assertEqual(lookup_dict[1]['header1'], 1)
+    def test_convert_cols_to_lower(self):
+        """utils: test that column names are converted to lowercase"""
+        from natcap.invest import utils
+
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                header,
+                A,
+                b
+                """
+            ))
+        df = utils.read_csv_to_dataframe(
+            csv_file, {'columns': {
+                'header': {'type': 'freestyle_string'}
+            }})
+        self.assertEqual(df['header'][0], 'a')
+
+    def test_convert_vals_to_lower(self):
+        """utils: test that values are converted to lowercase"""
+        from natcap.invest import utils
+
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                HEADER,
+                a,
+                b
+                """
+            ))
+        df = utils.read_csv_to_dataframe(
+            csv_file, {'columns': {
+                'header': {'type': 'freestyle_string'}
+            }})
+        self.assertEqual(df.columns[0], 'header')
+
+    def test_integer_type_columns(self):
+        """utils: integer column values are returned as integers."""
+        from natcap.invest import utils
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                id,header,
+                1,5.0,
+                2,-1,
+                3,
+                """
+            ))
+        df = utils.read_csv_to_dataframe(
+            csv_file, {'columns': {
+                'id': {'type': 'integer'},
+                'header': {'type': 'integer', 'na_allowed': True}}})
+        self.assertIsInstance(df['header'][0], numpy.int64)
+        self.assertIsInstance(df['header'][1], numpy.int64)
+        # empty values are returned as pandas.NA
+        self.assertTrue(pd.isna(df['header'][2]))
+
+    def test_float_type_columns(self):
+        """utils: float column values are returned as floats."""
+        from natcap.invest import utils
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                h1,h2,h3
+                5,0.5,.4
+                -1,-.3,
+                """
+            ))
+        df = utils.read_csv_to_dataframe(
+            csv_file, {'columns': {
+                'h1': {'type': 'number'},
+                'h2': {'type': 'ratio'},
+                'h3': {'type': 'percent', 'na_allowed': True},
+            }})
+        self.assertEqual(df['h1'].dtype, float)
+        self.assertEqual(df['h2'].dtype, float)
+        self.assertEqual(df['h3'].dtype, float)
+        # empty values are returned as numpy.nan
+        self.assertTrue(numpy.isnan(df['h3'][1]))
+
+    def test_string_type_columns(self):
+        """utils: string column values are returned as strings."""
+        from natcap.invest import utils
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                h1,h2,h3
+                1,a,foo
+                2,b,
+                """
+            ))
+        df = utils.read_csv_to_dataframe(
+            csv_file, {'columns': {
+                'h1': {'type': 'freestyle_string'},
+                'h2': {'type': 'option_string'},
+                'h3': {'type': 'freestyle_string'},
+            }})
+        self.assertEqual(df['h1'][0], '1')
+        self.assertEqual(df['h2'][1], 'b')
+        # empty values are returned as NA
+        self.assertTrue(pd.isna(df['h3'][1]))
+
+    def test_boolean_type_columns(self):
+        """utils: boolean column values are returned as booleans."""
+        from natcap.invest import utils
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                index,h1
+                a,1
+                b,0
+                c,
+                """
+            ))
+        df = utils.read_csv_to_dataframe(
+            csv_file, {'columns': {
+                'index': {'type': 'freestyle_string'},
+                'h1': {'type': 'bool', 'na_allowed': True}}})
+        self.assertEqual(df['h1'][0], True)
+        self.assertEqual(df['h1'][1], False)
+        # empty values are returned as pandas.NA
+        self.assertTrue(pd.isna(df['h1'][2]))
+
+    def test_expand_path_columns(self):
+        """utils: test values in path columns are expanded."""
+        from natcap.invest import utils
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                f"""\
+                bar,path
+                1,foo.txt
+                2,foo/bar.txt
+                3,foo\\bar.txt
+                4,{self.workspace_dir}/foo.txt
+                5,
+                """
+            ))
+        df = utils.read_csv_to_dataframe(
+            csv_file, {'columns': {
+                'bar': {'type': 'integer'},
+                'path': {'type': 'file'}
+            }})
+        self.assertEqual(
+            f'{self.workspace_dir}{os.sep}foo.txt',
+            df['path'][0])
+        self.assertEqual(
+            f'{self.workspace_dir}{os.sep}foo{os.sep}bar.txt',
+            df['path'][1])
+        self.assertEqual(
+            f'{self.workspace_dir}{os.sep}foo\\bar.txt',
+            df['path'][2])
+        self.assertEqual(
+            f'{self.workspace_dir}{os.sep}foo.txt',
+            df['path'][3])
+        # empty values are returned as empty strings
+        self.assertTrue(pd.isna(df['path'][4]))

    def test_csv_utf8_encoding(self):
        """utils: test that CSV read correctly with UTF-8 encoding."""
@ -873,21 +1040,26 @@ class ReadCSVToDataframeTests(unittest.TestCase):
                """
            ))
        lookup_dict = utils.read_csv_to_dataframe(
-            csv_file, 'header1').to_dict(orient='index')
+            csv_file,
+            {
+                'index_col': 'header1',
+                'columns': {
+                    'header1': {'type': 'integer'},
+                    'header2': {'type': 'integer'},
+                    'header3': {'type': 'freestyle_string'}
+            }}).to_dict(orient='index')
        self.assertEqual(lookup_dict[4]['header2'], 5)
        self.assertEqual(lookup_dict[4]['header3'], 'foo')
-        self.assertEqual(lookup_dict[1]['header1'], 1)

-    def test_csv_utf8_bom_encoding(self):
+    def test_utf8_bom_encoding(self):
        """utils: test that CSV read correctly with UTF-8 BOM encoding."""
        from natcap.invest import utils
-
        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
        # writing with utf-8-sig will prepend the BOM
        with open(csv_file, 'w', encoding='utf-8-sig') as file_obj:
            file_obj.write(textwrap.dedent(
                """\
-                header1,HEADER2,header3
+                header1,header2,header3
                1,2,bar
                4,5,FOO
                """
@ -895,18 +1067,20 @@ class ReadCSVToDataframeTests(unittest.TestCase):
        # confirm that the file has the BOM prefix
        with open(csv_file, 'rb') as file_obj:
            self.assertTrue(file_obj.read().startswith(codecs.BOM_UTF8))
-
-        lookup_dict = utils.read_csv_to_dataframe(
-            csv_file, 'header1').to_dict(orient='index')
+        df = utils.read_csv_to_dataframe(csv_file,
+            {
+                'columns': {
+                    'header1': {'type': 'integer'},
+                    'header2': {'type': 'integer'},
+                    'header3': {'type': 'freestyle_string'}
+            }})
        # assert the BOM prefix was correctly parsed and skipped
-        self.assertEqual(lookup_dict[4]['header2'], 5)
-        self.assertEqual(lookup_dict[4]['header3'], 'foo')
-        self.assertEqual(lookup_dict[1]['header1'], 1)
+        self.assertEqual(df.columns[0], 'header1')
+        self.assertEqual(df['header2'][1], 5)

    def test_csv_latin_1_encoding(self):
-        """utils: test that CSV read correctly with Latin-1 encoding."""
+        """utils: can read Latin-1 encoded CSV if it uses only ASCII chars."""
        from natcap.invest import utils
-
        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
        with codecs.open(csv_file, 'w', encoding='iso-8859-1') as file_obj:
            file_obj.write(textwrap.dedent(
@ -916,13 +1090,16 @@ class ReadCSVToDataframeTests(unittest.TestCase):
                4,5,FOO
                """
            ))
-
-        lookup_dict = utils.read_csv_to_dataframe(
-            csv_file, 'header 1').to_dict(orient='index')
-
-        self.assertEqual(lookup_dict[4]['header 2'], 5)
-        self.assertEqual(lookup_dict[4]['header 3'], 'foo')
-        self.assertEqual(lookup_dict[1]['header 1'], 1)
+        df = utils.read_csv_to_dataframe(
+            csv_file,
+            {'columns': {
+                'header 1': {'type': 'integer'},
+                'header 2': {'type': 'integer'},
+                'header 3': {'type': 'freestyle_string'}
+        }})
+        self.assertEqual(df['header 2'][1], 5)
+        self.assertEqual(df['header 3'][1], 'foo')
+        self.assertEqual(df['header 1'][0], 1)

    def test_csv_error_non_utf8_character(self):
        """utils: test that error is raised on non-UTF8 character."""
@ -938,91 +1115,15 @@ class ReadCSVToDataframeTests(unittest.TestCase):
                """
            ))
        with self.assertRaises(UnicodeDecodeError):
-            utils.read_csv_to_dataframe(csv_file, 'header 1')
-
-    def test_expand_path(self):
-        """utils: test path expansion function."""
-        from natcap.invest import utils
-        base_path = os.path.join(self.workspace_dir, 'csv.csv')
-        self.assertEqual(
-            f'{self.workspace_dir}{os.sep}foo.txt',
-            utils.expand_path('foo.txt', base_path))
-        self.assertEqual(
-            f'{self.workspace_dir}{os.sep}foo{os.sep}bar.txt',
-            utils.expand_path('foo/bar.txt', base_path))
-        self.assertEqual(
-            f'{self.workspace_dir}{os.sep}foo\\bar.txt',
-            utils.expand_path('foo\\bar.txt', base_path))
-        self.assertEqual(
-            f'{self.workspace_dir}{os.sep}foo.txt',
-            utils.expand_path(f'{self.workspace_dir}{os.sep}foo.txt', base_path))
-
-    def test_convert_cols_to_lower(self):
-        """utils: test that to_lower=True makes headers lowercase"""
-        from natcap.invest import utils
-
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                """\
-                HEADER,
-                A,
-                b
-                """
-            ))
-        df = utils.read_csv_to_dataframe(
-            csv_file, convert_cols_to_lower=True, convert_vals_to_lower=False)
-        # header should be lowercase
-        self.assertEqual(df.columns[0], 'header')
-        # case of table values shouldn't change
-        self.assertEqual(df['header'][0], 'A')
-        self.assertEqual(df['header'][1], 'b')
-
-    def test_convert_vals_to_lower(self):
-        """utils: test that to_lower=True makes headers lowercase"""
-        from natcap.invest import utils
-
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                """\
-                HEADER,
-                A,
-                b
-                """
-            ))
-        df = utils.read_csv_to_dataframe(
-            csv_file, convert_cols_to_lower=False, convert_vals_to_lower=True)
-        # header should still be uppercase
-        self.assertEqual(df.columns[0], 'HEADER')
-        # case of table values should change
-        self.assertEqual(df['HEADER'][0], 'a')
-        self.assertEqual(df['HEADER'][1], 'b')
-
-    def test_utf8_bom_encoding(self):
-        """utils: test that CSV read correctly with UTF-8 BOM encoding."""
-        from natcap.invest import utils
-
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-        # writing with utf-8-sig will prepend the BOM
-        with open(csv_file, 'w', encoding='utf-8-sig') as file_obj:
-            file_obj.write(textwrap.dedent(
-                """\
-                header1,header2,header3
-                1,2,bar
-                4,5,FOO
-                """
-            ))
-        # confirm that the file has the BOM prefix
-        with open(csv_file, 'rb') as file_obj:
-            self.assertTrue(file_obj.read().startswith(codecs.BOM_UTF8))
-
-        df = utils.read_csv_to_dataframe(csv_file)
-        # assert the BOM prefix was correctly parsed and skipped
-        self.assertEqual(df.columns[0], 'header1')
-        self.assertEqual(df['header2'][1], 5)
+            utils.read_csv_to_dataframe(
+                csv_file,
+                {
+                    'index_col': 'header1',
+                    'columns': {
+                        'header1': {'type': 'integer'},
+                        'header2': {'type': 'integer'},
+                        'header3': {'type': 'freestyle_string'}
+                    }})

    def test_override_default_encoding(self):
        """utils: test that you can override the default encoding kwarg"""
@ -1039,7 +1140,10 @@ class ReadCSVToDataframeTests(unittest.TestCase):
                bar
                """
            ))
-        df = utils.read_csv_to_dataframe(csv_file, encoding='iso8859_5')
+        df = utils.read_csv_to_dataframe(
+            csv_file, {
+                'columns': {'header': {'type': 'freestyle_string'}
+            }}, encoding='iso8859_5')
        # with the encoding specified, special characters should work
        # and be lowercased
        self.assertEqual(df['header'][0], 'fюю')
@ -1061,10 +1165,16 @@ class ReadCSVToDataframeTests(unittest.TestCase):
            ))
        # using sep=None with the default engine='python',
        # it should infer what the separator is
-        df = utils.read_csv_to_dataframe(csv_file, sep=None)
+        df = utils.read_csv_to_dataframe(
+            csv_file, {
+                'columns': {
+                    'h1': {'type': 'freestyle_string'},
+                    'h2': {'type': 'freestyle_string'},
+                    'h3': {'type': 'freestyle_string'}
+            }}, converters={'h2': lambda val: f'foo_{val}'})

        self.assertEqual(df.columns[0], 'h1')
-        self.assertEqual(df['h2'][1], 'e')
+        self.assertEqual(df['h2'][1], 'foo_e')

    def test_csv_with_integer_headers(self):
        """
@ -1085,7 +1195,13 @@ class ReadCSVToDataframeTests(unittest.TestCase):
                d,e,f
                """
            ))
-        df = utils.read_csv_to_dataframe(csv_file)
+        df = utils.read_csv_to_dataframe(
+            csv_file,
+            {'columns': {
+                '1': {'type': 'freestyle_string'},
+                '2': {'type': 'freestyle_string'},
+                '3': {'type': 'freestyle_string'}
+            }})
        # expect headers to be strings
        self.assertEqual(df.columns[0], '1')
        self.assertEqual(df['1'][0], 'a')
@ -1100,48 +1216,23 @@ class ReadCSVToDataframeTests(unittest.TestCase):
            file_obj.write(" Col1, Col2 ,Col3 \n")
            file_obj.write(" val1, val2 ,val3 \n")
            file_obj.write(" , 2 1 ,  ")
-        df = utils.read_csv_to_dataframe(csv_file, convert_cols_to_lower=False)
-        # header should have no leading / trailing whitespace
-        self.assertEqual(df.columns[0], 'Col1')
-        self.assertEqual(df.columns[1], 'Col2')
-        self.assertEqual(df.columns[2], 'Col3')
-        # values should have no leading / trailing whitespace
-        self.assertEqual(df['Col1'][0], 'val1')
-        self.assertEqual(df['Col2'][0], 'val2')
-        self.assertEqual(df['Col3'][0], 'val3')
-        self.assertEqual(df['Col1'][1], '')
-        self.assertEqual(df['Col2'][1], '2 1')
-        self.assertEqual(df['Col3'][1], '')
-
-    def test_expand_path_columns(self):
-        """utils: test path expansion feature of read_csv_to_dataframe."""
-        from natcap.invest import utils
-
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                f"""\
-                bar,path
-                1,foo.txt
-                2,foo/bar.txt
-                3,foo\\bar.txt
-                4,{self.workspace_dir}/foo.txt
-                """
-            ))
        df = utils.read_csv_to_dataframe(
-            csv_file, expand_path_cols=['path'], convert_vals_to_lower=False)
-        self.assertEqual(
-            f'{self.workspace_dir}{os.sep}foo.txt',
-            df['path'][0])
-        self.assertEqual(
-            f'{self.workspace_dir}{os.sep}foo{os.sep}bar.txt',
-            df['path'][1])
-        self.assertEqual(
-            f'{self.workspace_dir}{os.sep}foo\\bar.txt',
-            df['path'][2])
-        self.assertEqual(
-            f'{self.workspace_dir}{os.sep}foo.txt',
-            df['path'][3])
+            csv_file, {
+                'columns': {
+                    'col1': {'type': 'freestyle_string'},
+                    'col2': {'type': 'freestyle_string'},
+                    'col3': {'type': 'freestyle_string'}
+            }})
+        # header should have no leading / trailing whitespace
+        self.assertEqual(list(df.columns), ['col1', 'col2', 'col3'])
+
+        # values should have no leading / trailing whitespace
+        self.assertEqual(df['col1'][0], 'val1')
+        self.assertEqual(df['col2'][0], 'val2')
+        self.assertEqual(df['col3'][0], 'val3')
+        self.assertEqual(df['col1'][1], '')
+        self.assertEqual(df['col2'][1], '2 1')
+        self.assertEqual(df['col3'][1], '')


 class CreateCoordinateTransformationTests(unittest.TestCase):
@ -1471,7 +1562,7 @@ class AssertVectorsEqualTests(unittest.TestCase):
        attrs = [{'id': 1, 'foo': 2.3456}, {'id': 2, 'foo': 5.6789}]
        attrs_copy = [
            {'id': 1, 'foo': 2.3456}, {'id': 2, 'foo': 5.6789},
-            {'id': 3, 'foo': 5.0}]
+            {'id': 3, 'foo': 5}]

        srs = osr.SpatialReference()
        srs.ImportFromEPSG(3157)
--- a/tests/test_wind_energy.py
+++ b/tests/test_wind_energy.py
@ -846,7 +846,7 @@ class WindEnergyRegressionTests(unittest.TestCase):
            wind_energy.execute(args)

        self.assertTrue(
-            "returned 0 features. If an AOI was" in str(cm.exception))
+            "returned 0 features. This means the AOI " in str(cm.exception))


 class WindEnergyValidationTests(unittest.TestCase):
--- a/workbench/mocks/electron-store.js
+++ b/workbench/mocks/electron-store.js
@ -0,0 +1,22 @@
+export default class Store {
+  constructor(options) {
+    this.defaults = options.defaults || {};
+    this.store = this.defaults;
+  }
+
+  get(key) {
+    return this.store[key];
+  }
+
+  set(key, val) {
+    this.store[key] = val;
+  }
+
+  delete(key) {
+    delete this.store[key];
+  }
+
+  reset() {
+    this.store = this.defaults;
+  }
+}
--- a/workbench/electron-builder-config.js
+++ b/workbench/electron-builder-config.js
@ -43,11 +43,11 @@ const config = {
      from: 'resources/storage_token.txt',
      to: 'storage_token.txt',
    },
+    {
+      from: '../LICENSE.txt',
+      to: 'LICENSE.InVEST.txt',
+    },
  ],
-  extraFiles: [{
-    from: '../LICENSE.txt',
-    to: 'LICENSE.InVEST.txt',
-  }],
  appId: APP_ID,
  productName: PRODUCT_NAME,
  artifactName: ARTIFACT_NAME,
--- a/workbench/package.json
+++ b/workbench/package.json
@ -53,6 +53,7 @@
  "dependencies": {
    "@babel/runtime": "^7.13.10",
    "electron-log": "^4.3.5",
+    "electron-store": "^8.1.0",
    "i18next": "^22.4.9",
    "localforage": "^1.9.0",
    "node-fetch": "^2.6.7",
@ -71,6 +72,7 @@
    "@testing-library/react": "^14.0.0",
    "@testing-library/user-event": "^14.4.3",
    "@vitejs/plugin-react": "^4.0.0",
+    "ajv": "^8.12.0",
    "babel-eslint": "^10.1.0",
    "bootstrap": "4.3.1",
    "concurrently": "^8.2.0",
--- a/workbench/src/main/createPythonFlaskProcess.js
+++ b/workbench/src/main/createPythonFlaskProcess.js
@ -1,4 +1,4 @@
-import { spawn, exec } from 'child_process';
+import { spawn, execSync } from 'child_process';

 import fetch from 'node-fetch';

@ -88,26 +88,12 @@ export async function shutdownPythonProcess(subprocess) {
      subprocess.kill();
    } else {
      const { pid } = subprocess;
-      exec(`taskkill /pid ${pid} /t /f`);
+      execSync(`taskkill /pid ${pid} /t /f`);
    }
  } catch (error) {
    // if the process was already killed by some other means
    logger.debug(error);
+  } finally {
+    Promise.resolve();
  }
-
-  // If we return too quickly, it seems the electron app is allowed
-  // to quit before the subprocess is killed, and the subprocess remains
-  // open. Here we poll a flask endpoint and resolve only when it
-  // gives ECONNREFUSED.
-  return fetch(`${HOSTNAME}:${process.env.PORT}/ready`, {
-    method: 'get',
-  })
-    .then(async () => {
-      await new Promise((resolve) => setTimeout(resolve, 300));
-      return shutdownPythonProcess(subprocess);
-    })
-    .catch(() => {
-      logger.debug('flask server is closed');
-      return Promise.resolve();
-    });
 }
--- a/workbench/src/main/ipcMainChannels.js
+++ b/workbench/src/main/ipcMainChannels.js
@ -1,9 +1,12 @@
 export const ipcMainChannels = {
+  CHANGE_LANGUAGE: 'change-language',
  CHECK_FILE_PERMISSIONS: 'check-file-permissions',
  CHECK_STORAGE_TOKEN: 'check-storage-token',
  DOWNLOAD_URL: 'download-url',
-  GET_N_CPUS: 'get-n-cpus',
  GET_ELECTRON_PATHS: 'get-electron-paths',
+  GET_N_CPUS: 'get-n-cpus',
+  GET_SETTING: 'get-setting',
+  GET_LANGUAGE: 'get-language',
  INVEST_KILL: 'invest-kill',
  INVEST_READ_LOG: 'invest-read-log',
  INVEST_RUN: 'invest-run',
@ -12,8 +15,8 @@ export const ipcMainChannels = {
  LOGGER: 'logger',
  OPEN_EXTERNAL_URL: 'open-external-url',
  OPEN_LOCAL_HTML: 'open-local-html',
+  SET_SETTING: 'set-setting',
  SHOW_ITEM_IN_FOLDER: 'show-item-in-folder',
  SHOW_OPEN_DIALOG: 'show-open-dialog',
  SHOW_SAVE_DIALOG: 'show-save-dialog',
-  CHANGE_LANGUAGE: 'change-language',
 };
--- a/workbench/src/main/main.js
+++ b/workbench/src/main/main.js
@ -4,7 +4,6 @@ import path from 'path';
 import {
  app,
  BrowserWindow,
-  screen,
  nativeTheme,
  Menu,
  ipcMain
@ -29,7 +28,7 @@ import {
 import setupGetNCPUs from './setupGetNCPUs';
 import setupOpenExternalUrl from './setupOpenExternalUrl';
 import setupOpenLocalHtml from './setupOpenLocalHtml';
-import setupChangeLanguage from './setupChangeLanguage';
+import { settingsStore, setupSettingsHandlers } from './settingsStore';
 import setupGetElectronPaths from './setupGetElectronPaths';
 import setupRendererLogger from './setupRendererLogger';
 import { ipcMainChannels } from './ipcMainChannels';
@ -37,8 +36,8 @@ import menuTemplate from './menubar';
 import ELECTRON_DEV_MODE from './isDevMode';
 import BASE_URL from './baseUrl';
 import { getLogger } from './logger';
-import pkg from '../../package.json';
 import i18n from './i18n/i18n';
+import pkg from '../../package.json';

 const logger = getLogger(__filename.split('/').slice(-1)[0]);

@ -61,6 +60,7 @@ if (!process.env.PORT) {
 let mainWindow;
 let splashScreen;
 let flaskSubprocess;
+let forceQuit = false;

 export function destroyWindow() {
  mainWindow = null;
@ -71,6 +71,8 @@ export const createWindow = async () => {
  logger.info(`Running invest-workbench version ${pkg.version}`);
  nativeTheme.themeSource = 'light'; // override OS/browser setting

+  i18n.changeLanguage(settingsStore.get('language'));
+
  splashScreen = new BrowserWindow({
    width: 574, // dims set to match the image in splash.html
    height: 500,
@ -86,7 +88,7 @@ export const createWindow = async () => {
  setupCheckFilePermissions();
  setupCheckFirstRun();
  setupCheckStorageToken();
-  setupChangeLanguage();
+  setupSettingsHandlers();
  setupGetElectronPaths();
  setupGetNCPUs();
  setupInvestLogReaderHandler();
@ -110,14 +112,6 @@ export const createWindow = async () => {
      menuTemplate(mainWindow, ELECTRON_DEV_MODE, i18n)
    )
  );
-  // when language changes, rebuild the menu bar in new language
-  i18n.on('languageChanged', (lng) => {
-    Menu.setApplicationMenu(
-      Menu.buildFromTemplate(
-        menuTemplate(mainWindow, ELECTRON_DEV_MODE, i18n)
-      )
-    );
-  });
  mainWindow.loadURL(path.join(BASE_URL, 'index.html'));

  mainWindow.once('ready-to-show', () => {
@ -135,6 +129,16 @@ export const createWindow = async () => {
    logger.error(details);
  });

+  mainWindow.on('close', (event) => {
+    // 'close' is triggered by the red traffic light button on mac
+    // override this behavior and just minimize,
+    // unless we're actually quitting the app
+    if (process.platform === 'darwin' & !forceQuit) {
+      event.preventDefault();
+      mainWindow.minimize()
+    }
+  });
+
  mainWindow.on('closed', () => {
    mainWindow = null;
  });
@ -186,17 +190,12 @@ export function main() {
      createWindow();
    }
  });
-  app.on('window-all-closed', async () => {
-    // On OS X it is common for applications and their menu bar
-    // to stay active until the user quits explicitly with Cmd + Q
-    if (process.platform !== 'darwin') {
-      app.quit();
-    }
-  });
+
  let shuttingDown = false;
  app.on('before-quit', async (event) => {
    // prevent quitting until after we're done with cleanup,
    // then programatically quit
+    forceQuit = true;
    if (shuttingDown) { return; }
    event.preventDefault();
    shuttingDown = true;
--- a/workbench/src/main/settingsStore.js
+++ b/workbench/src/main/settingsStore.js
@ -0,0 +1,101 @@
+import { app, ipcMain } from 'electron';
+import Store from 'electron-store';
+import Ajv from 'ajv';
+
+import { ipcMainChannels } from './ipcMainChannels';
+import { getLogger } from './logger';
+
+const logger = getLogger(__filename.split('/').slice(-1)[0]);
+
+export const defaults = {
+  nWorkers: -1,
+  taskgraphLoggingLevel: 'INFO',
+  loggingLevel: 'INFO',
+  language: 'en',
+};
+
+export const schema = {
+  type: 'object',
+  properties: {
+    nWorkers: {
+      type: 'number',
+    },
+    taskgraphLoggingLevel: {
+      enum: ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'],
+    },
+    loggingLevel: {
+      enum: ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'],
+    },
+    language: {
+      enum: ['en', 'es', 'zh'],
+    },
+  },
+  required: ['nWorkers', 'taskgraphLoggingLevel', 'loggingLevel', 'language']
+};
+
+/**
+ * Open a store and validate against a schema.
+ *
+ * Required properties missing from the store are initialized with defaults.
+ * Invalid properties are reset to defaults.
+ *
+ * @param  {object} data key-values with which to initialize a store.
+ * @returns {Store} an instance of an electron-store Store
+ */
+export function initStore(data = defaults) {
+  const ajv = new Ajv({ allErrors: true });
+  const validate = ajv.compile(schema);
+  const store = new Store({ defaults: data });
+  const valid = validate(store.store);
+  if (!valid) {
+    validate.errors.forEach((e) => {
+      logger.debug(e);
+      let property;
+      if (e.instancePath) {
+        property = e.instancePath.split('/').pop();
+      } else if (e.keyword === 'required') {
+        property = e.params.missingProperty;
+      } else {
+        // something is invalid that we're not prepared to fix
+        // so just reset the whole store to defaults.
+        logger.debug(e);
+        store.reset();
+      }
+      logger.debug(`resetting value for setting ${property}`);
+      store.set(property, defaults[property]);
+    });
+  }
+  return store;
+}
+
+export const settingsStore = initStore();
+
+export function setupSettingsHandlers() {
+  ipcMain.handle(
+    ipcMainChannels.GET_SETTING,
+    (event, key) => settingsStore.get(key)
+  );
+
+  ipcMain.on(
+    ipcMainChannels.SET_SETTING,
+    (event, key, value) => settingsStore.set(key, value)
+  );
+
+  // language is stored in the same store, but has special
+  // needs for getting & setting because we need to get
+  // the value synchronously during preload, and trigger
+  // an app restart on change.
+  ipcMain.on(ipcMainChannels.GET_LANGUAGE, (event) => {
+    event.returnValue = settingsStore.get('language');
+  });
+
+  ipcMain.handle(
+    ipcMainChannels.CHANGE_LANGUAGE,
+    (e, languageCode) => {
+      logger.debug('changing language to', languageCode);
+      settingsStore.set('language', languageCode);
+      app.relaunch();
+      app.quit();
+    }
+  );
+}
--- a/workbench/src/main/setupChangeLanguage.js
+++ b/workbench/src/main/setupChangeLanguage.js
@ -1,16 +1,25 @@
-import i18n from 'i18next';
-import { ipcMain } from 'electron';
+import Store from 'electron-store';
+import { app, ipcMain } from 'electron';
 import { getLogger } from './logger';
 import { ipcMainChannels } from './ipcMainChannels';

 const logger = getLogger(__filename.split('/').slice(-1)[0]);

+const store = new Store();
+
 export default function setupChangeLanguage() {
+  ipcMain.on(ipcMainChannels.GET_LANGUAGE, (event) => {
+    // default to en if no language setting exists
+    event.returnValue = store.get('language', 'en');
+  });
+
  ipcMain.handle(
    ipcMainChannels.CHANGE_LANGUAGE,
    (e, languageCode) => {
      logger.debug('changing language to', languageCode);
-      i18n.changeLanguage(languageCode);
+      store.set('language', languageCode);
+      app.relaunch();
+      app.quit();
    }
  );
 }
--- a/workbench/src/main/setupInvestHandlers.js
+++ b/workbench/src/main/setupInvestHandlers.js
@ -11,6 +11,7 @@ import ELECTRON_DEV_MODE from './isDevMode';
 import investUsageLogger from './investUsageLogger';
 import markupMessage from './investLogMarkup';
 import writeInvestParameters from './writeInvestParameters';
+import { settingsStore } from './settingsStore';

 const logger = getLogger(__filename.split('/').slice(-1)[0]);

@ -45,12 +46,16 @@ export function setupInvestRunHandlers(investExe) {
  });

  ipcMain.on(ipcMainChannels.INVEST_RUN, async (
-    event, modelRunName, pyModuleName, args, loggingLevel, taskgraphLoggingLevel, language, tabID
+    event, modelRunName, pyModuleName, args, tabID
  ) => {
    let investRun;
    let investStarted = false;
    let investStdErr = '';
    const usageLogger = investUsageLogger();
+    const loggingLevel = settingsStore.get('loggingLevel');
+    const taskgraphLoggingLevel = settingsStore.get('taskgraphLoggingLevel');
+    const language = settingsStore.get('language');
+    const nWorkers = settingsStore.get('nWorkers');

    // Write a temporary datastack json for passing to invest CLI
    try {
@ -64,7 +69,10 @@ export function setupInvestRunHandlers(investExe) {
      filepath: datastackPath,
      moduleName: pyModuleName,
      relativePaths: false,
-      args: JSON.stringify(args),
+      args: JSON.stringify({
+        ...args,
+        n_workers: nWorkers,
+      }),
    };
    await writeInvestParameters(payload);

--- a/workbench/src/main/setupOpenLocalHtml.js
+++ b/workbench/src/main/setupOpenLocalHtml.js
@ -11,6 +11,7 @@ export default function setupOpenLocalHtml(parentWindow, isDevMode) {
    ipcMainChannels.OPEN_LOCAL_HTML, (event, url) => {
      const [width, height] = parentWindow.getSize();
      const child = new BrowserWindow({
+        parent: parentWindow,
        width: width > 1000 ? 1000 : width, // UG content is never wider
        height: height,
        frame: true,
--- a/workbench/src/preload/api.js
+++ b/workbench/src/preload/api.js
@ -35,6 +35,7 @@ export default {
  PORT: PORT, // where the flask app is running
  ELECTRON_LOG_PATH: electronLogPath,
  USERGUIDE_PATH: userguidePath,
+  LANGUAGE: ipcRenderer.sendSync(ipcMainChannels.GET_LANGUAGE),
  logger: {
    debug: (message) => ipcRenderer.send(ipcMainChannels.LOGGER, 'debug', message),
    info: (message) => ipcRenderer.send(ipcMainChannels.LOGGER, 'info', message),
--- a/workbench/src/renderer/InvestJob.js
+++ b/workbench/src/renderer/InvestJob.js
@ -58,8 +58,8 @@ export default class InvestJob {
      const lastKey = sortedJobHashes.pop();
      investJobStore.removeItem(lastKey);
    }
-    await investJobStore.setItem(HASH_ARRAY_KEY, sortedJobHashes);
    await investJobStore.setItem(job.hash, job);
+    await investJobStore.setItem(HASH_ARRAY_KEY, sortedJobHashes);
    return InvestJob.getJobStore();
  }

--- a/workbench/src/renderer/app.jsx
+++ b/workbench/src/renderer/app.jsx
@ -1,5 +1,6 @@
 import React from 'react';
 import PropTypes from 'prop-types';
+import i18n from 'i18next';

 import TabPane from 'react-bootstrap/TabPane';
 import TabContent from 'react-bootstrap/TabContent';
@ -19,14 +20,9 @@ import InvestTab from './components/InvestTab';
 import SettingsModal from './components/SettingsModal';
 import DataDownloadModal from './components/DataDownloadModal';
 import DownloadProgressBar from './components/DownloadProgressBar';
-import {
-  saveSettingsStore, getAllSettings,
-} from './components/SettingsModal/SettingsStorage';
 import { getInvestModelNames } from './server_requests';
 import InvestJob from './InvestJob';
 import { dragOverHandlerNone } from './utils';
-import { ipcMainChannels } from '../main/ipcMainChannels';
-import i18n from 'i18next';

 const { ipcRenderer } = window.Workbench.electron;

@ -43,18 +39,15 @@ export default class App extends React.Component {
      openJobs: {},
      investList: null,
      recentJobs: [],
-      investSettings: null,
      showDownloadModal: false,
      downloadedNofN: null,
    };
-    this.saveSettings = this.saveSettings.bind(this);
    this.switchTabs = this.switchTabs.bind(this);
    this.openInvestModel = this.openInvestModel.bind(this);
    this.closeInvestModel = this.closeInvestModel.bind(this);
    this.updateJobProperties = this.updateJobProperties.bind(this);
    this.saveJob = this.saveJob.bind(this);
    this.clearRecentJobs = this.clearRecentJobs.bind(this);
-    this.storeDownloadDir = this.storeDownloadDir.bind(this);
    this.showDownloadModal = this.showDownloadModal.bind(this);
  }

@ -62,17 +55,17 @@ export default class App extends React.Component {
  async componentDidMount() {
    const investList = await getInvestModelNames();
    const recentJobs = await InvestJob.getJobStore();
-    const investSettings = await getAllSettings();
    this.setState({
      investList: investList,
-      recentJobs: recentJobs,
-      investSettings: investSettings,
+      // filter out models that do not exist in current version of invest
+      recentJobs: recentJobs.filter((job) => (
+        Object.values(investList)
+          .map((m) => m.model_name)
+          .includes(job.modelRunName)
+      )),
      showDownloadModal: this.props.isFirstRun,
    });
-    await i18n.changeLanguage(investSettings.language);
-    await ipcRenderer.invoke(
-      ipcMainChannels.CHANGE_LANGUAGE, investSettings.language
-    );
+    await i18n.changeLanguage(window.Workbench.LANGUAGE);
    ipcRenderer.on('download-status', (downloadedNofN) => {
      this.setState({
        downloadedNofN: downloadedNofN,
@ -94,33 +87,6 @@ export default class App extends React.Component {
    );
  }

-  async saveSettings(settings) {
-    const { investSettings } = this.state;
-    await saveSettingsStore(settings);
-    this.setState({ investSettings: settings });
-    // if language has changed, refresh the app
-    if (settings.language !== investSettings.language) {
-      // change language in the renderer process
-      await i18n.changeLanguage(settings.language);
-      // change language in the main process
-      await ipcRenderer.invoke(
-        ipcMainChannels.CHANGE_LANGUAGE, settings.language
-      );
-      // rerender for changes to take effect
-      window.location.reload();
-    }
-  }
-
-  /** Store a sampledata filepath in localforage.
-   *
-   * @param {string} dir - the path to the user-selected dir
-   */
-  storeDownloadDir(dir) {
-    const { investSettings } = this.state;
-    investSettings.sampleDataDir = dir;
-    this.saveSettings(investSettings);
-  }
-
  showDownloadModal(shouldShow) {
    this.setState({
      showDownloadModal: shouldShow,
@ -211,7 +177,6 @@ export default class App extends React.Component {
  render() {
    const {
      investList,
-      investSettings,
      recentJobs,
      openJobs,
      openTabIDs,
@ -260,7 +225,17 @@ export default class App extends React.Component {
            key={id}
            className={id === activeTab ? 'active' : ''}
          >
-            <Nav.Link eventKey={id}>
+            <Nav.Link
+              eventKey={id}
+              onAuxClick={(event) => {
+                event.stopPropagation();
+                event.preventDefault();
+                if (event.button === 1) {
+                  // middle mouse button clicked, close tab
+                  this.closeInvestModel(id);
+                }
+              }}
+            >
              {statusSymbol}
              {` ${job.modelHumanName}`}
            </Nav.Link>
@ -288,7 +263,6 @@ export default class App extends React.Component {
          <InvestTab
            job={job}
            tabID={id}
-            investSettings={investSettings}
            saveJob={this.saveJob}
            updateJobProperties={this.updateJobProperties}
          />
@ -301,7 +275,6 @@ export default class App extends React.Component {
        <DataDownloadModal
          show={showDownloadModal}
          closeModal={() => this.showDownloadModal(false)}
-          storeDownloadDir={this.storeDownloadDir}
        />
        <TabContainer activeKey={activeTab}>
          <Navbar
@ -343,21 +316,12 @@ export default class App extends React.Component {
                    )
                    : <div />
                }
-                {
-                  // don't render until after we fetched the data
-                  (investSettings)
-                    ? (
-                      <SettingsModal
-                        className="mx-3"
-                        saveSettings={this.saveSettings}
-                        investSettings={investSettings}
-                        clearJobsStorage={this.clearRecentJobs}
-                        showDownloadModal={() => this.showDownloadModal(true)}
-                        nCPU={this.props.nCPU}
-                      />
-                    )
-                    : <div />
-                }
+                <SettingsModal
+                  className="mx-3"
+                  clearJobsStorage={this.clearRecentJobs}
+                  showDownloadModal={() => this.showDownloadModal(true)}
+                  nCPU={this.props.nCPU}
+                />
              </Col>
            </Row>
          </Navbar>
--- a/workbench/src/renderer/components/DataDownloadModal/index.jsx
+++ b/workbench/src/renderer/components/DataDownloadModal/index.jsx
@ -109,7 +109,6 @@ class DataDownloadModal extends React.Component {
          this.state.selectedLinksArray,
          data.filePaths[0]
        );
-        this.props.storeDownloadDir(data.filePaths[0]);
        this.closeDialog();
      }
    }
@ -283,7 +282,6 @@ class DataDownloadModal extends React.Component {
 DataDownloadModal.propTypes = {
  show: PropTypes.bool.isRequired,
  closeModal: PropTypes.func.isRequired,
-  storeDownloadDir: PropTypes.func.isRequired,
 };

-export default withTranslation()(DataDownloadModal)
+export default withTranslation()(DataDownloadModal);
--- a/workbench/src/renderer/components/HomeTab/index.jsx
+++ b/workbench/src/renderer/components/HomeTab/index.jsx
@ -11,6 +11,8 @@ import { useTranslation } from 'react-i18next';
 import OpenButton from '../OpenButton';
 import InvestJob from '../../InvestJob';

+const { logger } = window.Workbench;
+
 /**
 * Renders a table of buttons for each invest model and
 * a list of cards for each cached invest job.
@ -110,46 +112,51 @@ HomeTab.propTypes = {
 */
 function RecentInvestJobs(props) {
  const { recentJobs, openInvestModel } = props;
-  const handleClick = (jobMetadata) => {
-    openInvestModel(new InvestJob(jobMetadata));
-  }
  const { t, i18n } = useTranslation();

-  // Buttons to load each recently saved state
+  const handleClick = (jobMetadata) => {
+    try {
+      openInvestModel(new InvestJob(jobMetadata));
+    } catch (error) {
+      logger.debug(error);
+    }
+  };
+
  const recentButtons = [];
  recentJobs.forEach((job) => {
-    if (!job.argsValues) { return; }
-    recentButtons.push(
-      <Card
-        className="text-left recent-job-card"
-        as="button"
-        key={job.hash}
-        onClick={() => handleClick(job)}
-      >
-        <Card.Body>
-          <Card.Header>
-            <span className="header-title">{job.modelHumanName}</span>
-          </Card.Header>
-          <Card.Title>
-            <span className="text-heading">{'Workspace: '}</span>
-            <span className="text-mono">{job.argsValues.workspace_dir}</span>
-          </Card.Title>
-          <Card.Title>
-            <span className="text-heading">{'Suffix: '}</span>
-            <span className="text-mono">{job.argsValues.results_suffix}</span>
-          </Card.Title>
-          <Card.Footer className="text-muted">
-            <span className="timestamp">{job.humanTime}</span>
-            <span className="status">
-              {(job.status === 'success'
-                ? <span className="status-success">{t('Model Complete')}</span>
-                : <span className="status-error">{job.status}</span>
-              )}
-            </span>
-          </Card.Footer>
-        </Card.Body>
-      </Card>
-    );
+    if (job && job.argsValues && job.modelHumanName) {
+      recentButtons.push(
+        <Card
+          className="text-left recent-job-card"
+          as="button"
+          key={job.hash}
+          onClick={() => handleClick(job)}
+        >
+          <Card.Body>
+            <Card.Header>
+              <span className="header-title">{job.modelHumanName}</span>
+            </Card.Header>
+            <Card.Title>
+              <span className="text-heading">{'Workspace: '}</span>
+              <span className="text-mono">{job.argsValues.workspace_dir}</span>
+            </Card.Title>
+            <Card.Title>
+              <span className="text-heading">{'Suffix: '}</span>
+              <span className="text-mono">{job.argsValues.results_suffix}</span>
+            </Card.Title>
+            <Card.Footer className="text-muted">
+              <span className="timestamp">{job.humanTime}</span>
+              <span className="status">
+                {(job.status === 'success'
+                  ? <span className="status-success">{t('Model Complete')}</span>
+                  : <span className="status-error">{job.status}</span>
+                )}
+              </span>
+            </Card.Footer>
+          </Card.Body>
+        </Card>
+      );
+    }
  });

  return (
--- a/workbench/src/renderer/components/InvestTab/index.jsx
+++ b/workbench/src/renderer/components/InvestTab/index.jsx
@ -147,7 +147,6 @@ class InvestTab extends React.Component {
    const {
      job,
      tabID,
-      investSettings,
      updateJobProperties,
    } = this.props;
    const args = { ...argsValues };
@ -162,9 +161,6 @@ class InvestTab extends React.Component {
      job.modelRunName,
      this.state.modelSpec.pyname,
      args,
-      investSettings.loggingLevel,
-      investSettings.taskgraphLoggingLevel,
-      investSettings.language,
      tabID
    );
    this.switchTabs('log');
@ -205,7 +201,7 @@ class InvestTab extends React.Component {
      logfile,
    } = this.props.job;

-    const { tabID, investSettings, t } = this.props;
+    const { tabID, t } = this.props;

    // Don't render the model setup & log until data has been fetched.
    if (!modelSpec) {
@ -279,7 +275,6 @@ class InvestTab extends React.Component {
                  uiSpec={uiSpec}
                  argsInitValues={argsValues}
                  investExecute={this.investExecute}
-                  nWorkers={investSettings.nWorkers}
                  sidebarSetupElementId={sidebarSetupElementId}
                  sidebarFooterElementId={sidebarFooterElementId}
                  executeClicked={executeClicked}
@ -313,12 +308,6 @@ InvestTab.propTypes = {
    status: PropTypes.string,
  }).isRequired,
  tabID: PropTypes.string.isRequired,
-  investSettings: PropTypes.shape({
-    nWorkers: PropTypes.string,
-    taskgraphLoggingLevel: PropTypes.string,
-    loggingLevel: PropTypes.string,
-    language: PropTypes.string,
-  }).isRequired,
  saveJob: PropTypes.func.isRequired,
  updateJobProperties: PropTypes.func.isRequired,
 };
--- a/workbench/src/renderer/components/OpenButton/index.jsx
+++ b/workbench/src/renderer/components/OpenButton/index.jsx
@ -11,6 +11,7 @@ import { fetchDatastackFromFile } from '../../server_requests';
 import { ipcMainChannels } from '../../../main/ipcMainChannels';

 const { ipcRenderer } = window.Workbench.electron;
+const { logger } = window.Workbench;

 /**
 * Render a button that loads args from a datastack, parameterset, or logfile.
@ -23,9 +24,22 @@ class OpenButton extends React.Component {
  }

  async browseFile() {
+    const { t } = this.props;
    const data = await ipcRenderer.invoke(ipcMainChannels.SHOW_OPEN_DIALOG);
    if (!data.canceled) {
-      const datastack = await fetchDatastackFromFile(data.filePaths[0]);
+      let datastack;
+      try {
+        datastack = await fetchDatastackFromFile(data.filePaths[0]);
+      } catch (error) {
+        logger.error(error);
+        alert(
+          t(
+            'No InVEST model data can be parsed from the file:\n {{filepath}}',
+            { filepath: data.filePaths[0] }
+          )
+        );
+        return;
+      }
      const job = new InvestJob({
        modelRunName: datastack.model_run_name,
        modelHumanName: datastack.model_human_name,
--- a/workbench/src/renderer/components/ResourcesLinks/index.jsx
+++ b/workbench/src/renderer/components/ResourcesLinks/index.jsx
@ -75,7 +75,7 @@ export default function ResourcesTab(props) {
  }

  const { t, i18n } = useTranslation();
-  const userGuideURL = `${window.Workbench.USERGUIDE_PATH}/${i18n.language}/${docs}`;
+  const userGuideURL = `${window.Workbench.USERGUIDE_PATH}/${window.Workbench.LANGUAGE}/${docs}`;

  return (
    <React.Fragment>
--- a/workbench/src/renderer/components/SettingsModal/SettingsStorage.js
+++ b/workbench/src/renderer/components/SettingsModal/SettingsStorage.js
@ -1,83 +0,0 @@
-import localforage from 'localforage';
-
-const { logger } = window.Workbench;
-
-const investSettingsStore = localforage.createInstance({
-  name: 'InvestSettings',
-});
-
-/** Getter function for global default settings.
- *
- * @returns {object} to destructure into:
- *     {String} nWorkers - TaskGraph number of workers
- *     {String} taskgraphLoggingLevel - InVEST taskgraph logging level
- *     {String} loggingLevel - InVEST model logging level
- *     {String} sampleDataDir - default location for sample datastack downloads
- */
-export function getDefaultSettings() {
-  const defaultSettings = {
-    nWorkers: '-1',
-    taskgraphLoggingLevel: 'INFO',
-    loggingLevel: 'INFO',
-    sampleDataDir: '',
-    language: 'en'
-  };
-  return defaultSettings;
-}
-
-/** Getter function for settings store value.
- *
- * @param {object} obj.argsValues - an invest "args dict" with initial values
- * @param {string} key - setting key to get value
- *
- * @returns {string} - value of the setting key.
- */
-export async function getSettingsValue(key) {
-  const value = await investSettingsStore.getItem(key);
-  if (!value) {
-    return getDefaultSettings()[key];
-  }
-  return value;
-}
-
-/** Getter function for entire contents of store.
- *
- * @returns {Object} - key: value pairs of settings
- */
-export async function getAllSettings() {
-  try {
-    const promises = [];
-    const keys = Object.keys(getDefaultSettings());
-    keys.forEach((key) => {
-      promises.push(getSettingsValue(key));
-    });
-    const values = await Promise.all(promises);
-    const settings = Object.fromEntries(keys.map(
-      (_, i) => [keys[i], values[i]]
-    ));
-    return settings;
-  } catch (err) {
-    logger.error(err.message);
-    return getDefaultSettings();
-  }
-}
-
-/** Clear the settings store. */
-export async function clearSettingsStore() {
-  await investSettingsStore.clear();
-}
-
-/** Setter function for saving store values.
- *
- * @param {object} settingsObj - object with one or more key:value pairs
- *
- */
-export async function saveSettingsStore(settingsObj) {
-  try {
-    for (const [setting, value] of Object.entries(settingsObj)) {
-      await investSettingsStore.setItem(setting, value);
-    }
-  } catch (err) {
-    logger.error(`Error saving settings: ${err}`);
-  }
-}
--- a/workbench/src/renderer/components/SettingsModal/index.jsx
+++ b/workbench/src/renderer/components/SettingsModal/index.jsx
@ -16,7 +16,6 @@ import {
 import { BsChevronExpand } from 'react-icons/bs';
 import { withTranslation } from 'react-i18next';

-import { getDefaultSettings } from './SettingsStorage';
 import { ipcMainChannels } from '../../../main/ipcMainChannels';
 import { getSupportedLanguages } from '../../server_requests';

@ -29,11 +28,18 @@ class SettingsModal extends React.Component {
    this.state = {
      show: false,
      languageOptions: null,
+      loggingLevel: null,
+      taskgraphLoggingLevel: null,
+      nWorkers: null,
+      language: window.Workbench.LANGUAGE,
+      showConfirmLanguageChange: false,
    };
    this.handleShow = this.handleShow.bind(this);
    this.handleClose = this.handleClose.bind(this);
    this.handleChange = this.handleChange.bind(this);
-    this.handleReset = this.handleReset.bind(this);
+    this.handleChangeNumber = this.handleChangeNumber.bind(this);
+    this.loadSettings = this.loadSettings.bind(this);
+    this.handleChangeLanguage = this.handleChangeLanguage.bind(this);
    this.switchToDownloadModal = this.switchToDownloadModal.bind(this);
  }

@ -42,6 +48,7 @@ class SettingsModal extends React.Component {
    this.setState({
      languageOptions: languageOptions,
    });
+    this.loadSettings();
  }

  handleClose() {
@ -54,17 +61,40 @@ class SettingsModal extends React.Component {
    this.setState({ show: true });
  }

-  handleReset(event) {
-    event.preventDefault();
-    const resetSettings = getDefaultSettings();
-    this.props.saveSettings(resetSettings);
+  handleChange(event) {
+    const { name, value } = event.currentTarget;
+    this.setState({ [name]: value });
+    ipcRenderer.send(ipcMainChannels.SET_SETTING, name, value);
  }

-  handleChange(event) {
-    const newSettings = { ...this.props.investSettings };
+  handleChangeNumber(event) {
    const { name, value } = event.currentTarget;
-    newSettings[name] = value;
-    this.props.saveSettings(newSettings);
+    const numeral = Number(value);
+    this.setState({ [name]: numeral });
+    ipcRenderer.send(ipcMainChannels.SET_SETTING, name, numeral);
+  }
+
+  async loadSettings() {
+    const loggingLevel = await ipcRenderer
+      .invoke(ipcMainChannels.GET_SETTING, 'loggingLevel');
+    const taskgraphLoggingLevel = await ipcRenderer
+      .invoke(ipcMainChannels.GET_SETTING, 'taskgraphLoggingLevel');
+    const nWorkers = await ipcRenderer
+      .invoke(ipcMainChannels.GET_SETTING, 'nWorkers');
+    this.setState({
+      loggingLevel: loggingLevel,
+      taskgraphLoggingLevel: taskgraphLoggingLevel,
+      nWorkers: nWorkers
+    });
+  }
+
+  handleChangeLanguage() {
+    // if language has changed, refresh the app
+    if (this.state.language !== window.Workbench.LANGUAGE) {
+      // tell the main process to update the language setting in storage
+      // and then relaunch the app
+      ipcRenderer.invoke(ipcMainChannels.CHANGE_LANGUAGE, this.state.language);
+    }
  }

  switchToDownloadModal() {
@ -73,21 +103,29 @@ class SettingsModal extends React.Component {
  }

  render() {
-    const { show, languageOptions } = this.state;
-    const { investSettings, clearJobsStorage, nCPU, t } = this.props;
+    const {
+      show,
+      languageOptions,
+      language,
+      loggingLevel,
+      taskgraphLoggingLevel,
+      nWorkers,
+      showConfirmLanguageChange,
+    } = this.state;
+    const { clearJobsStorage, nCPU, t } = this.props;

    const nWorkersOptions = [
      [-1, `${t('Synchronous')} (-1)`],
-      [0, `${t('Threaded task management')} (0)`]
+      [0, `${t('Threaded task management')} (0)`],
    ];
    for (let i = 1; i <= nCPU; i += 1) {
      nWorkersOptions.push([i, `${i} ${t('CPUs')}`]);
    }
-    const logLevelOptions = {  // map value to display name
-      'DEBUG': t('DEBUG'),
-      'INFO': t('INFO'),
-      'WARNING': t('WARNING'),
-      'ERROR': t('ERROR')
+    const logLevelOptions = { // map value to display name
+      DEBUG: t('DEBUG'),
+      INFO: t('INFO'),
+      WARNING: t('WARNING'),
+      ERROR: t('ERROR'),
    };
    return (
      <React.Fragment>
@ -124,18 +162,18 @@ class SettingsModal extends React.Component {
                  <Form.Label column sm="8" htmlFor="language-select">
                    <MdTranslate className="language-icon" />
                    {t('Language')}
-                    <Form.Text className="text-nowrap" muted>
-                      <MdWarningAmber className="align-text-bottom ml-3" />
-                      {t('Changing this setting will refresh the app and close all tabs')}
-                    </Form.Text>
                  </Form.Label>
                  <Col sm="4">
                    <Form.Control
                      id="language-select"
                      as="select"
                      name="language"
-                      value={investSettings.language}
-                      onChange={this.handleChange}
+                      value={window.Workbench.LANGUAGE}
+                      onChange={
+                        (event) => this.setState({
+                          showConfirmLanguageChange: true,
+                          language: event.target.value
+                        })}
                    >
                      {Object.entries(languageOptions).map((entry) => {
                        const [value, displayName] = entry;
@ -155,7 +193,7 @@ class SettingsModal extends React.Component {
                  id="logging-select"
                  as="select"
                  name="loggingLevel"
-                  value={investSettings.loggingLevel}
+                  value={loggingLevel}
                  onChange={this.handleChange}
                >
                  {Object.entries(logLevelOptions).map(
@ -173,7 +211,7 @@ class SettingsModal extends React.Component {
                  id="taskgraph-logging-select"
                  as="select"
                  name="taskgraphLoggingLevel"
-                  value={investSettings.taskgraphLoggingLevel}
+                  value={taskgraphLoggingLevel}
                  onChange={this.handleChange}
                >
                  {Object.entries(logLevelOptions).map(
@ -197,8 +235,8 @@ class SettingsModal extends React.Component {
                        as="select"
                        name="nWorkers"
                        type="text"
-                        value={investSettings.nWorkers}
-                        onChange={this.handleChange}
+                        value={nWorkers}
+                        onChange={this.handleChangeNumber}
                      >
                        {nWorkersOptions.map(
                          (opt) => <option value={opt[0]} key={opt[0]}>{opt[1]}</option>
@ -233,18 +271,6 @@ class SettingsModal extends React.Component {
                )
                : <div />
            }
-            <Row className="justify-content-end">
-              <Col sm="5">
-                <Button
-                  variant="secondary"
-                  onClick={this.handleReset}
-                  type="button"
-                  className="w-100"
-                >
-                  {t('Reset to Defaults')}
-                </Button>
-              </Col>
-            </Row>
            <hr />
            <Button
              variant="primary"
@ -264,21 +290,37 @@ class SettingsModal extends React.Component {
            <span>{t('no invest workspaces will be deleted')}</span>
          </Modal.Body>
        </Modal>
+        {
+          (languageOptions) ? (
+            <Modal show={showConfirmLanguageChange} className="confirm-modal" >
+              <Modal.Header>
+                <Modal.Title as="h5" >{t('Warning')}</Modal.Title>
+              </Modal.Header>
+              <Modal.Body>
+                <p>
+                  {t('Changing this setting will close your tabs and relaunch the app.')}
+                </p>
+              </Modal.Body>
+              <Modal.Footer>
+                <Button
+                  variant="secondary"
+                  onClick={() => this.setState({ showConfirmLanguageChange: false })}
+                >{t('Cancel')}</Button>
+                <Button
+                  variant="primary"
+                  onClick={this.handleChangeLanguage}
+                >{t('Change to ') + languageOptions[language]}</Button>
+              </Modal.Footer>
+            </Modal>
+          ) : <React.Fragment />
+        }
      </React.Fragment>
    );
  }
 }

 SettingsModal.propTypes = {
-  saveSettings: PropTypes.func.isRequired,
  clearJobsStorage: PropTypes.func.isRequired,
-  investSettings: PropTypes.shape({
-    nWorkers: PropTypes.string,
-    taskgraphLoggingLevel: PropTypes.string,
-    loggingLevel: PropTypes.string,
-    sampleDataDir: PropTypes.string,
-    language: PropTypes.string,
-  }).isRequired,
  showDownloadModal: PropTypes.func.isRequired,
  nCPU: PropTypes.number.isRequired,
 };
--- a/workbench/src/renderer/components/SetupTab/ArgInput/index.jsx
+++ b/workbench/src/renderer/components/SetupTab/ArgInput/index.jsx
@ -30,7 +30,8 @@ const { ipcRenderer } = window.Workbench.electron;
 * @returns {string} - the filtered and formatted part of the message
 */
 function filterSpatialOverlapFeedback(message, filepath) {
-  const newPrefix = i18n.t('Bounding box does not intersect at least one other:');
+  const newPrefix = i18n.t(
+    'Not all of the spatial layers overlap each other. Bounding box:');
  const bbox = message.split(`${filepath}:`).pop().split('|')[0];
  const bboxFormatted = bbox.split(' ').map(
    (str) => str.padEnd(22, ' ')
@ -167,7 +168,7 @@ export default function ArgInput(props) {
  // Messages with this pattern include validation feedback about
  // multiple inputs, but the whole message is repeated for each input.
  // It's more readable if filtered on the individual input.
-  const pattern = 'Bounding boxes do not intersect';
+  const pattern = 'Not all of the spatial layers overlap each other';
  if (validationMessage.startsWith(pattern)) {
    validationMessage = filterSpatialOverlapFeedback(
      validationMessage, value
@ -362,7 +363,7 @@ function AboutModal(props) {
  // create link to users guide entry for this arg
  // anchor name is the arg name, with underscores replaced with hyphens
  const userguideURL = `
-    ${window.Workbench.USERGUIDE_PATH}/${i18n.language}/${userguide}#${argkey.replace(/_/g, '-')}`;
+    ${window.Workbench.USERGUIDE_PATH}/${window.Workbench.LANGUAGE}/${userguide}#${argkey.replace(/_/g, '-')}`;
  return (
    <React.Fragment>
      <Button
--- a/workbench/src/renderer/components/SetupTab/index.jsx
+++ b/workbench/src/renderer/components/SetupTab/index.jsx
@ -1,5 +1,6 @@
 import React from 'react';
 import PropTypes from 'prop-types';
+import { withTranslation } from 'react-i18next';

 import Alert from 'react-bootstrap/Alert';
 import Container from 'react-bootstrap/Container';
@ -23,9 +24,9 @@ import {
 } from '../../server_requests';
 import { argsDictFromObject } from '../../utils';
 import { ipcMainChannels } from '../../../main/ipcMainChannels';
-import { withTranslation } from 'react-i18next';

 const { ipcRenderer } = window.Workbench.electron;
+const { logger } = window.Workbench;

 /** Initialize values of InVEST args based on the model's UI Spec.
 *
@ -54,7 +55,7 @@ function initializeArgValues(argsSpec, uiSpec, argsDict) {
    if (argsSpec[argkey].type === 'boolean') {
      value = argsDict[argkey] || false;
    } else if (argsSpec[argkey].type === 'option_string') {
-      if  (argsDict[argkey]) {
+      if (argsDict[argkey]) {
        value = argsDict[argkey];
      } else { // default to first
        if (Array.isArray(argsSpec[argkey].options)) {
@ -105,7 +106,6 @@ class SetupTab extends React.Component {
    this.updateArgTouched = this.updateArgTouched.bind(this);
    this.updateArgValues = this.updateArgValues.bind(this);
    this.batchUpdateArgs = this.batchUpdateArgs.bind(this);
-    this.insertNWorkers = this.insertNWorkers.bind(this);
    this.callUISpecFunctions = this.callUISpecFunctions.bind(this);
    this.browseForDatastack = this.browseForDatastack.bind(this);
    this.loadParametersFromFile = this.loadParametersFromFile.bind(this);
@ -204,19 +204,6 @@ class SetupTab extends React.Component {
    }
  }

-  /**
-   * n_workers is a special invest arg stored in global settings
-   *
-   * @param  {object} argsValues - of the shape returned by `initializeArgValues`.
-   * @returns {object} copy of original argsValues with an n_workers property.
-   */
-  insertNWorkers(argsValues) {
-    return {
-      ...argsValues,
-      n_workers: { value: this.props.nWorkers },
-    };
-  }
-
  /** Save the current invest arguments to a python script via datastack.py API.
   *
   * @param {string} filepath - desired path to the python script
@ -226,9 +213,7 @@ class SetupTab extends React.Component {
    const {
      modelName,
    } = this.props;
-    const args = argsDictFromObject(
-      this.insertNWorkers(this.state.argsValues)
-    );
+    const args = argsDictFromObject(this.state.argsValues);
    const payload = {
      filepath: filepath,
      modelname: modelName,
@ -242,9 +227,7 @@ class SetupTab extends React.Component {
    const {
      pyModuleName,
    } = this.props;
-    const args = argsDictFromObject(
-      this.insertNWorkers(this.state.argsValues)
-    );
+    const args = argsDictFromObject(this.state.argsValues);
    const payload = {
      filepath: datastackPath,
      moduleName: pyModuleName,
@ -292,8 +275,20 @@ class SetupTab extends React.Component {
  }

  async loadParametersFromFile(filepath) {
-    const datastack = await fetchDatastackFromFile(filepath);
    const { pyModuleName, switchTabs, t } = this.props;
+    let datastack;
+    try {
+      datastack = await fetchDatastackFromFile(filepath);
+    } catch (error) {
+      logger.error(error);
+      alert( // eslint-disable-line no-alert
+        t(
+          'No InVEST model data can be parsed from the file:\n {{filepath}}',
+          { filepath: filepath }
+        )
+      );
+      return;
+    }
    if (datastack.module_name === pyModuleName) {
      this.batchUpdateArgs(datastack.args);
      switchTabs('setup');
@ -317,7 +312,7 @@ class SetupTab extends React.Component {

  wrapInvestExecute() {
    this.props.investExecute(
-      argsDictFromObject(this.insertNWorkers(this.state.argsValues))
+      argsDictFromObject(this.state.argsValues)
    );
  }

@ -598,7 +593,6 @@ SetupTab.propTypes = {
  }).isRequired,
  argsInitValues: PropTypes.objectOf(PropTypes.oneOfType([PropTypes.string, PropTypes.bool])),
  investExecute: PropTypes.func.isRequired,
-  nWorkers: PropTypes.string.isRequired,
  sidebarSetupElementId: PropTypes.string.isRequired,
  sidebarFooterElementId: PropTypes.string.isRequired,
  executeClicked: PropTypes.bool.isRequired,
--- a/workbench/src/renderer/menubar/about.jsx
+++ b/workbench/src/renderer/menubar/about.jsx
@ -4,7 +4,6 @@ import { Translation } from 'react-i18next';

 import i18n from '../i18n/i18n';
 import { handleClickExternalURL } from './handlers';
-import { getSettingsValue } from '../components/SettingsModal/SettingsStorage';
 import { ipcMainChannels } from '../../main/ipcMainChannels';
 import investLogo from '../static/invest-logo.png';

@ -15,8 +14,7 @@ async function getInvestVersion() {
  return investVersion;
 }

-const language = await getSettingsValue('language');
-await i18n.changeLanguage(language);
+await i18n.changeLanguage(window.Workbench.LANGUAGE);
 const investVersion = await getInvestVersion();
 ReactDom.render(
  <Translation>
--- a/workbench/src/renderer/menubar/report.jsx
+++ b/workbench/src/renderer/menubar/report.jsx
@ -7,12 +7,10 @@ import {
  handleClickExternalURL,
  handleClickFindLogfiles
 } from './handlers';
-import { getSettingsValue } from '../components/SettingsModal/SettingsStorage';
 import investLogo from '../static/invest-logo.png';
 import natcapLogo from '../static/NatCapLogo.jpg';

-const language = await getSettingsValue('language');
-await i18n.changeLanguage(language);
+await i18n.changeLanguage(window.Workbench.LANGUAGE);
 ReactDom.render(
  <Translation>
    {(t, { i18n }) => (
--- a/workbench/src/renderer/server_requests.js
+++ b/workbench/src/renderer/server_requests.js
@ -1,7 +1,5 @@
-import { getSettingsValue } from './components/SettingsModal/SettingsStorage';
-
 const HOSTNAME = 'http://127.0.0.1';
-const { logger, PORT } = window.Workbench;
+const { logger, PORT, LANGUAGE } = window.Workbench;
 const PREFIX = 'api';

 // The Flask server sends UTF-8 encoded responses by default
@ -16,13 +14,12 @@ const PREFIX = 'api';
 * @returns {Promise} resolves object
 */
 export async function getInvestModelNames() {
-  const language = await getSettingsValue('language');
  return (
-    window.fetch(`${HOSTNAME}:${PORT}/${PREFIX}/models?language=${language}`, {
+    window.fetch(`${HOSTNAME}:${PORT}/${PREFIX}/models?language=${LANGUAGE}`, {
      method: 'get',
    })
      .then((response) => response.json())
-      .catch((error) => { logger.error(`${error.stack}`) })
+      .catch((error) => { logger.error(`${error.stack}`); })
  );
 }

@ -33,9 +30,8 @@ export async function getInvestModelNames() {
 * @returns {Promise} resolves object
 */
 export async function getSpec(payload) {
-  const language = await getSettingsValue('language');
  return (
-    window.fetch(`${HOSTNAME}:${PORT}/${PREFIX}/getspec?language=${language}`, {
+    window.fetch(`${HOSTNAME}:${PORT}/${PREFIX}/getspec?language=${LANGUAGE}`, {
      method: 'post',
      body: JSON.stringify(payload),
      headers: { 'Content-Type': 'application/json' },
@ -55,9 +51,8 @@ export async function getSpec(payload) {
 * @returns {Promise} resolves array
 */
 export async function fetchValidation(payload) {
-  const language = await getSettingsValue('language');
  return (
-    window.fetch(`${HOSTNAME}:${PORT}/${PREFIX}/validate?language=${language}`, {
+    window.fetch(`${HOSTNAME}:${PORT}/${PREFIX}/validate?language=${LANGUAGE}`, {
      method: 'post',
      body: JSON.stringify(payload),
      headers: { 'Content-Type': 'application/json' },
@ -87,7 +82,6 @@ export function fetchDatastackFromFile(payload) {
      headers: { 'Content-Type': 'application/json' },
    })
      .then((response) => response.json())
-      .catch((error) => logger.error(error.stack))
  );
 }

@ -101,7 +95,7 @@ export function getVectorColumnNames(payload) {
  return (
    window.fetch(`${HOSTNAME}:${PORT}/${PREFIX}/colnames`, {
      method: 'post',
-      body: JSON.stringify({vector_path: payload}),
+      body: JSON.stringify({ vector_path: payload }),
      headers: { 'Content-Type': 'application/json' },
    })
      .then((response) => response.json())
@ -188,7 +182,6 @@ export function writeParametersToFile(payload) {
  );
 }

-
 /**
 * Get the mapping of supported language codes to display names.
 *
--- a/workbench/src/renderer/styles/style.css
+++ b/workbench/src/renderer/styles/style.css
@ -572,6 +572,11 @@ input[type=text]::placeholder {
 	margin-bottom: 0.2rem;
 }

+.confirm-modal .modal-content {
+	background-color: papayawhip;
+	margin-top: 100px;
+}
+
 .error-boundary {
 	max-width:600px;
 	margin: 0 auto;
--- a/workbench/tests/binary_tests/puppet.test.js
+++ b/workbench/tests/binary_tests/puppet.test.js
@ -16,6 +16,7 @@ import { APP_HAS_RUN_TOKEN } from '../../src/main/setupCheckFirstRun';

 jest.setTimeout(240000);
 const PORT = 9009;
+const WAIT_TO_CLICK = 300; // ms
 let ELECTRON_PROCESS;
 let BROWSER;

@ -133,13 +134,15 @@ beforeEach(() => {

 afterEach(async () => {
  try {
+    const pages = await BROWSER.pages();
+    await Promise.all(pages.map(page => page.close()));
    await BROWSER.close();
  } catch (error) {
    console.log(BINARY_PATH);
    console.error(error);
+    // Normally BROWSER.close() will kill this process
+    ELECTRON_PROCESS.kill();
  }
-  ELECTRON_PROCESS.removeAllListeners();
-  ELECTRON_PROCESS.kill();
 });

 test('Run a real invest model', async () => {
@ -164,6 +167,7 @@ test('Run a real invest model', async () => {
  const downloadModal = await page.waitForSelector('.modal-dialog');
  const downloadModalCancel = await downloadModal.waitForSelector(
    'aria/[name="Cancel"][role="button"]');
+  await page.waitForTimeout(WAIT_TO_CLICK); // waiting for click handler to be ready
  await downloadModalCancel.click();
  // We need to get the modelButton from w/in this list-group because there
  // are buttons with the same name in the Recent Jobs container.
@ -232,15 +236,17 @@ test('Check local userguide links', async () => {
  const downloadModal = await page.waitForSelector('.modal-dialog');
  const downloadModalCancel = await downloadModal.waitForSelector(
    'aria/[name="Cancel"][role="button"]');
+  await page.waitForTimeout(WAIT_TO_CLICK); // waiting for click handler to be ready
  await downloadModalCancel.click();

  const investList = await page.waitForSelector('.invest-list-group');
  const modelButtons = await investList.$$('aria/[role="button"]');

+  await page.waitForTimeout(WAIT_TO_CLICK); // first btn click does not register w/o this pause
  for (const btn of modelButtons) {
    await btn.click();
    const link = await page.waitForSelector('text/User\'s Guide');
-    await page.waitForTimeout(300); // link.click() not working w/o this pause
+    await page.waitForTimeout(WAIT_TO_CLICK); // link.click() not working w/o this pause
    const hrefHandle = await link.getProperty('href');
    const hrefValue = await hrefHandle.jsonValue();
    await link.click();
--- a/workbench/tests/jest.overrides.js
+++ b/workbench/tests/jest.overrides.js
@ -12,6 +12,7 @@ if (!process.env.ELECTRON_LOG_LEVEL) {
 if (global.window) {
  // mock the work of preload.js here:
  const api = require('../src/preload/api').default;
+  api.LANGUAGE = 'en';
  global.window.Workbench = api;

  // normally electron main passes port to preload.
--- a/workbench/tests/main/main.test.js
+++ b/workbench/tests/main/main.test.js
@ -183,6 +183,7 @@ describe('createWindow', () => {
      ipcMainChannels.CHANGE_LANGUAGE,
      ipcMainChannels.CHECK_STORAGE_TOKEN,
      ipcMainChannels.CHECK_FILE_PERMISSIONS,
+      ipcMainChannels.GET_SETTING,
      ipcMainChannels.GET_N_CPUS,
      ipcMainChannels.INVEST_VERSION,
      ipcMainChannels.IS_FIRST_RUN,
@ -192,6 +193,7 @@ describe('createWindow', () => {
    const expectedOnChannels = [
      ipcMainChannels.DOWNLOAD_URL,
      ipcMainChannels.GET_ELECTRON_PATHS,
+      ipcMainChannels.GET_LANGUAGE,
      ipcMainChannels.INVEST_RUN,
      ipcMainChannels.INVEST_KILL,
      ipcMainChannels.INVEST_READ_LOG,
@ -199,6 +201,7 @@ describe('createWindow', () => {
      ipcMainChannels.SHOW_ITEM_IN_FOLDER,
      ipcMainChannels.OPEN_EXTERNAL_URL,
      ipcMainChannels.OPEN_LOCAL_HTML,
+      ipcMainChannels.SET_SETTING,
    ];
    // Even with mocking, the 'on' method is a real event handler,
    // so we can get it's registered events from the EventEmitter.
--- a/workbench/tests/main/settings.test.js
+++ b/workbench/tests/main/settings.test.js
@ -0,0 +1,52 @@
+import {
+  defaults,
+  settingsStore,
+  initStore,
+} from '../../src/main/settingsStore';
+
+afterEach(() => {
+  settingsStore.reset();
+});
+
+test('an empty store initializes to defaults', () => {
+  const store = initStore();
+  expect(store.store).toEqual(defaults);
+});
+
+test('invalid items are reset, valid items are unchanged', () => {
+  const data = { ...defaults };
+  data.nWorkers = 5; // valid, but not default
+  data.taskgraphLoggingLevel = 'ERROR'; // valid, but not default
+  data.loggingLevel = 'FOO'; // wrong value
+  data.language = 1; // wrong type
+
+  const store = initStore(data);
+
+  // invalid: should be reset to defaults
+  expect(store.get('loggingLevel')).toBe(defaults.loggingLevel);
+  expect(store.get('language')).toBe(defaults.language);
+
+  // valid: should be not be reset to defaults
+  expect(store.get('taskgraphLoggingLevel')).toBe(data.taskgraphLoggingLevel);
+  expect(store.get('nWorkers')).toBe(data.nWorkers);
+});
+
+test('properties not present in schema are untouched during validation', () => {
+  const data = { ...defaults };
+  data.foo = 'bar';
+
+  const store = initStore(data);
+
+  expect(store.get('foo')).toEqual(data.foo);
+});
+
+test('missing properties are added with default value', () => {
+  const data = { ...defaults };
+  delete data.loggingLevel;
+  delete data.language;
+
+  const store = initStore(data);
+
+  expect(store.get('loggingLevel')).toEqual(defaults.loggingLevel);
+  expect(store.get('language')).toEqual(defaults.language);
+});
--- a/workbench/tests/renderer/app.test.js
+++ b/workbench/tests/renderer/app.test.js
@ -3,7 +3,6 @@ import { ipcRenderer } from 'electron';
 import {
  render, waitFor, within
 } from '@testing-library/react';
-import { act } from 'react-dom/test-utils';
 import userEvent from '@testing-library/user-event';
 import '@testing-library/jest-dom';

@ -17,13 +16,12 @@ import {
 } from '../../src/renderer/server_requests';
 import InvestJob from '../../src/renderer/InvestJob';
 import {
-  getSettingsValue,
-  saveSettingsStore,
-  clearSettingsStore,
-} from '../../src/renderer/components/SettingsModal/SettingsStorage';
-
+  settingsStore,
+  setupSettingsHandlers
+} from '../../src/main/settingsStore';
+import { ipcMainChannels } from '../../src/main/ipcMainChannels';
+import { removeIpcMainListeners } from '../../src/main/main';
 import { mockUISpec } from './utils';
-
 // It's quite a pain to dynamically mock a const from a module,
 // here we do it by importing as another object, then
 // we can overwrite the object we want to mock later
@ -270,7 +268,7 @@ describe('Various ways to open and close InVEST models', () => {

 describe('Display recently executed InVEST jobs on Home tab', () => {
  beforeEach(() => {
-    getInvestModelNames.mockResolvedValue({});
+    getInvestModelNames.mockResolvedValue(MOCK_INVEST_LIST);
  });

  afterEach(async () => {
@ -279,7 +277,7 @@ describe('Display recently executed InVEST jobs on Home tab', () => {

  test('Recent Jobs: each has a button', async () => {
    const job1 = new InvestJob({
-      modelRunName: 'carbon',
+      modelRunName: MOCK_MODEL_RUN_NAME,
      modelHumanName: 'Carbon Sequestration',
      argsValues: {
        workspace_dir: 'work1',
@ -288,7 +286,7 @@ describe('Display recently executed InVEST jobs on Home tab', () => {
    });
    await InvestJob.saveJob(job1);
    const job2 = new InvestJob({
-      modelRunName: 'sdr',
+      modelRunName: MOCK_MODEL_RUN_NAME,
      modelHumanName: 'Sediment Ratio Delivery',
      argsValues: {
        workspace_dir: 'work2',
@ -330,7 +328,7 @@ describe('Display recently executed InVEST jobs on Home tab', () => {

  test('Recent Jobs: a job with incomplete data is skipped', async () => {
    const job1 = new InvestJob({
-      modelRunName: 'carbon',
+      modelRunName: MOCK_MODEL_RUN_NAME,
      modelHumanName: 'invest A',
      argsValues: {
        workspace_dir: 'dir',
@ -339,7 +337,7 @@ describe('Display recently executed InVEST jobs on Home tab', () => {
    });
    const job2 = new InvestJob({
      // argsValues is missing
-      modelRunName: 'sdr',
+      modelRunName: MOCK_MODEL_RUN_NAME,
      modelHumanName: 'invest B',
      status: 'success',
    });
@ -352,6 +350,23 @@ describe('Display recently executed InVEST jobs on Home tab', () => {
    expect(queryByText(job2.modelHumanName)).toBeNull();
  });

+  test('Recent Jobs: a job from a deprecated model is not displayed', async () => {
+    const job1 = new InvestJob({
+      modelRunName: 'does not exist',
+      modelHumanName: 'invest A',
+      argsValues: {
+        workspace_dir: 'dir',
+      },
+      status: 'success',
+    });
+    await InvestJob.saveJob(job1);
+    const { findByText, queryByText } = render(<App />);
+
+    expect(queryByText(job1.modelHumanName)).toBeNull();
+    expect(await findByText(/Set up a model from a sample datastack file/))
+      .toBeInTheDocument();
+  });
+
  test('Recent Jobs: placeholder if there are no recent jobs', async () => {
    const { findByText } = render(
      <App />
@ -363,7 +378,7 @@ describe('Display recently executed InVEST jobs on Home tab', () => {

  test('Recent Jobs: cleared by button', async () => {
    const job1 = new InvestJob({
-      modelRunName: 'carbon',
+      modelRunName: MOCK_MODEL_RUN_NAME,
      modelHumanName: 'Carbon Sequestration',
      argsValues: {
        workspace_dir: 'work1',
@ -393,42 +408,29 @@ describe('InVEST global settings: dialog interactions', () => {
  const tgLoggingLabelText = 'Taskgraph logging threshold';
  const languageLabelText = 'Language';

-  const { location } = global.window;
  beforeAll(() => {
-    // window.location.reload is not implemented in jsdom
-    delete global.window.location;
-    Object.defineProperty(global.window, 'location', {
-      configurable: true,
-      value: { reload: jest.fn() },
-    });
+    setupSettingsHandlers();
  });

  afterAll(() => {
-    Object.defineProperty(global.window, 'location', {
-      configurable: true,
-      value: location,
-    });
+    removeIpcMainListeners();
  });

  beforeEach(async () => {
    getInvestModelNames.mockResolvedValue({});
    getSupportedLanguages.mockResolvedValue({ en: 'english', es: 'spanish' });
-    ipcRenderer.invoke.mockImplementation(() => Promise.resolve());
-  });
-
-  afterEach(async () => {
-    await clearSettingsStore();
  });

  test('Invest settings save on change', async () => {
    const nWorkersLabel = 'Threaded task management (0)';
-    const nWorkersValue = '0';
+    const nWorkersValue = 0;
    const loggingLevel = 'DEBUG';
    const tgLoggingLevel = 'DEBUG';
    const languageValue = 'es';
+    const spyInvoke = jest.spyOn(ipcRenderer, 'invoke');

    const {
-      getByText, getByRole, getByLabelText, findByRole,
+      getByText, getByLabelText, findByRole, findByText,
    } = render(
      <App />
    );
@ -437,74 +439,25 @@ describe('InVEST global settings: dialog interactions', () => {
    const nWorkersInput = getByLabelText(nWorkersLabelText, { exact: false });
    const loggingInput = getByLabelText(loggingLabelText);
    const tgLoggingInput = getByLabelText(tgLoggingLabelText);
-    const languageInput = getByLabelText(languageLabelText, { exact: false });

    await userEvent.selectOptions(nWorkersInput, [getByText(nWorkersLabel)]);
-    await waitFor(() => { expect(nWorkersInput).toHaveValue(nWorkersValue); });
+    await waitFor(() => { expect(nWorkersInput).toHaveValue(nWorkersValue.toString()); });
    await userEvent.selectOptions(loggingInput, [loggingLevel]);
    await waitFor(() => { expect(loggingInput).toHaveValue(loggingLevel); });
    await userEvent.selectOptions(tgLoggingInput, [tgLoggingLevel]);
    await waitFor(() => { expect(tgLoggingInput).toHaveValue(tgLoggingLevel); });
-    await userEvent.selectOptions(languageInput, [languageValue]);
-    await waitFor(() => { expect(languageInput).toHaveValue(languageValue); });
-    await userEvent.click(getByRole('button', { name: 'close settings' }));

-    // Check values were saved in app and in store
-    await userEvent.click(await findByRole('button', { name: 'settings' }));
-    await waitFor(() => {
-      expect(nWorkersInput).toHaveValue(nWorkersValue);
-      expect(loggingInput).toHaveValue(loggingLevel);
-      expect(tgLoggingInput).toHaveValue(tgLoggingLevel);
-      expect(languageInput).toHaveValue(languageValue);
-    });
-    expect(await getSettingsValue('nWorkers')).toBe(nWorkersValue);
-    expect(await getSettingsValue('loggingLevel')).toBe(loggingLevel);
-    expect(await getSettingsValue('taskgraphLoggingLevel')).toBe(tgLoggingLevel);
-    expect(await getSettingsValue('language')).toBe(languageValue);
-  });
+    // Check values were saved
+    expect(settingsStore.get('nWorkers')).toBe(nWorkersValue);
+    expect(settingsStore.get('loggingLevel')).toBe(loggingLevel);
+    expect(settingsStore.get('taskgraphLoggingLevel')).toBe(tgLoggingLevel);

-  test('Load invest settings from storage and test Reset', async () => {
-    const defaultSettings = {
-      nWorkers: '-1',
-      loggingLevel: 'INFO',
-      taskgraphLoggingLevel: 'ERROR',
-      language: 'en',
-    };
-    const expectedSettings = {
-      nWorkers: '0',
-      loggingLevel: 'ERROR',
-      taskgraphLoggingLevel: 'INFO',
-      language: 'en',
-    };
-
-    await saveSettingsStore(expectedSettings);
-
-    const {
-      getByText, getByLabelText, findByRole,
-    } = render(<App />);
-
-    await userEvent.click(await findByRole('button', { name: 'settings' }));
-    const nWorkersInput = getByLabelText(nWorkersLabelText, { exact: false });
-    const loggingInput = getByLabelText(loggingLabelText);
-    const tgLoggingInput = getByLabelText(tgLoggingLabelText);
+    // language is handled differently; changing it triggers electron to restart
    const languageInput = getByLabelText(languageLabelText, { exact: false });
-
-    // Test that the invest settings were loaded in from store.
-    await waitFor(() => {
-      expect(nWorkersInput).toHaveValue(expectedSettings.nWorkers);
-      expect(loggingInput).toHaveValue(expectedSettings.loggingLevel);
-      expect(tgLoggingInput).toHaveValue(expectedSettings.tgLoggingLevel);
-      expect(languageInput).toHaveValue(expectedSettings.language);
-    });
-
-    // Test Reset sets values to default
-    await userEvent.click(getByText('Reset to Defaults'));
-    await waitFor(() => {
-      expect(nWorkersInput).toHaveValue(defaultSettings.nWorkers);
-      expect(loggingInput).toHaveValue(defaultSettings.loggingLevel);
-      expect(tgLoggingInput).toHaveValue(defaultSettings.tgLoggingLevel);
-      expect(languageInput).toHaveValue(defaultSettings.language);
-    });
+    await userEvent.selectOptions(languageInput, [languageValue]);
+    await userEvent.click(await findByText('Change to spanish'));
+    expect(spyInvoke)
+      .toHaveBeenCalledWith(ipcMainChannels.CHANGE_LANGUAGE, languageValue);
  });

  test('Access sampledata download Modal from settings', async () => {
@ -525,40 +478,3 @@ describe('InVEST global settings: dialog interactions', () => {
    expect(queryByText('Settings')).toBeNull();
  });
 });
-
-describe('Translation', () => {
-  const { location } = global.window;
-  beforeAll(async () => {
-    getInvestModelNames.mockResolvedValue({});
-    getSupportedLanguages.mockResolvedValue({ en: 'english', ll: 'foo' });
-
-    delete global.window.location;
-    Object.defineProperty(global.window, 'location', {
-      configurable: true,
-      value: { reload: jest.fn() },
-    });
-  });
-
-  afterAll(() => {
-    Object.defineProperty(global.window, 'location', {
-      configurable: true,
-      value: location,
-    });
-  });
-
-  test('Text rerenders in new language when language setting changes', async () => {
-    const { findByLabelText } = render(<App />);
-
-    await userEvent.click(await findByLabelText('settings'));
-    const languageInput = await findByLabelText('Language', { exact: false });
-    expect(languageInput).toHaveValue('en');
-
-    await userEvent.selectOptions(languageInput, 'll');
-    await waitFor(() => {
-      expect(global.window.location.reload).toHaveBeenCalled();
-    });
-    // because we can't reload the window in the test environment,
-    // components won't actually rerender in the new language
-    expect(languageInput).toHaveValue('ll');
-  });
-});
--- a/workbench/tests/renderer/downloadmodal.test.js
+++ b/workbench/tests/renderer/downloadmodal.test.js
@ -12,10 +12,6 @@ import DownloadProgressBar from '../../src/renderer/components/DownloadProgressB
 import sampledata_registry from '../../src/renderer/components/DataDownloadModal/sampledata_registry.json';
 import { getInvestModelNames } from '../../src/renderer/server_requests';
 import App from '../../src/renderer/app';
-import {
-  clearSettingsStore,
-  getSettingsValue,
-} from '../../src/renderer/components/SettingsModal/SettingsStorage';
 import setupDownloadHandlers from '../../src/main/setupDownloadHandlers';
 import { removeIpcMainListeners } from '../../src/main/main';
 import { ipcMainChannels } from '../../src/main/ipcMainChannels';
@ -173,7 +169,6 @@ describe('Integration tests with main process', () => {

  afterEach(async () => {
    removeIpcMainListeners();
-    await clearSettingsStore();
  });

  test('Download: starts, updates progress, & stores location', async () => {
@ -200,29 +195,12 @@ describe('Integration tests with main process', () => {
    const downloadButton = await findByRole('button', { name: 'Download' });
    await userEvent.click(downloadButton);
    const nURLs = allCheckBoxes.length - 1; // all except Select All
-    await waitFor(async () => {
-      expect(await getSettingsValue('sampleDataDir'))
-        .toBe(dialogData.filePaths[0]);
-    });
    const progressBar = await findByRole('progressbar');
    expect(progressBar).toHaveTextContent(`Downloading 1 of ${nURLs}`);
    // The electron window's downloadURL function is mocked, so we don't
    // expect the progress bar to update further in this test.
  });

-  test('Cancel: does not store a sampleDataDir value', async () => {
-    const { findByRole } = render(<App isFirstRun />);
-
-    const existingValue = await getSettingsValue('sampleDataDir');
-    const cancelButton = await findByRole('button', { name: 'Cancel' });
-    await userEvent.click(cancelButton);
-
-    await waitFor(async () => {
-      const value = await getSettingsValue('sampleDataDir');
-      expect(value).toBe(existingValue);
-    });
-  });
-
  test('Alert when download location is not writeable', async () => {
    const dialogData = {
      filePaths: ['foo/directory'],
--- a/workbench/tests/renderer/investtab.test.js
+++ b/workbench/tests/renderer/investtab.test.js
@ -179,7 +179,7 @@ describe('Sidebar Buttons', () => {
    const args = JSON.parse(payload.args);
    const argKeys = Object.keys(args);
    expect(argKeys).toEqual(
-      expect.arrayContaining(Object.keys(spec.args).concat('n_workers'))
+      expect.arrayContaining(Object.keys(spec.args))
    );
    argKeys.forEach((key) => {
      expect(typeof args[key]).toBe('string');
@ -191,7 +191,7 @@ describe('Sidebar Buttons', () => {
    const response = 'saved';
    saveToPython.mockResolvedValue(response);
    const mockDialogData = { canceled: false, filePath: 'foo.py' };
-    ipcRenderer.invoke.mockResolvedValue(mockDialogData);
+    ipcRenderer.invoke.mockResolvedValueOnce(mockDialogData);

    const { findByText, findByLabelText, findByRole } = renderInvestTab();
    const saveAsButton = await findByText('Save as...');
@ -215,7 +215,7 @@ describe('Sidebar Buttons', () => {
    const args = JSON.parse(payload.args);
    const argKeys = Object.keys(args);
    expect(argKeys).toEqual(
-      expect.arrayContaining(Object.keys(spec.args).concat('n_workers'))
+      expect.arrayContaining(Object.keys(spec.args))
    );
    argKeys.forEach((key) => {
      expect(typeof args[key]).toBe('string');
--- a/workbench/tests/renderer/setuptab.test.js
+++ b/workbench/tests/renderer/setuptab.test.js
@ -163,6 +163,21 @@ describe('Arguments form input types', () => {
    expect(input).toHaveValue('a');
    expect(input).not.toHaveValue('b');
  });
+
+  test('initial arg values can contain extra args', async () => {
+    const spec = baseArgsSpec('number');
+    const displayedValue = '1';
+    const missingValue = '0';
+    const initArgs = {
+      [Object.keys(spec.args)[0]]: displayedValue,
+      paramZ: missingValue, // paramZ is not in the ARGS_SPEC or UI_SPEC
+    };
+
+    const { findByLabelText, queryByText } = renderSetupFromSpec(spec, UI_SPEC, initArgs);
+    const input = await findByLabelText(`${spec.args.arg.name} (${spec.args.arg.units})`);
+    await waitFor(() => expect(input).toHaveValue(displayedValue));
+    expect(queryByText(missingValue)).toBeNull();
+  });
 });

 describe('Arguments form interactions', () => {
@ -577,8 +592,8 @@ describe('Misc form validation stuff', () => {
    const rasterValue = './raster.tif';
    const expectedVal2 = '-79.0198012081401';
    const rasterBox = `[${expectedVal2}, 26.481559513537064, -78.37173806200593, 27.268061760228512]`;
-    const message = `Bounding boxes do not intersect: ${vectorValue}: ${vectorBox} | ${rasterValue}: ${rasterBox}`;
-    const newPrefix = 'Bounding box does not intersect at least one other:';
+    const message = `Not all of the spatial layers overlap each other. All bounding boxes must intersect: ${vectorValue}: ${vectorBox} | ${rasterValue}: ${rasterBox}`;
+    const newPrefix = 'Not all of the spatial layers overlap each other. Bounding box:';
    const vectorMessage = new RegExp(`${newPrefix}\\s*\\[${expectedVal1}`);
    const rasterMessage = new RegExp(`${newPrefix}\\s*\\[${expectedVal2}`);

--- a/workbench/yarn.lock
+++ b/workbench/yarn.lock