update all models and tests to use new version of read_csv_to_dataframe

2023-07-06 12:44:15 -07:00 · 2023-07-06 12:44:15 -07:00 · 73fbcadf43
parent e85728153d
commit 73fbcadf43
32 changed files with 255 additions and 174 deletions
--- a/src/natcap/invest/annual_water_yield.py
+++ b/src/natcap/invest/annual_water_yield.py
@ -532,7 +532,8 @@ def execute(args):
            'valuation table.')
        # Open/read in valuation parameters from CSV file
        valuation_params = utils.read_csv_to_dataframe(
-            args['valuation_table_path'], 'ws_id').to_dict(orient='index')
+            args['valuation_table_path'], MODEL_SPEC['args']['valuation_table_path']
+        ).to_dict(orient='index')
        watershed_vector = gdal.OpenEx(
            args['watersheds_path'], gdal.OF_VECTOR)
        watershed_layer = watershed_vector.GetLayer()
@ -651,14 +652,16 @@ def execute(args):

    # Open/read in the csv file into a dictionary and add to arguments
    bio_dict = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
+        args['biophysical_table_path'], MODEL_SPEC['args']['biophysical_table_path']
+    ).to_dict(orient='index')
    bio_lucodes = set(bio_dict.keys())
    bio_lucodes.add(nodata_dict['lulc'])
    LOGGER.debug(f'bio_lucodes: {bio_lucodes}')

    if 'demand_table_path' in args and args['demand_table_path'] != '':
        demand_dict = utils.read_csv_to_dataframe(
-            args['demand_table_path'], 'lucode').to_dict(orient='index')
+            args['demand_table_path'], MODEL_SPEC['args']['demand_table_path']
+        ).to_dict(orient='index')
        demand_reclassify_dict = dict(
            [(lucode, demand_dict[lucode]['demand'])
             for lucode in demand_dict])
--- a/src/natcap/invest/carbon.py
+++ b/src/natcap/invest/carbon.py
@ -368,7 +368,8 @@ def execute(args):
         (_TMP_BASE_FILES, output_dir)], file_suffix)

    carbon_pool_table = utils.read_csv_to_dataframe(
-        args['carbon_pools_path'], 'lucode').to_dict(orient='index')
+        args['carbon_pools_path'], MODEL_SPEC['args']['carbon_pools_path']
+    ).to_dict(orient='index')

    work_token_dir = os.path.join(
        intermediate_output_dir, '_taskgraph_working_dir')
--- a/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py
+++ b/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py
@ -305,12 +305,14 @@ MODEL_SPEC = {
            "index_col": "lulc-class",
            "columns": {
                "lulc-class": {
-                    "type": "integer",
+                    "type": "freestyle_string",
+                    "na_allowed": True,
                    "about": gettext(
                        "LULC codes matching the codes in the biophysical "
                        "table.")},
                "[LULC CODE]": {
                    "type": "option_string",
+                    "na_allowed": True,
                    "options": {
                        "accum": {
                            "description": gettext("a state of carbon accumulation")
@ -589,7 +591,8 @@ def execute(args):
    # We're assuming that the LULC initial variables and the carbon pool
    # transient table are combined into a single lookup table.
    biophysical_parameters = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], 'code').to_dict(orient='index')
+        args['biophysical_table_path'], MODEL_SPEC['args']['biophysical_table_path']
+    ).to_dict(orient='index')

    # LULC Classnames are critical to the transition mapping, so they must be
    # unique.  This check is here in ``execute`` because it's possible that
@ -969,7 +972,8 @@ def execute(args):
            prices = {
                year: values['price'] for (year, values) in
                utils.read_csv_to_dataframe(
-                    args['price_table_path'], 'year'
+                    args['price_table_path'],
+                    MODEL_SPEC['args']['price_table_path']
                ).to_dict(orient='index').items()}
        else:
            inflation_rate = float(args['inflation_rate']) * 0.01
@ -1991,7 +1995,7 @@ def _read_transition_matrix(transition_csv_path, biophysical_dict):
        the pool for the landcover transition.
    """
    table = utils.read_csv_to_dataframe(
-        transition_csv_path, convert_cols_to_lower=False, convert_vals_to_lower=False)
+        transition_csv_path, MODEL_SPEC['args']['landcover_transitions_table'], set_index=False)

    lulc_class_to_lucode = {}
    max_lucode = 0
@ -2249,7 +2253,7 @@ def _extract_snapshots_from_table(csv_path):

    """
    table = utils.read_csv_to_dataframe(
-        csv_path, convert_vals_to_lower=False, expand_path_cols=['raster_path'])
+        csv_path, MODEL_SPEC['args']['landcover_snapshot_csv'], set_index=False)

    output_dict = {}
    table.set_index("snapshot_year", drop=False, inplace=True)
--- a/src/natcap/invest/coastal_blue_carbon/preprocessor.py
+++ b/src/natcap/invest/coastal_blue_carbon/preprocessor.py
@ -214,7 +214,9 @@ def execute(args):
        task_name='Align input landcover rasters')

    landcover_table = utils.read_csv_to_dataframe(
-        args['lulc_lookup_table_path'], 'code').to_dict(orient='index')
+        args['lulc_lookup_table_path'],
+        MODEL_SPEC['args']['lulc_lookup_table_path'], set_index=False
+    ).set_index('code', drop=False).to_dict(orient='index')

    target_transition_table = os.path.join(
        output_dir, TRANSITION_TABLE.format(suffix=suffix))
@ -382,12 +384,15 @@ def _create_biophysical_table(landcover_table, target_biophysical_table_path):
    Returns:
        ``None``
    """
+    print(landcover_table)
    target_column_names = [
        colname.lower() for colname in coastal_blue_carbon.MODEL_SPEC['args'][
            'biophysical_table_path']['columns']]

+    print(target_column_names)
    with open(target_biophysical_table_path, 'w') as bio_table:
        bio_table.write(f"{','.join(target_column_names)}\n")
+        print(f"{','.join(target_column_names)}\n")
        for lulc_code in sorted(landcover_table.keys()):
            # 2 columns are defined below, and we need 1 less comma to only
            # have commas between fields.
@ -398,6 +403,7 @@ def _create_biophysical_table(landcover_table, target_biophysical_table_path):
                    row.append(str(landcover_table[lulc_code][colname]))
                except KeyError:
                    row.append('')
+            print(f"{','.join(row)}\n")
            bio_table.write(f"{','.join(row)}\n")


--- a/src/natcap/invest/coastal_vulnerability.py
+++ b/src/natcap/invest/coastal_vulnerability.py
@ -247,17 +247,11 @@ MODEL_SPEC = {
                        "represented by any value and absence of the habitat "
                        "can be represented by 0 and nodata values.")},
                "rank": {
-                    "type": "option_string",
-                    "options": {
-                        "1": {"description": gettext("very high protection")},
-                        "2": {"description": gettext("high protection")},
-                        "3": {"description": gettext("moderate protection")},
-                        "4": {"description": gettext("low protection")},
-                        "5": {"description": gettext("very low protection")}
-                    },
+                    "type": "integer",
                    "about": gettext(
                        "Relative amount of coastline protection this habitat "
-                        "provides.")
+                        "provides, from 1 (very high protection) to 5 "
+                        "(very low protection.")
                },
                "protection distance (m)": {
                    "type": "number",
@ -2318,7 +2312,7 @@ def _schedule_habitat_tasks(

    """
    habitat_dataframe = utils.read_csv_to_dataframe(
-        habitat_table_path, convert_vals_to_lower=False, expand_path_cols=['path'])
+        habitat_table_path, MODEL_SPEC['args']['habitat_table_path'], set_index=False)
    habitat_dataframe = habitat_dataframe.rename(
        columns={'protection distance (m)': 'distance'})

@ -2838,7 +2832,9 @@ def assemble_results_and_calculate_exposure(
                final_values_dict[var_name] = pickle.load(file)

    habitat_df = utils.read_csv_to_dataframe(
-        habitat_protection_path, convert_cols_to_lower=False, convert_vals_to_lower=False)
+        habitat_protection_path, MODEL_SPEC['outputs']['intermediate'][
+            'contents']['habitats']['contents']['habitat_protection.csv'], set_index=False
+    ).rename(columns={'r_hab': 'R_hab'})
    output_layer.StartTransaction()
    for feature in output_layer:
        shore_id = feature.GetField(SHORE_ID_FIELD)
@ -3238,7 +3234,6 @@ def _aggregate_raster_values_in_radius(
            kernel_mask &= ~utils.array_equals_nodata(array, nodata)

        result[shore_id] = aggregation_op(array, kernel_mask)
-
    with open(target_pickle_path, 'wb') as pickle_file:
        pickle.dump(result, pickle_file)

@ -3468,8 +3463,7 @@ def _validate_habitat_table_paths(habitat_table_path):
        ValueError if any vector in the ``path`` column cannot be opened.
    """
    habitat_dataframe = utils.read_csv_to_dataframe(
-        habitat_table_path, convert_cols_to_lower=False, convert_vals_to_lower=False,
-        expand_path_cols=['path'])
+        habitat_table_path, MODEL_SPEC['args']['habitat_table_path'])
    bad_paths = []
    for habitat_row in habitat_dataframe.itertuples():
        try:
--- a/src/natcap/invest/crop_production_percentile.py
+++ b/src/natcap/invest/crop_production_percentile.py
@ -89,7 +89,7 @@ MODEL_SPEC = {
        },
        "landcover_to_crop_table_path": {
            "type": "csv",
-            "index_col": "lucode",
+            "index_col": "crop_name",
            "columns": {
                "lucode": {"type": "integer"},
                "crop_name": {
@ -173,6 +173,9 @@ MODEL_SPEC = {
                            "type": "option_string",
                            "options": CROP_OPTIONS
                        },
+                        "percentrefuse": {
+                            "type": "percent"
+                        },
                        **{nutrient: {
                            "type": "number",
                            "units": units
@ -417,11 +420,11 @@ _AGGREGATE_TABLE_FILE_PATTERN = os.path.join(
    '.', 'aggregate_results%s.csv')

 _EXPECTED_NUTRIENT_TABLE_HEADERS = [
-    'Protein', 'Lipid', 'Energy', 'Ca', 'Fe', 'Mg', 'Ph', 'K', 'Na', 'Zn',
-    'Cu', 'Fl', 'Mn', 'Se', 'VitA', 'betaC', 'alphaC', 'VitE', 'Crypto',
-    'Lycopene', 'Lutein', 'betaT', 'gammaT', 'deltaT', 'VitC', 'Thiamin',
-    'Riboflavin', 'Niacin', 'Pantothenic', 'VitB6', 'Folate', 'VitB12',
-    'VitK']
+    'protein', 'lipid', 'energy', 'ca', 'fe', 'mg', 'ph', 'k', 'na', 'zn',
+    'cu', 'fl', 'mn', 'se', 'vita', 'betac', 'alphac', 'vite', 'crypto',
+    'lycopene', 'lutein', 'betat', 'gammat', 'deltat', 'vitc', 'thiamin',
+    'riboflavin', 'niacin', 'pantothenic', 'vitb6', 'folate', 'vitb12',
+    'vitk']
 _EXPECTED_LUCODE_TABLE_HEADER = 'lucode'
 _NODATA_YIELD = -1

@ -470,7 +473,8 @@ def execute(args):

    """
    crop_to_landcover_table = utils.read_csv_to_dataframe(
-        args['landcover_to_crop_table_path'], 'crop_name').to_dict(orient='index')
+        args['landcover_to_crop_table_path'],
+        MODEL_SPEC['args']['landcover_to_crop_table_path']).to_dict(orient='index')
    bad_crop_name_list = []
    for crop_name in crop_to_landcover_table:
        crop_climate_bin_raster_path = os.path.join(
@ -552,7 +556,11 @@ def execute(args):
            args['model_data_path'],
            _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name)
        crop_climate_percentile_table = utils.read_csv_to_dataframe(
-            climate_percentile_yield_table_path, 'climate_bin').to_dict(orient='index')
+            climate_percentile_yield_table_path,
+            MODEL_SPEC['args']['model_data_path']['contents'][
+                'climate_percentile_yield_tables']['contents'][
+                '[CROP]_percentile_yield_table.csv']
+            ).to_dict(orient='index')
        yield_percentile_headers = [
            x for x in list(crop_climate_percentile_table.values())[0]
            if x != 'climate_bin']
@ -711,8 +719,8 @@ def execute(args):
    # this model data.
    nutrient_table = utils.read_csv_to_dataframe(
        os.path.join(args['model_data_path'], 'crop_nutrient.csv'),
-        'crop', convert_cols_to_lower=False, convert_vals_to_lower=False
-        ).to_dict(orient='index')
+        MODEL_SPEC['args']['model_data_path']['contents']['crop_nutrient.csv']
+    ).to_dict(orient='index')
    result_table_path = os.path.join(
        output_dir, 'result_table%s.csv' % file_suffix)

@ -953,7 +961,7 @@ def tabulate_results(

            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
-                1 - nutrient_table[crop_name]['Percentrefuse'] / 100)
+                1 - nutrient_table[crop_name]['percentrefuse'] / 100)
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                for yield_percentile_id in sorted(yield_percentile_headers):
                    total_nutrient = (
@ -1026,7 +1034,7 @@ def aggregate_to_polygons(
    for crop_name in crop_to_landcover_table:
        # convert 100g to Mg and fraction left over from refuse
        nutrient_factor = 1e4 * (
-            1 - nutrient_table[crop_name]['Percentrefuse'] / 100)
+            1 - nutrient_table[crop_name]['percentrefuse'] / 100)
        # loop over percentiles
        for yield_percentile_id in yield_percentile_headers:
            percentile_crop_production_raster_path = os.path.join(
--- a/src/natcap/invest/crop_production_regression.py
+++ b/src/natcap/invest/crop_production_regression.py
@ -86,7 +86,7 @@ MODEL_SPEC = {
        },
        "landcover_to_crop_table_path": {
            "type": "csv",
-            "index_col": "lucode",
+            "index_col": "crop_name",
            "columns": {
                "lucode": {"type": "integer"},
                "crop_name": {
@ -138,11 +138,11 @@ MODEL_SPEC = {
                                    "type": "number",
                                    "units": u.metric_ton/u.hectare
                                },
-                                "b_nut":  {"type": "number", "units": u.none},
-                                "b_k2o":  {"type": "number", "units": u.none},
-                                "c_n":    {"type": "number", "units": u.none},
-                                "c_p2o5": {"type": "number", "units": u.none},
-                                "c_k2o":  {"type": "number", "units": u.none}
+                                "b_nut":  {"type": "number", "units": u.none, "na_allowed": True},
+                                "b_k2o":  {"type": "number", "units": u.none, "na_allowed": True},
+                                "c_n":    {"type": "number", "units": u.none, "na_allowed": True},
+                                "c_p2o5": {"type": "number", "units": u.none, "na_allowed": True},
+                                "c_k2o":  {"type": "number", "units": u.none, "na_allowed": True}
                            }
                        }
                    }
@ -155,6 +155,9 @@ MODEL_SPEC = {
                            "type": "option_string",
                            "options": CROPS
                        },
+                        "percentrefuse": {
+                            "type": "percent"
+                        },
                        **{nutrient: {
                            "about": about,
                            "type": "number",
@ -329,7 +332,7 @@ _REGRESSION_TABLE_PATTERN = os.path.join(
    'climate_regression_yield_tables', '%s_regression_yield_table.csv')

 _EXPECTED_REGRESSION_TABLE_HEADERS = [
-    'climate_bin', 'yield_ceiling', 'b_nut', 'b_k2o', 'c_n', 'c_p2o5', 'c_k2o']
+    'yield_ceiling', 'b_nut', 'b_k2o', 'c_n', 'c_p2o5', 'c_k2o']

 # crop_name, yield_regression_id, file_suffix
 _COARSE_YIELD_REGRESSION_PARAMETER_FILE_PATTERN = os.path.join(
@ -419,11 +422,11 @@ _AGGREGATE_TABLE_FILE_PATTERN = os.path.join(
    '.', 'aggregate_results%s.csv')

 _EXPECTED_NUTRIENT_TABLE_HEADERS = [
-    'Protein', 'Lipid', 'Energy', 'Ca', 'Fe', 'Mg', 'Ph', 'K', 'Na', 'Zn',
-    'Cu', 'Fl', 'Mn', 'Se', 'VitA', 'betaC', 'alphaC', 'VitE', 'Crypto',
-    'Lycopene', 'Lutein', 'betaT', 'gammaT', 'deltaT', 'VitC', 'Thiamin',
-    'Riboflavin', 'Niacin', 'Pantothenic', 'VitB6', 'Folate', 'VitB12',
-    'VitK']
+    'protein', 'lipid', 'energy', 'ca', 'fe', 'mg', 'ph', 'k', 'na', 'zn',
+    'cu', 'fl', 'mn', 'se', 'vita', 'betac', 'alphac', 'vite', 'crypto',
+    'lycopene', 'lutein', 'betat', 'gammat', 'deltat', 'vitc', 'thiamin',
+    'riboflavin', 'niacin', 'pantothenic', 'vitb6', 'folate', 'vitb12',
+    'vitk']
 _EXPECTED_LUCODE_TABLE_HEADER = 'lucode'
 _NODATA_YIELD = -1

@ -495,10 +498,15 @@ def execute(args):
    LOGGER.info(
        "Checking if the landcover raster is missing lucodes")
    crop_to_landcover_table = utils.read_csv_to_dataframe(
-        args['landcover_to_crop_table_path'], 'crop_name').to_dict(orient='index')
+        args['landcover_to_crop_table_path'],
+        MODEL_SPEC['args']['landcover_to_crop_table_path']).to_dict(orient='index')
+
+    print(crop_to_landcover_table)

    crop_to_fertlization_rate_table = utils.read_csv_to_dataframe(
-        args['fertilization_rate_table_path'], 'crop_name').to_dict(orient='index')
+        args['fertilization_rate_table_path'],
+        MODEL_SPEC['args']['fertilization_rate_table_path']
+    ).to_dict(orient='index')

    crop_lucodes = [
        x[_EXPECTED_LUCODE_TABLE_HEADER]
@ -582,11 +590,14 @@ def execute(args):
            args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name)

        crop_regression_table = utils.read_csv_to_dataframe(
-            crop_regression_table_path, 'climate_bin').to_dict(orient='index')
+            crop_regression_table_path,
+            MODEL_SPEC['args']['model_data_path']['contents'][
+                'climate_regression_yield_tables']['contents'][
+                '[CROP]_regression_yield_table.csv']).to_dict(orient='index')
        for bin_id in crop_regression_table:
            for header in _EXPECTED_REGRESSION_TABLE_HEADERS:
-                if crop_regression_table[bin_id][header.lower()] == '':
-                    crop_regression_table[bin_id][header.lower()] = 0
+                if numpy.isnan(crop_regression_table[bin_id][header]):
+                    crop_regression_table[bin_id][header] = 0

        yield_regression_headers = [
            x for x in list(crop_regression_table.values())[0]
@ -808,8 +819,8 @@ def execute(args):
    # this model data.
    nutrient_table = utils.read_csv_to_dataframe(
        os.path.join(args['model_data_path'], 'crop_nutrient.csv'),
-        'crop', convert_cols_to_lower=False, convert_vals_to_lower=False
-        ).to_dict(orient='index')
+        MODEL_SPEC['args']['model_data_path']['contents']['crop_nutrient.csv']
+    ).to_dict(orient='index')

    LOGGER.info("Generating report table")
    result_table_path = os.path.join(
@ -1016,7 +1027,7 @@ def tabulate_regression_results(

            # convert 100g to Mg and fraction left over from refuse
            nutrient_factor = 1e4 * (
-                1 - nutrient_table[crop_name]['Percentrefuse'] / 100)
+                1 - nutrient_table[crop_name]['percentrefuse'] / 100)
            for nutrient_id in _EXPECTED_NUTRIENT_TABLE_HEADERS:
                total_nutrient = (
                    nutrient_factor *
@ -1085,7 +1096,7 @@ def aggregate_regression_results_to_polygons(
    for crop_name in crop_to_landcover_table:
        # convert 100g to Mg and fraction left over from refuse
        nutrient_factor = 1e4 * (
-            1 - nutrient_table[crop_name]['Percentrefuse'] / 100)
+            1 - nutrient_table[crop_name]['percentrefuse'] / 100)
        LOGGER.info(
            "Calculating zonal stats for %s", crop_name)
        crop_production_raster_path = os.path.join(
--- a/src/natcap/invest/datastack.py
+++ b/src/natcap/invest/datastack.py
@ -336,7 +336,7 @@ def build_datastack_archive(args, model_name, datastack_path):
                    data_dir, f'{key}_csv_data')

                dataframe = utils.read_csv_to_dataframe(
-                    source_path, convert_vals_to_lower=False)
+                    source_path, args_spec[key], set_index=False)
                csv_source_dir = os.path.abspath(os.path.dirname(source_path))
                for spatial_column_name in spatial_columns:
                    # Iterate through the spatial columns, identify the set of
--- a/src/natcap/invest/forest_carbon_edge_effect.py
+++ b/src/natcap/invest/forest_carbon_edge_effect.py
@ -75,6 +75,7 @@ MODEL_SPEC = {
                "c_above": {
                    "type": "number",
                    "units": u.metric_ton/u.hectare,
+                    "na_allowed": True,
                    "about": gettext(
                        "Carbon density value for the aboveground carbon "
                        "pool.")
@ -420,7 +421,8 @@ def execute(args):
    LOGGER.info('Calculating direct mapped carbon stocks')
    carbon_maps = []
    biophysical_table = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
+        args['biophysical_table_path'],
+        MODEL_SPEC['args']['biophysical_table_path']).to_dict(orient='index')
    biophysical_keys = [
        x.lower() for x in list(biophysical_table.values())[0].keys()]
    pool_list = [('c_above', True)]
@ -632,7 +634,8 @@ def _calculate_lulc_carbon_map(
    """
    # classify forest pixels from lulc
    biophysical_table = utils.read_csv_to_dataframe(
-        biophysical_table_path, 'lucode').to_dict(orient='index')
+        biophysical_table_path, MODEL_SPEC['args']['biophysical_table_path']
+    ).to_dict(orient='index')

    lucode_to_per_cell_carbon = {}
    cell_size = pygeoprocessing.get_raster_info(
@ -698,7 +701,8 @@ def _map_distance_from_tropical_forest_edge(
    """
    # Build a list of forest lucodes
    biophysical_table = utils.read_csv_to_dataframe(
-        biophysical_table_path, 'lucode').to_dict(orient='index')
+        biophysical_table_path, MODEL_SPEC['args']['biophysical_table_path']
+    ).to_dict(orient='index')
    forest_codes = [
        lucode for (lucode, ludata) in biophysical_table.items()
        if int(ludata['is_tropical_forest']) == 1]
--- a/src/natcap/invest/habitat_quality.py
+++ b/src/natcap/invest/habitat_quality.py
@ -124,6 +124,7 @@ MODEL_SPEC = {
                },
                "fut_path": {
                    "required": "lulc_fut_path",
+                    "na_allowed": True,
                    "type": "raster",
                    "bands": {1: {"type": "ratio"}},
                    "about": gettext(
@ -134,6 +135,7 @@ MODEL_SPEC = {
                },
                "base_path": {
                    "required": "lulc_bas_path",
+                    "na_allowed": True,
                    "type": "raster",
                    "bands": {1: {"type": "ratio"}},
                    "about": gettext(
@ -174,6 +176,10 @@ MODEL_SPEC = {
            "index_col": "lulc",
            "columns": {
                "lulc": spec_utils.LULC_TABLE_COLUMN,
+                "name": {
+                    "type": "freestyle_string",
+                    "required": False
+                },
                "habitat": {
                    "type": "ratio",
                    "about": gettext(
@ -383,11 +389,12 @@ def execute(args):
    # Get CSVs as dictionaries and ensure the key is a string for threats.
    threat_dict = {
        str(key): value for key, value in utils.read_csv_to_dataframe(
-            args['threats_table_path'], 'THREAT',
-            expand_path_cols=['cur_path', 'fut_path', 'base_path']
-            ).to_dict(orient='index').items()}
+            args['threats_table_path'],
+            MODEL_SPEC['args']['threats_table_path']
+        ).to_dict(orient='index').items()}
    sensitivity_dict = utils.read_csv_to_dataframe(
-        args['sensitivity_table_path'], 'LULC').to_dict(orient='index')
+        args['sensitivity_table_path'],
+        MODEL_SPEC['args']['sensitivity_table_path']).to_dict(orient='index')

    half_saturation_constant = float(args['half_saturation_constant'])

@ -1156,15 +1163,16 @@ def validate(args, limit_to=None):
    if ("threats_table_path" not in invalid_keys and
            "sensitivity_table_path" not in invalid_keys and
            "threat_raster_folder" not in invalid_keys):
-
        # Get CSVs as dictionaries and ensure the key is a string for threats.
        threat_dict = {
            str(key): value for key, value in utils.read_csv_to_dataframe(
-                args['threats_table_path'], 'THREAT',
-                expand_path_cols=['cur_path', 'fut_path', 'base_path']
-                ).to_dict(orient='index').items()}
+                args['threats_table_path'],
+                MODEL_SPEC['args']['threats_table_path']
+            ).to_dict(orient='index').items()}
        sensitivity_dict = utils.read_csv_to_dataframe(
-            args['sensitivity_table_path'], 'LULC').to_dict(orient='index')
+            args['sensitivity_table_path'],
+            MODEL_SPEC['args']['sensitivity_table_path']
+        ).to_dict(orient='index')

        # check that the threat names in the threats table match with the
        # threats columns in the sensitivity table.
--- a/src/natcap/invest/hra.py
+++ b/src/natcap/invest/hra.py
@ -106,6 +106,7 @@ MODEL_SPEC = {
                "stressor buffer (meters)": {
                    "type": "number",
                    "units": u.meter,
+                    "na_allowed": True,
                    "about": gettext(
                        "The desired buffer distance used to expand a given "
                        "stressor’s influence or footprint. This should be "
@ -1843,11 +1844,15 @@ def _open_table_as_dataframe(table_path, **kwargs):
        excel_df.columns = excel_df.columns.str.lower()
        excel_df['path'] = excel_df['path'].apply(
            lambda p: utils.expand_path(p, table_path))
+        excel_df['name'] = excel_df['name'].astype('string')
+        excel_df['type'] = excel_df['type'].astype('string')
+        excel_df['stressor buffer (meters)'] = excel_df['stressor buffer (meters)'].astype(float)
+        excel_df = excel_df.set_index('name')
        return excel_df
    else:
-        return utils.read_csv_to_dataframe(
-            table_path, convert_vals_to_lower=False,
-            expand_path_cols=['path'], **kwargs)
+        df = utils.read_csv_to_dataframe(
+            table_path, MODEL_SPEC['args']['info_table_path'], **kwargs)
+        return df


 def _parse_info_table(info_table_path):
@ -1872,8 +1877,12 @@ def _parse_info_table(info_table_path):
    """
    info_table_path = os.path.abspath(info_table_path)

-    table = _open_table_as_dataframe(info_table_path)
-    table = table.set_index('name')
+    try:
+        table = _open_table_as_dataframe(info_table_path)
+    except ValueError as err:
+        if 'Index has duplicate keys' in str(err):
+            raise ValueError("Habitat and stressor names may not overlap.")
+
    table = table.rename(columns={'stressor buffer (meters)': 'buffer'})

    # Drop the buffer column from the habitats list; we don't need it.
@ -1884,15 +1893,6 @@ def _parse_info_table(info_table_path):
    stressors = table.loc[table['type'] == 'stressor'].drop(
        columns=['type']).to_dict(orient='index')

-    # habitats and stressors must be nonoverlapping sets.
-    repeated_habitats_stressors = set(
-        habitats.keys()).intersection(stressors.keys())
-    if repeated_habitats_stressors:
-        raise ValueError(
-            "Habitat and stressor names may not overlap. These names are "
-            "both habitats and stressors: "
-            f"{', '.join(repeated_habitats_stressors)}")
-
    return (habitats, stressors)


--- a/src/natcap/invest/ndr/ndr.py
+++ b/src/natcap/invest/ndr/ndr.py
@ -621,7 +621,9 @@ def execute(args):
            nutrients_to_process.append(nutrient_id)

    lucode_to_parameters = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
+        args['biophysical_table_path'],
+        MODEL_SPEC['args']['biophysical_table_path']
+    ).to_dict(orient='index')

    _validate_inputs(nutrients_to_process, lucode_to_parameters)

--- a/src/natcap/invest/pollination.py
+++ b/src/natcap/invest/pollination.py
@ -324,7 +324,7 @@ _INDEX_NODATA = -1
 _NESTING_SUBSTRATE_PATTERN = 'nesting_([^_]+)_availability_index'
 _FLORAL_RESOURCES_AVAILABLE_PATTERN = 'floral_resources_([^_]+)_index'
 _EXPECTED_BIOPHYSICAL_HEADERS = [
-    'lucode', _NESTING_SUBSTRATE_PATTERN, _FLORAL_RESOURCES_AVAILABLE_PATTERN]
+    _NESTING_SUBSTRATE_PATTERN, _FLORAL_RESOURCES_AVAILABLE_PATTERN]

 # These are patterns expected in the guilds table
 _NESTING_SUITABILITY_PATTERN = 'nesting_suitability_([^_]+)_index'
@ -334,7 +334,7 @@ _FORAGING_ACTIVITY_RE_PATTERN = _FORAGING_ACTIVITY_PATTERN % '([^_]+)'
 _RELATIVE_SPECIES_ABUNDANCE_FIELD = 'relative_abundance'
 _ALPHA_HEADER = 'alpha'
 _EXPECTED_GUILD_HEADERS = [
-    'species', _NESTING_SUITABILITY_PATTERN, _FORAGING_ACTIVITY_RE_PATTERN,
+    _NESTING_SUITABILITY_PATTERN, _FORAGING_ACTIVITY_RE_PATTERN,
    _ALPHA_HEADER, _RELATIVE_SPECIES_ABUNDANCE_FIELD]

 _NESTING_SUBSTRATE_INDEX_FILEPATTERN = 'nesting_substrate_index_%s%s.tif'
@ -1182,7 +1182,8 @@ def _parse_scenario_variables(args):
        farm_vector_path = None

    guild_table = utils.read_csv_to_dataframe(
-        guild_table_path, 'species').to_dict(orient='index')
+        guild_table_path, MODEL_SPEC['args']['guild_table_path']
+    ).to_dict(orient='index')

    LOGGER.info('Checking to make sure guild table has all expected headers')
    guild_headers = list(guild_table.values())[0].keys()
@ -1195,7 +1196,9 @@ def _parse_scenario_variables(args):
                f"headers from {guild_table_path}: {', '.join(guild_headers)}")

    landcover_biophysical_table = utils.read_csv_to_dataframe(
-        landcover_biophysical_table_path, 'lucode').to_dict(orient='index')
+        landcover_biophysical_table_path,
+        MODEL_SPEC['args']['landcover_biophysical_table_path']
+    ).to_dict(orient='index')
    biophysical_table_headers = (
        list(landcover_biophysical_table.values())[0].keys())
    for header in _EXPECTED_BIOPHYSICAL_HEADERS:
--- a/src/natcap/invest/recreation/recmodel_client.py
+++ b/src/natcap/invest/recreation/recmodel_client.py
@ -861,8 +861,8 @@ def _schedule_predictor_data_processing(
    }

    predictor_table = utils.read_csv_to_dataframe(
-        predictor_table_path, 'id', expand_path_cols=['path']
-        ).to_dict(orient='index')
+        predictor_table_path, MODEL_SPEC['args']['predictor_table_path']
+    ).to_dict(orient='index')
    predictor_task_list = []
    predictor_json_list = []  # tracks predictor files to add to shp

@ -1555,7 +1555,8 @@ def _validate_same_id_lengths(table_path):

    """
    predictor_table = utils.read_csv_to_dataframe(
-        table_path, 'id').to_dict(orient='index')
+        table_path, MODEL_SPEC['args']['predictor_table_path']
+    ).to_dict(orient='index')
    too_long = set()
    for p_id in predictor_table:
        if len(p_id) > 10:
@ -1589,10 +1590,13 @@ def _validate_same_ids_and_types(

    """
    predictor_table = utils.read_csv_to_dataframe(
-        predictor_table_path, 'id').to_dict(orient='index')
+        predictor_table_path, MODEL_SPEC['args']['predictor_table_path']
+    ).to_dict(orient='index')

    scenario_predictor_table = utils.read_csv_to_dataframe(
-        scenario_predictor_table_path, 'id').to_dict(orient='index')
+        scenario_predictor_table_path,
+        MODEL_SPEC['args']['scenario_predictor_table_path']
+    ).to_dict(orient='index')

    predictor_table_pairs = set([
        (p_id, predictor_table[p_id]['type'].strip()) for p_id in predictor_table])
@ -1625,7 +1629,7 @@ def _validate_same_projection(base_vector_path, table_path):
    # This will load the table as a list of paths which we can iterate through
    # without bothering the rest of the table structure
    data_paths = utils.read_csv_to_dataframe(
-        table_path, convert_vals_to_lower=False, expand_path_cols=['path']
+        table_path, MODEL_SPEC['args']['predictor_table_path']
    ).squeeze('columns')['path'].tolist()

    base_vector = gdal.OpenEx(base_vector_path, gdal.OF_VECTOR)
@ -1682,7 +1686,8 @@ def _validate_predictor_types(table_path):
        ValueError if any value in the ``type`` column does not match a valid
        type, ignoring leading/trailing whitespace.
    """
-    df = utils.read_csv_to_dataframe(table_path, convert_vals_to_lower=False)
+    df = utils.read_csv_to_dataframe(
+        table_path, MODEL_SPEC['args']['predictor_table_path'])
    # ignore leading/trailing whitespace because it will be removed
    # when the type values are used
    type_list = set([type.strip() for type in df['type']])
--- a/src/natcap/invest/sdr/sdr.py
+++ b/src/natcap/invest/sdr/sdr.py
@ -500,7 +500,8 @@ def execute(args):
    """
    file_suffix = utils.make_suffix_string(args, 'results_suffix')
    biophysical_table = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
+        args['biophysical_table_path'], MODEL_SPEC['args']['biophysical_table_path']
+    ).to_dict(orient='index')

    # Test to see if c or p values are outside of 0..1
    for table_key in ['usle_c', 'usle_p']:
--- a/src/natcap/invest/seasonal_water_yield/seasonal_water_yield.py
+++ b/src/natcap/invest/seasonal_water_yield/seasonal_water_yield.py
@ -567,11 +567,13 @@ def _execute(args):
            not args['user_defined_climate_zones']):
        rain_events_lookup = (
            utils.read_csv_to_dataframe(
-                args['rain_events_table_path'], 'month'
-                ).to_dict(orient='index'))
+                args['rain_events_table_path'],
+                MODEL_SPEC['args']['rain_events_table_path']
+            ).to_dict(orient='index'))

    biophysical_table = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
+        args['biophysical_table_path'],
+        MODEL_SPEC['args']['biophysical_table_path']).to_dict(orient='index')

    bad_value_list = []
    for lucode, value in biophysical_table.items():
@ -598,7 +600,8 @@ def _execute(args):
        alpha_month_map = dict(
            (key, val['alpha']) for key, val in
            utils.read_csv_to_dataframe(
-                args['monthly_alpha_path'], 'month'
+                args['monthly_alpha_path'],
+                MODEL_SPEC['args']['monthly_alpha_path']
            ).to_dict(orient='index').items())
    else:
        # make all 12 entries equal to args['alpha_m']
@ -768,7 +771,8 @@ def _execute(args):
            if args['user_defined_climate_zones']:
                cz_rain_events_lookup = (
                    utils.read_csv_to_dataframe(
-                        args['climate_zone_table_path'], 'cz_id'
+                        args['climate_zone_table_path'],
+                        MODEL_SPEC['args']['climate_zone_table_path']
                    ).to_dict(orient='index'))
                month_label = MONTH_ID_TO_LABEL[month_id]
                climate_zone_rain_events_month = dict([
--- a/src/natcap/invest/stormwater.py
+++ b/src/natcap/invest/stormwater.py
@ -484,7 +484,8 @@ def execute(args):

    # Build a lookup dictionary mapping each LULC code to its row
    biophysical_dict = utils.read_csv_to_dataframe(
-        args['biophysical_table'], 'lucode').to_dict(orient='index')
+        args['biophysical_table'], MODEL_SPEC['args']['biophysical_table']
+    ).to_dict(orient='index')
    # sort the LULC codes upfront because we use the sorted list in multiple
    # places. it's more efficient to do this once.
    sorted_lucodes = sorted(biophysical_dict)
--- a/src/natcap/invest/urban_cooling_model.py
+++ b/src/natcap/invest/urban_cooling_model.py
@ -413,7 +413,8 @@ def execute(args):
        args['workspace_dir'], 'intermediate')
    utils.make_directories([args['workspace_dir'], intermediate_dir])
    biophysical_lucode_map = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], 'lucode').to_dict(orient='index')
+        args['biophysical_table_path'], MODEL_SPEC['args']['biophysical_table_path']
+    ).to_dict(orient='index')

    # cast to float and calculate relative weights
    # Use default weights for shade, albedo, eti if the user didn't provide
@ -1082,7 +1083,9 @@ def calculate_energy_savings(
    type_field_index = fieldnames.index('type')

    energy_consumption_table = utils.read_csv_to_dataframe(
-        energy_consumption_table_path, 'type').to_dict(orient='index')
+        energy_consumption_table_path,
+        MODEL_SPEC['args']['energy_consumption_table_path']
+    ).to_dict(orient='index')

    target_building_layer.StartTransaction()
    last_time = time.time()
--- a/src/natcap/invest/urban_flood_risk_mitigation.py
+++ b/src/natcap/invest/urban_flood_risk_mitigation.py
@ -309,7 +309,9 @@ def execute(args):

    # Load CN table
    cn_table = utils.read_csv_to_dataframe(
-        args['curve_number_table_path'], 'lucode').to_dict(orient='index')
+        args['curve_number_table_path'],
+        MODEL_SPEC['args']['curve_number_table_path']
+    ).to_dict(orient='index')

    # make cn_table into a 2d array where first dim is lucode, second is
    # 0..3 to correspond to CN_A..CN_D
@ -651,7 +653,9 @@ def _calculate_damage_to_infrastructure_in_aoi(
    infrastructure_layer = infrastructure_vector.GetLayer()

    damage_type_map = utils.read_csv_to_dataframe(
-        structures_damage_table, 'type').to_dict(orient='index')
+        structures_damage_table,
+        MODEL_SPEC['args']['infrastructure_damage_loss_table_path']
+    ).to_dict(orient='index')

    infrastructure_layer_defn = infrastructure_layer.GetLayerDefn()
    type_index = -1
--- a/src/natcap/invest/urban_nature_access.py
+++ b/src/natcap/invest/urban_nature_access.py
@ -92,6 +92,7 @@ MODEL_SPEC = {
                    'units': u.meter,
                    'required':
                        f'search_radius_mode == "{RADIUS_OPT_URBAN_NATURE}"',
+                    'na_allowed': True,
                    'expression': 'value >= 0',
                    'about': (
                        'The distance within which a LULC type is relevant '
@ -260,7 +261,7 @@ MODEL_SPEC = {
            'index_col': 'pop_group',
            'columns': {
                "pop_group": {
-                    "type": "ratio",
+                    "type": "freestyle_string",
                    "required": False,
                    "about": gettext(
                        "The name of the population group. Names must match "
@ -904,7 +905,8 @@ def execute(args):
                    aoi_reprojection_task, lulc_mask_task]
            )

-    attr_table = utils.read_csv_to_dataframe(args['lulc_attribute_table'])
+    attr_table = utils.read_csv_to_dataframe(
+        args['lulc_attribute_table'], MODEL_SPEC['args']['lulc_attribute_table'], set_index=False)
    kernel_paths = {}  # search_radius, kernel path
    kernel_tasks = {}  # search_radius, kernel task

@ -924,7 +926,8 @@ def execute(args):
                index=False, name=None))
    elif args['search_radius_mode'] == RADIUS_OPT_POP_GROUP:
        pop_group_table = utils.read_csv_to_dataframe(
-            args['population_group_radii_table'])
+            args['population_group_radii_table'],
+            MODEL_SPEC['args']['population_group_radii_table'], set_index=False)
        search_radii = set(pop_group_table['search_radius_m'].unique())
        # Build a dict of {pop_group: search_radius_m}
        search_radii_by_pop_group = dict(
@ -1189,7 +1192,8 @@ def execute(args):

        # Create a dict of {pop_group: search_radius_m}
        group_radii_table = utils.read_csv_to_dataframe(
-            args['population_group_radii_table'])
+            args['population_group_radii_table'],
+            MODEL_SPEC['args']['population_group_radii_table'], set_index=False)
        search_radii = dict(
            group_radii_table[['pop_group', 'search_radius_m']].itertuples(
                index=False, name=None))
@ -1751,7 +1755,8 @@ def _reclassify_urban_nature_area(
        ``None``
    """
    attribute_table_dict = utils.read_csv_to_dataframe(
-        lulc_attribute_table, 'lucode').to_dict(orient='index')
+        lulc_attribute_table, MODEL_SPEC['args']['lulc_attribute_table'], set_index=False
+    ).to_dict(orient='index')

    squared_pixel_area = abs(
        numpy.multiply(*_square_off_pixels(lulc_raster_path)))
--- a/src/natcap/invest/wave_energy.py
+++ b/src/natcap/invest/wave_energy.py
@ -747,7 +747,7 @@ def execute(args):
    # arrays. Also store the amount of energy the machine produces
    # in a certain wave period/height state as a 2D array
    machine_perf_dict = {}
-    machine_perf_data = utils.read_csv_to_dataframe(args['machine_perf_path'])
+    machine_perf_data = pandas.read_csv(args['machine_perf_path'])
    # Get the wave period fields, starting from the second column of the table
    machine_perf_dict['periods'] = machine_perf_data.columns.values[1:]
    # Build up the height field by taking the first column of the table
@ -781,7 +781,7 @@ def execute(args):
    if 'land_gridPts_path' in args:
        # Create a grid_land_data dataframe for later use in valuation
        grid_land_data = utils.read_csv_to_dataframe(
-            args['land_gridPts_path'], convert_vals_to_lower=False)
+            args['land_gridPts_path'], MODEL_SPEC['args']['land_gridPts_path'], set_index=False)
        required_col_names = ['id', 'type', 'lat', 'long', 'location']
        grid_land_data, missing_grid_land_fields = _get_validated_dataframe(
            args['land_gridPts_path'], required_col_names)
@ -1084,10 +1084,8 @@ def execute(args):
    grid_vector_path = os.path.join(
        output_dir, 'GridPts_prj%s.shp' % file_suffix)

-    grid_data = grid_land_data.loc[
-        grid_land_data['type'].str.upper() == 'GRID']
-    land_data = grid_land_data.loc[
-        grid_land_data['type'].str.upper() == 'LAND']
+    grid_data = grid_land_data.loc[grid_land_data['type'] == 'grid']
+    land_data = grid_land_data.loc[grid_land_data['type'] == 'land']

    grid_dict = grid_data.to_dict('index')
    land_dict = land_data.to_dict('index')
@ -1429,7 +1427,8 @@ def _get_validated_dataframe(csv_path, field_list):
        missing_fields (list): missing fields as string format in dataframe.

    """
-    dataframe = utils.read_csv_to_dataframe(csv_path, convert_vals_to_lower=False)
+    dataframe = utils.read_csv_to_dataframe(csv_path,
+        MODEL_SPEC['args']['land_gridPts_path'], set_index=False)
    missing_fields = []
    for field in field_list:
        if field not in dataframe.columns:
@ -1497,6 +1496,7 @@ def _dict_to_point_vector(base_dict_data, target_vector_path, layer_name,
    for point_dict in base_dict_data.values():
        latitude = float(point_dict['lat'])
        longitude = float(point_dict['long'])
+        point_dict['id'] = int(point_dict['id'])
        # When projecting to WGS84, extents -180 to 180 are used for longitude.
        # In case input longitude is from -360 to 0 convert
        if longitude < -180:
@ -1509,7 +1509,7 @@ def _dict_to_point_vector(base_dict_data, target_vector_path, layer_name,
        target_layer.CreateFeature(output_feature)

        for field_name in point_dict:
-            output_feature.SetField(field_name, point_dict[field_name])
+            output_feature.SetField(field_name.upper(), point_dict[field_name])
        output_feature.SetGeometryDirectly(geom)
        target_layer.SetFeature(output_feature)
        output_feature = None
@ -1674,7 +1674,13 @@ def _machine_csv_to_dict(machine_csv_path):
    machine_dict = {}
    # make columns and indexes lowercased and strip whitespace
    machine_data = utils.read_csv_to_dataframe(
-        machine_csv_path, 'name', convert_vals_to_lower=False)
+        machine_csv_path,
+        {
+            'index_col': 'name',
+            'columns': {
+                'name': {'type': 'freestyle_string'},
+                'value': {'type': 'number'}
+        }})
    machine_data.index = machine_data.index.str.strip()
    machine_data.index = machine_data.index.str.lower()

--- a/src/natcap/invest/wind_energy.py
+++ b/src/natcap/invest/wind_energy.py
@ -756,7 +756,9 @@ def execute(args):
        # If Price Table provided use that for price of energy, validate inputs
        time = int(val_parameters_dict['time_period'])
        if args['price_table']:
-            wind_price_df = utils.read_csv_to_dataframe(args['wind_schedule'])
+            wind_price_df = utils.read_csv_to_dataframe(
+                args['wind_schedule'], MODEL_SPEC['args']['wind_schedule'],
+                set_index=False)

            year_count = len(wind_price_df['year'])
            if year_count != time + 1:
@ -1136,13 +1138,11 @@ def execute(args):

        # Read the grid points csv, and convert it to land and grid dictionary
        grid_land_df = utils.read_csv_to_dataframe(
-            args['grid_points_path'], convert_vals_to_lower=False)
+            args['grid_points_path'], MODEL_SPEC['args']['grid_points_path'], set_index=False)

        # Make separate dataframes based on 'TYPE'
-        grid_df = grid_land_df.loc[(
-            grid_land_df['type'].str.upper() == 'GRID')]
-        land_df = grid_land_df.loc[(
-            grid_land_df['type'].str.upper() == 'LAND')]
+        grid_df = grid_land_df.loc[(grid_land_df['type'] == 'grid')]
+        land_df = grid_land_df.loc[(grid_land_df['type'] == 'land')]

        # Convert the dataframes to dictionaries, using 'ID' (the index) as key
        grid_df.set_index('id', inplace=True)
@ -1976,7 +1976,8 @@ def _read_csv_wind_data(wind_data_path, hub_height):

    """
    wind_point_df = utils.read_csv_to_dataframe(
-        wind_data_path, convert_cols_to_lower=False, convert_vals_to_lower=False)
+        wind_data_path, MODEL_SPEC['args']['wind_data_path'], set_index=False)
+    wind_point_df.columns = wind_point_df.columns.str.upper()

    # Calculate scale value at new hub height given reference values.
    # See equation 3 in users guide
--- a/tests/test_annual_water_yield.py
+++ b/tests/test_annual_water_yield.py
@ -63,7 +63,7 @@ class AnnualWaterYieldTests(unittest.TestCase):

        with self.assertRaises(ValueError) as cm:
            annual_water_yield.execute(args)
-        self.assertTrue('veg value must be either 1 or 0' in str(cm.exception))
+        self.assertIn('Empty or NA values are not allowed', str(cm.exception))

        table_df = pandas.read_csv(args['biophysical_table_path'])
        table_df['LULC_veg'] = ['-1']*len(table_df.index)
@ -72,7 +72,7 @@ class AnnualWaterYieldTests(unittest.TestCase):

        with self.assertRaises(ValueError) as cm:
            annual_water_yield.execute(args)
-        self.assertTrue('veg value must be either 1 or 0' in str(cm.exception))
+        self.assertIn('value must be either 1 or 0, not -1', str(cm.exception))
    
    def test_missing_lulc_value(self):
        """Hydro: catching missing LULC value in Biophysical table."""
--- a/tests/test_coastal_blue_carbon.py
+++ b/tests/test_coastal_blue_carbon.py
@ -10,6 +10,7 @@ import textwrap
 import unittest

 import numpy
+import pandas
 import pygeoprocessing
 from natcap.invest import utils
 from osgeo import gdal
@ -151,10 +152,12 @@ class TestPreprocessor(unittest.TestCase):
                       pprint.pformat(non_suffixed_files)))

        expected_landcover_codes = set(range(0, 24))
-        found_landcover_codes = set(utils.read_csv_to_dataframe(
-            os.path.join(outputs_dir,
-                         'carbon_biophysical_table_template_150225.csv'),
-            'code').to_dict(orient='index').keys())
+        print('\n\n')
+        print(pandas.read_csv(
+            os.path.join(outputs_dir, 'carbon_biophysical_table_template_150225.csv')))
+        found_landcover_codes = set(pandas.read_csv(
+            os.path.join(outputs_dir, 'carbon_biophysical_table_template_150225.csv')
+        )['code'].values)
        self.assertEqual(expected_landcover_codes, found_landcover_codes)

    def test_transition_table(self):
@ -189,7 +192,9 @@ class TestPreprocessor(unittest.TestCase):
            lulc_csv.write('1,parking lot,False\n')

        landcover_table = utils.read_csv_to_dataframe(
-            landcover_table_path, 'code').to_dict(orient='index')
+            landcover_table_path,
+            preprocessor.MODEL_SPEC['args']['lulc_lookup_table_path']
+        ).to_dict(orient='index')
        target_table_path = os.path.join(self.workspace_dir,
                                         'transition_table.csv')

@ -204,7 +209,9 @@ class TestPreprocessor(unittest.TestCase):

        # Re-load the landcover table
        landcover_table = utils.read_csv_to_dataframe(
-            landcover_table_path, 'code').to_dict(orient='index')
+            landcover_table_path,
+            preprocessor.MODEL_SPEC['args']['lulc_lookup_table_path']
+        ).to_dict(orient='index')
        preprocessor._create_transition_table(
            landcover_table, [filename_a, filename_b], target_table_path)

--- a/tests/test_crop_production.py
+++ b/tests/test_crop_production.py
@ -57,20 +57,17 @@ class CropProductionTests(unittest.TestCase):
        expected_agg_result_table_path = os.path.join(
            TEST_DATA_PATH, 'expected_aggregate_results.csv')
        expected_agg_result_table = pandas.read_csv(
-            expected_agg_result_table_path)
+            expected_agg_result_table_path).rename(str.lower, axis='columns')
        agg_result_table = pandas.read_csv(
            agg_result_table_path)
        pandas.testing.assert_frame_equal(
            expected_agg_result_table, agg_result_table, check_dtype=False)

-        result_table_path = os.path.join(
-            args['workspace_dir'], 'result_table.csv')
-        expected_result_table_path = os.path.join(
-            TEST_DATA_PATH, 'expected_result_table.csv')
        expected_result_table = pandas.read_csv(
-            expected_result_table_path)
+            os.path.join(TEST_DATA_PATH, 'expected_result_table.csv')
+        ).rename(str.lower, axis='columns')
        result_table = pandas.read_csv(
-            result_table_path)
+            os.path.join(args['workspace_dir'], 'result_table.csv'))
        pandas.testing.assert_frame_equal(
            expected_result_table, result_table, check_dtype=False)

@ -127,6 +124,7 @@ class CropProductionTests(unittest.TestCase):
            TEST_DATA_PATH, 'expected_result_table_no_nodata.csv')
        expected_result_table = pandas.read_csv(
            expected_result_table_path)
+        expected_result_table.columns = expected_result_table.columns.str.lower()
        result_table = pandas.read_csv(
            result_table_path)
        pandas.testing.assert_frame_equal(
@ -314,14 +312,11 @@ class CropProductionTests(unittest.TestCase):

        crop_production_regression.execute(args)

-        agg_result_table_path = os.path.join(
-            args['workspace_dir'], 'aggregate_results.csv')
-        expected_agg_result_table_path = os.path.join(
-            TEST_DATA_PATH, 'expected_regression_aggregate_results.csv')
        expected_agg_result_table = pandas.read_csv(
-            expected_agg_result_table_path)
+            os.path.join(TEST_DATA_PATH, 'expected_regression_aggregate_results.csv')
+        ).rename(str.lower, axis='columns')
        agg_result_table = pandas.read_csv(
-            agg_result_table_path)
+            os.path.join(args['workspace_dir'], 'aggregate_results.csv'))
        pandas.testing.assert_frame_equal(
            expected_agg_result_table, agg_result_table, check_dtype=False)

@ -387,14 +382,11 @@ class CropProductionTests(unittest.TestCase):

        crop_production_regression.execute(args)

-        result_table_path = os.path.join(
-            args['workspace_dir'], 'result_table.csv')
-        expected_result_table_path = os.path.join(
-            TEST_DATA_PATH, 'expected_regression_result_table_no_nodata.csv')
-        expected_result_table = pandas.read_csv(
-            expected_result_table_path)
+        expected_result_table = pandas.read_csv(os.path.join(
+            TEST_DATA_PATH, 'expected_regression_result_table_no_nodata.csv'
+        )).rename(str.lower, axis='columns')
        result_table = pandas.read_csv(
-            result_table_path)
+            os.path.join(args['workspace_dir'], 'result_table.csv'))
        pandas.testing.assert_frame_equal(
            expected_result_table, result_table, check_dtype=False)

--- a/tests/test_datastack.py
+++ b/tests/test_datastack.py
@ -378,8 +378,14 @@ class DatastackArchiveTests(unittest.TestCase):
                filecmp.cmp(archive_params[key], params[key], shallow=False))

        spatial_csv_dict = utils.read_csv_to_dataframe(
-            archive_params['spatial_table'], 'ID',
-            convert_cols_to_lower=True, convert_vals_to_lower=True).to_dict(orient='index')
+            archive_params['spatial_table'],
+            {
+                'index_col': 'id',
+                'columns': {
+                    'id': {'type': 'integer'},
+                    'path': {'type': 'file'}
+                }
+            }).to_dict(orient='index')
        spatial_csv_dir = os.path.dirname(archive_params['spatial_table'])
        numpy.testing.assert_allclose(
            pygeoprocessing.raster_to_numpy_array(
--- a/tests/test_forest_carbon_edge.py
+++ b/tests/test_forest_carbon_edge.py
@ -159,9 +159,9 @@ class ForestCarbonEdgeTests(unittest.TestCase):

        with self.assertRaises(ValueError) as cm:
            forest_carbon_edge_effect.execute(args)
-        expected_message = 'Could not interpret carbon pool value'
        actual_message = str(cm.exception)
-        self.assertTrue(expected_message in actual_message, actual_message)
+        self.assertTrue(
+            'Empty or NA values are not allowed' in actual_message, actual_message)
    
    def test_missing_lulc_value(self):
        """Forest Carbon Edge: test with missing LULC value."""
--- a/tests/test_hra.py
+++ b/tests/test_hra.py
@ -749,12 +749,14 @@ class HRAUnitTests(unittest.TestCase):

        # No matter the supported file format, make sure we have consistent
        # table headings.
-        source_df = pandas.read_csv(io.StringIO(textwrap.dedent("""\
-                FOO,bar,BaZ,path
-                1, 2, 3,foo.tif""")))
+        source_df = pandas.DataFrame({
+            'name': pandas.Series(['1'], dtype='string'),
+            'type': pandas.Series(['2'], dtype='string'),
+            'stressor buffer (meters)': pandas.Series([3], dtype=float),
+            'path': pandas.Series(['foo.tif'], dtype='string')
+        })

-        expected_df = source_df.copy()  # defaults to a deepcopy.
-        expected_df.columns = expected_df.columns.str.lower()
+        expected_df = source_df.copy().set_index('name')  # defaults to a deepcopy.
        expected_df['path'] = [os.path.join(self.workspace_dir, 'foo.tif')]

        for filename, func in [('target.csv', source_df.to_csv),
@ -763,7 +765,7 @@ class HRAUnitTests(unittest.TestCase):
            func(full_filepath, index=False)

            opened_df = hra._open_table_as_dataframe(full_filepath)
-            pandas.testing.assert_frame_equal(expected_df, opened_df)
+            pandas.testing.assert_frame_equal(expected_df, opened_df, check_index_type=False)

    def test_pairwise_risk(self):
        """HRA: check pairwise risk calculations."""
--- a/tests/test_recreation.py
+++ b/tests/test_recreation.py
@ -972,8 +972,9 @@ class RecreationRegressionTests(unittest.TestCase):

        # make outputs to be overwritten
        predictor_dict = utils.read_csv_to_dataframe(
-            predictor_table_path, 'id',
-            convert_cols_to_lower=True, convert_vals_to_lower=True).to_dict(orient='index')
+            predictor_table_path,
+            recmodel_client.MODEL_SPEC['args']['predictor_table_path']
+        ).to_dict(orient='index')
        predictor_list = predictor_dict.keys()
        tmp_working_dir = tempfile.mkdtemp(dir=self.workspace_dir)
        empty_json_list = [
--- a/tests/test_sdr.py
+++ b/tests/test_sdr.py
@ -303,8 +303,7 @@ class SDRTests(unittest.TestCase):
        with self.assertRaises(ValueError) as context:
            sdr.execute(args)
        self.assertIn(
-            f'A value in the biophysical table is not a number '
-            f'within range 0..1.', str(context.exception))
+            f'could not be interpreted as ratios', str(context.exception))

    def test_lucode_not_a_number(self):
        """SDR test expected exception for invalid data in lucode column."""
@ -325,8 +324,7 @@ class SDRTests(unittest.TestCase):
        with self.assertRaises(ValueError) as context:
            sdr.execute(args)
        self.assertIn(
-            f'Value "{invalid_value}" from the "lucode" column of the '
-            f'biophysical table is not a number.', str(context.exception))
+            'could not be interpreted as integers', str(context.exception))

    def test_missing_lulc_value(self):
        """SDR test for ValueError when LULC value not found in table."""
--- a/tests/test_seasonal_water_yield_regression.py
+++ b/tests/test_seasonal_water_yield_regression.py
@ -737,8 +737,8 @@ class SeasonalWaterYieldRegressionTests(unittest.TestCase):

        with self.assertRaises(ValueError) as context:
            seasonal_water_yield.execute(args)
-        self.assertTrue(
-            'expecting all floating point numbers' in str(context.exception))
+        self.assertIn(
+            'could not be interpreted as numbers', str(context.exception))

    def test_monthly_alpha_regression(self):
        """SWY monthly alpha values regression test on sample data.
--- a/tests/test_urban_nature_access.py
+++ b/tests/test_urban_nature_access.py
@ -494,6 +494,7 @@ class UNATests(unittest.TestCase):
        new_search_radius_values[7] = 30 * 9  # make one a duplicate distance.
        attribute_table['search_radius_m'] = attribute_table['lucode'].map(
            new_search_radius_values)
+        print(attribute_table)
        attribute_table.to_csv(args['lulc_attribute_table'], index=False)

        urban_nature_access.execute(args)