re #BITBUCKET-3626 starting regression model for crop production

2017-05-05 08:56:25 -07:00 · 2017-05-05 08:56:25 -07:00 · 4d21d7db0d
parent aeb844f776
commit 4d21d7db0d
3 changed files with 35 additions and 40 deletions
--- a/src/natcap/invest/crop_production_regression.py
+++ b/src/natcap/invest/crop_production_regression.py
@ -15,15 +15,26 @@ from . import utils
 logging.basicConfig(format='%(asctime)s %(name)-20s %(levelname)-8s \
 %(message)s', level=logging.DEBUG, datefmt='%m/%d/%Y %H:%M:%S ')

-LOGGER = logging.getLogger('natcap.invest.crop_production_percentile')
+LOGGER = logging.getLogger('natcap.invest.crop_production_regression')

 _INTERMEDIATE_OUTPUT_DIR = 'intermediate_output'

+_REGRESSION_TABLE_PATTERN = os.path.join(
+    'climate_regression_yield_tables', '%s_regression_yield_table.csv')
+
+_EXPECTED_REGRESSION_TABLE_HEADERS = [
+    'climate_bin', 'yield_ceiling', 'b_nut', 'b_K2O', 'c_N', 'c_P2O5',
+    'c_K2O']
+
+###old constants below
+
 _YIELD_PERCENTILE_FIELD_PATTERN = 'yield_([^_]+)'
 _GLOBAL_OBSERVED_YIELD_FILE_PATTERN = os.path.join(
    'observed_yield', '%s_yield_map.tif')  # crop_name
 _EXTENDED_CLIMATE_BIN_FILE_PATTERN = os.path.join(
    'extended_climate_bin_maps', 'extendedclimatebins%s.tif')  # crop_name
+
+
 _CLIMATE_PERCENTILE_TABLE_PATTERN = os.path.join(
    'climate_percentile_yield_tables',
    '%s_percentile_yield_table.csv')  # crop_name
@ -80,7 +91,7 @@ _NODATA_YIELD = -1.0


 def execute(args):
-    """Crop Production Percentile Model.
+    """Crop Production Regression Model.

    This model will take a landcover (crop cover?), N, P, and K map and
    produce modeled yields, and a nutrient table.
@ -102,7 +113,6 @@ def execute(args):
        args['n_raster_path'] (string): path to nitrogen fertilization rates.
        args['p_raster_path'] (string): path to phosphorous fertilization
            rates.
-
        args['aggregate_polygon_path'] (string): path to polygon shapefile
            that will be used to aggregate crop yields and total nutrient
            value. (optional, if value is None, then skipped)
@ -203,12 +213,16 @@ def execute(args):
            clipped_climate_bin_raster_path, 'nearest',
            target_bb=landcover_wgs84_bounding_box)

-        climate_percentile_yield_table_path = os.path.join(
-            args['model_data_path'],
-            _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name)
-        crop_climate_percentile_table = utils.build_lookup_from_csv(
-            climate_percentile_yield_table_path, 'climate_bin',
-            to_lower=True, numerical_cast=True)
+        crop_regression_table_path = os.path.join(
+            args['model_data_path'], _REGRESSION_TABLE_PATTERN % crop_name)
+
+        crop_regression_table = utils.build_lookup_from_csv(
+            crop_regression_table_path, 'climate_bin',
+            to_lower=True, numerical_cast=True, warn_if_missing=False)
+
+        print crop_regression_table
+        os.exit()
+
        yield_percentile_headers = [
            x for x in crop_climate_percentile_table.itervalues().next()
            if x != 'climate_bin']
--- a/src/natcap/invest/utils.py
+++ b/src/natcap/invest/utils.py
@ -205,7 +205,8 @@ def _attempt_float(value):


 def build_lookup_from_csv(
-        table_path, key_field, to_lower=True, numerical_cast=True):
+        table_path, key_field, to_lower=True, numerical_cast=True,
+        warn_if_missing=True):
    """Read a CSV table into a dictionary indexed by `key_field`.

    Creates a dictionary from a CSV whose keys are unique entries in the CSV
@ -227,6 +228,8 @@ def build_lookup_from_csv(
            attempt to be cast to a floating point type; if it fails will be
            left as unicode.  If false, all values will be considered raw
            unicode.
+        warn_if_missing (bool): If True, warnings are logged if there are
+            empty headers or value rows.

    Returns:
        lookup_dict (dict): a dictionary of the form {
@ -250,7 +253,7 @@ def build_lookup_from_csv(
            raise ValueError(
                '%s expected in %s for the CSV file at %s' % (
                    key_field, header_row, table_path))
-        if '' in header_row:
+        if warn_if_missing and '' in header_row:
            LOGGER.warn(
                "There are empty strings in the header row at %s", table_path)
        key_index = header_row.index(key_field)
@ -260,7 +263,7 @@ def build_lookup_from_csv(
                row = [x.lower() for x in row]
            if numerical_cast:
                row = [_attempt_float(x) for x in row]
-            if '' in row:
+            if warn_if_missing and '' in row:
                LOGGER.warn(
                    "There are empty strings in row %s in %s", row,
                    table_path)
--- a/tests/test_crop_production.py
+++ b/tests/test_crop_production.py
@ -118,6 +118,12 @@ class CropProductionTests(unittest.TestCase):
            'results_suffix': '',
            'landcover_raster_path': os.path.join(
                SAMPLE_DATA_PATH, 'landcover.tif'),
+            'k_raster_path': os.path.join(
+                SAMPLE_DATA_PATH, 'fake_fert_map.tif'),
+            'p_raster_path': os.path.join(
+                SAMPLE_DATA_PATH, 'fake_fert_map.tif'),
+            'n_raster_path': os.path.join(
+                SAMPLE_DATA_PATH, 'fake_fert_map.tif'),
            'landcover_to_crop_table_path': os.path.join(
                SAMPLE_DATA_PATH, 'landcover_to_crop_table.csv'),
            'aggregate_polygon_path': os.path.join(
@ -126,31 +132,3 @@ class CropProductionTests(unittest.TestCase):
            'model_data_path': MODEL_DATA_PATH
        }
        crop_production_regression.execute(args)
-
-        result_table_path = os.path.join(
-            args['workspace_dir'], 'aggregate_results.csv')
-        from natcap.invest import utils
-        result_table = utils.build_lookup_from_csv(
-            result_table_path, 'id', to_lower=True, numerical_cast=True)
-
-        expected_result_table_path = os.path.join(
-            TEST_DATA_PATH, 'expected_aggregate_results.csv')
-
-        expected_result_table = utils.build_lookup_from_csv(
-            expected_result_table_path, 'id', to_lower=True,
-            numerical_cast=True)
-
-        for id_key in expected_result_table:
-            if id_key not in result_table:
-                self.fail("Expected ID %s in result table" % id_key)
-            for column_key in expected_result_table[id_key]:
-                if column_key not in result_table[id_key]:
-                    self.fail(
-                        "Expected column %s in result table" % column_key)
-                # The tolerance of 3 digits after the decimal was determined by
-                # experimentation
-                tolerance_places = 3
-                numpy.testing.assert_almost_equal(
-                    expected_result_table[id_key][column_key],
-                    result_table[id_key][column_key],
-                    decimal=tolerance_places)