Move datastack metadata generation to occur within build_datastack_archive; don't copy invalid metadata to datastack; don't overwrite units (#1774)
This commit is contained in:
parent
36b0c7e52d
commit
d21e418bb0
|
@ -98,7 +98,7 @@ def _tarfile_safe_extract(archive_path, dest_dir_path):
|
|||
|
||||
|
||||
def _copy_spatial_files(spatial_filepath, target_dir):
|
||||
"""Copy spatial files and their geometamaker metadata to a new directory.
|
||||
"""Copy spatial files to a new directory.
|
||||
|
||||
Args:
|
||||
spatial_filepath (str): The filepath to a GDAL-supported file.
|
||||
|
@ -134,15 +134,6 @@ def _copy_spatial_files(spatial_filepath, target_dir):
|
|||
shutil.copyfile(member_file, target_filepath)
|
||||
spatial_file = None
|
||||
|
||||
# Copy any existing geometamaker metadata
|
||||
spatial_metadata = spatial_filepath + ".yml"
|
||||
if os.path.exists(spatial_metadata):
|
||||
LOGGER.info(f"Metadata detected for {spatial_filepath}. "
|
||||
f"Copying to {target_dir}")
|
||||
metadata_target = os.path.join(target_dir,
|
||||
os.path.basename(spatial_metadata))
|
||||
shutil.copyfile(spatial_metadata, metadata_target)
|
||||
|
||||
# I can't conceive of a case where the basename of the source file does not
|
||||
# match any of the member file basenames, but just in case there's a
|
||||
# weird GDAL driver that does this, it seems reasonable to fall back to
|
||||
|
@ -153,28 +144,6 @@ def _copy_spatial_files(spatial_filepath, target_dir):
|
|||
return return_filepath
|
||||
|
||||
|
||||
def _copy_flat_file(flat_filepath, target_dir):
|
||||
"""Copy flat file and its geometamaker metadata to a new directory.
|
||||
|
||||
Args:
|
||||
flat_filepath (str): Filepath to a single-component file (e.g., .csv).
|
||||
target_dir (str): The directory where file and .yml should be copied.
|
||||
If this directory does not exist, it will be created.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
"""
|
||||
LOGGER.info(f'Copying {flat_filepath} --> {target_dir}')
|
||||
|
||||
shutil.copyfile(flat_filepath, target_dir)
|
||||
try:
|
||||
shutil.copyfile(flat_filepath+".yml", target_dir)
|
||||
except FileNotFoundError:
|
||||
# no metadata for file found
|
||||
pass
|
||||
|
||||
|
||||
def format_args_dict(args_dict, model_name):
|
||||
"""Nicely format an arguments dictionary for writing to a stream.
|
||||
|
||||
|
@ -362,7 +331,7 @@ def build_datastack_archive(args, model_name, datastack_path):
|
|||
if not spatial_columns:
|
||||
LOGGER.debug(
|
||||
f'No spatial columns, copying to {target_csv_path}')
|
||||
_copy_flat_file(source_path, target_csv_path)
|
||||
shutil.copyfile(source_path, target_csv_path)
|
||||
else:
|
||||
contained_files_dir = os.path.join(
|
||||
data_dir, f'{key}_csv_data')
|
||||
|
@ -431,7 +400,7 @@ def build_datastack_archive(args, model_name, datastack_path):
|
|||
elif input_type == 'file':
|
||||
target_filepath = os.path.join(
|
||||
data_dir, f'{key}_file')
|
||||
_copy_flat_file(source_path, target_filepath)
|
||||
shutil.copyfile(source_path, target_filepath)
|
||||
target_arg_value = target_filepath
|
||||
files_found[source_path] = target_arg_value
|
||||
|
||||
|
@ -448,7 +417,7 @@ def build_datastack_archive(args, model_name, datastack_path):
|
|||
if os.path.isdir(src_path):
|
||||
shutil.copytree(src_path, dest_path)
|
||||
else:
|
||||
_copy_flat_file(src_path, dest_path)
|
||||
shutil.copyfile(src_path, dest_path)
|
||||
|
||||
LOGGER.debug(
|
||||
f'Directory copied from {source_path} --> {target_directory}')
|
||||
|
@ -487,8 +456,16 @@ def build_datastack_archive(args, model_name, datastack_path):
|
|||
parameter_set = build_parameter_set(
|
||||
rewritten_args, model_name, param_file_uri, relative=True)
|
||||
|
||||
spec_utils.generate_metadata_for_datastack(
|
||||
module, args, parameter_set, temp_workspace)
|
||||
# write metadata for all files in args
|
||||
keywords = [module.MODEL_SPEC['model_id'], 'InVEST']
|
||||
for k, v in args.items():
|
||||
if isinstance(v, str) and os.path.isfile(v):
|
||||
this_arg_spec = module.MODEL_SPEC['args'][k]
|
||||
# write metadata file to target location (in temp dir)
|
||||
subdir = os.path.dirname(parameter_set['args'][k])
|
||||
target_location = os.path.join(temp_workspace, subdir)
|
||||
spec_utils.write_metadata_file(v, this_arg_spec, keywords,
|
||||
out_workspace=target_location)
|
||||
|
||||
# Remove the handler before archiving the working dir (and the logfile)
|
||||
archive_filehandler.close()
|
||||
|
|
|
@ -638,7 +638,7 @@ def write_metadata_file(datasource_path, spec, keywords_list,
|
|||
out_workspace (str, optional) - where to write metadata if different
|
||||
from data location
|
||||
Returns:
|
||||
None
|
||||
None: if metadata could not be created due to validation or file errors.
|
||||
|
||||
"""
|
||||
|
||||
|
@ -674,16 +674,20 @@ def write_metadata_file(datasource_path, spec, keywords_list,
|
|||
attr_spec = spec['fields']
|
||||
if attr_spec:
|
||||
for key, value in attr_spec.items():
|
||||
about = value['about'] if 'about' in value else ''
|
||||
units = format_unit(value['units']) if 'units' in value else ''
|
||||
try:
|
||||
# field names in attr_spec are always lowercase, but the
|
||||
# actual fieldname in the data could be any case because
|
||||
# invest does not require case-sensitive fieldnames
|
||||
yaml_key = _get_key(key, resource)
|
||||
# Field description only gets set if its empty, i.e. ''
|
||||
if len(resource.get_field_description(yaml_key)
|
||||
.description.strip()) < 1:
|
||||
about = value['about'] if 'about' in value else ''
|
||||
resource.set_field_description(yaml_key, description=about)
|
||||
# units only get set if empty
|
||||
if len(resource.get_field_description(yaml_key)
|
||||
.units.strip()) < 1:
|
||||
units = format_unit(value['units']) if 'units' in value else ''
|
||||
resource.set_field_description(yaml_key, units=units)
|
||||
except KeyError as error:
|
||||
# fields that are in the spec but missing
|
||||
|
@ -691,7 +695,7 @@ def write_metadata_file(datasource_path, spec, keywords_list,
|
|||
LOGGER.debug(error)
|
||||
if 'bands' in spec:
|
||||
for idx, value in spec['bands'].items():
|
||||
if len(resource.get_band_description(idx).description) < 1:
|
||||
if len(resource.get_band_description(idx).units) < 1:
|
||||
try:
|
||||
units = format_unit(spec['bands'][idx]['units'])
|
||||
except KeyError:
|
||||
|
@ -741,31 +745,3 @@ def generate_metadata_for_outputs(model_module, args_dict):
|
|||
LOGGER.debug(error)
|
||||
|
||||
_walk_spec(model_module.MODEL_SPEC['outputs'], args_dict['workspace_dir'])
|
||||
|
||||
|
||||
def generate_metadata_for_datastack(model_module, args_dict, param_set,
|
||||
temp_dir):
|
||||
"""Create metadata for all items in invest model args.
|
||||
|
||||
Args:
|
||||
model_module (object) - the natcap.invest module containing
|
||||
the MODEL_SPEC attribute
|
||||
args_dict (dict) - the arguments dictionary passed to the
|
||||
model's ``execute`` function.
|
||||
param_set (dict) - parameter set which contains relative filepaths
|
||||
temp_dir (str) - directory where datastack is temporarily stored
|
||||
before compression
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
"""
|
||||
keywords = [model_module.MODEL_SPEC['model_id'], 'InVEST']
|
||||
for k, v in args_dict.items():
|
||||
if isinstance(v, str) and os.path.isfile(v):
|
||||
this_arg_spec = model_module.MODEL_SPEC['args'][k]
|
||||
# write metadata file to target location (in temp dir)
|
||||
subdir = os.path.dirname(param_set['args'][k])
|
||||
target_location = os.path.join(temp_dir, subdir)
|
||||
write_metadata_file(v, this_arg_spec, keywords,
|
||||
out_workspace=target_location)
|
||||
|
|
Loading…
Reference in New Issue