Move datastack metadata generation to occur within build_datastack_archive; don't copy invalid metadata to datastack; don't overwrite units (#1774)

This commit is contained in:
Claire Simpson 2025-05-06 11:29:21 -06:00
parent 36b0c7e52d
commit d21e418bb0
2 changed files with 22 additions and 69 deletions

View File

@ -98,7 +98,7 @@ def _tarfile_safe_extract(archive_path, dest_dir_path):
def _copy_spatial_files(spatial_filepath, target_dir):
"""Copy spatial files and their geometamaker metadata to a new directory.
"""Copy spatial files to a new directory.
Args:
spatial_filepath (str): The filepath to a GDAL-supported file.
@ -134,15 +134,6 @@ def _copy_spatial_files(spatial_filepath, target_dir):
shutil.copyfile(member_file, target_filepath)
spatial_file = None
# Copy any existing geometamaker metadata
spatial_metadata = spatial_filepath + ".yml"
if os.path.exists(spatial_metadata):
LOGGER.info(f"Metadata detected for {spatial_filepath}. "
f"Copying to {target_dir}")
metadata_target = os.path.join(target_dir,
os.path.basename(spatial_metadata))
shutil.copyfile(spatial_metadata, metadata_target)
# I can't conceive of a case where the basename of the source file does not
# match any of the member file basenames, but just in case there's a
# weird GDAL driver that does this, it seems reasonable to fall back to
@ -153,28 +144,6 @@ def _copy_spatial_files(spatial_filepath, target_dir):
return return_filepath
def _copy_flat_file(flat_filepath, target_dir):
"""Copy flat file and its geometamaker metadata to a new directory.
Args:
flat_filepath (str): Filepath to a single-component file (e.g., .csv).
target_dir (str): The directory where file and .yml should be copied.
If this directory does not exist, it will be created.
Returns:
None
"""
LOGGER.info(f'Copying {flat_filepath} --> {target_dir}')
shutil.copyfile(flat_filepath, target_dir)
try:
shutil.copyfile(flat_filepath+".yml", target_dir)
except FileNotFoundError:
# no metadata for file found
pass
def format_args_dict(args_dict, model_name):
"""Nicely format an arguments dictionary for writing to a stream.
@ -362,7 +331,7 @@ def build_datastack_archive(args, model_name, datastack_path):
if not spatial_columns:
LOGGER.debug(
f'No spatial columns, copying to {target_csv_path}')
_copy_flat_file(source_path, target_csv_path)
shutil.copyfile(source_path, target_csv_path)
else:
contained_files_dir = os.path.join(
data_dir, f'{key}_csv_data')
@ -431,7 +400,7 @@ def build_datastack_archive(args, model_name, datastack_path):
elif input_type == 'file':
target_filepath = os.path.join(
data_dir, f'{key}_file')
_copy_flat_file(source_path, target_filepath)
shutil.copyfile(source_path, target_filepath)
target_arg_value = target_filepath
files_found[source_path] = target_arg_value
@ -448,7 +417,7 @@ def build_datastack_archive(args, model_name, datastack_path):
if os.path.isdir(src_path):
shutil.copytree(src_path, dest_path)
else:
_copy_flat_file(src_path, dest_path)
shutil.copyfile(src_path, dest_path)
LOGGER.debug(
f'Directory copied from {source_path} --> {target_directory}')
@ -487,8 +456,16 @@ def build_datastack_archive(args, model_name, datastack_path):
parameter_set = build_parameter_set(
rewritten_args, model_name, param_file_uri, relative=True)
spec_utils.generate_metadata_for_datastack(
module, args, parameter_set, temp_workspace)
# write metadata for all files in args
keywords = [module.MODEL_SPEC['model_id'], 'InVEST']
for k, v in args.items():
if isinstance(v, str) and os.path.isfile(v):
this_arg_spec = module.MODEL_SPEC['args'][k]
# write metadata file to target location (in temp dir)
subdir = os.path.dirname(parameter_set['args'][k])
target_location = os.path.join(temp_workspace, subdir)
spec_utils.write_metadata_file(v, this_arg_spec, keywords,
out_workspace=target_location)
# Remove the handler before archiving the working dir (and the logfile)
archive_filehandler.close()

View File

@ -638,7 +638,7 @@ def write_metadata_file(datasource_path, spec, keywords_list,
out_workspace (str, optional) - where to write metadata if different
from data location
Returns:
None
None: if metadata could not be created due to validation or file errors.
"""
@ -674,16 +674,20 @@ def write_metadata_file(datasource_path, spec, keywords_list,
attr_spec = spec['fields']
if attr_spec:
for key, value in attr_spec.items():
about = value['about'] if 'about' in value else ''
units = format_unit(value['units']) if 'units' in value else ''
try:
# field names in attr_spec are always lowercase, but the
# actual fieldname in the data could be any case because
# invest does not require case-sensitive fieldnames
yaml_key = _get_key(key, resource)
# Field description only gets set if its empty, i.e. ''
if len(resource.get_field_description(yaml_key)
.description.strip()) < 1:
about = value['about'] if 'about' in value else ''
resource.set_field_description(yaml_key, description=about)
# units only get set if empty
if len(resource.get_field_description(yaml_key)
.units.strip()) < 1:
units = format_unit(value['units']) if 'units' in value else ''
resource.set_field_description(yaml_key, units=units)
except KeyError as error:
# fields that are in the spec but missing
@ -691,7 +695,7 @@ def write_metadata_file(datasource_path, spec, keywords_list,
LOGGER.debug(error)
if 'bands' in spec:
for idx, value in spec['bands'].items():
if len(resource.get_band_description(idx).description) < 1:
if len(resource.get_band_description(idx).units) < 1:
try:
units = format_unit(spec['bands'][idx]['units'])
except KeyError:
@ -741,31 +745,3 @@ def generate_metadata_for_outputs(model_module, args_dict):
LOGGER.debug(error)
_walk_spec(model_module.MODEL_SPEC['outputs'], args_dict['workspace_dir'])
def generate_metadata_for_datastack(model_module, args_dict, param_set,
temp_dir):
"""Create metadata for all items in invest model args.
Args:
model_module (object) - the natcap.invest module containing
the MODEL_SPEC attribute
args_dict (dict) - the arguments dictionary passed to the
model's ``execute`` function.
param_set (dict) - parameter set which contains relative filepaths
temp_dir (str) - directory where datastack is temporarily stored
before compression
Returns:
None
"""
keywords = [model_module.MODEL_SPEC['model_id'], 'InVEST']
for k, v in args_dict.items():
if isinstance(v, str) and os.path.isfile(v):
this_arg_spec = model_module.MODEL_SPEC['args'][k]
# write metadata file to target location (in temp dir)
subdir = os.path.dirname(param_set['args'][k])
target_location = os.path.join(temp_dir, subdir)
write_metadata_file(v, this_arg_spec, keywords,
out_workspace=target_location)