Improvement: Support ZST in mamba and enable ZST by default (#2404)

This commit is contained in:
John H. Ayad 2023-06-27 15:14:29 +01:00 committed by GitHub
parent 2209a90cc7
commit 60cd358c39
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 78 additions and 117 deletions

View File

@ -279,7 +279,6 @@ namespace mamba
bool use_only_tar_bz2 = false;
bool repodata_use_zst = false;
std::vector<std::string> repodata_has_zst = { "https://conda.anaconda.org/conda-forge" };
// usernames on anaconda.org can have a underscore, which influences the

View File

@ -1232,11 +1232,6 @@ namespace mamba
.set_env_var_names()
.description("Permit use of the --overide-channels command-line flag"));
insert(Configurable("repodata_use_zst", &ctx.repodata_use_zst)
.group("Repodata")
.set_rc_configurable()
.description("Use zstd encoded repodata when fetching"));
insert(Configurable("repodata_has_zst", &ctx.repodata_has_zst)
.group("Repodata")
.set_rc_configurable()

View File

@ -598,33 +598,30 @@ namespace mamba
auto& ctx = Context::instance();
if (!ctx.offline || forbid_cache())
{
if (ctx.repodata_use_zst)
bool has_value = m_metadata.has_zst.has_value();
bool is_expired = m_metadata.has_zst.has_value()
&& m_metadata.has_zst.value().has_expired();
bool has_zst = m_metadata.check_zst(p_channel);
if (!has_zst && (is_expired || !has_value))
{
bool has_value = m_metadata.has_zst.has_value();
bool is_expired = m_metadata.has_zst.has_value()
&& m_metadata.has_zst.value().has_expired();
bool has_zst = m_metadata.check_zst(p_channel);
if (!has_zst && (is_expired || !has_value))
m_check_targets.push_back(std::make_unique<DownloadTarget>(
m_name + " (check zst)",
m_repodata_url + ".zst",
""
));
m_check_targets.back()->set_head_only(true);
m_check_targets.back()->set_finalize_callback(&MSubdirData::finalize_check, this);
m_check_targets.back()->set_ignore_failure(true);
if (!(ctx.graphics_params.no_progress_bars || ctx.output_params.quiet
|| ctx.output_params.json))
{
m_check_targets.push_back(std::make_unique<DownloadTarget>(
m_name + " (check zst)",
m_repodata_url + ".zst",
""
));
m_check_targets.back()->set_head_only(true);
m_check_targets.back()->set_finalize_callback(&MSubdirData::finalize_check, this);
m_check_targets.back()->set_ignore_failure(true);
if (!(ctx.graphics_params.no_progress_bars || ctx.output_params.quiet
|| ctx.output_params.json))
{
m_progress_bar_check = Console::instance().add_progress_bar(
m_name + " (check zst)"
);
m_check_targets.back()->set_progress_bar(m_progress_bar_check);
m_progress_bar_check.repr().postfix.set_value("Checking");
}
return true;
m_progress_bar_check = Console::instance().add_progress_bar(
m_name + " (check zst)"
);
m_check_targets.back()->set_progress_bar(m_progress_bar_check);
m_progress_bar_check.repr().postfix.set_value("Checking");
}
return true;
}
create_target();
}
@ -676,15 +673,12 @@ namespace mamba
m_solv_cache_valid = true;
}
if (Context::instance().repodata_use_zst)
{
auto state_file = json_file;
state_file.replace_extension(".state.json");
auto lock = LockFile(state_file);
m_metadata.store_file_metadata(json_file);
auto outf = open_ofstream(state_file);
m_metadata.serialize_to_stream(outf);
}
auto state_file = json_file;
state_file.replace_extension(".state.json");
auto lock = LockFile(state_file);
m_metadata.store_file_metadata(json_file);
auto outf = open_ofstream(state_file);
m_metadata.serialize_to_stream(outf);
}
bool MSubdirData::finalize_transfer(const DownloadTarget&)
@ -817,79 +811,28 @@ namespace mamba
m_metadata.cache_control = m_target->get_cache_control();
m_metadata.stored_file_size = file_size;
if (!Context::instance().repodata_use_zst)
fs::u8path state_file = json_file;
state_file.replace_extension(".state.json");
std::error_code ec;
mamba_fs::rename_or_move(m_temp_file->path(), json_file, ec);
if (ec)
{
LOG_DEBUG << "Opening '" << json_file.string() << "'";
path::touch(json_file, true);
std::ofstream final_file = open_ofstream(json_file);
if (!final_file.is_open())
{
throw mamba_error(
fmt::format("Could not open file '{}'", json_file.string()),
mamba_error_code::subdirdata_not_loaded
);
}
if (m_progress_bar)
{
m_progress_bar.set_postfix("Finalizing");
}
std::ifstream temp_file = open_ifstream(m_temp_file->path());
std::stringstream temp_json;
m_metadata.serialize_to_stream_tiny(temp_json);
// replace `}` with `,`
temp_json.seekp(-1, temp_json.cur);
temp_json << ',';
final_file << temp_json.str();
temp_file.seekg(1);
std::copy(
std::istreambuf_iterator<char>(temp_file),
std::istreambuf_iterator<char>(),
std::ostreambuf_iterator<char>(final_file)
throw mamba_error(
fmt::format(
"Could not move repodata file from {} to {}: {}",
m_temp_file->path(),
json_file,
strerror(errno)
),
mamba_error_code::subdirdata_not_loaded
);
if (!temp_file)
{
std::error_code ec;
fs::remove(json_file, ec);
if (ec)
{
LOG_ERROR << "Could not remove file " << json_file << ": " << ec.message();
}
throw mamba_error(
fmt::format("Could not write out repodata file {}: {}", json_file, strerror(errno)),
mamba_error_code::subdirdata_not_loaded
);
}
fs::last_write_time(json_file, fs::now());
}
else
{
fs::u8path state_file = json_file;
state_file.replace_extension(".state.json");
std::error_code ec;
mamba_fs::rename_or_move(m_temp_file->path(), json_file, ec);
if (ec)
{
throw mamba_error(
fmt::format(
"Could not move repodata file from {} to {}: {}",
m_temp_file->path(),
json_file,
strerror(errno)
),
mamba_error_code::subdirdata_not_loaded
);
}
fs::last_write_time(json_file, fs::now());
fs::last_write_time(json_file, fs::now());
m_metadata.store_file_metadata(json_file);
std::ofstream state_file_stream = open_ofstream(state_file);
m_metadata.serialize_to_stream(state_file_stream);
m_metadata.store_file_metadata(json_file);
std::ofstream state_file_stream = open_ofstream(state_file);
m_metadata.serialize_to_stream(state_file_stream);
}
if (m_progress_bar)
{
m_progress_bar.repr().postfix.set_value("Downloaded").deactivate();

View File

@ -587,8 +587,6 @@ namespace mamba
{
TEST_CASE("parse_mod_etag")
{
bool old_value = Context::instance().repodata_use_zst;
Context::instance().repodata_use_zst = true;
fs::u8path cache_folder = fs::u8path{ test_data_dir / "repodata_json_cache" };
auto mq = detail::read_metadata(cache_folder / "test_1.json");
CHECK(mq.has_value());
@ -665,8 +663,6 @@ namespace mamba
CHECK_EQ(j.url, "https://conda.anaconda.org/conda-forge/noarch/repodata.json.zst");
CHECK_EQ(j.has_zst.value().value, true);
CHECK_EQ(j.has_zst.value().last_checked, parse_utc_timestamp("2023-01-06T16:33:06Z"));
Context::instance().repodata_use_zst = old_value;
}
}
} // namespace mamba

View File

@ -1533,6 +1533,8 @@ class SubdirData:
) -> None: ...
def cache_path(self) -> str: ...
def create_repo(self, arg0: Pool) -> Repo: ...
def download_and_check_targets(self, arg0: DownloadTargetList) -> bool: ...
def finalize_checks(self) -> None: ...
def loaded(self) -> bool: ...
pass

View File

@ -136,6 +136,7 @@ PYBIND11_MODULE(bindings, m)
auto pyPackageInfo = py::class_<PackageInfo>(m, "PackageInfo");
auto pyPrefixData = py::class_<PrefixData>(m, "PrefixData");
auto pySolver = py::class_<MSolver>(m, "Solver");
auto pyMultiDownloadTarget = py::class_<MultiDownloadTarget>(m, "DownloadTargetList");
// only used in a return type; does it belong in the module?
auto pyRootRole = py::class_<validation::RootRole>(m, "RootRole");
@ -473,13 +474,25 @@ PYBIND11_MODULE(bindings, m)
.def(
"cache_path",
[](const MSubdirData& self) -> std::string { return extract(self.cache_path()); }
);
)
.def(
"download_and_check_targets",
[](MSubdirData& self, MultiDownloadTarget& multi_download) -> bool
{
for (auto& check_target : self.check_targets())
{
multi_download.add(check_target.get());
}
multi_download.download(MAMBA_NO_CLEAR_PROGRESS_BARS);
return self.check_targets().size();
}
)
.def("finalize_checks", &MSubdirData::finalize_checks);
m.def("cache_fn_url", &cache_fn_url);
m.def("create_cache_dir", &create_cache_dir);
py::class_<MultiDownloadTarget>(m, "DownloadTargetList")
.def(py::init<>())
pyMultiDownloadTarget.def(py::init<>())
.def(
"add",
[](MultiDownloadTarget& self, MSubdirData& sub) -> void { self.add(sub.target()); }

View File

@ -102,10 +102,23 @@ def get_index(
channel, channel_platform, full_url, pkgs_dirs, repodata_fn
)
needs_finalising = sd.download_and_check_targets(dlist)
index.append(
(sd, {"platform": channel_platform, "url": url, "channel": channel})
(
sd,
{
"platform": channel_platform,
"url": url,
"channel": channel,
"needs_finalising": needs_finalising,
},
)
)
dlist.add(sd)
for (sd, info) in index:
if info["needs_finalising"]:
sd.finalize_checks()
dlist.add(sd)
is_downloaded = dlist.download(api.MAMBA_DOWNLOAD_FAILFAST)