mamba/libmamba/src/download/mirror_impl.cpp

400 lines
14 KiB
C++

// Copyright (c) 2023, QuantStack and Mamba Contributors
//
// Distributed under the terms of the BSD 3-Clause License.
//
// The full license is in the file LICENSE, distributed with this software.
#include "mamba/core/logging.hpp"
#include "mamba/core/output.hpp"
#include "mamba/util/string.hpp"
#include "mamba/util/url.hpp"
#include "mamba/util/url_manip.hpp"
#include "nlohmann/json.hpp"
#include "mirror_impl.hpp"
namespace nl = nlohmann;
namespace mamba::download
{
/************************************
* PassThroughMirror implementation *
************************************/
namespace
{
const auto PASSTHROUGH_MIRROR_ID = MirrorID("");
}
PassThroughMirror::PassThroughMirror()
: Mirror(PassThroughMirror::make_id())
{
}
MirrorID PassThroughMirror::make_id()
{
return PASSTHROUGH_MIRROR_ID;
}
auto PassThroughMirror::get_request_generators_impl(const std::string&, const std::string&) const
-> request_generator_list
{
return { [](const Request& dl_request, const Content*)
{ return MirrorRequest(dl_request, dl_request.url_path); } };
}
/*****************************
* HTTPMirror implementation *
*****************************/
HTTPMirror::HTTPMirror(std::string url)
: Mirror(HTTPMirror::make_id(url))
, m_url(std::move(url))
{
}
MirrorID HTTPMirror::make_id(std::string url)
{
return MirrorID(std::move(url));
}
auto HTTPMirror::get_request_generators_impl(const std::string&, const std::string&) const
-> request_generator_list
{
return { [url = m_url](const Request& dl_request, const Content*)
{ return MirrorRequest(dl_request, util::url_concat(url, dl_request.url_path)); } };
}
/****************************
* OCIMirror implementation *
****************************/
namespace utils
{
std::pair<std::string, std::string> split_path_tag(const std::string& path)
{
// for OCI, if we have a filename like "xtensor-0.23.10-h2acdbc0_0.tar.bz2"
// we want to split it to `xtensor:0.23.10-h2acdbc0-0`
// If the file corresponds to repodata: i.e `repodata.json` or `repodata.json.zst`,
// the tag is `latest`, and there is no need for splitting parts
if (util::ends_with(path, ".json") || util::ends_with(path, ".json.zst"))
{
return { path, "latest" };
}
std::pair<std::string, std::string> result;
auto parts = util::rsplit(path, "-", 2);
if (parts.size() < 2)
{
LOG_ERROR << "Could not split " << path << " into enough parts";
throw std::runtime_error("Could not split filename into enough parts");
}
result.first = parts[0];
std::string tag;
if (parts.size() > 2)
{
std::string last_part = parts[2].substr(0, parts[2].find_first_of("."));
tag = fmt::format("{}-{}", parts[1], last_part);
}
else
{
tag = parts[1];
}
util::replace_all(tag, "_", "-");
result.second = tag;
LOG_INFO << "Splitting " << path << " to name: " << result.first
<< " tag: " << result.second;
return result;
}
nl::json parse_json_nothrow(const std::string& value)
{
try
{
auto j = nl::json::parse(value);
return j;
}
catch (const nlohmann::detail::parse_error& e)
{
LOG_ERROR << fmt::format("Could not parse JSON\n{}", value);
LOG_ERROR << fmt::format("Error message: {}", e.what());
return nl::json::object();
}
}
}
OCIMirror::OCIMirror(
std::string url,
std::string repo_prefix,
std::string scope,
std::string username,
std::string password
)
: Mirror(OCIMirror::make_id(url))
, m_url(std::move(url))
, m_repo_prefix(std::move(repo_prefix))
, m_scope(std::move(scope))
, m_username(std::move(username))
, m_password(std::move(password))
, m_path_map()
{
}
MirrorID OCIMirror::make_id(std::string url)
{
return MirrorID(std::move(url));
}
auto
OCIMirror::get_request_generators_impl(const std::string& url_path, const std::string& spec_sha256) const
-> request_generator_list
{
// NB: This method can be executed by many threads in parallel. Therefore,
// data should not be captured in lambda used for building the request, as
// inserting a new ArtifactData object may relocate preexisting ones.
auto [split_path, split_tag] = utils::split_path_tag(url_path);
// TODO we are getting here a new token for every artifact/path
// => we should handle this differently to use the same token
// => we could assume all requests are necessarily finished in < ~30 min? (max of token
// validity) and store artifact data by subdir instead?
// but data also contains sha256 which is specific to the artifact
// (however, the token that we get seems to be the same even if asked multiple times...
// so maybe that's okay)
auto* data = get_artifact_data(split_path);
if (!data)
{
m_path_map[split_path].reset(new ArtifactData);
data = m_path_map[split_path].get();
}
request_generator_list req_gen;
if (data->token.empty())
{
req_gen.push_back([this, split_path](const Request& dl_request, const Content*)
{ return build_authentication_request(dl_request, split_path); });
}
if (data->sha256sum.empty())
{
// If we know the spec sha256 (retrieved from repodata.json), we don't ask for the
// manifest to get the spec
if (!spec_sha256.empty())
{
// Update data with the corresponding spec sha256
data->sha256sum = spec_sha256;
}
else
{
// This is the case of requesting repodata.json, we need to get the manifest first
req_gen.push_back(
[this, split_path, split_tag](const Request& dl_request, const Content*)
{ return build_manifest_request(dl_request, split_path, split_tag); }
);
}
}
// Request to get the actual artifact
req_gen.push_back([this, split_path](const Request& dl_request, const Content*)
{ return build_blob_request(dl_request, split_path); });
return req_gen;
}
MirrorRequest OCIMirror::build_authentication_request(
const Request& initial_request,
const std::string& split_path
) const
{
ArtifactData* data = get_artifact_data(split_path);
std::string auth_url = get_authentication_url(split_path);
MirrorRequest req(initial_request.name, auth_url);
req.username = m_username;
req.password = m_password;
req.on_success = [data](const Success& success) -> expected_t<void>
{
const Buffer& buf = std::get<Buffer>(success.content);
auto j = utils::parse_json_nothrow(buf.value);
if (j.contains("token"))
{
data->token = j["token"].get<std::string>();
return expected_t<void>();
}
else
{
return make_unexpected(
"Could not retrieve authentication token",
mamba_error_code::download_content
);
}
};
return req;
}
MirrorRequest OCIMirror::build_manifest_request(
const Request& initial_request,
const std::string& split_path,
const std::string& split_tag
) const
{
ArtifactData* data = get_artifact_data(split_path);
std::string manifest_url = get_manifest_url(split_path, split_tag);
std::vector<std::string> headers = { get_authentication_header(data->token),
"Accept: application/vnd.oci.image.manifest.v1+json" };
MirrorRequest req(initial_request.name, manifest_url, std::move(headers));
req.on_success = [data](const Success& success) -> expected_t<void>
{
const Buffer& buf = std::get<Buffer>(success.content);
auto j = utils::parse_json_nothrow(buf.value);
if (j.contains("layers"))
{
std::string digest;
for (auto& l : j["layers"])
{
// Getting repodata.json.zst, if present, is preferable
// Otherwise, we stick with the non compressed repodata.json
if (l["mediaType"] == "application/vnd.conda.repodata.v1+json+zst")
{
digest = l["digest"];
data->is_repodata_zst = true;
break;
}
else if (l["mediaType"] == "application/vnd.conda.repodata.v1+json")
{
digest = l["digest"];
}
}
assert(util::starts_with(digest, "sha256:"));
data->sha256sum = digest.substr(sizeof("sha256:") - 1);
return expected_t<void>();
}
else
{
return make_unexpected("Could not retrieve sha256", mamba_error_code::download_content);
}
};
return req;
}
MirrorRequest
OCIMirror::build_blob_request(const Request& initial_request, const std::string& split_path) const
{
const ArtifactData* data = get_artifact_data(split_path);
std::string url = get_blob_url(split_path, data->sha256sum);
std::vector<std::string> headers = { get_authentication_header(data->token) };
return MirrorRequest(initial_request, url, std::move(headers), data->is_repodata_zst);
}
// This is not used but could be if we use creds
// cf. OCIMirror constructor comment in header
bool OCIMirror::need_authentication() const
{
return !m_username.empty() && !m_password.empty();
}
std::string OCIMirror::get_repo(const std::string& repo) const
{
// OCI image names cannot start with `_`
// Get the package name and prepend with `zzz` to map to `channel_mirrors` implementation
auto parts = util::rsplit(repo, "/", 1);
assert(parts.size() == 2);
std::string mapped_package_name = parts.back();
std::string mapped_repo = repo;
if (util::starts_with(mapped_package_name, "_"))
{
mapped_package_name.insert(0, std::string("zzz"));
mapped_repo = fmt::format("{}/{}", parts[0], mapped_package_name);
}
if (!m_repo_prefix.empty())
{
return fmt::format("{}/{}", m_repo_prefix, mapped_repo);
}
else
{
return mapped_repo;
}
}
std::string OCIMirror::get_authentication_url(const std::string& repo) const
{
return fmt::format("{}/token?scope=repository:{}:{}", m_url, get_repo(repo), m_scope);
}
std::string OCIMirror::get_authentication_header(const std::string& token) const
{
if (token.empty())
{
LOG_ERROR << "Trying to pull artifacts with an empty token";
throw std::invalid_argument("Trying to pull artifacts with an empty token");
}
else
{
return fmt::format("Authorization: Bearer {}", token);
}
}
std::string OCIMirror::get_manifest_url(const std::string& repo, const std::string& reference) const
{
return fmt::format("{}/v2/{}/manifests/{}", m_url, get_repo(repo), reference);
}
std::string OCIMirror::get_blob_url(const std::string& repo, const std::string& sha256sum) const
{
// Should be this format:
// https://ghcr.io/v2/wolfv/artifact/blobs/sha256:c5be3ea75353851e1fcf3a298af3b6cfd2af3d7ff018ce52657b6dbd8f986aa4
return fmt::format("{}/v2/{}/blobs/sha256:{}", m_url, get_repo(repo), sha256sum);
}
auto OCIMirror::get_artifact_data(const std::string& split_path) const -> ArtifactData*
{
auto it = m_path_map.find(split_path);
if (it != m_path_map.end())
{
return it->second.get();
}
return nullptr;
}
/******************************
* make_mirror implementation *
******************************/
std::unique_ptr<Mirror> make_mirror(std::string url)
{
if (url.empty())
{
return std::make_unique<PassThroughMirror>();
}
else if (util::starts_with(url, "https://") || util::starts_with(url, "http://")
|| util::starts_with(url, "file://"))
{
return std::make_unique<HTTPMirror>(std::move(url));
}
else if (util::starts_with(url, "oci://"))
{
const auto parsed_url = util::URL::parse(url).value();
return std::make_unique<OCIMirror>(
util::concat("https://", parsed_url.host()), // we use "https" as scheme instead
// of "oci"
std::string(util::lstrip(parsed_url.path(), "/")),
"pull"
);
}
return nullptr;
}
}