[OCI - Mirrors] Add tests and doc (#3307)

* Add doc and micromamba python test * Add some tests for mirrors * Use another channel name to avoid interfering with conda-forge * Add test parameter to use libsolv parser * Remove dry-run from tests
2024-06-12 16:36:31 +02:00 · 2024-06-12 16:36:31 +02:00 · 7f8d5649c0
parent 4604d390e9
commit 7f8d5649c0
8 changed files with 287 additions and 62 deletions
--- a/docs/source/developer_zone/changes-2.0.rst
+++ b/docs/source/developer_zone/changes-2.0.rst
@ -136,5 +136,42 @@ The main changes are:

 - Improved downloaders.

-.. TODO OCI registry
-.. TODO Mirrors
+Mirrors and OCI registries
+--------------------------
+In the perspective of ensuring continuous and faster access when downloading packages, we now support mirroring channels.
+
+Furthermore, we support fetching packages from `OCI registries <https://github.com/opencontainers/distribution-spec/blob/v1.0/spec.md#definitions>`_
+in order to provide an alternative to hosting on https://conda.anaconda.org/conda-forge/.
+
+Specifying a mirror can be done in the rc file as follows:
+
+.. code::
+
+  $ cat ~/.mambarc
+
+  # Specify a mirror (can be a list of mirrors) for conda-forge channel
+  mirrored_channels:
+    conda-forge: ["oci://ghcr.io/channel-mirrors/conda-forge"]
+
+  # ``repodata_use_zst`` isn't considered when fetching from oci registries
+  # since compressed repodata is handled internally
+  # (if present, compressed repodata is necessarily fetched)
+  # Setting ``repodata_use_zst`` to ``false`` avoids useless requests with
+  # zst extension in repodata filename
+  repodata_use_zst: false
+
+Then, you can for instance create a new environment ``pandoc_from_oci`` where ``pandoc`` can be fetched from the specified mirror and installed:
+
+.. code::
+
+  $ micromamba create -n pandoc_from_oci pandoc -c conda-forge
+
+Listing packages in the created ``pandoc_from_oci`` environment:
+
+.. code::
+
+  $ micromamba list -n pandoc_from_oci
+
+  Name    Version  Build       Channel
+  ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  pandoc  3.2      ha770c72_0  https://pkg-containers.githubusercontent.com/ghcr1/blobs/pandoc-3.2-ha770c72_0.conda
--- a/docs/source/developer_zone/dev_environment.rst
+++ b/docs/source/developer_zone/dev_environment.rst
@ -123,7 +123,7 @@ Running commands manually
   The CI files in ``.github/workflow`` provide an alternative way of developing Mamba.

 Install development dependencies
-*******************************
+********************************

 .. code:: bash

--- a/libmamba/include/mamba/download/request.hpp
+++ b/libmamba/include/mamba/download/request.hpp
@ -119,7 +119,7 @@ namespace mamba::download

    // This class is used to create strong alias on
    // string_view. This helps to avoid error-prone
-    // calls to functionsthat accept many arguments
+    // calls to functions that accept many arguments
    // of the same type
    template <int I>
    class string_view_alias
--- a/libmamba/src/download/mirror_impl.cpp
+++ b/libmamba/src/download/mirror_impl.cpp
@ -71,7 +71,7 @@ namespace mamba::download
     * OCIMirror implementation *
     ****************************/

-    namespace
+    namespace utils
    {
        std::pair<std::string, std::string> split_path_tag(const std::string& path)
        {
@ -161,7 +161,7 @@ namespace mamba::download
        // NB: This method can be executed by many threads in parallel. Therefore,
        // data should not be captured in lambda used for building the request, as
        // inserting a new ArtifactData object may relocate preexisting ones.
-        auto [split_path, split_tag] = split_path_tag(url_path);
+        auto [split_path, split_tag] = utils::split_path_tag(url_path);

        // TODO we are getting here a new token for every artifact/path
        // => we should handle this differently to use the same token
@ -226,7 +226,7 @@ namespace mamba::download
        req.on_success = [data](const Success& success) -> expected_t<void>
        {
            const Buffer& buf = std::get<Buffer>(success.content);
-            auto j = parse_json_nothrow(buf.value);
+            auto j = utils::parse_json_nothrow(buf.value);
            if (j.contains("token"))
            {
                data->token = j["token"].get<std::string>();
@ -259,7 +259,7 @@ namespace mamba::download
        req.on_success = [data](const Success& success) -> expected_t<void>
        {
            const Buffer& buf = std::get<Buffer>(success.content);
-            auto j = parse_json_nothrow(buf.value);
+            auto j = utils::parse_json_nothrow(buf.value);
            if (j.contains("layers"))
            {
                std::string digest;
--- a/libmamba/tests/CMakeLists.txt
+++ b/libmamba/tests/CMakeLists.txt
@ -65,6 +65,7 @@ set(
    src/validation/test_update_framework_v1.cpp
    # Implementation of downloaders and mirrors
    src/download/test_downloader.cpp
+    src/download/test_mirror.cpp
    # Core tests
    ../longpath.manifest
    src/core/test_activation.cpp
@ -75,7 +76,6 @@ set(
    src/core/test_environments_manager.cpp
    src/core/test_history.cpp
    src/core/test_lockfile.cpp
-    src/core/test_mirror.cpp
    src/core/test_pinning.cpp
    src/core/test_output.cpp
    src/core/test_progress_bar.cpp
--- a/libmamba/tests/src/core/test_mirror.cpp
+++ b/libmamba/tests/src/core/test_mirror.cpp
@ -1,53 +0,0 @@
-// Copyright (c) 2023, QuantStack and Mamba Contributors
-//
-// Distributed under the terms of the BSD 3-Clause License.
-//
-// The full license is in the file LICENSE, distributed with this software.
-
-#include <doctest/doctest.h>
-
-#include "mamba/core/mirror.hpp"
-
-// TODO to be removed along with the previous header
-
-namespace mamba
-{
-    /*TEST_SUITE("mirror")
-    {
-        TEST_CASE("split_path_tag")
-        {
-            SUBCASE("tar_bz2_extension")
-            {
-                auto [split_path, split_tag] = split_path_tag("xtensor-0.23.10-h2acdbc0_0.tar.bz2");
-                CHECK_EQ(split_path, "xtensor");
-                CHECK_EQ(split_tag, "0.23.10-h2acdbc0-0");
-            }
-
-            SUBCASE("multiple_parts")
-            {
-                auto [split_path, split_tag] =
-    split_path_tag("x-tensor-10.23.10-h2acdbc0_0.tar.bz2"); CHECK_EQ(split_path, "x-tensor");
-                CHECK_EQ(split_tag, "10.23.10-h2acdbc0-0");
-            }
-
-            SUBCASE("more_multiple_parts")
-            {
-                auto [split_path, split_tag] =
-    split_path_tag("x-tens-or-10.23.10-h2acdbc0_0.tar.bz2"); CHECK_EQ(split_path, "x-tens-or");
-                CHECK_EQ(split_tag, "10.23.10-h2acdbc0-0");
-            }
-
-            SUBCASE("json_extension")
-            {
-                auto [split_path, split_tag] = split_path_tag("xtensor-0.23.10-h2acdbc0_0.json");
-                CHECK_EQ(split_path, "xtensor-0.23.10-h2acdbc0_0.json");
-                CHECK_EQ(split_tag, "latest");
-            }
-
-            SUBCASE("not_enough_parts")
-            {
-                CHECK_THROWS_AS(split_path_tag("xtensor.tar.bz2"), std::runtime_error);
-            }
-        }
-    }*/
-}
--- a/libmamba/tests/src/download/test_mirror.cpp
+++ b/libmamba/tests/src/download/test_mirror.cpp
@ -0,0 +1,199 @@
+// Copyright (c) 2023, QuantStack and Mamba Contributors
+//
+// Distributed under the terms of the BSD 3-Clause License.
+//
+// The full license is in the file LICENSE, distributed with this software.
+
+#include <typeinfo>
+
+#include <doctest/doctest.h>
+
+#include "../src/download/mirror_impl.hpp"
+
+namespace mamba::download
+{
+    namespace utils
+    {
+        std::pair<std::string, std::string> split_path_tag(const std::string& path);
+
+        TEST_SUITE("split_path_tag")
+        {
+            TEST_CASE("tar_bz2_extension")
+            {
+                auto [split_path, split_tag] = split_path_tag("xtensor-0.23.10-h2acdbc0_0.tar.bz2");
+                CHECK_EQ(split_path, "xtensor");
+                CHECK_EQ(split_tag, "0.23.10-h2acdbc0-0");
+            }
+
+            TEST_CASE("multiple_parts")
+            {
+                auto [split_path, split_tag] = split_path_tag("x-tensor-10.23.10-h2acdbc0_0.tar.bz2");
+                CHECK_EQ(split_path, "x-tensor");
+                CHECK_EQ(split_tag, "10.23.10-h2acdbc0-0");
+            }
+
+            TEST_CASE("more_multiple_parts")
+            {
+                auto [split_path, split_tag] = split_path_tag("x-tens-or-10.23.10-h2acdbc0_0.tar.bz2");
+                CHECK_EQ(split_path, "x-tens-or");
+                CHECK_EQ(split_tag, "10.23.10-h2acdbc0-0");
+            }
+
+            TEST_CASE("json_extension")
+            {
+                auto [split_path, split_tag] = split_path_tag("xtensor-0.23.10-h2acdbc0_0.json");
+                CHECK_EQ(split_path, "xtensor-0.23.10-h2acdbc0_0.json");
+                CHECK_EQ(split_tag, "latest");
+            }
+
+            TEST_CASE("not_enough_parts")
+            {
+                CHECK_THROWS_AS(split_path_tag("xtensor.tar.bz2"), std::runtime_error);
+            }
+        }
+    }
+
+    TEST_SUITE("mirrors")
+    {
+        TEST_CASE("PassThroughMirror")
+        {
+            std::unique_ptr<Mirror> mir = make_mirror("");
+            CHECK_EQ(typeid(*mir), typeid(PassThroughMirror));
+
+            Mirror::request_generator_list req_gen = mir->get_request_generators("", "");
+            CHECK_EQ(req_gen.size(), 1);
+
+            Request req_repodata("some_request_name", MirrorName("mirror_name"), "linux-64/repodata.json");
+            MirrorRequest mir_req = req_gen[0](req_repodata, nullptr);
+
+            CHECK_EQ(mir_req.name, "some_request_name");
+            CHECK_EQ(mir_req.url, "linux-64/repodata.json");
+        }
+
+        TEST_CASE("HTTPMirror")
+        {
+            SUBCASE("https")
+            {
+                std::unique_ptr<Mirror> mir = make_mirror("https://conda.anaconda.org/conda-forge");
+                CHECK_EQ(typeid(*mir), typeid(HTTPMirror));
+
+                Mirror::request_generator_list req_gen = mir->get_request_generators("", "");
+                CHECK_EQ(req_gen.size(), 1);
+
+                Request req_repodata(
+                    "repodata_request",
+                    MirrorName("mirror_name"),
+                    "linux-64/repodata.json"
+                );
+                MirrorRequest mir_req = req_gen[0](req_repodata, nullptr);
+
+                CHECK_EQ(mir_req.name, "repodata_request");
+                CHECK_EQ(mir_req.url, "https://conda.anaconda.org/conda-forge/linux-64/repodata.json");
+            }
+
+            SUBCASE("http")
+            {
+                std::unique_ptr<Mirror> mir = make_mirror("http://conda.anaconda.org/conda-forge");
+                CHECK_EQ(typeid(*mir), typeid(HTTPMirror));
+
+                Mirror::request_generator_list req_gen = mir->get_request_generators("", "");
+                CHECK_EQ(req_gen.size(), 1);
+
+                Request req_repodata(
+                    "repodata_request",
+                    MirrorName("mirror_name"),
+                    "linux-64/repodata.json"
+                );
+                MirrorRequest mir_req = req_gen[0](req_repodata, nullptr);
+
+                CHECK_EQ(mir_req.name, "repodata_request");
+                CHECK_EQ(mir_req.url, "http://conda.anaconda.org/conda-forge/linux-64/repodata.json");
+            }
+
+            SUBCASE("file")
+            {
+                std::unique_ptr<Mirror> mir = make_mirror("file://channel_path");
+                CHECK_EQ(typeid(*mir), typeid(HTTPMirror));
+
+                Mirror::request_generator_list req_gen = mir->get_request_generators("", "");
+                CHECK_EQ(req_gen.size(), 1);
+
+                Request req_repodata(
+                    "repodata_request",
+                    MirrorName("mirror_name"),
+                    "linux-64/repodata.json"
+                );
+                MirrorRequest mir_req = req_gen[0](req_repodata, nullptr);
+
+                CHECK_EQ(mir_req.name, "repodata_request");
+                CHECK_EQ(mir_req.url, "file://channel_path/linux-64/repodata.json");
+            }
+        }
+
+        TEST_CASE("OCIMirror")
+        {
+            SUBCASE("Request repodata.json")
+            {
+                std::unique_ptr<Mirror> mir = make_mirror("oci://ghcr.io/channel-mirrors/conda-forge");
+                CHECK_EQ(typeid(*mir), typeid(OCIMirror));
+
+                Mirror::request_generator_list req_gen = mir->get_request_generators(
+                    "linux-64/repodata.json",
+                    ""
+                );
+                CHECK_EQ(req_gen.size(), 3);
+
+                Request req_repodata(
+                    "repodata_request",
+                    MirrorName("mirror_name"),
+                    "linux-64/repodata.json"
+                );
+                MirrorRequest mir_req = req_gen[0](req_repodata, nullptr);
+
+                CHECK_EQ(mir_req.name, "repodata_request");
+                CHECK_EQ(
+                    mir_req.url,
+                    "https://ghcr.io/token?scope=repository:channel-mirrors/conda-forge/linux-64/repodata.json:pull"
+                );
+
+                // Empty token leads to throwing an exception
+                CHECK_THROWS_AS(req_gen[1](req_repodata, nullptr), std::invalid_argument);
+                CHECK_THROWS_AS(req_gen[2](req_repodata, nullptr), std::invalid_argument);
+            }
+
+            SUBCASE("Request spec with sha")
+            {
+                std::unique_ptr<Mirror> mir = make_mirror("oci://ghcr.io/channel-mirrors/conda-forge");
+                CHECK_EQ(typeid(*mir), typeid(OCIMirror));
+
+                Mirror::request_generator_list req_gen = mir->get_request_generators(
+                    "linux-64/pandoc-3.2-ha770c72_0.conda",
+                    "418348076c1a39170efb0bdc8a584ddd11e9ed0ff58ccd905488d3f165ca98ba"
+                );
+                CHECK_EQ(req_gen.size(), 2);
+
+                Request req_spec(
+                    "pandoc_request",
+                    MirrorName("mirror_name"),
+                    "linux-64/pandoc-3.2-ha770c72_0.conda"
+                );
+                MirrorRequest mir_req = req_gen[0](req_spec, nullptr);
+
+                CHECK_EQ(mir_req.name, "pandoc_request");
+                CHECK_EQ(
+                    mir_req.url,
+                    "https://ghcr.io/token?scope=repository:channel-mirrors/conda-forge/linux-64/pandoc:pull"
+                );
+
+                // Empty token leads to throwing an exception
+                CHECK_THROWS_AS(req_gen[1](req_spec, nullptr), std::invalid_argument);
+            }
+        }
+
+        TEST_CASE("nullptr")
+        {
+            std::unique_ptr<Mirror> mir = make_mirror("ghcr.io/channel-mirrors/conda-forge");
+            CHECK_EQ(mir, nullptr);
+        }
+    }
+}
--- a/micromamba/tests/test_create.py
+++ b/micromamba/tests/test_create.py
@ -1172,6 +1172,48 @@ def test_create_with_multi_channels_and_non_existing_subdir(tmp_home, tmp_root_p
        )


+oci_registry_config = {
+    "mirrored_channels": {"oci_channel": ["oci://ghcr.io/channel-mirrors/conda-forge"]},
+    # `repodata_use_zst` isn't considered when fetching from oci registries
+    # since compressed repodata is handled internally
+    # (if present, compressed repodata is necessarily fetched)
+    # Setting `repodata_use_zst` to `false` avoids useless requests with
+    # zst extension in repodata filename
+    "repodata_use_zst": "false",
+}
+
+
+@pytest.mark.parametrize("shared_pkgs_dirs", [True], indirect=True)
+@pytest.mark.parametrize("spec", ["pandoc", "pandoc=3.1.13"])
+@pytest.mark.parametrize("parser", ["mamba", "libsolv"])
+def test_create_with_oci_mirrored_channels(tmp_home, tmp_root_prefix, tmp_path, spec, parser):
+    env_name = "myenv"
+    env_prefix = tmp_root_prefix / "envs" / env_name
+
+    rc_file = tmp_path / "config.yaml"
+    rc_file.write_text(yaml.dump(oci_registry_config))
+
+    cmd = ["-n", env_name, spec, "--json", "-c", "oci_channel"]
+    if parser == "libsolv":
+        cmd += ["--no-exp-repodata-parsing"]
+
+    res = helpers.create(
+        *cmd,
+        f"--rc-file={rc_file}",
+        default_channel=False,
+        no_rc=False,
+    )
+
+    assert res["actions"]["PREFIX"] == str(env_prefix)
+    for pkg in res["actions"]["LINK"]:
+        assert pkg["url"].startswith(
+            "https://pkg-containers.githubusercontent.com/ghcr1/blobs/pandoc"
+        )
+        assert pkg["name"] == "pandoc"
+        if spec == "pandoc=3.1.13":
+            assert pkg["version"] == "3.1.13"
+
+
@pytest.mark.parametrize("shared_pkgs_dirs", [True], indirect=True)
 def test_create_with_unicode(tmp_home, tmp_root_prefix):
    env_name = "320 áγђß家固êôōçñ한"