diff --git a/libmamba/include/mamba/core/mamba_fs.hpp b/libmamba/include/mamba/core/mamba_fs.hpp index 876e7ae69..69d783098 100644 --- a/libmamba/include/mamba/core/mamba_fs.hpp +++ b/libmamba/include/mamba/core/mamba_fs.hpp @@ -408,6 +408,11 @@ namespace fs return m_path.extension(); } + u8path lexically_relative(const u8path& base) const + { + return m_path.lexically_relative(base); + } + //---- Modifiers ---- void clear() noexcept diff --git a/libmamba/include/mamba/core/package_handling.hpp b/libmamba/include/mamba/core/package_handling.hpp index a6631a7f9..f7c987ac0 100644 --- a/libmamba/include/mamba/core/package_handling.hpp +++ b/libmamba/include/mamba/core/package_handling.hpp @@ -25,9 +25,12 @@ namespace mamba void create_archive(const fs::u8path& directory, const fs::u8path& destination, compression_algorithm, - int compression_level); + int compression_level, + int compression_threads, + bool (*filter)(const fs::u8path&)); void create_package(const fs::u8path& directory, const fs::u8path& out_file, + int compression_threads, int compression_level); void extract_archive(const fs::u8path& file, const fs::u8path& destination); @@ -37,7 +40,10 @@ namespace mamba void extract(const fs::u8path& file, const fs::u8path& destination); fs::u8path extract(const fs::u8path& file); void extract_subproc(const fs::u8path& file, const fs::u8path& dest); - bool transmute(const fs::u8path& pkg_file, const fs::u8path& target, int compression_level); + bool transmute(const fs::u8path& pkg_file, + const fs::u8path& target, + int compression_level, + int compression_threads); bool validate(const fs::u8path& pkg_folder); } // namespace mamba diff --git a/libmamba/src/core/package_handling.cpp b/libmamba/src/core/package_handling.cpp index 1810efeb5..342777358 100644 --- a/libmamba/src/core/package_handling.cpp +++ b/libmamba/src/core/package_handling.cpp @@ -84,12 +84,38 @@ namespace mamba return r; } + bool path_has_prefix(const fs::u8path& path, const fs::u8path& prefix) + { + auto pair = std::mismatch(path.std_path().begin(), + path.std_path().end(), + prefix.std_path().begin(), + prefix.std_path().end()); + return pair.second == prefix.std_path().end(); + } + + int order(const fs::u8path& path) + { + int is_info = path_has_prefix(path, "info"); + return !is_info; + } + + int zip_order(const fs::u8path& path) + { + // sort info-...tar.zst file last in zip folder" + int init_order = starts_with(path.filename().string(), "info-"); + // sort metadata.json first in zip folder + if (path.filename().string() == "metadata.json") + init_order = -1; + return init_order; + } + // Bundle up all files in directory and create destination archive void create_archive(const fs::u8path& directory, const fs::u8path& destination, compression_algorithm ca, int compression_level, - bool (*filter)(const std::string&)) + int compression_threads, + bool (*filter)(const fs::u8path&)) { int r; struct archive* a; @@ -127,10 +153,27 @@ namespace mamba archive_write_add_filter_zstd(a); if (compression_level < 1 || compression_level > 22) - throw std::runtime_error("zip compression level should be between 1 and 22"); + throw std::runtime_error("zstd compression level should be between 1 and 22"); + std::string comp_level = std::string("zstd:compression-level=") + std::to_string(compression_level); - archive_write_set_options(a, comp_level.c_str()); + + int res = archive_write_set_options(a, comp_level.c_str()); + if (res != 0) + { + LOG_ERROR << "libarchive error (" << res << ") " << archive_error_string(a); + } + + if (compression_threads > 2) + { + std::string comp_threads_level + = std::string("zstd:threads=") + std::to_string(compression_threads); + res = archive_write_set_options(a, comp_threads_level.c_str()); + if (res != 0) + { + LOG_ERROR << "libarchive error (" << res << ") " << archive_error_string(a); + } + } } archive_write_open_filename(a, abs_out_path.string().c_str()); @@ -142,24 +185,42 @@ namespace mamba } fs::current_path(directory); - for (auto& dir_entry : fs::recursive_directory_iterator(".")) + std::vector> files; + if (ca != compression_algorithm::zip) { - // we only add empty directories to the archive - if (dir_entry.is_directory() && !fs::is_empty(dir_entry.path())) + for (auto& dir_entry : fs::recursive_directory_iterator(".")) { + auto clean_path = dir_entry.path().lexically_relative("./"); + files.push_back({ order(clean_path), clean_path }); + } + } + else + { + // for zip files, sort `info` last + for (auto& dir_entry : fs::directory_iterator(".")) + { + auto clean_path = dir_entry.path().lexically_relative("./"); + files.push_back({ zip_order(clean_path), clean_path }); + } + } + + std::sort(files.begin(), files.end()); + + for (auto& order_pair : files) + { + const fs::u8path& path = order_pair.second; + + // skip adding _empty_ directories (they are implicitly added by the files therein) + auto status = fs::symlink_status(path); + if (fs::is_directory(status) && !fs::is_empty(path) && !fs::is_symlink(status)) + { + LOG_INFO << "Skipping " << path << " as it is a non-empty directory."; continue; } - std::string p = dir_entry.path().string(); - // do this in a better way? - if (p[0] == '.') - { - p = p.substr(1); - } - if (p[0] == '/') - { - p = p.substr(1); - } + LOG_INFO << "Adding " << path << " to archive"; + + std::string p = path.string(); if (filter && filter(p)) { continue; @@ -187,6 +248,11 @@ namespace mamba { throw std::runtime_error(concat("libarchive error: ", archive_error_string(disk))); } + + // clean out UID and GID + archive_entry_set_uid(entry, 0); + archive_entry_set_gid(entry, 0); + if (archive_read_disk_descend(disk) < ARCHIVE_OK) { throw std::runtime_error(concat("libarchive error: ", archive_error_string(disk))); @@ -232,7 +298,8 @@ namespace mamba // note the info folder must have already been created! void create_package(const fs::u8path& directory, const fs::u8path& out_file, - int compression_level) + int compression_level, + int compression_threads) { fs::u8path out_file_abs = fs::absolute(out_file); if (ends_with(out_file.string(), ".tar.bz2")) @@ -241,7 +308,8 @@ namespace mamba out_file_abs, bzip2, compression_level, - [](const std::string&) { return false; }); + compression_threads, + [](const fs::u8path&) { return false; }); } else if (ends_with(out_file.string(), ".conda")) { @@ -250,12 +318,20 @@ namespace mamba tdir.path() / concat("info-", out_file.stem().string(), ".tar.zst"), zstd, compression_level, - [](const std::string& p) -> bool { return !starts_with(p, "info/"); }); + compression_threads, + [](const fs::u8path& p) -> bool { + return p.std_path().begin() != p.std_path().end() + && *p.std_path().begin() != "info"; + }); create_archive(directory, tdir.path() / concat("pkg-", out_file.stem().string(), ".tar.zst"), zstd, compression_level, - [](const std::string& p) -> bool { return starts_with(p, "info/"); }); + compression_threads, + [](const fs::u8path& p) -> bool { + return p.std_path().begin() != p.std_path().end() + && *p.std_path().begin() == "info"; + }); nlohmann::json pkg_metadata; pkg_metadata["conda_pkg_format_version"] = 2; @@ -264,8 +340,12 @@ namespace mamba metadata_file << pkg_metadata; metadata_file.close(); - create_archive( - tdir.path(), out_file_abs, zip, 0, [](const std::string&) { return false; }); + create_archive(tdir.path(), + out_file_abs, + zip, + 0, + compression_threads, + [](const fs::u8path&) { return false; }); } } @@ -470,7 +550,10 @@ namespace mamba } } - bool transmute(const fs::u8path& pkg_file, const fs::u8path& target, int compression_level) + bool transmute(const fs::u8path& pkg_file, + const fs::u8path& target, + int compression_level, + int compression_threads) { TemporaryDirectory extract_dir; @@ -487,7 +570,7 @@ namespace mamba throw std::runtime_error("Unknown package format (" + pkg_file.string() + ")"); } - create_package(extract_dir, target, compression_level); + create_package(extract_dir, target, compression_level, compression_threads); return true; } diff --git a/libmambapy/libmambapy/__init__.pyi b/libmambapy/libmambapy/__init__.pyi index 6273da3dd..7c2fb8db0 100644 --- a/libmambapy/libmambapy/__init__.pyi +++ b/libmambapy/libmambapy/__init__.pyi @@ -1463,7 +1463,12 @@ def get_virtual_packages() -> typing.List[PackageInfo]: def sign(data: str, secret_key: str) -> str: pass -def transmute(arg0: Path, arg1: Path, arg2: int) -> bool: +def transmute( + source_package: Path, + destination_package: Path, + compression_level: int, + compression_threads: int = 1, +) -> bool: pass MAMBA_CLEAN_ALL = 1 diff --git a/libmambapy/src/main.cpp b/libmambapy/src/main.cpp index 57b4caa06..1f13c9c7b 100644 --- a/libmambapy/src/main.cpp +++ b/libmambapy/src/main.cpp @@ -669,7 +669,19 @@ PYBIND11_MODULE(bindings, m) m.def("get_channels", &get_channels); - m.def("transmute", &transmute); + m.def("transmute", + &transmute, + py::arg("source_package"), + py::arg("destination_package"), + py::arg("compression_level"), + py::arg("compression_threads") = 1); + + // fix extract from error_handling first + // auto package_handling_sm = m.def_submodule("package_handling"); + // package_handling_sm.def("extract", &extract); + // package_handling_sm.def("create", &create_package, py::arg("directory"), + // py::arg("out_package"), py::arg("compression_level"), py::arg("compression_threads") = 1); + m.def("get_virtual_packages", &get_virtual_packages); diff --git a/mamba_package/src/package.cpp b/mamba_package/src/package.cpp index 2cc7505ed..5109f9623 100644 --- a/mamba_package/src/package.cpp +++ b/mamba_package/src/package.cpp @@ -16,6 +16,7 @@ set_package_command(CLI::App* com) { static std::string infile, dest; static int compression_level = -1; + static int compression_threads = 1; auto extract_subcom = com->add_subcommand("extract"); extract_subcom->add_option("archive", infile, "Archive to extract"); @@ -35,6 +36,10 @@ set_package_command(CLI::App* com) "-c,--compression-level", compression_level, "Compression level from 0-9 (tar.bz2, default is 9), and 1-22 (conda, default is 15)"); + compress_subcom->add_option( + "--compression-threads", + compression_threads, + "Compression threads (only relevant for .conda packages, default is 1)"); compress_subcom->callback( [&]() { @@ -45,7 +50,8 @@ set_package_command(CLI::App* com) if (ends_with(dest, ".conda") && compression_level == -1) compression_level = 15; - create_package(fs::absolute(infile), fs::absolute(dest), compression_level); + create_package( + fs::absolute(infile), fs::absolute(dest), compression_level, compression_threads); }); auto transmute_subcom = com->add_subcommand("transmute"); @@ -54,6 +60,10 @@ set_package_command(CLI::App* com) "-c,--compression-level", compression_level, "Compression level from 0-9 (tar.bz2, default is 9), and 1-22 (conda, default is 15)"); + transmute_subcom->add_option( + "--compression-threads", + compression_threads, + "Compression threads (only relevant for .conda packages, default is 1)"); transmute_subcom->callback( [&]() { @@ -70,6 +80,7 @@ set_package_command(CLI::App* com) dest = infile.substr(0, infile.size() - 8) + ".tar.bz2"; } std::cout << "Transmuting " << fs::absolute(infile) << " to " << dest << std::endl; - transmute(fs::absolute(infile), fs::absolute(dest), compression_level); + transmute( + fs::absolute(infile), fs::absolute(dest), compression_level, compression_threads); }); } diff --git a/micromamba/environment-dev.yml b/micromamba/environment-dev.yml index c16e25490..297823160 100644 --- a/micromamba/environment-dev.yml +++ b/micromamba/environment-dev.yml @@ -23,6 +23,7 @@ dependencies: - pytest-asyncio - pytest-lazy-fixture - pytest-xprocess + - conda-package-handling - pyyaml - spdlog - fmt diff --git a/micromamba/src/package.cpp b/micromamba/src/package.cpp index 932b3b13a..453257529 100644 --- a/micromamba/src/package.cpp +++ b/micromamba/src/package.cpp @@ -7,6 +7,7 @@ #include "common_options.hpp" #include "mamba/core/util.hpp" +#include "mamba/api/configuration.hpp" #include "mamba/core/package_handling.hpp" using namespace mamba; // NOLINT(build/namespaces) @@ -16,47 +17,72 @@ set_package_command(CLI::App* subcom) { static std::string infile, dest; static int compression_level = -1; + static int compression_threads = 1; + + init_general_options(subcom); auto extract_subcom = subcom->add_subcommand("extract"); + init_general_options(extract_subcom); extract_subcom->add_option("archive", infile, "Archive to extract"); extract_subcom->add_option("dest", dest, "Destination folder"); extract_subcom->callback( [&]() { - std::cout << "Extracting " << fs::absolute(infile) << " to " << fs::absolute(dest) - << std::endl; + // load verbose and other options to context + Configuration::instance().load(); + + Console::stream() << "Extracting " << fs::absolute(infile) << " to " + << fs::absolute(dest) << std::endl; extract(fs::absolute(infile), fs::absolute(dest)); }); auto compress_subcom = subcom->add_subcommand("compress"); + init_general_options(compress_subcom); compress_subcom->add_option("folder", infile, "Folder to compress"); compress_subcom->add_option("dest", dest, "Destination (e.g. myfile-3.1-0.tar.bz2 or .conda)"); compress_subcom->add_option( "-c,--compression-level", compression_level, "Compression level from 0-9 (tar.bz2, default is 9), and 1-22 (conda, default is 15)"); + compress_subcom->add_option( + "--compression-threads", + compression_threads, + "Compression threads (only relevant for .conda packages, default is 1)"); compress_subcom->callback( [&]() { - std::cout << "Compressing " << fs::absolute(infile) << " to " << dest << std::endl; + // load verbose and other options to context + Configuration::instance().load(); + + Console::stream() << "Compressing " << fs::absolute(infile) << " to " << dest + << std::endl; if (ends_with(dest, ".tar.bz2") && compression_level == -1) compression_level = 9; if (ends_with(dest, ".conda") && compression_level == -1) compression_level = 15; - create_package(fs::absolute(infile), fs::absolute(dest), compression_level); + create_package( + fs::absolute(infile), fs::absolute(dest), compression_level, compression_threads); }); auto transmute_subcom = subcom->add_subcommand("transmute"); + init_general_options(transmute_subcom); transmute_subcom->add_option("infile", infile, "Folder to compress"); transmute_subcom->add_option( "-c,--compression-level", compression_level, "Compression level from 0-9 (tar.bz2, default is 9), and 1-22 (conda, default is 15)"); + transmute_subcom->add_option( + "--compression-threads", + compression_threads, + "Compression threads (only relevant for .conda packages, default is 1)"); transmute_subcom->callback( [&]() { + // load verbose and other options to context + Configuration::instance().load(); + if (ends_with(infile, ".tar.bz2")) { if (compression_level == -1) @@ -69,7 +95,9 @@ set_package_command(CLI::App* subcom) compression_level = 9; dest = infile.substr(0, infile.size() - 8) + ".tar.bz2"; } - std::cout << "Transmuting " << fs::absolute(infile) << " to " << dest << std::endl; - transmute(fs::absolute(infile), fs::absolute(dest), compression_level); + Console::stream() << "Transmuting " << fs::absolute(infile) << " to " << dest + << std::endl; + transmute( + fs::absolute(infile), fs::absolute(dest), compression_level, compression_threads); }); } diff --git a/micromamba/tests/data/cph_test_data-0.0.1-0.tar.bz2 b/micromamba/tests/data/cph_test_data-0.0.1-0.tar.bz2 new file mode 100644 index 000000000..9e41d5416 Binary files /dev/null and b/micromamba/tests/data/cph_test_data-0.0.1-0.tar.bz2 differ diff --git a/micromamba/tests/test_package.py b/micromamba/tests/test_package.py new file mode 100644 index 000000000..ff816398f --- /dev/null +++ b/micromamba/tests/test_package.py @@ -0,0 +1,209 @@ +import filecmp +import platform +import shutil +import subprocess +import tarfile +import zipfile +from pathlib import Path + +import pytest +import zstandard +from conda_package_handling import api as cph + +from .helpers import * + + +@pytest.fixture +def cph_test_file(): + return Path(__file__).parent / "data" / "cph_test_data-0.0.1-0.tar.bz2" + + +def print_diff_files(dcmp): + for name in dcmp.diff_files: + print(f"diff_file {name} found in {dcmp.left} and {dcmp.right}") + for name in dcmp.left_only: + print(f"file only found in LHS {dcmp.left} / {name}") + for name in dcmp.right_only: + print(f"file only found in RHS {dcmp.right} / {name}") + + for sub_dcmp in dcmp.subdirs.values(): + print_diff_files(sub_dcmp) + + +def test_extract(cph_test_file: Path, tmp_path: Path): + (tmp_path / "cph").mkdir(parents=True) + (tmp_path / "mm").mkdir(parents=True) + + shutil.copy(cph_test_file, tmp_path / "mm") + shutil.copy(cph_test_file, tmp_path / "cph") + + mamba_exe = get_umamba() + subprocess.call( + [ + mamba_exe, + "package", + "extract", + str(tmp_path / "mm" / cph_test_file.name), + str(tmp_path / "mm" / "cph_test_data-0.0.1-0"), + ] + ) + cph.extract( + str(tmp_path / "cph" / cph_test_file.name), + dest_dir=str(tmp_path / "cph" / "cph_test_data-0.0.1-0"), + ) + + conda = set( + (p.relative_to(tmp_path / "cph") for p in (tmp_path / "cph").rglob("**/*")) + ) + mamba = set( + (p.relative_to(tmp_path / "mm") for p in (tmp_path / "mm").rglob("**/*")) + ) + assert conda == mamba + + extracted = cph_test_file.name.removesuffix(".tar.bz2") + fcmp = filecmp.dircmp(tmp_path / "cph" / extracted, tmp_path / "mm" / extracted) + assert ( + len(fcmp.left_only) == 0 + and len(fcmp.right_only) == 0 + and len(fcmp.diff_files) == 0 + ) + # fcmp.report_full_closure() + + +def compare_two_tarfiles(tar1, tar2): + tar1_files = set(tar1.getnames()) + tar2_files = set(tar2.getnames()) + assert tar1_files == tar2_files + + for f in tar1_files: + m1: tarfile.TarInfo = tar1.getmember(f) + m2 = tar2.getmember(f) + if platform.system() != "Windows": + if not m1.issym(): + assert m1.mode == m2.mode + else: + if platform.system() == "Linux": + assert m2.mode == 0o777 + else: + assert m1.mode == m2.mode + assert m1.mtime == m2.mtime + + assert m2.uid == 0 + assert m2.gid == 0 + assert m1.size == m2.size + + if m1.isfile(): + assert tar1.extractfile(f).read() == tar2.extractfile(f).read() + + if m1.islnk() or m1.issym(): + assert m1.linkname == m2.linkname + + +def assert_sorted(l): + assert l == sorted(l) + + +def test_extract_compress(cph_test_file: Path, tmp_path: Path): + (tmp_path / "mm").mkdir(parents=True) + + shutil.copy(cph_test_file, tmp_path / "mm") + + mamba_exe = get_umamba() + out = tmp_path / "mm" / "out" + subprocess.call( + [ + mamba_exe, + "package", + "extract", + str(tmp_path / "mm" / cph_test_file.name), + str(out), + ] + ) + subprocess.call( + [ + mamba_exe, + "package", + "compress", + str(out), + str(tmp_path / "mm" / "out.tar.bz2"), + ] + ) + + compare_two_tarfiles( + tarfile.open(cph_test_file), tarfile.open(tmp_path / "mm" / "out.tar.bz2") + ) + + fout = tarfile.open(tmp_path / "mm" / "out.tar.bz2") + names = fout.getnames() + assert "info/paths.json" in names + + info_files = [f for f in names if f.startswith("info/")] + # check that info files are at the beginning + assert names[: len(info_files)] == info_files + + # check that the rest is sorted + assert_sorted(names[len(info_files) :]) + assert_sorted(names[: len(info_files)]) + + +def test_transmute(cph_test_file: Path, tmp_path: Path): + (tmp_path / "cph").mkdir(parents=True) + (tmp_path / "mm").mkdir(parents=True) + + shutil.copy(cph_test_file, tmp_path) + shutil.copy(tmp_path / cph_test_file.name, tmp_path / "mm") + + mamba_exe = get_umamba() + subprocess.call( + [mamba_exe, "package", "transmute", str(tmp_path / "mm" / cph_test_file.name)] + ) + failed_files = cph.transmute( + str(tmp_path / cph_test_file.name), ".conda", out_folder=str(tmp_path / "cph") + ) + assert len(failed_files) == 0 + + as_conda = cph_test_file.name.removesuffix(".tar.bz2") + ".conda" + + cph.extract(str(tmp_path / "cph" / as_conda)) + cph.extract(str(tmp_path / "mm" / as_conda)) + + conda = list((tmp_path / "cph").rglob("**/*")) + mamba = list((tmp_path / "mm").rglob("**/*")) + + fcmp = filecmp.dircmp( + tmp_path / "cph" / "cph_test_data-0.0.1-0", + tmp_path / "mm" / "cph_test_data-0.0.1-0", + ) + assert ( + len(fcmp.left_only) == 0 + and len(fcmp.right_only) == 0 + and len(fcmp.diff_files) == 0 + ) + # fcmp.report_full_closure() + + # extract zipfile + with zipfile.ZipFile(tmp_path / "mm" / as_conda, "r") as zip_ref: + l = zip_ref.namelist() + + assert l[2].startswith("info-") + assert l[0] == "metadata.json" + assert l[1].startswith("pkg-") + + zip_ref.extractall(tmp_path / "mm" / "zipcontents") + + files = list((tmp_path / "mm" / "zipcontents").glob("**/*")) + for f in files: + if f.suffix == ".zst": + with open(f, mode="rb") as fi: + dcf = zstandard.ZstdDecompressor().stream_reader(fi) + + with tarfile.open(fileobj=dcf, mode="r|") as z: + assert_sorted(z.getnames()) + members = z.getmembers() + for m in members: + if f.name.startswith("info-"): + assert m.name.startswith("info/") + if not f.name.startswith("info-"): + assert not m.name.startswith("info/") + assert m.uid == 0 + assert m.gid == 0