add validation and unpacking for packages

This commit is contained in:
Wolf Vollprecht 2020-04-17 13:12:13 +02:00
parent 290cef0d20
commit c16056aa16
11 changed files with 472 additions and 62 deletions

View File

@ -3,6 +3,7 @@
#include <vector>
#include <string>
#include <memory>
#include <csignal>
// Context singleton class
class Context
@ -26,6 +27,8 @@ public:
bool dry_run = false;
bool always_yes = false;
bool sig_interrupt = false;
void set_verbosity(int lvl)
{
if (lvl == 0)
@ -66,5 +69,9 @@ private:
{
no_progress_bars = true;
}
std::signal(SIGINT, [](int signum) {
instance().sig_interrupt = true;
});
}
};

View File

@ -4,6 +4,7 @@
#include "thirdparty/indicators/progress_bar.hpp"
#include "output.hpp"
#include "validate.hpp"
extern "C" {
#include <stdio.h>
@ -84,7 +85,7 @@ namespace mamba
}
DownloadTarget(const std::string& name, const std::string& url, const std::string& filename)
: m_name(name)
: m_name(name), m_filename(filename)
{
m_file = std::ofstream(filename, std::ios::binary);
@ -221,8 +222,9 @@ namespace mamba
char* effective_url = nullptr;
curl_easy_getinfo(m_target, CURLINFO_RESPONSE_CODE, &http_status);
curl_easy_getinfo(m_target, CURLINFO_EFFECTIVE_URL, &effective_url);
curl_easy_getinfo(m_target, CURLINFO_SIZE_DOWNLOAD_T, &downloaded_size);
LOG(INFO) << "Transfer finalized, status: " << http_status << " @ " << effective_url;
LOG(INFO) << "Transfer finalized, status: " << http_status << " @ " << effective_url << " " << downloaded_size << " bytes";
final_url = effective_url;
if (m_finalize_callback)
@ -231,6 +233,7 @@ namespace mamba
}
else
{
validate();
if (m_has_progress_bar)
{
m_progress_bar.mark_as_completed("Downloaded " + m_name);
@ -239,6 +242,25 @@ namespace mamba
return true;
}
void validate()
{
if (m_expected_size)
{
curl_off_t dl_size;
curl_easy_getinfo(m_target, CURLINFO_SIZE_DOWNLOAD_T, &dl_size);
if (dl_size != m_expected_size)
{
throw std::runtime_error("Download of " + m_name + " does not have expected size!");
}
validate::sha256(m_filename, dl_size, m_sha256);
}
}
void set_sha256(const std::string& sha256)
{
m_sha256 = sha256;
}
~DownloadTarget()
{
curl_easy_cleanup(m_target);
@ -247,13 +269,19 @@ namespace mamba
int http_status;
std::string final_url;
curl_off_t downloaded_size;
std::string etag, mod, cache_control;
private:
std::function<int()> m_finalize_callback;
std::string m_name;
std::string m_name, m_filename;
// validation
std::size_t m_expected_size = 0;
std::string m_sha256;
std::chrono::steady_clock::time_point m_progress_throttle_time;
CURL* m_target;
@ -380,8 +408,13 @@ namespace mamba
{
repeats = 0;
}
} while (still_running);
} while (still_running && !Context::instance().sig_interrupt);
if (Context::instance().sig_interrupt)
{
std::cout << "Download interrupted" << std::endl;
curl_multi_cleanup(m_handle);
}
return true;
}

View File

@ -103,37 +103,26 @@ namespace cursor
namespace mamba
{
class NullBuffer : public std::streambuf
{
public:
int overflow(int c) { return c; }
};
class NullStream : public std::ostream
{
public:
NullStream()
: std::ostream(&m_sb)
{
}
private:
NullBuffer m_sb;
};
class Output
{
public:
static std::ostream& print()
class SpecialStream : public std::stringstream
{
if (Context::instance().quiet || Context::instance().json)
public:
SpecialStream()
: std::stringstream()
{
return Output::instance().null_stream;
}
else
~SpecialStream()
{
return std::cout;
Output::instance().print(str());
}
};
static SpecialStream print()
{
return SpecialStream();
}
static void print(const std::string_view& str)
@ -159,6 +148,41 @@ namespace mamba
}
}
static bool prompt(const std::string_view& message, char fallback='_')
{
if (Context::instance().always_yes) {
return true;
}
char in;
while (!Context::instance().sig_interrupt) {
std::cout << message << ": ";
if (fallback == 'n') {
std::cout << "[y/N] ";
}
else if (fallback == 'y') {
std::cout << "[Y/n] ";
}
else {
std::cout << "[y/n] ";
}
in = std::cin.get();
if (in == '\n')
{
// enter pressed
in = fallback;
}
if (in == 'y' || in == 'Y')
{
return true && !Context::instance().sig_interrupt;
}
if (in == 'n' || in == 'N')
{
return false;
}
}
return false;
}
struct ProgressProxy
{
indicators::ProgressBar* p_bar;
@ -310,8 +334,6 @@ namespace mamba
return out;
}
NullStream null_stream;
private:
Output() {
#ifdef _WIN32

View File

@ -0,0 +1,135 @@
#include <system_error>
#include "thirdparty/filesystem.hpp"
#include <archive.h>
#include <archive_entry.h>
#include "util.hpp"
namespace fs = ghc::filesystem;
namespace mamba
{
void extract_conda()
{
throw std::runtime_error("Not implemented yet!");
}
static int copy_data(archive *ar, archive *aw)
{
int r;
const void *buff;
size_t size;
la_int64_t offset;
while (true)
{
r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF)
{
return (ARCHIVE_OK);
}
if (r < ARCHIVE_OK)
{
throw std::runtime_error(archive_error_string(aw));
}
r = archive_write_data_block(aw, buff, size, offset);
if (r < ARCHIVE_OK)
{
throw std::runtime_error(archive_error_string(aw));
}
}
}
void extract_tarball(const fs::path& file, const fs::path& destination)
{
struct archive *a;
struct archive *ext;
struct archive_entry *entry;
int flags;
int r;
/* Select which attributes we want to restore. */
flags = ARCHIVE_EXTRACT_TIME;
flags |= ARCHIVE_EXTRACT_PERM;
flags |= ARCHIVE_EXTRACT_SECURE_NODOTDOT;
flags |= ARCHIVE_EXTRACT_SECURE_SYMLINKS;
// flags |= ARCHIVE_EXTRACT_SECURE_NOABSOLUTEPATHS;
flags |= ARCHIVE_EXTRACT_SPARSE;
flags |= ARCHIVE_EXTRACT_UNLINK;
a = archive_read_new();
archive_read_support_format_tar(a);
archive_read_support_filter_all(a);
ext = archive_write_disk_new();
archive_write_disk_set_options(ext, flags);
archive_write_disk_set_standard_lookup(ext);
if ((r = archive_read_open_filename(a, file.c_str(), 10240)))
{
throw std::runtime_error("Could not open archive for reading.");
}
for (;;)
{
r = archive_read_next_header(a, &entry);
if (r == ARCHIVE_EOF)
{
break;
}
if (r < ARCHIVE_OK)
{
throw std::runtime_error(archive_error_string(a));
}
// modify path to extract to directory
// Either we do this here or we temporarily have to `chdir` to the dest dir
const char* relative_file_path = archive_entry_pathname(entry);
fs::path full_output_path = destination / relative_file_path;
archive_entry_set_pathname(entry, full_output_path.c_str());
r = archive_write_header(ext, entry);
if (r < ARCHIVE_OK)
{
throw std::runtime_error(archive_error_string(a));
}
else
{
r = copy_data(a, ext);
if (r < ARCHIVE_OK)
{
throw std::runtime_error(archive_error_string(a));
}
}
r = archive_write_finish_entry(ext);
if (r < ARCHIVE_OK)
{
throw std::runtime_error(archive_error_string(a));
}
}
archive_read_close(a);
archive_read_free(a);
archive_write_close(ext);
archive_write_free(ext);
}
void extract(const fs::path& file)
{
std::string dest_dir = file;
if (ends_with(dest_dir, ".tar.bz2"))
{
dest_dir = dest_dir.substr(0, dest_dir.size() - 8);
extract_tarball(file, dest_dir);
}
else if (ends_with(dest_dir, ".conda"))
{
dest_dir = dest_dir.substr(0, dest_dir.size() - 6);
extract_tarball(file, dest_dir);
}
else
{
throw std::runtime_error("Unknown file format.");
}
}
}

View File

@ -37,6 +37,7 @@ PYBIND11_MODULE(mamba_api, m) {
.def("to_conda", &MTransaction::to_conda)
.def("print", &MTransaction::print)
.def("fetch_extract_packages", &MTransaction::fetch_extract_packages)
.def("prompt", &MTransaction::prompt)
;
py::class_<MSolver>(m, "Solver")

View File

@ -12,6 +12,8 @@ extern "C"
#include "solv/repo_conda.h"
}
#include "pool.hpp"
namespace mamba
{
class MRepo

View File

@ -129,7 +129,7 @@ namespace mamba
LOG(INFO) << "Using cache " << m_url << " age in seconds: " << cache_age << " / " << max_age;
std::string prefix = m_name;
prefix.resize(PREFIX_LENGTH - 1, ' ');
Output::print() << prefix << " Using cache" << std::endl;
Output::print() << prefix << " Using cache";
m_loaded = true;
m_json_cache_valid = true;

View File

@ -8,11 +8,104 @@
#include "repo.hpp"
#include "fetch.hpp"
#include "package_handling.hpp"
extern "C"
{
#include "solv/transaction.h"
}
#include "solver.hpp"
namespace fs = ghc::filesystem;
namespace mamba
{
class PackageDownloadExtractTarget
{
public:
PackageDownloadExtractTarget(MRepo* repo, Solvable* solvable)
: m_repo(repo), m_solv(solvable)
{
}
int finalize_callback()
{
Id __unused__;
m_progress_proxy.set_option(indicators::option::PostfixText{"Validating..."});
// Validation
auto expected_size = solvable_lookup_num(m_solv, SOLVABLE_DOWNLOADSIZE, 0);
std::string sha256_check = solvable_lookup_checksum(m_solv, SOLVABLE_CHECKSUM, &__unused__);
if (m_target->downloaded_size != expected_size)
{
throw std::runtime_error("File not valid: file size doesn't match expectation (" + std::string(m_tarball_path) + ")");
}
if (!validate::sha256(m_tarball_path, expected_size, sha256_check))
{
throw std::runtime_error("File not valid: SHA256 sum doesn't match expectation (" + std::string(m_tarball_path) + ")");
}
m_progress_proxy.set_option(indicators::option::PostfixText{"Decompressing..."});
extract(m_tarball_path);
m_progress_proxy.set_option(indicators::option::PostfixText{"Done"});
m_progress_proxy.mark_as_completed("Downloaded & extracted " + m_name);
return 0;
}
std::unique_ptr<DownloadTarget>& target(const fs::path& cache_path)
{
std::string filename = solvable_lookup_str(m_solv, SOLVABLE_MEDIAFILE);
m_tarball_path = cache_path / filename;
bool tarball_exists = fs::exists(m_tarball_path);
bool valid = false;
Id __unused__;
if (tarball_exists)
{
Output::print() << "Found tarball at " << m_tarball_path;
// validate that this tarball has the right size and MD5 sum
std::uintmax_t file_size = solvable_lookup_num(m_solv, SOLVABLE_DOWNLOADSIZE, 0);
valid = validate::file_size(m_tarball_path, file_size);
valid = valid && validate::md5(m_tarball_path, file_size, solvable_lookup_checksum(m_solv, SOLVABLE_PKGID, &__unused__));
LOG(INFO) << m_tarball_path << " is " << valid;
}
if (!tarball_exists || !valid)
{
Output::print("Adding DL target");
// need to download this file
std::string m_url = m_repo->url();
m_url += "/" + filename;
m_name = pool_id2str(m_solv->repo->pool, m_solv->name);
LOG(INFO) << "Adding " << m_name << " with " << m_url;
Output::print() << "Target: " << m_url;
m_progress_proxy = Output::instance().add_progress_bar(m_name);
m_target = std::make_unique<DownloadTarget>(m_name, m_url, cache_path / filename);
m_target->set_finalize_callback(&PackageDownloadExtractTarget::finalize_callback, this);
m_target->set_progress_bar(m_progress_proxy);
}
return m_target;
}
MRepo* m_repo;
Solvable* m_solv;
Output::ProgressProxy m_progress_proxy;
std::unique_ptr<DownloadTarget> m_target;
std::string m_url, m_name;
fs::path m_tarball_path;
};
inline void try_add(nlohmann::json& j, const char* key, const char* val)
{
if (!val)
@ -169,47 +262,68 @@ namespace mamba
auto fetch_extract_packages(const std::string& cache_dir, std::vector<MRepo*>& repos)
{
fs::path cache_path(cache_dir);
std::vector<std::unique_ptr<DownloadTarget>> targets;
std::vector<std::unique_ptr<PackageDownloadExtractTarget>> targets;
MultiDownloadTarget multi_dl;
Output::instance().init_multi_progress();
for (auto& s : m_to_install)
{
std::string filename = solvable_lookup_str(s, SOLVABLE_MEDIAFILE);
if (!fs::exists(cache_path / filename))
std::string url;
MRepo* mamba_repo = nullptr;
for (auto& r : repos)
{
// need to download this file
auto* pool = s->repo->pool;
std::string url;
for (auto& r : repos)
if (r->repo() == s->repo)
{
if (r->repo() == s->repo)
{
url = r->url();
}
mamba_repo = r;
break;
}
if (!url.size())
{
throw std::runtime_error("Repo not associated.");
}
url += "/" + filename;
std::string name = pool_id2str(pool, s->name);
LOG(INFO) << "Adding " << name << " with " << url;
auto progress_proxy = Output::instance().add_progress_bar(name);
auto target = std::make_unique<DownloadTarget>(name, url, cache_path / filename);
target->set_expected_size(solvable_lookup_num(s, SOLVABLE_DOWNLOADSIZE, 0));
target->set_progress_bar(progress_proxy);
multi_dl.add(target);
targets.push_back(std::move(target));
}
if (mamba_repo == nullptr)
{
throw std::runtime_error("Repo not associated.");
}
auto dl_target = std::make_unique<PackageDownloadExtractTarget>(mamba_repo, s);
multi_dl.add(dl_target->target(cache_path));
targets.push_back(std::move(dl_target));
}
multi_dl.download(true);
}
bool empty()
{
return m_to_install.size() == 0 && m_to_remove.size() == 0;
}
bool prompt(const std::string& cache_dir, std::vector<MRepo*>& repos)
{
if (Context::instance().quiet && Context::instance().always_yes)
{
return true;
}
// check size of transaction
Output::print("\n");
if (empty())
{
Output::print("# All requested packages already installed\n");
return true;
}
// we print, even if quiet
print();
if (Context::instance().dry_run)
{
return true;
}
bool res = Output::prompt("Confirm changes", 'y');
if (res)
{
fetch_extract_packages(cache_dir, repos);
}
return true;
}
void print()
{
transaction_print(m_transaction);

97
include/validate.hpp Normal file
View File

@ -0,0 +1,97 @@
#pragma once
#include <iostream>
#include "thirdparty/filesystem.hpp"
#include "openssl/sha.h"
#include "openssl/md5.h"
#include "output.hpp"
namespace fs = ghc::filesystem;
namespace validate
{
std::string sha256sum(const std::string& path, std::size_t filesize)
{
unsigned char hash[SHA256_DIGEST_LENGTH];
SHA256_CTX sha256;
SHA256_Init(&sha256);
std::ifstream infile(path, std::ios::binary);
// 1 MB buffer size
constexpr std::size_t BUFSIZE = 1024 * 1024;
char buffer[BUFSIZE];
while (infile)
{
infile.read(buffer, BUFSIZE);
size_t count = infile.gcount();
if (!count)
break;
SHA256_Update(&sha256, buffer, count);
}
SHA256_Final(hash, &sha256);
std::stringstream out;
out.fill('0');
out << std::hex;
for(int i = 0; i < SHA256_DIGEST_LENGTH; i++) {
out << std::setw(2) << (int) hash[i];
}
return out.str();
}
std::string md5sum(const std::string& path, std::size_t filesize)
{
unsigned char hash[MD5_DIGEST_LENGTH];
MD5_CTX md5;
MD5_Init(&md5);
std::ifstream infile(path, std::ios::binary);
// 1 MB buffer size
constexpr std::size_t BUFSIZE = 1024 * 1024;
char buffer[BUFSIZE];
while (infile)
{
infile.read(buffer, BUFSIZE);
size_t count = infile.gcount();
if (!count)
break;
MD5_Update(&md5, buffer, count);
}
MD5_Final(hash, &md5);
std::stringstream out;
out.fill('0');
out << std::hex;
for(int i = 0; i < MD5_DIGEST_LENGTH; i++) {
out << std::setw(2) << (int) hash[i];
}
return out.str();
}
bool sha256(const std::string& path, std::size_t filesize, const std::string& validation) {
return sha256sum(path, filesize) == validation;
}
bool md5(const std::string& path, std::size_t filesize, const std::string& validation) {
return md5sum(path, filesize) == validation;
}
bool file_size(const fs::path& path, std::uintmax_t validation)
{
return fs::file_size(path) == validation;
}
}

View File

@ -328,8 +328,6 @@ def install(args, parser, command='install'):
else:
raise EnvironmentLocationNotFound(prefix)
# context.__init__(argparse_args=args)
prepend = not args.override_channels
prefix = context.target_prefix
@ -481,8 +479,9 @@ def install(args, parser, command='install'):
transaction = api.Transaction(solver)
to_link, to_unlink = transaction.to_conda()
if not context.dry_run:
transaction.fetch_extract_packages(PackageCacheData.first_writable().pkgs_dir, repos)
mamba_cache_path = os.path.join(PackageCacheData.first_writable().pkgs_dir, "mamba")
mkdir_p(mamba_cache_path)
transaction.prompt(mamba_cache_path, repos)
conda_transaction = to_txn(specs, (), prefix, to_link, to_unlink, index)
handle_txn(conda_transaction, prefix, args, newenv)

View File

@ -66,7 +66,7 @@ ext_modules = [
"include/thirdparty/"
],
library_dirs=library_dir,
libraries=['archive', 'solv', 'solvext', CURL_LIB],
libraries=['archive', 'solv', 'solvext', CURL_LIB, 'crypto'],
language='c++'
),
]