initial commit

This commit is contained in:
Wolf Vollprecht 2019-03-11 20:46:42 +01:00
commit 2589052338
14 changed files with 42067 additions and 0 deletions

7
.gitignore vendored Normal file
View File

@ -0,0 +1,7 @@
__cache__/
.ipynb_checkpoints/
*.cppimporthash
.rendered*
installed.json
*.so

65
cpp_test.py Normal file
View File

@ -0,0 +1,65 @@
import cppimport
from multiprocessing.pool import Pool as MPool
import bz2
from urllib import request
import sys
import os
import time
from datetime import datetime, timedelta
channels = ['conda-forge', 'r', 'anaconda']
what_to_get = "opencv ==3.4.2"
url_template = 'https://conda.anaconda.org/{}/linux-64/repodata.json.bz2'
def ensure_dir(file_path):
directory = os.path.dirname(file_path)
if not os.path.exists(directory):
os.makedirs(directory)
def download(args):
channel, url, idx = args
print("Checking: ", args)
cache_file = os.path.join('./__cache__/', channel + '.json')
has_cache = os.path.isfile(cache_file)
if has_cache:
print(datetime.fromtimestamp(os.path.getmtime(cache_file)))
if datetime.now() - datetime.fromtimestamp(os.path.getmtime(cache_file)) > timedelta(hours=24):
print("Cache invalidated...")
has_cache = False
if has_cache == False:
req = request.urlopen(url)
print("Downloading ", cache_file)
with open(cache_file, 'w') as ftmp:
ftmp.write(bz2.decompress(req.read()).decode('utf-8'))
return True
def download_all(channels):
global repos
repos = {channel: {} for channel in channels}
channel_args = [(channel, url_template.format(channel), i) for i, channel in enumerate(channels)]
mpool = MPool(8)
result = mpool.map(download, channel_args)
return True
repos = download_all(channels)
api = cppimport.imp('interface')
if os.path.isfile('./installed.json'):
print("Using installed.json from this folder")
installed_json = './installed.json'
else:
print("Couldn't find installed.json. Generate with `conda list --json > installed.json` from the command line.")
installed_json = ""
channel_json = ['./__cache__/{}.json'.format(c) for c in channels]
result = api.solve(channel_json, installed_json,what_to_get)
print(result)

311
include/api.cpp Normal file
View File

@ -0,0 +1,311 @@
/*
<%
cfg['compiler_args'] = ['-std=c++17', '-march=native']
cfg['libraries'] = ['solv']
setup_pybind11(cfg)
%>
*/
#include "thirdparty/simdjson/simdjson.h"
#include "thirdparty/simdjson/simdjson.cpp"
extern "C"
{
#include "solv/pool.h"
#include "solv/repo.h"
#include "solv/queue.h"
#include "solv/solver.h"
#include "solv/solverdebug.h"
}
static Pool* global_pool;
#include <iostream>
#include "parsing.hpp"
struct package {
std::string name;
std::string version;
std::string build_string;
int build_number;
};
std::ostream& operator<<(std::ostream& os, package& pkg)
{
return os << pkg.name << " -> " << pkg.version << ", " << pkg.build_string;
}
struct repo_package {
std::string name;
std::string version;
std::string build_string;
int build_number;
std::vector<std::string> dependencies;
};
std::ostream& operator<<(std::ostream& os, repo_package& pkg)
{
return os << pkg.name << " -> " << pkg.version << ", " << pkg.build_string;
}
void parse_repo(ParsedJson::iterator &i, Repo* repo) {
package pkg;
if (!i.move_to_key("packages"))
{
throw std::runtime_error("Could not find packages key!");
}
std::string_view version, build_string, features, name;
int build_number = 0; // change to char* as well
i.down();
do {
Id s_id = repo_add_solvable(repo);
auto& s = global_pool->solvables[s_id];
i.next(); i.down();
do {
if (strcmp(i.get_string(), "name") == 0)
{
i.next();
pkg.name = i.get_string();
name = i.get_string();
Id name_id = pool_str2id(global_pool, i.get_string(), 1);
s.name = name_id;
}
else if (strcmp(i.get_string(), "build_number") == 0)
{
i.next();
build_number = i.get_integer();
}
else if (strcmp(i.get_string(), "build") == 0)
{
i.next();
build_string = i.get_string();
}
else if (strcmp(i.get_string(), "features") == 0)
{
i.next();
features = i.get_string();
}
else if (strcmp(i.get_string(), "version") == 0)
{
i.next();
version = i.get_string();
}
else if (strcmp(i.get_string(), "depends") == 0)
{
i.next();
if (i.down())
{
do {
Id rel = parse_to_relation(i.get_string(), global_pool);
solvable_add_deparray(
&s,
SOLVABLE_REQUIRES,
rel, -1);
} while (i.next());
i.up();
}
}
else {
i.next(); // skip value?
}
} while (i.next());
s.evr = pool_str2id(global_pool, normalize_version(version, build_number, build_string).c_str(), 1);
solvable_add_deparray(&s, SOLVABLE_PROVIDES,
pool_rel2id(global_pool, s.name, s.evr, REL_EQ, 1), -1);
if (features.size())
{
std::stringstream os;
os << name << "[" << features << "]";
std::string feature_name = os.str();
auto feat_id = pool_strn2id(global_pool, feature_name.c_str(), feature_name.size(), 1);
solvable_add_deparray(&s, SOLVABLE_PROVIDES,
pool_rel2id(global_pool, feat_id, s.evr, REL_EQ, 1), -1);
}
if (build_string.size())
{
std::stringstream os;
os << name << "[" << build_string << "]";
std::string feature_name = os.str();
auto feat_id = pool_strn2id(global_pool, feature_name.c_str(), feature_name.size(), 1);
solvable_add_deparray(&s, SOLVABLE_PROVIDES,
pool_rel2id(global_pool, feat_id, s.evr, REL_EQ, 1), -1);
}
i.up();
} while (i.next());
}
void installed_packages(Repo* repo, ParsedJson::iterator &i) {
package pkg;
switch (i.get_type()) {
case '{':
{
package pkg;
i.down();
do {
if (strcmp(i.get_string(), "name") == 0)
{
i.next();
pkg.name = i.get_string();
}
else if (strcmp(i.get_string(), "build_number") == 0)
{
i.next();
pkg.build_number = i.get_integer();
}
else if (strcmp(i.get_string(), "build_string") == 0)
{
i.next();
pkg.build_string = i.get_string();
}
else if (strcmp(i.get_string(), "version") == 0)
{
i.next();
pkg.version = i.get_string();
}
else {
i.next(); // skip value?
}
} while (i.next());
i.up();
Id s_id = repo_add_solvable(repo);
auto& s = global_pool->solvables[s_id];
s.name = pool_str2id(global_pool, pkg.name.c_str(), 1);
s.evr = pool_str2id(global_pool, normalize_version(pkg.version, pkg.build_number, pkg.build_string).c_str(), 1);
solvable_add_deparray(&s, SOLVABLE_PROVIDES,
pool_rel2id(global_pool, s.name, s.evr, REL_EQ, 1), -1);
break;
}
case '[':
if (i.down()) {
do {
if (i.is_object_or_array()) {
installed_packages(repo, i);
}
} while (i.next());
i.up();
}
break;
case 'l':
case 'd':
case 'n':
case 't':
case 'f':
default:
break;
}
}
std::string solve(std::vector<std::string> repos,
std::string installed,
std::string look_for)
{
Pool* pool = pool_create();
global_pool = pool;
if (installed.size())
{
Repo* repo = repo_create(pool, "installed");
pool_set_installed(pool, repo);
std::string_view p = get_corpus(installed);
ParsedJson pj = build_parsed_json(p);
ParsedJson::iterator pjh(pj);
installed_packages(repo, pjh);
}
for (auto& fn : repos)
{
std::string_view p = get_corpus(fn);
Repo* repo = repo_create(pool, fn.c_str());
ParsedJson pj = build_parsed_json(p);
if (!pj.isValid())
{
std::cout << "Found invalid json!";
}
else
{
std::cout << "Parsing " << fn << std::endl;
}
ParsedJson::iterator pjh(pj);
parse_repo(pjh, repo);
std::cout << "Packages in " << fn << ": " << repo->nsolvables << std::endl;
repo_internalize(repo);
}
pool_createwhatprovides(global_pool);
Solver* solvy = solver_create(global_pool);
solver_set_flag(solvy, SOLVER_FLAG_ALLOW_DOWNGRADE, 1);
std::cout << "ALLOW DOWNGRADE? : " << solver_get_flag(solvy, SOLVER_FLAG_ALLOW_DOWNGRADE);
std::cout << "\nCreating the solver." << std::endl;
Queue q;
queue_init(&q);
int rel = parse_to_relation(look_for, pool);
std::cout << "Dep 2 str: " << pool_dep2str(pool, rel);
queue_push2(&q, SOLVER_INSTALL | SOLVER_SOLVABLE_NAME, rel);
solver_solve(solvy, &q);
Transaction* transy = solver_create_transaction(solvy);
int cnt = solver_problem_count(solvy);
Queue problem_queue;
queue_init(&problem_queue);
std::cout << "Encountered " << cnt << " problems.\n\n";
for (int i = 1; i <= cnt; i++)
{
queue_push(&problem_queue, i);
std::cout << "PROBLEM: " << solver_problem2str(solvy, i);
}
transaction_print(transy);
Queue q2;
int cut;
queue_init(&q2);
cut = transaction_installedresult(transy, &q2);
queue_truncate(&q2, cut);
std::cout << "Solution: " << std::endl;
std::vector<std::string> to_install;
for (int i = 0; i < q2.count; ++i)
{
to_install.emplace_back(pool_id2str(global_pool, global_pool->solvables[q2.elements[i]].name));
to_install.back() += " ==";
std::string version = pool_id2str(global_pool, global_pool->solvables[q2.elements[i]].evr);
auto vsplit = pystring::split(version, ".");
vsplit.pop_back();
version = pystring::join(".", vsplit);
to_install.back() += version;
}
std::sort(to_install.begin(), to_install.end());
std::string result;
for (auto& line : to_install)
{
result += "- " + line + "\n";
}
return result;
}

32
include/parsing.cpp Normal file
View File

@ -0,0 +1,32 @@
/*
<%
cfg['compiler_args'] = ['-O0', '-std=c++17', '-march=native']
cfg['libraries'] = ['solv']
setup_pybind11(cfg)
%>
*/
#include "parsing.hpp"
static Pool* gb_pool;
PYBIND11_MODULE(parsing, m) {
m.def("init", []() {
Pool* pool = pool_create();
gb_pool = pool;
});
// m.def("normalize_version", (std::string (std::string_view, int)*) &normalize_version);
// m.def("normalize_version", (std::string (std::string_view, int, std::string_view)*) &normalize_version);
m.def("parse_to_relation", [](const std::string_view& dep) { parse_to_relation(dep, gb_pool); });
m.def("test_parse", [](const std::string t) {
auto id = pool_str2id(gb_pool, "python", 1);
auto rel_id = parse_version_relation(id, t, gb_pool);
std::cout << pool_dep2str(gb_pool, rel_id) << std::endl;
return parse_version_relation(id, t, gb_pool);
});
m.def("test_fparse", [](const std::string t) {
auto rel_id = parse_to_relation(t, gb_pool);
std::cout << pool_dep2str(gb_pool, rel_id) << std::endl;
});
}

294
include/parsing.hpp Normal file
View File

@ -0,0 +1,294 @@
#ifndef MAMBA_PARSING
#define MAMBA_PARSING
#include <array>
#include <string>
#include <stdexcept>
#include <string_view>
extern "C"
{
#include "solv/pool.h"
#include "solv/repo.h"
#include "solv/queue.h"
#include "solv/solver.h"
}
#include "thirdparty/pystring/pystring.hpp"
#include <iostream>
struct parsed_relation {
int relation = -1;
bool fuzzy = false;
std::string_view version;
};
// TODO better concat
std::string normalize_version(std::string_view version, int build_number)
{
static const std::string zero = "0";
auto split = pystring::split(version, ".");
while (split.size() < 3) split.push_back(zero);
std::string out = pystring::join(".", split);
out += ".";
out += std::to_string(build_number);
return out;
}
std::string normalize_version(std::string_view version, int build_number, std::string_view build)
{
static const std::string zero = "0";
auto split = pystring::split(version, ".");
while (split.size() < 3) split.push_back(zero);
std::string out = pystring::join(".", split);
out += ".";
out += std::to_string(build_number);
out += "-";
out += build;
return out;
}
parsed_relation get_relation(const std::string_view& vs)
{
// std::cout << "VS: " << vs << std::endl;
parsed_relation res;
std::size_t start = 0, end;
if (vs[0] == '<' || vs[0] == '>')
{
res.relation = (vs[0] == '<') ? REL_LT : REL_GT;
if (vs[1] == '=')
{
res.relation |= REL_EQ;
start = 2;
}
else
{
start = 1;
}
}
else if (vs[0] == '=' && vs[1] == '=')
{
res.relation = REL_EQ;
start = 2;
}
if (vs[vs.size() - 1] == '*')
{
res.fuzzy = true;
end = vs.size() - 1;
}
else
{
end = vs.size();
}
if (res.relation == -1)
{
res.relation = REL_EQ;
}
if (res.fuzzy && res.relation != REL_EQ)
{
if (res.relation == (REL_GT | REL_EQ))
{
res.relation = REL_EQ;
}
else
{
if (res.relation & REL_LT)
{
// just remove * from end, do nothing
res.fuzzy = false;
}
else
{
std::cout << vs << std::endl;
throw std::runtime_error("Cannot match fuzzy version with other than `==`");
}
// TODO fix this intelligently with build string comparison ... ?
}
}
res.version = std::string_view(&vs[start], end - start);
return res;
}
std::ostream& operator<<(std::ostream& os, const parsed_relation& rel)
{
if (rel.fuzzy) { os << "fzy "; }
switch (rel.relation){
case REL_GT: os << ">"; break;
case REL_GT | REL_EQ: os << ">="; break;
case REL_LT: os << "<"; break;
case REL_LT | REL_EQ: os << "<="; break;
case REL_EQ: os << "=="; break;
}
os << " " << rel.version;
return os;
}
Id get_between(Id name_id, const std::string_view& v_lo, const std::string_view& v_hi, Pool* pool)
{
auto evr1 = pool_strn2id(pool, &v_lo[0], v_lo.size(), 1);
auto evr2 = pool_strn2id(pool, &v_hi[0], v_hi.size(), 1);
auto xr1 = pool_rel2id(pool, name_id, evr1, REL_GT | REL_EQ, 1);
auto xr2 = pool_rel2id(pool, name_id, evr2, REL_LT, 1);
return pool_rel2id(pool, xr1, xr2, REL_WITH, 1);
}
Id get_fuzzy_relation(Id name_id, const std::string_view& vnumber, Pool* pool)
{
int idx_back, idx_front;
std::size_t lversion = 0;
if (vnumber.back() == '.')
{
// remove last dot!
idx_back = vnumber.size() - 1;
}
else
{
idx_back = vnumber.size();
}
for (idx_front = idx_back - 1; idx_front >= 0; --idx_front)
{
if (vnumber[idx_front] == '.')
{
auto* end_ptr = const_cast<char*>(&vnumber[idx_back]);
lversion = std::strtoul(&vnumber[idx_front + 1], &end_ptr, 10);
break;
}
}
// lower_version = vnumber[0] -> vnumber[idx_back];
std::string higher_version(vnumber.begin(), vnumber.begin() + idx_front + 1);
higher_version += std::to_string(lversion + 1);
return get_between(name_id, std::string_view(&vnumber[0], idx_back),
higher_version, pool);
}
Id parse_version_relation(Id name_id,
const std::string_view& version_rel,
Pool* pool)
{
if (version_rel.size() == 1)
{
if (version_rel[0] == '*')
{
return name_id;
}
else
{
// TODO make sure that char is alpha num
auto v_lo = normalize_version(version_rel, 0);
auto v_hi = normalize_version(version_rel, 9999);
return get_between(name_id, v_lo, v_hi, pool);
}
}
// now we know that the size of `c` == 2 or larger!
auto rel = get_relation(version_rel);
if (!rel.fuzzy)
{
if (rel.relation == REL_EQ)
{
if (4 == std::count_if(rel.version.begin(), rel.version.end(), [](char c) { return c == '.';}))
{
Id evr_id = pool_strn2id(pool, &rel.version[0], rel.version.size(), 1);
return pool_rel2id(pool, name_id, evr_id, rel.relation, 1);
}
auto v_lo = normalize_version(rel.version, 0);
auto v_hi = normalize_version(rel.version, 9999);
return get_between(name_id, v_lo, v_hi, pool);
}
else
{
assert(rel.relation & REL_GT || rel.relation & REL_LT);
Id evr_id = pool_strn2id(pool, &rel.version[0], rel.version.size(), 1);
return pool_rel2id(pool, name_id, evr_id, rel.relation, 1);
}
}
else
{
return get_fuzzy_relation(name_id, rel.version, pool);
}
}
Id parse_rel_string(Id name_id, const std::string_view& str, Pool* pool)
{
// TODO implement operator precedence
for (std::size_t i = 0; i < str.size(); ++i)
{
switch (str[i])
{
case '|':
{
auto lhs_rel = parse_version_relation(name_id, std::string_view(&str[0], i), pool);
return pool_rel2id(pool, lhs_rel,
parse_rel_string(name_id, std::string_view(&str[i + 1], str.size() - (i + 1)), pool),
REL_OR, 1);
}
case ',':
{
auto lhs_rel = parse_version_relation(name_id, std::string_view(&str[0], i), pool);
return pool_rel2id(pool, lhs_rel,
parse_rel_string(name_id, std::string_view(&str[i + 1], str.size() - (i + 1)), pool),
REL_WITH, 1);
}
default:
break;
}
}
return parse_version_relation(name_id, str, pool);
}
Id parse_to_relation(const std::string_view& dep, Pool* pool) {
auto pkg = pystring::split(dep, " ");
if (pkg.size() == 1)
{
return pool_strn2id(pool, &dep[0], dep.size(), 1);
}
else
{
const auto& name = pkg[0];
Id name_id;
if (pkg.size() == 2)
{
name_id = pool_strn2id(pool, &name[0], name.size(), 1);
}
else
{
assert(pkg.size() == 3);
// add feature to dependency
std::stringstream os;
os << name << "[" << pkg[2] << "]";
std::string tmp_string = os.str();
name_id = pool_strn2id(pool, tmp_string.c_str(), tmp_string.size(), 1);
}
// std::cout << "PARTS ";
// for (auto& el : pkg)
// {
// std::cout << el << " :: ";
// }
// std::cout << "\n";
auto id = parse_rel_string(name_id, pkg[1], pool);
return id;
}
}
#endif

1630
include/thirdparty/pystring/pystring.hpp vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,400 @@
namespace pystring {
// @mainpage pystring
//
// This is a set of functions matching the interface and behaviors of python string methods
// (as of python 2.3) using std::string.
//
// Overlapping functionality ( such as index and slice/substr ) of std::string is included
// to match python interfaces.
//
/*
* @defgroup functions pystring
* @{
*/
// @brief Return a copy of the string with only its first character capitalized.
//
template <class S>
S capitalize(const S& str);
///
// @brief Return centered in a string of length width. Padding is done using spaces.
//
string center(const string_view& str, std::ptrdiff_t width);
//
// @brief Return the number of occurrences of substring sub in string S[start:end]. Optional
// arguments start and end are interpreted as in slice notation.
//
template <class S>
std::ptrdiff_t count(const S& str, const S& substr, std::ptrdiff_t start = 0, std::ptrdiff_t end = std::numeric_limits<std::ptrdiff_t>::max());
// @brief Return True if the string ends with the specified suffix, otherwise return False. With
// optional start, test beginning at that position. With optional end, stop comparing at that position.
//
template <class S, class C>
bool endswith(const S& str, const C& suffix, std::ptrdiff_t start = 0, std::ptrdiff_t end = std::numeric_limits<std::ptrdiff_t>::max());
///
// @brief Return a copy of the string where all tab characters are expanded using spaces. If tabsize
// is not given, a tab size of 8 characters is assumed.
//
string expandtabs(const string_view& str, std::ptrdiff_t tabsize = 8);
///
// @brief Return the lowest index in the string where substring sub is found, such that sub is
// contained in the range [start, end). Optional arguments start and end are interpreted as
// in slice notation. Return -1 if sub is not found.
//
template <class S>
std::ptrdiff_t find(const string_view& str, const S& sub, std::ptrdiff_t start = 0, std::ptrdiff_t end = std::numeric_limits<std::ptrdiff_t>::max());
///
// @brief Synonym of find right now. Python version throws exceptions. This one currently doesn't
//
template <class S>
std::ptrdiff_t index(const string_view& str, const S& sub, std::ptrdiff_t start = 0, std::ptrdiff_t end = std::numeric_limits<std::ptrdiff_t>::max());
///
// @brief Return true if all characters in the string are alphanumeric and there is at least one
// character, false otherwise.
//
template <class S>
bool isalnum(const S& str);
///
// @brief Return true if all characters in the string are alphabetic and there is at least one
// character, false otherwise
//
template <class S>
bool isalpha(const S& str);
///
// @brief Return true if all characters in the string are digits and there is at least one
// character, false otherwise.
//
template <class S>
bool isdigit(const S& str);
///
// @brief Return true if all cased characters in the string are lowercase and there is at least one
// cased character, false otherwise.
//
template <class S>
bool islower(const S& str);
///
// @brief Return true if there are only whitespace characters in the string and there is at least
// one character, false otherwise.
//
template <class S>
bool isspace(const S& str);
///
// @brief Return true if the string is a titlecased string and there is at least one character,
// i.e. uppercase characters may only follow uncased characters and lowercase characters only
// cased ones. Return false otherwise.
//
template <class S>
bool istitle(const S& str);
///
// @brief Return true if all cased characters in the string are uppercase and there is at least one
// cased character, false otherwise.
//
template <class S>
bool isupper(const S& str);
///
// @brief Return a string which is the concatenation of the strings in the sequence seq.
// The separator between elements is the str argument
//
template <class S, class C>
string join(const S& str, const C& seq);
///
// @brief Return the string left justified in a string of length width. Padding is done using
// spaces. The original string is returned if width is less than str.size().
//
string ljust(const string_view& str, std::ptrdiff_t width);
///
// @brief Return a copy of the string converted to lowercase.
//
template <class S>
S lower(const S& str);
///
// @brief Return a copy of the string with leading characters removed. If chars is omitted or None,
// whitespace characters are removed. If given and not "", chars must be a string; the
// characters in the string will be stripped from the beginning of the string this method
// is called on (argument "str" ).
//
string_view lstrip(const string_view& str, const string_view& chars = "");
///
// @brief Return a copy of the string, concatenated N times, together.
// Corresponds to the __mul__ operator.
//
template <class S>
S mul(const S& str, std::ptrdiff_t n);
///
// @brief Split the string around first occurance of sep.
// Three strings will always placed into result. If sep is found, the strings will
// be the text before sep, sep itself, and the remaining text. If sep is
// not found, the original string will be returned with two empty strings.
//
template <class S>
std::vector<S> partition(const S& str, const S& sep);
///
// @brief Return a copy of the string with all occurrences of substring old replaced by new. If
// the optional argument count is given, only the first count occurrences are replaced.
//
template <class S, class T, class U>
string replace(const S& str, const T& oldstr, const U& newstr, std::ptrdiff_t count = -1);
///
// @brief Return the highest index in the string where substring sub is found, such that sub is
// contained within s[start,end]. Optional arguments start and end are interpreted as in
// slice notation. Return -1 on failure.
//
template <class S>
std::ptrdiff_t rfind(const string_view& str, const S& sub, std::ptrdiff_t start = 0, std::ptrdiff_t end = std::numeric_limits<int>::max());
///
// @brief Currently a synonym of rfind. The python version raises exceptions. This one currently
// does not
//
template <class S>
std::ptrdiff_t rindex(const string_view& str, const S& sub, std::ptrdiff_t start = 0, std::ptrdiff_t end = std::numeric_limits<int>::max());
///
// @brief Return the string right justified in a string of length width. Padding is done using
// spaces. The original string is returned if width is less than str.size().
//
string rjust(const string_view& str, std::ptrdiff_t width);
///
// @brief Split the string around last occurance of sep.
// Three strings will always placed into result. If sep is found, the strings will
// be the text before sep, sep itself, and the remaining text. If sep is
// not found, the original string will be returned with two empty strings.
//
std::array<string_view, 3> rpartition(const string_view& str, const string_view& sep);
///
// @brief Return a copy of the string with trailing characters removed. If chars is "", whitespace
// characters are removed. If not "", the characters in the string will be stripped from the
// end of the string this method is called on.
//
string_view rstrip(const string_view& str, const string_view& chars = "");
///
// @brief Fills the "result" list with the words in the string, using sep as the delimiter string.
// If maxsplit is > -1, at most maxsplit splits are done. If sep is "",
// any whitespace string is a separator.
//
std::vector<string_view> split(const string_view& str, const string_view& sep = "", std::ptrdiff_t maxsplit = -1);
///
// @brief Fills the "result" list with the words in the string, using sep as the delimiter string.
// Does a number of splits starting at the end of the string, the result still has the
// split strings in their original order.
// If maxsplit is > -1, at most maxsplit splits are done. If sep is "",
// any whitespace string is a separator.
//
std::vector<string_view> rsplit(const string_view& str, const string_view& sep = "", std::ptrdiff_t maxsplit = -1);
///
// @brief Return a list of the lines in the string, breaking at line boundaries. Line breaks
// are not included in the resulting list unless keepends is given and true.
//
std::vector<string_view> splitlines(const string_view& str, bool keepends = false);
///
// @brief Return True if string starts with the prefix, otherwise return False. With optional start,
// test string beginning at that position. With optional end, stop comparing string at that
// position
//
template <class S, class C>
bool startswith(const S& str, const C& prefix, std::ptrdiff_t start = 0, std::ptrdiff_t end = std::numeric_limits<int>::max());
///
// @brief Return a copy of the string with leading and trailing characters removed. If chars is "",
// whitespace characters are removed. If given not "", the characters in the string will be
// stripped from the both ends of the string this method is called on.
//
string_view strip(const string_view& str, const string_view& chars = "");
///
// @brief Return a copy of the string with uppercase characters converted to lowercase and vice versa.
//
template <class S>
S swapcase(const S& str);
///
// @brief Return a titlecased version of the string: words start with uppercase characters,
// all remaining cased characters are lowercase.
//
template <class S>
S title(const S& str);
///
// @brief Return a copy of the string where all characters occurring in the optional argument
// deletechars are removed, and the remaining characters have been mapped through the given
// translation table, which must be a string of length 256.
//
template <class S>
string translate(const S& str, const string_view& table, const string_view& deletechars = "");
///
// @brief Return a copy of the string converted to uppercase.
//
template <class S>
S upper(const S& str);
///
// @brief Return the numeric string left filled with zeros in a string of length width. The original
// string is returned if width is less than str.size().
//
template <class S>
S zfill(const S& str, std::ptrdiff_t width);
///
// @brief function matching python's slice functionality.
//
string_view slice(const string_view& str, std::ptrdiff_t start = 0, std::ptrdiff_t end = std::numeric_limits<std::ptrdiff_t>::max());
//
// @ }
//
namespace os {
namespace path {
// All of the function below have three versions.
// Example:
// join(...)
// join_nt(...)
// join_posix(...)
//
// The regular function dispatches to the other versions - based on the OS
// at compile time - to match the result you'd get from the python
// interepreter on the same operating system
//
// Should you want to 'lock off' to a particular version of the string
// manipulation across *all* operating systems, use the version with the
// _OS you are interested in. I.e., you can use posix style path joining,
// even on Windows, with join_posix.
//
// The naming, (nt, posix) matches the cpython source implementation.
///
// @defgroup functions pystring::os::path
// @{
///
// @brief Return the base name of pathname path. This is the second half of the pair returned
// by split(path). Note that the result of this function is different from the Unix basename
// program; where basename for '/foo/bar/' returns 'bar', the basename() function returns an
// empty string ('').
std::string basename(const std::string& path);
std::string basename_nt(const std::string& path);
std::string basename_posix(const std::string& path);
///
// @brief Return the directory name of pathname path. This is the first half of the pair
// returned by split(path).
std::string dirname(const std::string& path);
std::string dirname_nt(const std::string& path);
std::string dirname_posix(const std::string& path);
///
// @brief Return True if path is an absolute pathname. On Unix, that means it begins with a
// slash, on Windows that it begins with a (back)slash after chopping off a potential drive
// letter.
bool isabs(const std::string& path);
bool isabs_nt(const std::string& path);
bool isabs_posix(const std::string& s);
///
// @brief Return a normalized absolutized version of the pathname path.
//
// NOTE: This differs from the interface of the python equivalent in that it requires you
// to pass in the current working directory as an argument.
std::string abspath(const std::string& path, const std::string& cwd);
std::string abspath_nt(const std::string& path, const std::string& cwd);
std::string abspath_posix(const std::string& path, const std::string& cwd);
///
// @brief Join one or more path components intelligently. If any component is an absolute
// path, all previous components (on Windows, including the previous drive letter, if there
// was one) are thrown away, and joining continues. The return value is the concatenation of
// path1, and optionally path2, etc., with exactly one directory separator (os.sep) inserted
// between components, unless path2 is empty. Note that on Windows, since there is a current
// directory for each drive, os.path.join("c:", "foo") represents a path relative to the
// current directory on drive C: (c:foo), not c:\foo.
// This dispatches based on the compilation OS
std::string join(const std::string& path1, const std::string& path2);
std::string join_nt(const std::string& path1, const std::string& path2);
std::string join_posix(const std::string& path1, const std::string& path2);
std::string join(const std::vector<std::string>& paths);
std::string join_nt(const std::vector<std::string>& paths);
std::string join_posix(const std::vector<std::string>& paths);
///
// @brief Normalize a pathname. This collapses redundant separators and up-level references
// so that A//B, A/B/, A/./B and A/foo/../B all become A/B. It does not normalize the case
// (use normcase() for that). On Windows, it converts forward slashes to backward slashes.
// It should be understood that this may change the meaning of the path if it contains
// symbolic links!
std::string normpath(const std::string& path);
std::string normpath_nt(const std::string& path);
std::string normpath_posix(const std::string& path);
///
// @brief Split the pathname path into a pair, (head, tail) where tail is the last pathname
// component and head is everything leading up to that. The tail part will never contain a
// slash; if path ends in a slash, tail will be empty. If there is no slash in path, head
// will be empty. If path is empty, both head and tail are empty. Trailing slashes are
// stripped from head unless it is the root (one or more slashes only). In all cases,
// join(head, tail) returns a path to the same location as path (but the strings may
// differ).
void split(std::string& head, std::string& tail, const std::string& path);
void split_nt(std::string& head, std::string& tail, const std::string& path);
void split_posix(std::string& head, std::string& tail, const std::string& path);
///
// @brief Split the pathname path into a pair (drive, tail) where drive is either a drive
// specification or the empty string. On systems which do not use drive specifications,
// drive will always be the empty string. In all cases, drive + tail will be the same as
// path.
void splitdrive(std::string& drivespec, std::string& pathspec, const std::string& path);
void splitdrive_nt(std::string& drivespec, std::string& pathspec, const std::string& p);
void splitdrive_posix(std::string& drivespec, std::string& pathspec, const std::string& path);
///
// @brief Split the pathname path into a pair (root, ext) such that root + ext == path, and
// ext is empty or begins with a period and contains at most one period. Leading periods on
// the basename are ignored; splitext('.cshrc') returns ('.cshrc', '').
void splitext(std::string& root, std::string& ext, const std::string& path);
void splitext_nt(std::string& root, std::string& ext, const std::string& path);
void splitext_posix(std::string& root, std::string& ext, const std::string& path);
}
} // namespace path
} // namespace os

View File

@ -0,0 +1,148 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2008-2010, Sony Pictures Imageworks Inc
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// Neither the name of the organization Sony Pictures Imageworks nor the
// names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER
// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
///////////////////////////////////////////////////////////////////////////////
#ifndef PYSTRING_UTILS
#define PYSTRING_UTILS
#ifndef PYSTRING_DEFAULT_STRING_VIEW
// #include "string_view_lite.hpp"
namespace pystring
{
using string_view = std::string_view;
using string = std::string;
}
#endif
namespace pystring
{
namespace detail
{
template <class S>
auto size(const S& s)
{
return s.size();
}
template <class T, std::size_t N>
auto size(const T (&str)[N])
{
return std::strlen(str);
}
struct char_substr
{
const char* pos;
const char* end;
operator std::string()
{
return {pos, end};
}
};
auto size(const char_substr& str)
{
return std::distance(str.pos, str.end);
}
template <class T>
bool operator==(const char_substr& lhs, const T& rhs)
{
return std::equal(lhs.pos, lhs.end, std::begin(rhs)) && size(lhs) == size(rhs);
}
template <class S>
auto substr(S& str, std::size_t pos, std::size_t len)
{
return str.substr(pos, len);
}
template <class T, std::size_t N>
auto substr(T (&str)[N], std::size_t pos, std::size_t len)
{
return char_substr{str + pos, str + pos + len};
}
using std::end;
using std::begin;
template <class C, std::size_t N>
auto end(const C (&c)[N])
{
return &c[0] + std::strlen(c);
}
struct fast_end_t {};
template <class C>
auto fast_end(C& c)
{
return c.end();
}
template <class T, std::size_t N>
auto fast_end(T (&)[N])
{
return fast_end_t();
}
template <class It>
bool operator==(It it, fast_end_t)
{
return (*it == '\0');
}
template <class It>
bool operator!=(It it, fast_end_t)
{
return !(it == fast_end_t());
}
const char* c_str(const std::string& str)
{
return str.c_str();
}
template <std::size_t N>
const char* c_str(const char(&str)[N])
{
return str;
}
} // ns detail
}
#endif

1
include/thirdparty/simdjson/README.md vendored Normal file
View File

@ -0,0 +1 @@
c++ -march=native -O3 -std=c++17 -o amalgamation_demo amalgamation_demo.cpp && ./amalgamation_demo ../jsonexamples/twitter.json

View File

@ -0,0 +1,17 @@
/* auto-generated on Tue 26 Feb 2019 10:14:31 EST. Do not edit! */
#include <iostream>
#include "simdjson.h"
#include "simdjson.cpp"
int main(int argc, char *argv[]) {
const char * filename = argv[1];
std::string_view p = get_corpus(filename);
ParsedJson pj = build_parsed_json(p); // do the parsing
if( ! pj.isValid() ) {
std::cout << "not valid" << std::endl;
} else {
std::cout << "valid" << std::endl;
}
return EXIT_SUCCESS;
}

1936
include/thirdparty/simdjson/simdjson.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

36843
include/thirdparty/simdjson/simdjson.h vendored Normal file

File diff suppressed because it is too large Load Diff

18
interface.cpp Normal file
View File

@ -0,0 +1,18 @@
/*
<%
cfg['compiler_args'] = ['-std=c++17', '-march=native']
cfg['libraries'] = ['solv']
setup_pybind11(cfg)
%>
*/
#include "include/api.cpp"
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
namespace py = pybind11;
PYBIND11_MODULE(interface, m) {
m.def("solve", &solve);
}

365
test.py Normal file
View File

@ -0,0 +1,365 @@
import sys
CONDA_PATH = '/home/wolfv/Programs/conda/'
sys.path.insert(0, CONDA_PATH)
# orig, sys.path = sys.path, [CONDA_PATH]
# import conda
# from tests import helpers
# sys.path = orig + [CONDA_PATH]
# i, r = helpers.get_index_r_1()
# print(i, r)
from conda.models import match_spec
from conda.models import version as cndversion
M = match_spec.MatchSpec('test >=2.5*, <3.5')
from multiprocessing.pool import Pool as MPool
import bz2
import sys
import solv
import json
from packaging import version
from urllib import request
import pandas as pd
import numpy as np
import re
# from clint.textui import progress
pool = solv.Pool()
# what is installed in the system
installed = pool.add_repo('installed')
pool.installed = installed
def normalize_version(version, build_number, build):
vsplit = version.split('.')
while len(vsplit) < 3:
vsplit.append('0')
return '.'.join(vsplit + [str(build_number)]) + '-' + build
def add_installed(f, installed_pool):
parsed = json.load(open(f, 'r'))
for pkg in parsed:
v = normalize_version(pkg['version'], pkg['build_number'], pkg['build_string'])
# print(pkg['name'], v)
s = installed_pool.add_solvable()
s.name = pkg['name']
s.evr = v
# s.arch = sel_pkg.get('target-triplet')
add_installed('installed.json', installed)
def parse_to_rel(d):
pkg = d.split(' ')
if len(pkg) > 1:
# print(d)
name = pkg[0]
constraint = pkg[1]
if len(pkg) > 2:
feature = pkg[2]
name = "{}[{}]".format(name, feature)
# print(constraint)
# Implement OR operator :)
constraints = constraint.split(',')
pcs = []
for c in constraints:
if len(c) and c[0] == '<':
rel = solv.REL_LT
if c[1] == '=':
rel |= solv.REL_EQ
target_version = c[2:]
else:
target_version = c[1:]
elif len(c) and c[0] == '>':
rel = solv.REL_GT
if c[1] == '=':
target_version = c[2:]
rel |= solv.REL_EQ
else:
target_version = c[1:]
else:
if '*' in c:
rel1 = solv.REL_GT | solv.REL_EQ
rel2 = solv.REL_LT
fill_value = 0
v1, v2 = [], []
for el in c.split('.'):
matches = re.match(r'(\d+)?(\D+)?', el).groups()
# print(el, v1, v2)
if matches[0] is None:
if matches[1] == '*' and len(v2) > 0:
# increment previous
v2[-1] += 1
# if len(v2) < 3:
# v1.append(fill_value)
# v2.append(fill_value)
else:
# only * as version number
return [pool.str2id(name)]
elif matches[0] is not None and matches[1] == '*':
v1.append(int(matches[0]))
v2.append(int(matches[0]) + 1)
elif matches[1] is None:
v1.append(int(matches[0]))
v2.append(int(matches[0]))
# print(c, '.'.join([str(x) for x in v1]), '.'.join([str(x) for x in v2]))
r1 = pool.rel2id(pool.str2id(name), pool.str2id('.'.join([str(x) for x in v1])), rel1)
r2 = pool.rel2id(pool.str2id(name), pool.str2id('.'.join([str(x) for x in v2])), rel2)
return [pool.rel2id(r1, r2, solv.REL_WITH)]
else:
vsplit = c.split('.')
if len(vsplit) == 4:
rel = solv.REL_EQ
return [pool.rel2id(pool.str2id(name), pool.str2id(c), rel)]
else:
# fix for build numbers as 4th güggel
while len(vsplit) < 4:
vsplit.append('0')
if len(vsplit) > 4:
print("WHAAAAAAAAAAAT ", vsplit)
rel1 = solv.REL_GT | solv.REL_EQ
rel2 = solv.REL_LT
r1 = pool.rel2id(pool.str2id(name), pool.str2id('.'.join([x for x in vsplit])), rel1)
vsplit[-1] = '9999'
r2 = pool.rel2id(pool.str2id(name), pool.str2id('.'.join([x for x in vsplit])), rel2)
return [pool.rel2id(r1, r2, solv.REL_WITH)]
start = 0
if len(c) and c[0] == '=':
start = 1
if c[1] == '=':
start = 2
target_version = c[start:]
pcs.append(pool.rel2id(pool.str2id(name), pool.str2id(target_version), rel))
if len(pcs) == 2:
pcs = [pool.rel2id(pcs[0], pcs[1], solv.REL_WITH)]
return pcs
else:
return [pool.str2id(d)]
def parse_json(json_str, channel):
repo_json = json.loads(json_str)
label, channel = channel
print("Parsing JSON ", label)
packages = repo_json.get('packages')
if packages is None:
packages = repo_json
df = pd.DataFrame(packages).T
try:
all_features = df['features'].dropna().unique()
except:
all_features = []
print("all features: ", all_features)
print(df)
channel['main'] = pool.add_repo(label)
for f in all_features:
channel[f] = pool.add_repo(label + '[' + f + ']')
for name, package_df in df.groupby(['name']):
for vers, package_v in package_df.groupby(['version', 'build']):
sel_pkg = package_v.loc[package_v['build_number'].astype(int).idxmax()]
additional_name = None
if len(all_features) and type(sel_pkg['features']) is str:
s = channel[sel_pkg['features']].add_solvable()
additional_name = "{}[{}]".format(name, sel_pkg['features'])
else:
s = channel['main'].add_solvable()
if type(sel_pkg['build']) is str:
additional_name = "{}[{}]".format(name, sel_pkg['build'])
# print("Adding ", sel_pkg['name'], sel_pkg['version'], sel_pkg['build_number'])
# print(sel_pkg['name'])
s.name = sel_pkg['name']
s.evr = normalize_version(sel_pkg['version'], sel_pkg['build_number'], sel_pkg['build'])
s.arch = sel_pkg.get('target-triplet')
if s.name == 'unixodbc':
print(s.name, s.evr)
s.add_deparray(solv.SOLVABLE_PROVIDES, pool.rel2id(pool.str2id(s.name), pool.str2id(s.evr), solv.REL_EQ))
for d in sel_pkg['depends']:
pcs = parse_to_rel(d)
for p in pcs:
s.add_deparray(solv.SOLVABLE_REQUIRES, p)
if additional_name:
# print("additional name: ", additional_name)
s.add_deparray(solv.SOLVABLE_PROVIDES, pool.rel2id(pool.str2id(additional_name), pool.str2id(s.evr), solv.REL_EQ))
print("Done")
# for key, package in repo_json['packages'].items():
# s = channel.add_solvable()
# s.name = package['name']
# s.evr = package['version']
# s.arch = package.get('target-triplet')
# s.add_deparray(solv.SOLVABLE_PROVIDES, pool.rel2id(pool.str2id(s.name), pool.str2id(s.evr), solv.REL_EQ))
# for d in package['depends']:
# parse_to_rel(d)
# s.add_deparray(solv.SOLVABLE_REQUIRES, pool.str2id(d.split(' ')[0]))
progress = []
progress_bars = []
url_template = 'https://conda.anaconda.org/{}/linux-64/repodata.json.bz2'
def ensure_dir(file_path):
directory = os.path.dirname(file_path)
if not os.path.exists(directory):
os.makedirs(directory)
import os
import time
from datetime import datetime, timedelta
repos = {}
def download(args):
channel, url, idx = args
print("Downloading: ", args)
cache_file = os.path.join('./__cache__/', channel + '.json')
has_cache = os.path.isfile(cache_file)
if has_cache:
print(datetime.fromtimestamp(os.path.getmtime(cache_file)))
if datetime.now() - datetime.fromtimestamp(os.path.getmtime(cache_file)) > timedelta(hours=24):
print("Cache invalidated...")
has_cache = False
if has_cache == False:
req = request.urlopen(url)
print("Downloading ", cache_file)
with open(cache_file, 'w') as ftmp:
ftmp.write(bz2.decompress(req.read()).decode('utf-8'))
return True
def download_all(channels):
global repos
repos = {channel: {} for channel in channels}
if (channels[0] == 'xtest'):
c = channels[0]
with open(c + '.json', 'r') as f:
parse_json(f.read(), (c, repos[c]))
return repos
channel_args = [(channel, url_template.format(channel), i) for i, channel in enumerate(channels)]
mpool = MPool(8)
result = mpool.map(download, channel_args)
for c in repos:
# if os.path.isfile('__cache__/' + c + '.solv'):
# repos[c].add_solv('__cache__/' + c + '.solv')
# with open('__cache__/' + c + '.json', 'r') as f:
# repos[c].read(f)
# else:
# # with open(c + '.json', 'r') as f:
# # parse_json(f.read(), (c, repos[c]))
with open('__cache__/' + c + '.json', 'r') as f:
parse_json(f.read(), (c, repos[c]))
# print(result)
return repos
channels = ['conda-forge', 'r', 'anaconda']
# channels = ['r']
# channels = ['xtest']
# channels = ['xtest']
repos = download_all(channels)
def internalize_repos(repos):
for key, repo in repos.items():
# print(key, repo)
for subkey, subrepo in repo.items():
subrepo.internalize()
# with open('__cache__/' + key + '.solv', 'wb') as f:
# xf = solv.xfopen_fd(None, f.fileno())
# repo.write(xf)
def get_solver(specs, jobs, features):
for x in repos.values():
for r in x.values():
print("PRIO: {}, SUB: {}".format(r.priority, r.subpriority))
for r in repos.values():
for k in r.keys():
if k in features:
print("Setting priority to 100", r[k])
r[k].priority = 100
else:
pass
# if k != 'main':
# r[k].free()
internalize_repos(repos)
pool.createwhatprovides()
solver = pool.Solver()
for s in specs:
jobs.append(pool.Job(solv.Job.SOLVER_INSTALL | solv.Job.SOLVER_SOLVABLE_NAME, parse_to_rel(s)[0]))
return solver
# instl = ['iopro 1.4*', 'python 2.7*', 'numpy 1.7*']
# instl = ['xtensor']
instl = ['hdf5 >=1.10']
jobs = []
# jobs.append(pool.Job(solv.Job.SOLVER_INSTALL | solv.Job.SOLVER_SOLVABLE_NAME, pool.str2id("r-rcpp")))
# jobs.append(pool.Job(solv.Job.SOLVER_INSTALL | solv.Job.SOLVER_SOLVABLE_NAME, pool.rel2id(pool.str2id("r-rcpp"), pool.str2id('1.0.0'), solv.REL_EQ)))
# jobs.append(pool.Job(solv.Job.SOLVER_INSTALL | solv.Job.SOLVER_SOLVABLE_NAME, pool.str2id("jupyter")))
# jobs.append(pool.Job(solv.Job.SOLVER_INSTALL | solv.Job.SOLVER_SOLVABLE_NAME, parse_to_rel("iopro 1.4*")[0]))
# jobs.append(pool.Job(solv.Job.SOLVER_INSTALL | solv.Job.SOLVER_SOLVABLE_NAME, parse_to_rel("python 2.7*")[0]))
# jobs.append(pool.Job(solv.Job.SOLVER_INSTALL | solv.Job.SOLVER_SOLVABLE_NAME, parse_to_rel("numpy 1.7*")[0]))
# instl = ['iopro', 'python 2.7*', 'numpy 1.5*']
# solver = get_solver(instl, jobs, features=['mkl'])
solver = get_solver(instl, jobs, features=[])
problems = solver.solve(jobs)
if problems:
for p in problems:
print("problem:", p)
sys.exit(1)
transaction = solver.transaction()
# xls = [p for p in transaction.newsolvables()]
# print("LEN: ", len(xls))
yml_export = """
name: {env_name}
channels:
{channels}
dependencies:
{dependencies}
"""
yml_channels = ''
for c in channels:
yml_channels += '- {}\n'.format(c)
yml_deps = ''
for p in transaction.newsolvables():
yml_deps += '- {}::{}={}\n'.format(p.repo.name, p.name, p.evr)
print(yml_export.format(env_name='test', channels=yml_channels, dependencies='\n'.join(sorted(yml_deps.split('\n')))))