commit 65a09d121fb0ee41480443551816f7ffb503b5d6 Author: yoshoku Date: Sat Sep 30 23:17:24 2017 +0900 :tada: first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8eb3b06 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +/.bundle/ +/.yardoc +/Gemfile.lock +/_yardoc/ +/coverage/ +/doc/ +/pkg/ +/spec/reports/ +/tmp/ + +# rspec failure tracking +.rspec_status diff --git a/.rspec b/.rspec new file mode 100644 index 0000000..8c18f1a --- /dev/null +++ b/.rspec @@ -0,0 +1,2 @@ +--format documentation +--color diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 0000000..06cb7de --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,17 @@ +#AllCops: +# TargetRubyVersion: 2.3 + +Documentation: + Enabled: false + +Metrics/LineLength: + Max: 120 + +Metrics/ModuleLength: + Max: 200 + +Metrics/ClassLength: + Max: 200 + +Security/MarshalLoad: + Enabled: false diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..aa549b1 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,5 @@ +sudo: false +language: ruby +rvm: + - 2.4.2 +before_install: gem install bundler -v 1.15.4 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..2b784c3 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,74 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, gender identity and expression, level of experience, +nationality, personal appearance, race, religion, or sexual identity and +orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or +advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at yoshoku@outlook.com. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at [http://contributor-covenant.org/version/1/4][version] + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/4/ diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..ddaf689 --- /dev/null +++ b/Gemfile @@ -0,0 +1,6 @@ +source "https://rubygems.org" + +git_source(:github) {|repo_name| "https://github.com/#{repo_name}" } + +# Specify your gem's dependencies in svmkit.gemspec +gemspec diff --git a/HISTORY.md b/HISTORY.md new file mode 100644 index 0000000..b4599fb --- /dev/null +++ b/HISTORY.md @@ -0,0 +1,8 @@ +# 0.1.0 +- Added basic classes. +- Added an utility module. +- Added class for RBF kernel approximation. +- Added class for Support Vector Machine with Pegasos alogrithm. +- Added class that performs mutlclass classification with one-vs.-rest strategy. +- Added classes for preprocessing such as min-max scaling, standardization, and L2 normalization. + diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..8b27839 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,23 @@ +Copyright (c) 2017 yoshoku +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..19c26b3 --- /dev/null +++ b/README.md @@ -0,0 +1,84 @@ +# SVMKit + +SVMKit is a library for machine learninig in Ruby. +SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python. +However, since SVMKit is an experimental library, there are few machine learning algorithms implemented. + +## Installation + +Add this line to your application's Gemfile: + +```ruby +gem 'svmkit' +``` + +And then execute: + + $ bundle + +Or install it yourself as: + + $ gem install svmkit + +## Usage + +Training phase: +```ruby +require 'svmkit' +require 'libsvmloader' + +samples, labels = LibSVMLoader.load_libsvm_file('pendigits', stype: :dense) + +normalizer = SVMKit::Preprocessing::MinMaxScaler.new +normalized = normalizer.fit_transform(samples) + +transformer = SVMKit::KernelApproximation::RBF.new(gamma: 2.0, n_components: 1024, random_seed: 1) +transformed = transformer.fit_transform(normalized) + +base_classifier = + SVMKit::LinearModel::PegasosSVC.new(penalty: 1.0, max_iter: 50, batch_size: 20, random_seed: 1) +classifier = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_classifier) +classifier.fit(transformed, labels) + +File.open('trained_normalizer.dat', 'wb') { |f| f.write(Marshal.dump(normalizer)) } +File.open('trained_transformer.dat', 'wb') { |f| f.write(Marshal.dump(transformer)) } +File.open('trained_classifier.dat', 'wb') { |f| f.write(Marshal.dump(classifier)) } +``` + +Testing phase: +```ruby +require 'svmkit' +require 'libsvmloader' + +samples, labels = LibSVMLoader.load_libsvm_file('pendigits.t', stype: :dense) + +normalizer = Marshal.load(File.binread('trained_normalizer.dat')) +transformer = Marshal.load(File.binread('trained_transformer.dat')) +classifier = Marshal.load(File.binread('trained_classifier.dat')) + +normalized = normalizer.transform(samples) +transformed = transformer.transform(normalized) + +puts(sprintf("Accuracy: %.1f%%", 100.0 * classifier.score(transformed, labels))) +``` + +## Development + +After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. + +To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). + +## Contributing + +Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/svmkit. +This project is intended to be a safe, welcoming space for collaboration, +and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct. + +## License + +The gem is available as open source under the terms of the [BSD 2-clause License](https://opensource.org/licenses/BSD-2-Clause). + +## Code of Conduct + +Everyone interacting in the SVMKit project’s codebases, issue trackers, +chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/svmkit/blob/master/CODE_OF_CONDUCT.md). diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..b7e9ed5 --- /dev/null +++ b/Rakefile @@ -0,0 +1,6 @@ +require "bundler/gem_tasks" +require "rspec/core/rake_task" + +RSpec::Core::RakeTask.new(:spec) + +task :default => :spec diff --git a/bin/console b/bin/console new file mode 100755 index 0000000..df255d5 --- /dev/null +++ b/bin/console @@ -0,0 +1,14 @@ +#!/usr/bin/env ruby + +require "bundler/setup" +require "svmkit" + +# You can add fixtures and/or initialization code here to make experimenting +# with your gem easier. You can also use a different console, if you like. + +# (If you use this, don't forget to add pry to your Gemfile!) +# require "pry" +# Pry.start + +require "irb" +IRB.start(__FILE__) diff --git a/bin/setup b/bin/setup new file mode 100755 index 0000000..dce67d8 --- /dev/null +++ b/bin/setup @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +IFS=$'\n\t' +set -vx + +bundle install + +# Do any other automated setup that you need to do here diff --git a/lib/svmkit.rb b/lib/svmkit.rb new file mode 100644 index 0000000..cfb308b --- /dev/null +++ b/lib/svmkit.rb @@ -0,0 +1,16 @@ +begin + require 'nmatrix/nmatrix' +rescue LoadError +end + +require 'svmkit/version' +require 'svmkit/utils' +require 'svmkit/base/base_estimator' +require 'svmkit/base/classifier' +require 'svmkit/base/transformer' +require 'svmkit/kernel_approximation/rbf' +require 'svmkit/linear_model/pegasos_svc' +require 'svmkit/multiclass/one_vs_rest_classifier' +require 'svmkit/preprocessing/l2_normalizer' +require 'svmkit/preprocessing/min_max_scaler' +require 'svmkit/preprocessing/standard_scaler' diff --git a/lib/svmkit/base/base_estimator.rb b/lib/svmkit/base/base_estimator.rb new file mode 100644 index 0000000..6a42640 --- /dev/null +++ b/lib/svmkit/base/base_estimator.rb @@ -0,0 +1,11 @@ + +module SVMKit + # This module consists of basic mix-in classes. + module Base + # Base module for all estimators in SVMKit. + module BaseEstimator + # Parameters for this estimator. + attr_accessor :params + end + end +end diff --git a/lib/svmkit/base/classifier.rb b/lib/svmkit/base/classifier.rb new file mode 100644 index 0000000..1638599 --- /dev/null +++ b/lib/svmkit/base/classifier.rb @@ -0,0 +1,22 @@ + +module SVMKit + module Base + # Module for all classifiers in SVMKit. + module Classifier + # An abstract method for fitting a model. + def fit + raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}." + end + + # An abstract method for predicting labels. + def predict + raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}." + end + + # An abstract method for calculating classification accuracy. + def score + raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}." + end + end + end +end diff --git a/lib/svmkit/base/transformer.rb b/lib/svmkit/base/transformer.rb new file mode 100644 index 0000000..d874318 --- /dev/null +++ b/lib/svmkit/base/transformer.rb @@ -0,0 +1,17 @@ + +module SVMKit + module Base + # Module for all transfomers in SVMKit. + module Transformer + # An abstract method for fitting a model. + def fit + raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}." + end + + # An abstract method for fitting a model and transforming given data. + def fit_transform + raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}." + end + end + end +end diff --git a/lib/svmkit/kernel_approximation/rbf.rb b/lib/svmkit/kernel_approximation/rbf.rb new file mode 100644 index 0000000..c4bbc07 --- /dev/null +++ b/lib/svmkit/kernel_approximation/rbf.rb @@ -0,0 +1,133 @@ +require 'svmkit/base/base_estimator' +require 'svmkit/base/transformer' + +module SVMKit + # Module for kernel approximation algorithms. + module KernelApproximation + # Class for RBF kernel feature mapping. + # + # transformer = SVMKit::KernelApproximation::RBF.new(gamma: 1.0, n_coponents: 128, random_seed: 1) + # new_training_samples = transformer.fit_transform(training_samples) + # new_testing_samples = transformer.transform(testing_samples) + # + # * *Refernce*: + # - A. Rahimi and B. Recht, "Random Features for Large-Scale Kernel Machines," Proc. NIPS'07, pp.1177--1184, 2007. + class RBF + include Base::BaseEstimator + include Base::Transformer + + DEFAULT_PARAMS = { # :nodoc: + gamma: 1.0, + n_components: 128, + random_seed: nil + }.freeze + + # The random matrix for transformation. + attr_reader :random_mat # :nodoc: + + # The random vector for transformation. + attr_reader :random_vec # :nodoc: + + # The random generator for transformation. + attr_reader :rng # :nodoc: + + # Creates a new transformer for mapping to RBF kernel feature space. + # + # call-seq: + # new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF + # + # * *Arguments* : + # - +:gamma+ (Float) (defaults to: 1.0) -- The parameter of RBF kernel: exp(-gamma * x^2) + # - +:n_components+ (Integer) (defaults to: 128) -- The number of dimensions of the RBF kernel feature space. + # - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator. + def initialize(params = {}) + self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }]) + self.params[:random_seed] ||= srand + @rng = Random.new(self.params[:random_seed]) + @random_mat = nil + @random_vec = nil + end + + # Fit the model with given training data. + # + # call-seq: + # fit(x) -> RBF + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model. This method uses only the number of features of the data. + # * *Returns* : + # - The learned transformer itself. + def fit(x, _y = nil) + n_features = x.shape[1] + params[:n_components] = 2 * n_features if params[:n_components] <= 0 + @random_mat = rand_normal([n_features, params[:n_components]]) * (2.0 * params[:gamma])**0.5 + n_half_components = params[:n_components] / 2 + @random_vec = NMatrix.zeros([1, params[:n_components] - n_half_components]).hconcat( + NMatrix.ones([1, n_half_components]) * (0.5 * Math::PI) + ) + #@random_vec = rand_uniform([1, self.params[:n_components]]) * (2.0 * Math::PI) + self + end + + # Fit the model with training data, and then transform them with the learned model. + # + # call-seq: + # fit_transform(x) -> NMatrix + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model. + # * *Returns* : + # - The transformed data (NMatrix, shape: [n_samples, n_components]). + def fit_transform(x, _y = nil) + fit(x).transform(x) + end + + # Transform the given data with the learned model. + # + # call-seq: + # transform(x) -> NMatrix + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The data to be transformed with the learned model. + # * *Returns* : + # - The transformed data (NMatrix, shape: [n_samples, n_components]). + def transform(x) + n_samples, = x.shape + projection = x.dot(@random_mat) + @random_vec.repeat(n_samples, 0) + projection.sin * ((2.0 / params[:n_components])**0.5) + end + + # Serializes object through Marshal#dump. + def marshal_dump # :nodoc: + { params: params, + random_mat: Utils.dump_nmatrix(@random_mat), + random_vec: Utils.dump_nmatrix(@random_vec), + rng: @rng } + end + + # Deserialize object through Marshal#load. + def marshal_load(obj) # :nodoc: + self.params = obj[:params] + @random_mat = Utils.restore_nmatrix(obj[:random_mat]) + @random_vec = Utils.restore_nmatrix(obj[:random_vec]) + @rng = obj[:rng] + nil + end + + protected + + # Generate the uniform random matrix with the given shape. + def rand_uniform(shape) # :nodoc: + rnd_vals = Array.new(NMatrix.size(shape)) { @rng.rand } + NMatrix.new(shape, rnd_vals, dtype: :float64, stype: :dense) + end + + # Generate the normal random matrix with the given shape, mean, and standard deviation. + def rand_normal(shape, mu = 0.0, sigma = 1.0) # :nodoc: + a = rand_uniform(shape) + b = rand_uniform(shape) + ((a.log * -2.0).sqrt * (b * 2.0 * Math::PI).sin) * sigma + mu + end + end + end +end diff --git a/lib/svmkit/linear_model/pegasos_svc.rb b/lib/svmkit/linear_model/pegasos_svc.rb new file mode 100644 index 0000000..0af42ec --- /dev/null +++ b/lib/svmkit/linear_model/pegasos_svc.rb @@ -0,0 +1,148 @@ +require 'svmkit/base/base_estimator' +require 'svmkit/base/classifier' + +module SVMKit + # This module consists of the classes that implement generalized linear models. + module LinearModel + # PegasosSVC is a class that implements Support Vector Classifier with the Pegasos algorithm. + # + # estimator = + # SVMKit::LinearModel::PegasosSVC.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1) + # estimator.fit(training_samples, traininig_labels) + # results = estimator.predict(testing_samples) + # + # * *Reference*: + # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007. + # + class PegasosSVC + include Base::BaseEstimator + include Base::Classifier + + DEFAULT_PARAMS = { # :nodoc: + reg_param: 1.0, + max_iter: 100, + batch_size: 50, + random_seed: nil + }.freeze + + # The weight vector for SVC. + attr_reader :weight_vec + + # The random generator for performing random sampling in the Pegasos algorithm. + attr_reader :rng + + # Create a new classifier with Support Vector Machine by the Pegasos algorithm. + # + # :call-seq: + # new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> PegasosSVC + # + # * *Arguments* : + # - +:reg_param+ (Float) (defaults to: 1.0) -- The regularization parameter. + # - +:max_iter+ (Integer) (defaults to: 100) -- The maximum number of iterations. + # - +:batch_size+ (Integer) (defaults to: 50) -- The size of the mini batches. + # - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator. + def initialize(params = {}) + self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }]) + self.params[:random_seed] ||= srand + @weight_vec = nil + @rng = Random.new(self.params[:random_seed]) + end + + # Fit the model with given training data. + # + # :call-seq: + # fit(x, y) -> PegasosSVC + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model. + # - +y+ (NMatrix, shape: [1, n_samples]) -- The labels to be used for fitting the model. + # * *Returns* : + # - The learned classifier itself. + def fit(x, y) + # Generate binary labels + negative_label = y.uniq.sort.shift + bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : -1 } + # Initialize some variables. + n_samples, n_features = x.shape + rand_ids = [*0..n_samples - 1].shuffle(random: @rng) + @weight_vec = NMatrix.zeros([1, n_features]) + # Start optimization. + params[:max_iter].times do |t| + # random sampling + subset_ids = rand_ids.shift(params[:batch_size]) + rand_ids.concat(subset_ids) + target_ids = subset_ids.map do |n| + n if @weight_vec.dot(x.row(n).transpose) * bin_y[n] < 1 + end + n_subsamples = target_ids.size + next if n_subsamples.zero? + # update the weight vector. + eta = 1.0 / (params[:reg_param] * (t + 1)) + mean_vec = NMatrix.zeros([1, n_features]) + target_ids.each { |n| mean_vec += x.row(n) * bin_y[n] } + mean_vec *= eta / n_subsamples + @weight_vec = @weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec + # scale the weight vector. + scaler = (1.0 / params[:reg_param]**0.5) / @weight_vec.norm2 + @weight_vec *= [1.0, scaler].min + end + self + end + + # Calculate confidence scores for samples. + # + # :call-seq: + # decision_function(x) -> NMatrix, shape: [1, n_samples] + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores. + # * *Returns* : + # - Confidence score per sample. + def decision_function(x) + @weight_vec.dot(x.transpose) + end + + # Predict class labels for samples. + # + # :call-seq: + # predict(x) -> NMatrix, shape: [1, n_samples] + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels. + # * *Returns* : + # - Predicted class label per sample. + def predict(x) + decision_function(x).map { |v| v >= 0 ? 1 : -1 } + end + + # Claculate the mean accuracy of the given testing data. + # + # :call-seq: + # predict(x, y) -> Float + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data. + # - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data. + # * *Returns* : + # - Mean accuracy + def score(x, y) + p = predict(x) + n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+) + n_hits / y.size.to_f + end + + # Serializes object through Marshal#dump. + def marshal_dump # :nodoc: + { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng } + end + + # Deserialize object through Marshal#load. + def marshal_load(obj) # :nodoc: + self.params = obj[:params] + @weight_vec = Utils.restore_nmatrix(obj[:weight_vec]) + @rng = obj[:rng] + nil + end + end + end +end diff --git a/lib/svmkit/multiclass/one_vs_rest_classifier.rb b/lib/svmkit/multiclass/one_vs_rest_classifier.rb new file mode 100644 index 0000000..6e29e6f --- /dev/null +++ b/lib/svmkit/multiclass/one_vs_rest_classifier.rb @@ -0,0 +1,127 @@ +require 'svmkit/base/base_estimator.rb' +require 'svmkit/base/classifier.rb' + +module SVMKit + # This module consists of the classes that implement multi-label classification strategy. + module Multiclass + # OneVsRestClassifier is a class that implements One-vs-Rest (OvR) strategy for multi-label classification. + # + # base_estimator = + # SVMKit::LinearModel::PegasosSVC.new(penalty: 1.0, max_iter: 100, batch_size: 20, random_seed: 1) + # estimator = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_estimator) + # estimator.fit(training_samples, training_labels) + # results = estimator.predict(testing_samples) + # + class OneVsRestClassifier + include Base::BaseEstimator + include Base::Classifier + + DEFAULT_PARAMS = { # :nodoc: + estimator: nil + }.freeze + + # The set of estimators. + attr_reader :estimators + + # The class labels. + attr_reader :classes + + # Create a new multi-label classifier with the one-vs-rest startegy. + # + # :call-seq: + # new(estimator: base_estimator) -> OneVsRestClassifier + # + # * *Arguments* : + # - +:estimator+ (Classifier) (defaults to: nil) -- The (binary) classifier for construction a multi-label classifier. + def initialize(params = {}) + self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }]) + @estimators = nil + @classes = nil + end + + # Fit the model with given training data. + # + # :call-seq: + # fit(x, y) -> OneVsRestClassifier + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model. + # - +y+ (NMatrix, shape: [1, n_samples]) -- The labels to be used for fitting the model. + # * *Returns* : + # - The learned classifier itself. + def fit(x, y) + @classes = y.uniq.sort + @estimators = @classes.map do |label| + bin_y = y.map { |l| l == label ? 1 : -1 } + params[:estimator].dup.fit(x, bin_y) + end + self + end + + # Calculate confidence scores for samples. + # + # :call-seq: + # decision_function(x) -> NMatrix, shape: [n_samples, n_classes] + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores. + # * *Returns* : + # - Confidence scores per sample for each class. + def decision_function(x) + n_samples, = x.shape + n_classes = @classes.size + NMatrix.new( + [n_classes, n_samples], + Array.new(n_classes) { |m| @estimators[m].decision_function(x).to_a }.flatten + ).transpose + end + + # Predict class labels for samples. + # + # :call-seq: + # predict(x) -> NMatrix, shape: [1, n_samples] + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels. + # * *Returns* : + # - Predicted class label per sample. + def predict(x) + n_samples, = x.shape + decision_values = decision_function(x) + NMatrix.new([1, n_samples], + decision_values.each_row.map { |vals| @classes[vals.to_a.index(vals.to_a.max)] }) + end + + # Claculate the mean accuracy of the given testing data. + # + # :call-seq: + # predict(x, y) -> Float + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data. + # - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data. + # * *Returns* : + # - Mean accuracy + def score(x, y) + p = predict(x) + n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+) + n_hits / y.size.to_f + end + + # Serializes object through Marshal#dump. + def marshal_dump # :nodoc: + { params: params, + classes: @classes, + estimators: @estimators.map { |e| Marshal.dump(e) } } + end + + # Deserialize object through Marshal#load. + def marshal_load(obj) # :nodoc: + self.params = obj[:params] + @classes = obj[:classes] + @estimators = obj[:estimators].map { |e| Marshal.load(e) } + nil + end + end + end +end diff --git a/lib/svmkit/preprocessing/l2_normalizer.rb b/lib/svmkit/preprocessing/l2_normalizer.rb new file mode 100644 index 0000000..10049eb --- /dev/null +++ b/lib/svmkit/preprocessing/l2_normalizer.rb @@ -0,0 +1,57 @@ +require 'svmkit/base/base_estimator' +require 'svmkit/base/transformer' + +module SVMKit + # This module consists of the classes that perform preprocessings. + module Preprocessing + # Normalize samples to unit L2-norm. + # + # normalizer = SVMKit::Preprocessing::StandardScaler.new + # new_samples = normalizer.fit_transform(samples) + class L2Normalizer + include Base::BaseEstimator + include Base::Transformer + + # The vector consists of norms of each sample. + attr_reader :norm_vec # :nodoc: + + # Create a new normalizer for normaliing to unit L2-norm. + # + # :call-seq: + # new() -> L2Normalizer + def initialize(_params = {}) + @norm_vec = nil + end + + # Calculate L2 norms of each sample. + # + # :call-seq: + # fit(x) -> L2Normalizer + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate L2-norms. + # * *Returns* : + # - L2Normalizer + def fit(x, _y = nil) + n_samples, = x.shape + @norm_vec = NMatrix.new([1, n_samples], + Array.new(n_samples) { |n| x.row(n).norm2 }) + self + end + + # Calculate L2 norms of each sample, and then normalize samples to unit L2-norm. + # + # :call-seq: + # fit_transform(x) -> NMatrix + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate L2-norms. + # * *Returns* : + # - The normalized samples (NMatrix) + def fit_transform(x, _y = nil) + fit(x) + x / @norm_vec.transpose.repeat(x.shape[1], 1) + end + end + end +end diff --git a/lib/svmkit/preprocessing/min_max_scaler.rb b/lib/svmkit/preprocessing/min_max_scaler.rb new file mode 100644 index 0000000..ff81444 --- /dev/null +++ b/lib/svmkit/preprocessing/min_max_scaler.rb @@ -0,0 +1,99 @@ +require 'svmkit/base/base_estimator' +require 'svmkit/base/transformer' + +module SVMKit + # This module consists of the classes that perform preprocessings. + module Preprocessing + # Normalize samples by scaling each feature to a given range. + # + # normalizer = SVMKit::Preprocessing::MinMaxScaler.new(feature_range: [0.0, 1.0]) + # new_training_samples = normalizer.fit_transform(training_samples) + # new_testing_samples = normalizer.transform(testing_samples) + class MinMaxScaler + include Base::BaseEstimator + include Base::Transformer + + DEFAULT_PARAMS = { # :nodoc: + feature_range: [0.0, 1.0] + }.freeze + + # The vector consists of the minimum value for each feature. + attr_reader :min_vec # :nodoc: + + # The vector consists of the maximum value for each feature. + attr_reader :max_vec # :nodoc: + + # Creates a new normalizer for scaling each feature to a given range. + # + # call-seq: + # new(feature_range: [0.0, 1.0]) -> MinMaxScaler + # + # * *Arguments* : + # - +:feature_range+ (Array) (defaults to: [0.0, 1.0]) -- The desired range of samples. + def initialize(params = {}) + @params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }]) + @min_vec = nil + @max_vec = nil + end + + # Calculate the minimum and maximum value of each feature for scaling. + # + # :call-seq: + # fit(x) -> MinMaxScaler + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate the minimum and maximum values. + # * *Returns* : + # - MinMaxScaler + def fit(x, _y = nil) + @min_vec = x.min(0) + @max_vec = x.max(0) + self + end + + # Calculate the minimum and maximum values, and then normalize samples to feature_range. + # + # :call-seq: + # fit_transform(x) -> NMatrix + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate the minimum and maximum values. + # * *Returns* : + # - The scaled samples (NMatrix) + def fit_transform(x, _y = nil) + fit(x).transform(x) + end + + # Perform scaling the given samples according to feature_range. + # + # call-seq: + # transform(x) -> NMatrix + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to be scaled. + # * *Returns* : + # - The scaled samples (NMatrix) + def transform(x) + n_samples, = x.shape + dif_vec = @max_vec - @min_vec + nx = (x - @min_vec.repeat(n_samples, 0)) / dif_vec.repeat(n_samples, 0) + nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0] + end + + # Serializes object through Marshal#dump. + def marshal_dump # :nodoc: + { params: @params, + min_vec: Utils.dump_nmatrix(@min_vec), + max_vec: Utils.dump_nmatrix(@max_vec) } + end + + # Deserialize object through Marshal#load. + def marshal_load(obj) # :nodoc: + @params = obj[:params] + @min_vec = Utils.restore_nmatrix(obj[:min_vec]) + @max_vec = Utils.restore_nmatrix(obj[:max_vec]) + nil + end + end + end +end diff --git a/lib/svmkit/preprocessing/standard_scaler.rb b/lib/svmkit/preprocessing/standard_scaler.rb new file mode 100644 index 0000000..5192614 --- /dev/null +++ b/lib/svmkit/preprocessing/standard_scaler.rb @@ -0,0 +1,87 @@ +require 'svmkit/base/base_estimator' +require 'svmkit/base/transformer' + +module SVMKit + # This module consists of the classes that perform preprocessings. + module Preprocessing + # Normalize samples by centering and scaling to unit variance. + # + # normalizer = SVMKit::Preprocessing::StandardScaler.new + # new_training_samples = normalizer.fit_transform(training_samples) + # new_testing_samples = normalizer.transform(testing_samples) + class StandardScaler + include Base::BaseEstimator + include Base::Transformer + + # The vector consists of the mean value for each feature. + attr_reader :mean_vec # :nodoc: + + # The vector consists of the standard deviation for each feature. + attr_reader :std_vec # :nodoc: + + # Create a new normalizer for centering and scaling to unit variance. + # + # :call-seq: + # new() -> StandardScaler + def initialize(_params = {}) + @mean_vec = nil + @std_vec = nil + end + + # Calculate the mean value and standard deviation of each feature for scaling. + # + # :call-seq: + # fit(x) -> StandardScaler + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate the mean values and standard deviations. + # * *Returns* : + # - StandardScaler + def fit(x, _y = nil) + @mean_vec = x.mean(0) + @std_vec = x.std(0) + self + end + + # Calculate the mean values and standard deviations, and then normalize samples using them. + # + # :call-seq: + # fit_transform(x) -> NMatrix + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate the mean values and standard deviations. + # * *Returns* : + # - The scaled samples (NMatrix) + def fit_transform(x, _y = nil) + fit(x).transform(x) + end + + # Perform standardization the given samples. + # + # call-seq: + # transform(x) -> NMatrix + # + # * *Arguments* : + # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to be scaled. + # * *Returns* : + # - The scaled samples (NMatrix) + def transform(x) + n_samples, = x.shape + (x - @mean_vec.repeat(n_samples, 0)) / @std_vec.repeat(n_samples, 0) + end + + # Serializes object through Marshal#dump. + def marshal_dump # :nodoc: + { mean_vec: Utils.dump_nmatrix(@mean_vec), + std_vec: Utils.dump_nmatrix(@std_vec) } + end + + # Deserialize object through Marshal#load. + def marshal_load(obj) # :nodoc: + @mean_vec = Utils.restore_nmatrix(obj[:mean_vec]) + @std_vec = Utils.restore_nmatrix(obj[:std_vec]) + nil + end + end + end +end diff --git a/lib/svmkit/utils.rb b/lib/svmkit/utils.rb new file mode 100644 index 0000000..b27dbf8 --- /dev/null +++ b/lib/svmkit/utils.rb @@ -0,0 +1,33 @@ +module SVMKit + # Module for utility methods. + module Utils + class << self + # Dump an NMatrix object converted to a Ruby Hash. + # # call-seq: + # dump_nmatrix(mat) -> Hash + # + # * *Arguments* : + # - +mat+ -- An NMatrix object converted to a Ruby Hash. + # * *Returns* : + # - A Ruby Hash containing matrix information. + def dump_nmatrix(mat) + return nil if mat.class != NMatrix + { shape: mat.shape, array: mat.to_flat_a, dtype: mat.dtype, stype: mat.stype } + end + + # Return the results of converting the dumped data into an NMatrix object. + # + # call-seq: + # restore_nmatrix(dumped_mat) -> NMatrix + # + # * *Arguments* : + # - +dumpted_mat+ -- A Ruby Hash about NMatrix object created with SVMKit::Utils.dump_nmatrix method. + # * *Returns* : + # - An NMatrix object restored from the given Hash. + def restore_nmatrix(dmp = {}) + return nil unless dmp.class == Hash && %i[shape array dtype stype].all?(&dmp.method(:has_key?)) + NMatrix.new(dmp[:shape], dmp[:array], dtype: dmp[:dtype], stype: dmp[:stype]) + end + end + end +end diff --git a/lib/svmkit/version.rb b/lib/svmkit/version.rb new file mode 100644 index 0000000..3dbcca4 --- /dev/null +++ b/lib/svmkit/version.rb @@ -0,0 +1,3 @@ +module SVMKit + VERSION = '0.1.0'.freeze +end diff --git a/spec/kernel_approximation/rbf_spec.rb b/spec/kernel_approximation/rbf_spec.rb new file mode 100644 index 0000000..a25e23a --- /dev/null +++ b/spec/kernel_approximation/rbf_spec.rb @@ -0,0 +1,48 @@ +require 'spec_helper' + +RSpec.describe SVMKit::KernelApproximation::RBF do + let(:n_samples) { 10 } + let(:n_features) { 4 } + let(:samples) do + rng = Random.new(1) + rnd_vals = Array.new(n_samples * n_features) { rng.rand } + NMatrix.new([n_samples, n_features], rnd_vals, dtype: :float64, stype: :dense) + end + + it 'has a small approximation error for the RBF kernel function.' do + # calculate RBF kernel matrix. + kernel_matrix = NMatrix.zeros([n_samples, n_samples]) + n_samples.times do |m| + n_samples.times do |n| + distance = (samples.row(m) - samples.row(n)).norm2 + kernel_matrix[m, n] = Math.exp(-distance**2) + end + end + # calculate approximate RBF kernel matrix. + transformer = described_class.new(gamma: 1.0, n_components: 4096, random_seed: 1) + new_samples = transformer.fit_transform(samples) + inner_matrix = new_samples.dot(new_samples.transpose) + # evalute mean error. + mean_error = 0.0 + n_samples.times do |m| + n_samples.times do |n| + mean_error += ((kernel_matrix[m, n] - inner_matrix[m, n])**2)**0.5 + end + end + mean_error /= n_samples * n_samples + expect(mean_error).to be < 0.01 + end + + it 'dumps and restores itself using Marshal module.' do + transformer = described_class.new(gamma: 1.0, n_components: 128, random_seed: 1) + transformer.fit(samples) + copied = Marshal.load(Marshal.dump(transformer)) + expect(transformer.class).to eq(copied.class) + expect(transformer.params[:gamma]).to eq(copied.params[:gamma]) + expect(transformer.params[:n_components]).to eq(copied.params[:n_components]) + expect(transformer.params[:random_seed]).to eq(copied.params[:random_seed]) + expect(transformer.random_mat).to eq(copied.random_mat) + expect(transformer.random_vec).to eq(copied.random_vec) + expect(transformer.rng).to eq(copied.rng) + end +end diff --git a/spec/linear_model/pegasos_spec.rb b/spec/linear_model/pegasos_spec.rb new file mode 100644 index 0000000..29a0e4a --- /dev/null +++ b/spec/linear_model/pegasos_spec.rb @@ -0,0 +1,25 @@ +require 'spec_helper' + +RSpec.describe SVMKit::LinearModel::PegasosSVC do + let(:samples) { SVMKit::Utils.restore_nmatrix(Marshal.load(File.read(__dir__ + '/test_samples.dat'))) } + let(:labels) { SVMKit::Utils.restore_nmatrix(Marshal.load(File.read(__dir__ + '/test_labels.dat'))) } + let(:estimator) { described_class.new(penalty: 1.0, max_iter: 100, batch_size: 20, random_seed: 1) } + + it 'classifies two clusters.' do + estimator.fit(samples, labels) + score = estimator.score(samples, labels) + expect(score).to eq(1.0) + end + + it 'dumps and restores itself using Marshal module.' do + estimator.fit(samples, labels) + copied = Marshal.load(Marshal.dump(estimator)) + expect(estimator.class).to eq(copied.class) + expect(estimator.params[:reg_param]).to eq(copied.params[:reg_param]) + expect(estimator.params[:max_iter]).to eq(copied.params[:max_iter]) + expect(estimator.params[:batch_size]).to eq(copied.params[:batch_size]) + expect(estimator.params[:random_seed]).to eq(copied.params[:random_seed]) + expect(estimator.weight_vec).to eq(copied.weight_vec) + expect(estimator.rng).to eq(copied.rng) + end +end diff --git a/spec/linear_model/test_labels.dat b/spec/linear_model/test_labels.dat new file mode 100644 index 0000000..75908f4 --- /dev/null +++ b/spec/linear_model/test_labels.dat @@ -0,0 +1,7 @@ +{ : +shape[iiÈ: +array[Èiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii: +dtype: +int32: +stype: +dense \ No newline at end of file diff --git a/spec/linear_model/test_samples.dat b/spec/linear_model/test_samples.dat new file mode 100644 index 0000000..ddde299 --- /dev/null +++ b/spec/linear_model/test_samples.dat @@ -0,0 +1,6 @@ +{ : +shape[iÈi: +array[f-2.144418611498281f-0.9327669444444282f-1.987981667557299f0.1291056270764213f-1.958700025903064f-0.6617679290110838f-1.98241392053168f0.8285531478791959f-0.8054892090705319f-0.1014904962885784f-1.615347001248649f-0.5822346610078045f-2.174203018184929f-0.2559465634119994f-1.793973693397624f0.3032281071962253f-1.811455769451304f0.4876973043118375f-1.904403883864021f-0.4958223474675058f-1.722949584406036f0.5509999150422911f-2.633444098614287f0.4856077300372701f-3.01258490263161f-0.6884220279541803f-2.78404306431203f0.5845588796056214f-1.450704452444316f-0.3236623388736068f-2.757341233500102f-0.3753025197144783f-2.717767399178752f-0.02491428892715813f-2.277189347846594f-0.0216584880616503f-1.770517396845783f0.3819547268238803f-2.00634251444034f0.04077201718117095f-2.538982489242754f0.7875354114764942f-1.350449714038846f-0.3224322078979789f-1.569593199955494f-0.8087344983018875f-2.118305717610033f0.8011259210590225f-2.86761735537266f0.4303106651783281f-1.951303148418281f0.03268964923487832f-2.88174845390202f-1.017414017392763f-1.222048164964224f-0.1522912748394634f-2.216909822747713f-0.1628263585489833f-1.606767426204021f0.9921566563529672f-2.584378077754018f-0.2761252754983766f-1.985434736302638f0.2282003340826754f-1.324616103688071f0.2601632044280106f-2.477458425274101f0.1689552661311049f-1.481168814141963f-0.6162242914837572f-1.413676814724229f-0.470159764569057f-2.029206810801547f0.0637407483015763f-1.366115035083427f0.2934628621510791f-2.282155484620107f0.3767428389299402f-1.133023780455289f0.1762998107439541f-2.246166367677849f0.1156669511668242f-2.312963365209115f-0.02774220701215796f-2.483510462099165f-0.2836705461187612f-1.537311075933073f-0.08014804936628275f-2.512499162040691f0.8687213757509379f-1.36488761713356f0.2909830225382575f-2.484747686310655f0.09644687092815092f-2.772991757891856f-0.251654059188626f-1.848773937840356f-0.2215288134463396f-2.136063589337238f0.5037015896707331f-2.843860659819726f-0.7701127758226097f-1.69667340203668f-0.3370425596219744f-2.0979120978712f0.1272037509043362f-1.690384167440792f0.4254184936647388f-2.384317181670939f-0.09155003692984716f-2.490283938316319f-0.218435163380498f-2.08882556640961f-0.5547084285054296f-2.681161790637632f0.09071526568542962f-1.787758798377515f0.2643833249168886f-2.00657479077827f-0.3362219186340693f-2.891955060330703f0.9147909764424907f-2.894158390556191f-0.5868400897667424f-1.430456899826965f0.3408248735828514f-1.528035050765353f-0.332179175349486f-2.207197987354195f0.4221525195197722f-2.362019378821083f0.3482574560940418f-2.029501073763865f-0.6588359548871827f-1.375629354603051f0.3659785169741512f-1.372788798145291f0.2534808288573508f-2.123892735514128f-0.03832085061135759f-2.332179643417549f0.064207253250704f-2.233923310026033f0.1703428887842252f-2.078097073574361f-0.3608810741079449f-2.395264689989341f0.624166186498133f-2.371525596770561f0.6020398271964085f-1.832836730629334f0.3258602079057504f-1.676994298803789f-0.02466622269525715f-2.309935502350914f0.1524602731015562f-2.529376980330392f-0.002651821303954707f-2.508365429038055f-0.4129056049363931f-2.383001573071263f-0.3051316366543474f-2.629510927130841f0.4864771869760756f-1.609643735784425f-0.8532560921534257f-1.916199127981252f-0.05627435054286239f-1.93944964216729f0.3802702903894403f-0.8079519781001878f-0.566265035688453f-1.415399059954582f0.4927970765666553f-2.024529603541811f-0.5368779502923896f-1.668289179785906f0.8559990464734932f-1.791787176830914f0.7447138910830096f-2.219267641682253f-0.0862581063877526f-2.497471019445765f0.5214576738266784f-2.485562348887162f-0.122149050789293f-1.647754800235478f0.3042495158314897f-1.728932899209959f-0.09116001476341214f-2.753418357487797f0.1740085780710963f-2.815686192243535f0.3570766738564072f-2.592815839628448f-0.5558987954394115f-1.516195531361048f0.7038551554001882f-1.768238833579353f0.4045428077092253f1.786652595470106f0.4802778751505351f2.000001233107681f0.04741161230560079f2.791040767263443f-0.8533446997757812f1.401450079852445f0.5952399878565107f1.033026492570266f-0.7234758061769727f1.58671809009995f0.1244616124024674f1.108762546809611f0.3735041625165819f2.440317081813784f0.2756032105365317f1.684729602394831f-0.7644887599258542f1.384472264846828f0.01803368231258959f2.922482885064848f-0.5585496273009759f2.611557846509449f-0.08139810383340067f1.530466869092838f0.05746204565669318f2.338918517847868f-0.2844248040995528f1.492490965542568f-0.2697611662592474f2.032583099563107f0.1733883920157632f2.299746446903676f-0.05596469466164723f1.769072308428699f0.02761734332593646f1.885413513389653f-0.3895895588572228f2.09409955264836f-0.7307100539086313f0.912767840347684f0.60936204691694f2.737325100676316f0.3807005844084384f1.921575699919799f-0.4068202567906838f1.851415715802569f0.03581992716704224f2.379717144087826f-1.030657766527259f1.89909696253338f-0.4133371168884555f2.308083494128881f0.1353427946563696f1.975937399159875f-0.4332514072655191f1.941159247244808f0.9511374335766813f1.448773863414043f-0.4249413624724646f1.291076843938298f-0.3486195387813348f1.746779592108437f-0.9063285002228127f2.232938688982304f-0.1051195141328337f1.587607815156598f0.3087604528370538f1.43214259064081f-0.3062856597089955f1.850119976429064f-0.2758060550670787f2.593817794169744f-0.3338008150293877f2.446642403565416f0.6061799725048089f1.780071414342694f0.02235280252155292f2.374543908602996f0.02641038148058739f2.133475240617765f0.5919170238236336f2.158025107804142f-0.51961845534472f1.404425676744015f0.2141516697458002f2.712952875339095f-0.0707750342777061f2.335814148332393f0.03752997919160039f2.587699233052629f0.3431920873337489f2.500170954592771f0.3264793117213138f2.032178493893528f-0.3832867688050214f1.767166451641884f-0.04568417387630694f1.951007159417427f0.2702398443835328f2.229846843686711f0.1703895990391992f1.558581069427728f-1.112902619419618f1.760262413147915f0.04108481089439742f0.9854226134150039f0.3256249222729714f2.169711398852724f-0.3428768516833324f2.432232148110246f0.3022970262134582f2.318368156306509f0.3630776530211939f1.210058314063567f-0.1035689983444037f1.607821714364248f-0.3336423086245562f2.117330471317896f0.1897433592271335f0.8251171227396745f0.3937079612641797f1.576561618910286f0.5702075043985405f2.036714768834819f0.05297307267984703f2.236498718031335f0.09022238093420423f2.83750413777004f-0.6174881286919769f2.881311634304046f-0.2513112997976279f3.02175658312238f0.491633716531026f2.369117564089166f-0.4730671748477339f1.130937012579302f-0.1853119264627286f1.955780727376449f0.638229655860915f1.91280709431694f-1.282107376725084f2.08753107079925f-1.678970139538193f2.794845884935492f-0.2482537856362289f1.703020811936693f-0.9203763511599554f1.758574733496744f-0.8666095155727298f1.664122108735067f-0.01588632322054494f3.057316947424249f-0.008656025026176514f2.830173507013125f-0.3137123757322497f1.67551179319461f0.5258274519136896f2.555696499112048f0.2111254365148558f2.40072228704558f-0.1665487037443429f2.451508914204974f0.8631005539536191f1.150634441339154f0.2005011312706865f1.742988841402903f-0.2658741204196208f1.837379294596942f-0.02899002485110245f2.05846106680976f-0.151499256017015f1.682169163340406f0.322962261822139f2.277800772560548f-0.599326957489632f3.226378588500612f0.1852187380108347f1.07754808695362f-0.4404048425372957f2.026171308963926f0.03880484320230392f1.720395797332036f-0.508270592331515f2.397364370678307f-0.2602174858459087f2.044971334421354f-1.020126950802313f2.413858514497774f0.06428720772239548f1.16338668291382f-0.5164272397827921f2.903164732240253f0.2230782008143861f3.091049135183457f0.2035885155335113f1.992382059485539f0.3481507956790337f2.13425545456899f-0.455088676696471: +dtype: float64: +stype: +dense \ No newline at end of file diff --git a/spec/multiclass/one_vs_rest_classifier_spec.rb b/spec/multiclass/one_vs_rest_classifier_spec.rb new file mode 100644 index 0000000..127c409 --- /dev/null +++ b/spec/multiclass/one_vs_rest_classifier_spec.rb @@ -0,0 +1,35 @@ +require 'spec_helper' + +RSpec.describe SVMKit::Multiclass::OneVsRestClassifier do + let(:samples) do + SVMKit::Utils.restore_nmatrix(Marshal.load(File.read(__dir__ + '/test_samples_three_clusters.dat'))) + end + let(:labels) do + SVMKit::Utils.restore_nmatrix(Marshal.load(File.read(__dir__ + '/test_labels_three_clusters.dat'))) + end + let(:base_estimator) do + SVMKit::LinearModel::PegasosSVC.new(penalty: 1.0, max_iter: 100, batch_size: 20, random_seed: 1) + end + let(:estimator) { described_class.new(estimator: base_estimator) } + + it 'classifies three clusters.' do + estimator.fit(samples, labels) + score = estimator.score(samples, labels) + expect(score).to eq(1.0) + end + + it 'dumps and restores itself using Marshal module.' do + estimator.fit(samples, labels) + copied = Marshal.load(Marshal.dump(estimator)) + expect(estimator.class).to eq(copied.class) + expect(estimator.estimators.size).to eq(copied.estimators.size) + expect(estimator.estimators[0].class).to eq(copied.estimators[0].class) + expect(estimator.estimators[1].class).to eq(copied.estimators[1].class) + expect(estimator.estimators[2].class).to eq(copied.estimators[2].class) + expect(estimator.estimators[0].weight_vec).to eq(copied.estimators[0].weight_vec) + expect(estimator.estimators[1].weight_vec).to eq(copied.estimators[1].weight_vec) + expect(estimator.estimators[2].weight_vec).to eq(copied.estimators[2].weight_vec) + expect(estimator.classes).to eq(copied.classes) + expect(estimator.params[:estimator].class).to eq(copied.params[:estimator].class) + end +end diff --git a/spec/multiclass/test_labels_three_clusters.dat b/spec/multiclass/test_labels_three_clusters.dat new file mode 100644 index 0000000..2fd6f84 --- /dev/null +++ b/spec/multiclass/test_labels_three_clusters.dat @@ -0,0 +1,7 @@ +{ : +shape[ii,: +array[,iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii: +dtype: +int32: +stype: +dense \ No newline at end of file diff --git a/spec/multiclass/test_samples_three_clusters.dat b/spec/multiclass/test_samples_three_clusters.dat new file mode 100644 index 0000000..a9a503e --- /dev/null +++ b/spec/multiclass/test_samples_three_clusters.dat @@ -0,0 +1,6 @@ +{ : +shape[i,i: +array[Xf-2.864926957826744f0.6954301950442197f-3.342291869300222f0.1825248956149158f-2.730410562665777f0.2560228613351597f-2.384548333479682f0.8993476840363449f-3.165996894916268f-0.2578079177235912f-2.876040191212324f0.502841808400966f-2.908615717239453f0.2001369573044791f-2.588717959148688f-0.07679641556073646f-2.511057086160661f-0.01628785975014454f-2.979831731552236f-0.484719746563008f-1.71201123017527f0.02043173289666946f-3.110330591687509f0.3871471428876735f-2.344554525815706f-0.1419050553417981f-2.653920861714317f0.2299702610782776f-3.257159817720557f0.04381888595743626f-2.938622871674538f-1.101321331663896f-2.114381697572645f-0.1579234714332601f-3.588630839836584f-0.367655163671006f-2.819902498290388f-0.1759045880802314f-3.216488602823151f0.1122135483016665f-2.973921009041256f0.3475397362787372f-2.339573716409668f0.3881144926800663f-2.829345832925309f0.7231148395468485f-3.314272177448808f0.4892141742552156f-3.44363110641696f0.5501128762105186f-3.155204334988403f0.4035481749562736f-2.893544944730463f-0.2699590773322158f-2.9695210521196f0.01701144196143791f-3.393977578461233f0.0982201354483149f-2.993305735527149f-0.3105933809428509f-3.643402814235349f-0.2305958400743432f-3.946169455074859f0.001576124631484603f-3.030243371683884f-0.4315994401580532f-2.901997039787287f0.5505693603451498f-4.443491139237103f-0.2301356504974988f-3.181024794553174f-0.9713981456279264f-2.829775773175177f-0.250570178610908f-3.133178194683929f-0.6442440199987485f-3.299047875228684f-0.4865147018539352f-3.62655750802723f-0.3679091210853068f-2.734161299225383f0.500859491504226f-2.916238756450126f-0.3429023324842929f-3.213505398798536f-0.5334089651044525f-3.021845102475778f-1.115155722656057f-4.088361166569395f0.3249748433108763f-2.535440147514703f-0.4266114914663433f-3.719689318600173f0.1449727816749393f-3.592155670948962f0.05484202159146043f-3.497434445016881f-0.2884150918156207f-3.043210932766437f0.8546570389507837f-3.000052465283137f0.07087067763694715f-3.429333293562384f0.1034324430490368f-2.987221801271855f-0.1643701054371707f-3.718006308456499f-0.205283002926301f-3.098645702232148f0.6532071500009405f-3.121519628507635f-0.1939016490672552f-2.273302947615168f0.3028828119763442f-2.142587108570224f-0.9217806943271787f-2.755461831286552f-0.1390948597897425f-2.839391094814967f0.01090042311084081f-2.544040649325363f-0.1888555953828687f-2.982085791603612f0.229585465400998f-3.531686692070751f0.2687595690967803f-2.245544690132979f-0.02964828949612934f-2.341461311836825f-0.1319860078232858f-2.458067569322465f-0.2642182028636242f-2.806156489704046f0.03759350967281532f-2.331238069483236f0.4300551173748915f-3.277075455119881f-0.2872392362658807f-3.586279798141184f-0.6399701024763482f-2.404926784801689f-0.4056269336970135f-3.954247015072673f-0.6355006619340425f-3.284481658941822f0.6912377605374127f-2.875697060681047f0.0534108757873796f-3.588714943202904f0.3335869522883834f-2.816987051457512f-0.2369552638993847f-3.129711038040938f-0.4444059897628158f-3.111000121283437f-1.202835753168885f-3.168486736896314f0.4191598497169323f-3.67979465621031f-0.06490029062744615f-4.186468642046977f-0.2316504274551406f-3.967419144934487f-0.660577429330165f-2.231049397059085f0.3496395981447329f-3.464157951378561f0.06918860932291826f-2.995057730250192f0.02239154692604439f-3.600495355049953f0.6135433538339137f-3.474457950318172f0.03062007409394906f-2.158379659133128f0.2574285036918448f-3.070549757315769f-0.7784545041154846f-3.550839481825777f0.2914533874465858f-2.522349320214706f-0.3304422595455005f-3.123021917910282f-0.1805736514795712f-2.853362785194576f0.3804479501855786f-2.433182642325028f0.4165403745116477f-3.226497609248361f-0.3330892707264287f-2.690127784679863f0.8256825505837083f-2.774068895548034f-0.08690282880089f-3.236758623878851f-0.5614830746766037f-2.455083977740889f0.9046226446806539f-2.972538148342056f-0.7122682210407418f3.162573435253726f0.6541656191703881f3.612783699313244f-0.09944105348076264f3.64866478460592f0.1575570796092829f2.883625400144483f0.1252602509372138f4.087664119663192f0.4062050495280819f2.305840211566472f0.5455962268774671f3.967270831094522f0.8208231845510564f2.862427254131573f0.8884499838956882f2.84842580606408f-0.5299304819502011f2.625827427769544f1.018858781890121f3.097283517837063f-0.1547636628143553f2.991409336232674f0.3770973206167993f2.921351440049667f-0.4915920050436803f2.105925327892852f-0.0431142901319192f2.87125224841011f0.7683186080885566f3.053999724682082f-0.524233121357933f2.641461240267172f-0.4927318118794846f2.101645350363655f0.3055141734238599f2.837900536802166f0.0736440464790751f3.14110153491376f0.03259204821224689f4.058139684213232f0.4662728230362461f3.005211346052571f0.8806940585745302f3.257873931396998f0.2837904399204798f2.785473522504401f0.7325647671666551f3.130266744995835f0.5894736892944379f3.148937476594178f0.1800856666515676f3.339832269493137f0.2824688814192333f2.882233375019263f-0.5151331763153203f3.320206410756716f-0.1828439282039963f3.197235144896031f-0.5596727070752755f2.470596344382956f-0.3746461705750473f3.748843298914863f-0.3401529961260191f2.144070054388509f0.2089650748701939f3.681425087983483f-0.5472544085941917f2.201132878931056f-0.9491438656820126f2.321828316211553f-0.1703652106333916f2.419698442556346f0.3442898846157949f3.172469643001777f-0.2037827043462672f3.207913848125124f-0.7730328538479855f2.562837819225241f0.6580861008385255f2.234538353067745f0.454929516366161f4.130030748696302f0.1222347809619853f2.756343136201003f0.07386023925010927f3.385921306358392f0.2025979678574783f4.404739475553557f-0.2529611982800938f3.225093794249524f0.01751823632688861f3.403030135798902f0.7161159030179767f3.258461425988628f-0.4736549303056492f2.717228646392627f0.4095499821085453f3.390112703364814f0.5418751042212077f3.27627919202819f0.06966749476083665f3.153461889160456f-0.3359896794025861f3.354881475290008f0.4953527698070173f2.742934566939468f0.3921700661088223f3.82760647200793f-0.8545316351955861f2.150321608627741f-0.3192094073573149f4.176862218793826f-0.1907320120590878f3.622547562847013f-0.5386753523661071f3.196566157321042f0.737897851012969f3.108993747944772f0.4220202324417466f2.527667387959191f-0.3855258385089532f3.238188742724533f0.08791420283952042f3.05078924736547f0.2558940898783019f3.174791395080971f0.05921531866206036f3.775444813407687f0.7751458955956176f2.075158097917451f-0.9179627763764989f3.724338673461359f-0.1919815473601002f3.618030606097409f0.3015936158577712f3.110310327126526f0.3608600962465986f3.109944467486755f-0.5236451854436887f3.082696662989683f-0.02264721286808232f2.77963203939399f0.1692333372584749f4.360323056606081f0.638786383293531f2.847347681491777f-0.1015898557745509f3.052435963745f-0.2187381932459229f2.287382260293541f0.3110104523195648f2.265646660913047f-0.5940943151245319f3.265663765948106f0.357049716328023f2.303117796220131f0.009152428829753987f2.503900799691162f0.02152544281469448f3.425684030055136f-0.1126215885016455f2.25681043212231f0.2428259786025041f2.91807803861431f0.3874807140385573f3.21818584595544f-0.1818163624080727f2.730869968057936f0.4012731362978825f3.787281837494129f-0.07266407593825143f2.867766658562771f-0.3505653062933383f3.320903962763166f0.8534478031504243f3.219082189861771f0.2664565816705156f2.164286848977618f0.180021626777026f2.921367607271338f-0.2000526159639434f2.749501881985954f-0.7525710586742854f3.255454799526417f-0.1267893251566977f2.267240694951579f0.2266328087694916f3.472587706808704f0.03201641026859129f3.009349474531297f0.06804063039202225f3.80769866500894f-0.2198260024727259f1.633493962606519f-0.06967061639443396f2.592842948890696f-0.060189678653097f3.218782194787646f0.5953256484053868f-0.2454150966164322f2.18037788746902f-0.04734951365761784f3.249408863840114f0.1747112541435465f2.535770746808701f0.1399915765432865f3.346990313523073f-0.04757500884879596f3.142532476267319f0.2998714125862969f3.188394625836068f0.2330665672182731f2.360876842428763f-1.09329888646693f3.626712108184402f0.5693450455038411f3.142163414652138f0.5380955358527759f3.620431970165427f-0.1496085910055676f2.81679088254116f-0.3133547299558979f3.38352272005671f0.0357544747884636f2.807854997603628f0.8206795322643261f2.298979483144073f0.2934953963371909f1.553299429696241f0.4421740292916265f3.273747501552816f0.04861048009932695f3.185050931222239f0.06058865146340832f3.255686014486799f0.4962537841391542f3.292315054508718f-0.5784761761993844f3.286289075927897f-0.4509911804175684f2.396991045560466f0.07253880402834502f3.651024083718528f-0.3927186768250254f2.840641530770401f-0.01675002427166761f3.627327514853023f-0.2479484301384337f2.643438374731023f-0.5004819494644428f2.375483482439904f-0.4193808400743811f3.207150810865402f0.3123027119716293f2.270270614020128f-0.3371875316171687f3.03457993447785f0.3484031708193356f1.987927233071689f0.1344736501963441f3.034244984069742f-0.452376933285112f1.845888603157017f-0.1357435344849925f2.845708752017119f0.5095572464288437f2.749146820094271f-0.4724334589718194f3.344016172506719f-0.7880647642321891f3.412291816123239f0.2570658484089068f2.576837169793795f0.3824166456498074f3.544907971211512f-0.0422005421423497f1.647109551045666f0.212875679224751f4.075848852860227f-0.3893922248035998f2.737670930682837f-0.07556992800074791f3.027971857696107f0.5331924942461347f2.558481436744451f-0.4712709717622769f3.991643487094501f-0.1935191588376492f3.185338319331159f-0.2469041130840618f2.878356516078556f-0.6304629349800622f2.358783821640429f-0.4867234258114697f3.703665201290695f0.5064855919614937f3.235214295726241f0.5628478841415315f3.0666692446934f0.748914739471896f3.004597870596964f0.4951328782960779f2.70361844647529f1.038339723932192f2.490374375704398f0.2777711039521499f2.990340729147412f-0.5114469395149008f3.755400960414096f0.8978224054152578f3.294918035325786f-0.1823507805454374f3.54197021128541f0.6015309201427427f3.094076489728934f-0.3935284839772989f2.28840488759681f0.6079025728583197f2.533136394191352f0.6211283536471968f3.085791374828122f0.410960137975919f3.483248870601497f0.0823008492861624f2.508685877625428f-0.4291974695987429f2.982409726484952f0.25642082948781f3.012387074024002f0.1867324979647327f2.552696676369606f-0.256071891571461f2.967056700373727f0.6003418565906098f3.352891365476113f-0.3921420154310601f3.253296803131405f-1.588203539537881f3.28975339835085f1.073748034184279f3.248680469074076f0.3381330614443318f2.974197223711482f0.301421590441954f3.049632079268552f-0.5506812884002805f2.554244564582064f-0.3626593706661156f2.712162248576321f0.09158867152364154f2.799872099439553f0.2098253993921188f3.182936770639627f-0.317953875872567f3.747192169043496f0.2527946201553096f2.654470989989709f-0.5553472718114334f3.616019106540915f-0.5456090418897219f2.547623218491004f0.03842021078256194f3.961059671335401f-0.2141326252818632f3.617556869042553f-0.475137122535459f3.096453864735282f-0.001383273666138943f2.862163349556265f-0.3789951515039162f2.269970958659582f0.1421291333546617f3.236834809955888f0.4613656950183357f2.636563683676703f-0.2064297066638142f2.246050178765078f-0.04979836735953044f3.131933716335978f-0.7659821030940857f2.414564019929216f-0.3284648749072014f2.732669810858166f-0.07474528969471336f3.13489803611463f1.134105742743526f3.838200847469428f0.1067228258206946f2.846981768510943f0.3220884846667058f3.056031879156058f0.2570070292308804f3.399355178150544f0.02827976294111253f2.376471285384257f-0.004715329404584871f2.875011487696633f-0.8912227128197908f3.091994021943346: +dtype: float64: +stype: +dense \ No newline at end of file diff --git a/spec/preprocessing/l2_normalizer_spec.rb b/spec/preprocessing/l2_normalizer_spec.rb new file mode 100644 index 0000000..57c5e37 --- /dev/null +++ b/spec/preprocessing/l2_normalizer_spec.rb @@ -0,0 +1,21 @@ +require 'spec_helper' + +RSpec.describe SVMKit::Preprocessing::L2Normalizer do + let(:n_samples) { 10 } + let(:n_features) { 4 } + let(:samples) do + rng = Random.new(1) + rnd_vals = Array.new(n_samples * n_features) { rng.rand } + NMatrix.new([n_samples, n_features], rnd_vals, dtype: :float64, stype: :dense) + end + + it 'normalizes each sample to unit length.' do + normalizer = described_class.new + normalized = normalizer.fit_transform(samples) + sum_norm = 0.0 + n_samples.times do |n| + sum_norm += normalized.row(n).norm2 + end + expect((sum_norm - n_samples).abs).to be < 1.0e-6 + end +end diff --git a/spec/preprocessing/min_max_scaler_spec.rb b/spec/preprocessing/min_max_scaler_spec.rb new file mode 100644 index 0000000..81200ad --- /dev/null +++ b/spec/preprocessing/min_max_scaler_spec.rb @@ -0,0 +1,35 @@ +require 'spec_helper' + +RSpec.describe SVMKit::Preprocessing::MinMaxScaler do + let(:n_samples) { 10 } + let(:n_features) { 4 } + let(:samples) do + rng = Random.new(1) + rnd_vals = Array.new(n_samples * n_features) { rng.rand } + NMatrix.new([n_samples, n_features], rnd_vals, dtype: :float64, stype: :dense) + end + + it 'normalizes range of features to [0,1].' do + normalizer = described_class.new + normalized = normalizer.fit_transform(samples) + expect(normalized.min.to_a.min).to eq(0) + expect(normalized.max.to_a.max).to eq(1) + end + + it 'normalizes range of features to a given range.' do + normalizer = described_class.new(feature_range: [-3, 2]) + normalized = normalizer.fit_transform(samples) + expect(normalized.min.to_a.min).to eq(-3) + expect(normalized.max.to_a.max).to eq(2) + end + + it 'dumps and restores itself using Marshal module.' do + transformer = described_class.new + transformer.fit(samples) + copied = Marshal.load(Marshal.dump(transformer)) + expect(transformer.min_vec).to eq(copied.min_vec) + expect(transformer.max_vec).to eq(copied.max_vec) + expect(transformer.params[:feature_range][0]).to eq(copied.params[:feature_range][0]) + expect(transformer.params[:feature_range][1]).to eq(copied.params[:feature_range][1]) + end +end diff --git a/spec/preprocessing/standard_scaler_spec.rb b/spec/preprocessing/standard_scaler_spec.rb new file mode 100644 index 0000000..b15050b --- /dev/null +++ b/spec/preprocessing/standard_scaler_spec.rb @@ -0,0 +1,28 @@ +require 'spec_helper' + +RSpec.describe SVMKit::Preprocessing::StandardScaler do + let(:n_samples) { 10 } + let(:n_features) { 4 } + let(:samples) do + rng = Random.new(1) + rnd_vals = Array.new(n_samples * n_features) { rng.rand } + NMatrix.new([n_samples, n_features], rnd_vals, dtype: :float64, stype: :dense) + end + + it 'performs standardization of samples.' do + normalizer = described_class.new + normalized = normalizer.fit_transform(samples) + mean_err = (normalized.mean(0) - NMatrix.zeros([1, n_features])).abs.sum(1)[0] + std_err = (normalized.std(0) - NMatrix.ones([1, n_features])).abs.sum(1)[0] + expect(mean_err).to be < 1.0e-8 + expect(std_err).to be < 1.0e-8 + end + + it 'dumps and restores itself using Marshal module.' do + transformer = described_class.new + transformer.fit(samples) + copied = Marshal.load(Marshal.dump(transformer)) + expect(transformer.mean_vec).to eq(copied.mean_vec) + expect(transformer.std_vec).to eq(copied.std_vec) + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 0000000..101db2a --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,14 @@ +require 'bundler/setup' +require 'svmkit' + +RSpec.configure do |config| + # Enable flags like --only-failures and --next-failure + config.example_status_persistence_file_path = '.rspec_status' + + # Disable RSpec exposing methods globally on `Module` and `main` + config.disable_monkey_patching! + + config.expect_with :rspec do |c| + c.syntax = :expect + end +end diff --git a/spec/svmkit_spec.rb b/spec/svmkit_spec.rb new file mode 100644 index 0000000..87ca54c --- /dev/null +++ b/spec/svmkit_spec.rb @@ -0,0 +1,23 @@ +require 'spec_helper' + +RSpec.describe SVMKit do + let(:samples) do + SVMKit::Utils.restore_nmatrix(Marshal.load(File.read(__dir__ + '/test_samples_xor.dat'))) + end + let(:labels) do + SVMKit::Utils.restore_nmatrix(Marshal.load(File.read(__dir__ + '/test_labels_xor.dat'))) + end + let(:estimator) do + SVMKit::LinearModel::PegasosSVC.new(penalty: 1.0, max_iter: 100, batch_size: 20, random_seed: 1) + end + let(:transformer) do + SVMKit::KernelApproximation::RBF.new(gamma: 1.0, n_components: 1024, random_seed: 1) + end + + it 'classifies xor data.' do + new_samples = transformer.fit_transform(samples) + estimator.fit(new_samples, labels) + score = estimator.score(new_samples, labels) + expect(score).to eq(1.0) + end +end diff --git a/spec/test_labels_xor.dat b/spec/test_labels_xor.dat new file mode 100644 index 0000000..6c5b157 --- /dev/null +++ b/spec/test_labels_xor.dat @@ -0,0 +1,7 @@ +{ : +shape[ii: +array[iúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiúiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii: +dtype: +int32: +stype: +dense \ No newline at end of file diff --git a/spec/test_samples_xor.dat b/spec/test_samples_xor.dat new file mode 100644 index 0000000..1a2be31 --- /dev/null +++ b/spec/test_samples_xor.dat @@ -0,0 +1,6 @@ +{ : +shape[ii: +array[ f-2.209725954409888f-2.046313035896967f-2.637770223820096f-2.77253116452164f-2.174567171236117f-2.900888372067449f-1.94621469707999f-1.568474656201251f-2.199168014839876f-1.782062348233342f-2.469304445441453f-2.822148974543326f-2.58735869351872f-2.583405667532922f-2.137282132828025f-1.453577102092203f-1.01339182131164f-2.821693093595107f-2.40310949872572f-1.716301122501215f-1.949628614104317f-2.567118004531144f-1.626693882803714f-2.324394795852354f-2.226689863491249f-1.947239888500226f-2.076821284379275f-2.231315776728075f-1.192859024194754f-1.323493520389126f-2.717791235463814f-1.839091055686027f-1.65835898540922f-1.602393147032149f-2.663323964414585f-1.824781830023273f-1.666810614633443f-1.498866703934859f-1.850842491026537f-1.524752934033315f-2.372524864553175f-1.491555132186878f-2.639664351415937f-2.134824874839605f-1.483242400864838f-1.883043866500453f-1.728538562907511f-1.903456366485284f-0.6722715641339669f-2.472566621619595f-2.221910415051053f-1.697647549897007f-2.116533204058161f-2.644377895383122f-2.042476389170389f-2.327503449477611f-1.538638975997265f-2.748789745911708f-1.982338642459158f-1.403501550456731f-2.126427217086736f-2.117044353584952f-2.343135394625151f-1.458097478547663f-1.523061845050248f-1.832331351779237f-2.166171787525207f-3.027545844482093f-2.194039744766403f-2.124012930859231f-2.132070202381267f-2.687059805670672f-1.578327000910754f-1.658387653073326f-1.972941778421861f-2.524026964511265f-3.824728704470591f-2.302010321285104f-2.347354822520256f-1.781248221271001f-2.264955389799419f-2.702027884890521f-2.220336797182864f-2.867606154217513f-2.321565650859263f-2.049563335085766f-1.208399406222631f-1.381271898148126f-1.245639470624885f-2.203769166225745f-2.253765083712156f-1.471147536630121f-1.802757438670127f-2.208029171787186f-2.976090686627943f-2.361949138957919f-2.061682116095333f-2.169797813850233f-2.403454485058448f-0.9891101964719065f-2.797379622666086f-1.442771484321178f-2.788006176950626f-2.014410575871367f-1.726942066810199f-1.427341446697172f-2.188320331470908f-1.455527819306362f-2.461268400860392f-2.31956821791897f-1.71428451930494f-2.082462509828003f-3.128741123628615f-1.432971249528042f-1.118643976827802f-1.347513709688967f-2.020909487010804f-1.57618422479318f-2.378001482605982f-1.893624994672987f-3.218878705644841f-2.254262060733514f-1.022492302670813f-2.249906649676866f-1.669198311617506f-2.074231055798419f-1.998538881353466f-1.086051516373492f-2.260207622592844f-2.127512472451488f-2.417906961127781f-1.648171149077212f-1.971913655377649f-2.038645486110266f-1.489874395981855f-2.445394163925653f-1.648475442383122f-1.39274601727298f-1.031313695412423f-2.722433990433452f-2.579052088433433f-2.204721211084542f-2.442423777762835f-2.809108824637275f-1.741385139765774f-2.711220171347892f-1.813638097468312f-1.935461788856698f-2.180584760421064f-2.530623800990041f-2.646680320337103f-2.513564767133806f-1.416873292230156f-1.892604744410577f-2.813861570632201f-1.57899787714748f-1.273853301375704f-2.229183247264093f-1.702266044825045f-1.47713478104176f-1.908493410825587f-1.703768297102723f-2.253005529751057f-3.35154675760006f-1.699503972828146f-2.433128878006165f-2.534114435538557f-1.405303339347807f-2.326342722306181f-1.999574052069466f-2.220764667678397f-2.07038397727807f-2.461061382589664f-2.618100956638223f-1.311290091508382f-1.987837733503464f-2.076995340548481f-2.164939382332757f-2.256806694784583f-1.858004114268627f-2.097567140259148f-3.295773310056171f-2.031161046759982f-1.422959414570591f-1.380012358861051f-2.495287257043362f-1.728159029752932f-1.798334528390218f-1.353475943383152f-2.323995109299227f-1.391481891929724f-1.498073139481946f-1.832117936121303f-1.734002167058252f-1.903903310650451f-2.346959055081965f-2.568456723865634f-1.604121048692232f-0.8509505963630755f-2.068515553313639f1.980473992080633f1.532832991015352f2.587144437443715f2.114241373099218f1.210967248318928f2.754281706560419f2.111215991958421f2.336692661326453f2.466617853802961f1.199947191787433f2.662007509803368f1.602137838316173f2.565684204060896f2.918527849861725f2.789562722979118f1.941324633285471f1.975465302837642f1.411579686766859f2.051734307086241f1.940081002267145f1.784013408578093f2.141005389237369f1.455329429086531f2.353477473878551f1.447095126188684f1.4335537628719f3.228526624554629f2.679567365653317f1.999897452159218f1.97303262809148f1.911383784601283f1.47720161212007f2.932019788121688f2.242874889537497f2.464225926333839f2.175258355743896f1.812059791833663f2.416530043262444f3.068627060101059f3.219794634145466f1.825233154588107f2.563997664657816f2.283724479274045f1.375948262825396f2.215915877964495f2.008803412367371f2.291185464885974f2.803850393513192f1.656276945293227f2.087156505871117f2.186190528011369f1.825285195634283f2.809415822069282f2.306809094530114f2.939909095836456f2.237377935615228f1.472755801254971f2.622912513884917f1.546945500842862f1.863565299163715f2.725917025814847f2.935598422576422f1.830738619375957f1.765837553329953f1.282452239125041f1.892805794910983f1.762033607909639f2.10846598062564f2.292891358106605f1.934882645718453f1.506302664539913f1.60653597539404f2.345414733703461f2.584713547682172f2.5386256747034f1.169471181554776f2.458000739689926f1.286883466751164f1.891630499878473f2.303237354547932f1.878492467119985f3.10257777443919f2.532901775800108f1.658013158488252f2.032663971672531f2.045838021692607f3.136115925441152f2.649022694981511f2.055775315703674f2.776833690537455f2.503960582898507f2.777995490730129f2.453198028335088f2.49207865412879f1.365330460414654f1.545765814828608f1.615847953017516f2.333551366361f1.473456596877224f2.371085510706323f2.278788501263503f1.448406170174003f1.550172769956415f2.750187785260392f2.216633414906932f1.648334021695811f1.122238446901616f1.503146810613169f2.871026890710231f1.661835853391649f2.534682343350969f1.506021304544449f2.531276923425429f1.817990407771864f2.993919163603474f1.87775541445127f1.403249489289478f1.655860079098122f2.170010092041962f2.167533334991454f2.122490918676056f2.042259458441158f1.776256985723675f2.259625575373134f3.116530837713469f1.932265595523527f2.036608697594672f2.313878262144167f2.664430322581723f2.105764176928631f1.251071891723583f1.619359161067621f1.957927169960837f1.518001505408447f1.852551754143429f2.301835981173735f2.699175073694026f2.53859529170548f0.6119010092794652f1.888003367094461f1.744009995361529f1.833764832796767f2.964942143918562f1.458606980885016f1.537416938770287f2.424510646088073f1.925483903375419f1.626675940954076f2.252567778909587f2.15078501746231f2.657229130076675f1.967406044424868f2.515585972968146f3.299881339335508f1.726121776264119f1.221803944333348f1.004547883669043f2.152833142494912f1.622599456402826f2.353351799981525f1.993874205123106f1.51058227762611f2.14490839982864f1.344098268579246f2.450611379132911f2.722847983011017f1.822859664942706f2.042256604844145f0.8906163482235294f1.619211653849986f1.456642344672556f2.027969226509879f2.165801068513638f1.527808609156171f2.342170803822177f1.661650552546892f1.945106749407758f2.649634924948666f1.640082490690888f1.122826780273257f1.990647106136943f1.739912689208994f2.412546111176681f3.128817346763112f3.037169317812931f1.164503192039504f2.135918272019516f1.832510431547257f2.339635098567305f2.413250809145037f2.418786752702883f2.375258095266898f1.928738864919939f1.632992601717846f2.072068368774942f1.971726090449873f1.683580289460739f1.496836368034774f0.9655405485788626f2.171714422270539f-2.482094828604898f2.549973835393851f-2.310497475410877f1.664062969615073f-1.959595059914134f1.852697018581236f-2.193950008909822f1.266471131658795f-1.929407611537223f1.464784955557245f-2.46827358071579f1.375553448202816f-1.694197247431212f1.401971855329453f-2.34438726924387f1.384591453829698f-1.550148953853674f2.206773127775875f-1.575019194410191f1.383876576511997f-2.928410295089662f2.713850267170903f-2.067477102452532f0.7466797006284869f-2.278506454852654f2.211082641880136f-1.756115475415268f2.205764179780152f-2.135439193892279f2.125032533060102f-2.377364611254006f2.69257192874602f-1.723227724908253f2.441252662753766f-2.475219426153313f2.147986919186942f-1.562450587986673f1.922955798195345f-2.00574238535309f2.286527800104448f-2.500765577818613f2.208034405999677f-1.347623577326291f2.113688116000044f-1.724114801969897f2.367656015898492f-2.481039055513452f2.288527145299003f-1.794888078136404f1.506711121750958f-2.423794628847879f2.312015005115742f-1.597367179141099f2.82268075312409f-1.417390994481662f2.625617706850293f-1.977304027187669f1.467116204274645f-1.965949604253773f2.422310353222138f-2.048146648751208f2.171318665386751f-2.644230090583928f1.891454974384743f-1.340664484855875f1.140119385945802f-1.508960840342626f1.549013249168635f-2.758605458084379f1.53367973811828f-2.655232702439731f3.211197512398395f-1.526422624545104f2.658663668835037f-2.144440097414108f2.24373096284533f-2.136079345418375f2.819669733958251f-2.216352959308659f1.566924127468159f-2.640697785982987f2.163748605880126f-2.102494759329026f2.306268894850002f-2.577930969705215f1.126631880609389f-1.435095497198797f1.698519980293583f-1.982342345494017f1.630608260347064f-1.501682148964537f2.058662183540394f-2.394883665117311f1.813838182727914f-2.746565880703212f1.95079741671718f-1.717824458850458f1.487835491056227f-2.575367643804582f1.394924677532927f-1.536867845954217f1.739642248790288f-2.163546394584791f1.355222435844242f-1.688265494147111f2.241597963421972f-2.021511018075139f2.022224882696027f-2.664792577719672f2.354683425751427f-2.180475834233427f2.487961370849882f-1.957443768760679f1.846054664556167f-0.6866298506602064f2.459277145756481f-2.007209951331602f1.573228951838999f-2.588454024943389f2.671521892960974f-1.687012805208645f1.532810372583369f-2.883211951824308f2.196512114777364f-2.045455725255278f2.87450052312505f-1.78504790322273f1.925334996698011f-2.097155078736181f2.272026309764271f-2.659645309031776f2.575507754373086f-1.903632515645225f2.56831187774634f-1.315548684981704f3.416875949693514f-1.312088013906121f2.307048472845393f-1.80253299465686f2.238974097018204f-2.224664892698707f1.445018478509778f-1.61390322585992f1.694012754075513f-1.447144317802501f1.74019897851634f-1.80243579480106f1.570052484780357f-2.430825564204613f2.951907329152281f-1.211189152759307f2.262451366301683f-1.689184649234654f1.513937415155302f-2.182601408038798f2.001002479742152f-1.581772181042111f2.141980120814609f-1.566664887824089f2.430865068317644f-1.718429566427794f2.334299858292801f-1.909541426521935f2.367948638282382f-2.707425533134735f2.12495733243525f-2.238154527089475f2.520105731348521f-2.172540364172282f1.265421153464106f-1.652758744363861f2.354553691688614f-1.654885490584756f1.815794927232912f-2.616437579497469f2.054851867492393f-1.539323957559517f1.936672216989802f-1.535313352049761f2.007487519748884f-2.367509098805971f1.153797082452055f-1.792769130527663f1.943470655179703f-2.514428743491716f2.049814126969735f-2.615206058556327f1.942830759511296f-1.834460512998114f1.979179003081472f-2.137611025401005f1.80235257306542f-2.667698931013502f1.732445274307245f-2.300677668774565f1.892031295492054f-2.373368788044031f2.387586704469228f-1.211953815441953f1.371851039178807f1.159854255272004f-1.548363558469732f2.075085288132565f-2.818666029481107f1.791688574956597f-1.684811383714165f1.809879792357196f-1.326496892453376f1.854731040409917f-1.711759062925384f2.177664716781316f-1.139345889340636f1.976509842796099f-1.852786354086618f2.380708574641018f-1.297068947600675f2.61556960543111f-1.029337098421754f1.523226062577061f-2.135362400245316f2.33969351609646f-2.504103762172859f1.648833107898686f-2.035500933889401f2.399369242989704f-2.4449354793577f1.933767817832783f-2.477906433677814f2.281355943552299f-1.567578605342105f2.363496221467895f-1.299121739276416f2.741810706560631f-1.727537575718966f2.133233953144226f-1.539662586239023f2.058121757191747f-1.317772609673298f1.692331109824532f-0.690385405135796f2.554600213378431f-1.509087209135954f1.790210337594331f-2.105553031494399f2.220415170372322f-2.209475270675419f2.240451471640379f-2.755295180066509f2.197352981240929f-2.477011644380455f1.633490081482901f-2.716788193305165f2.610279154580495f-2.186929938114539f2.271564826509143f-2.870525138056368f2.654220648968818f-2.101061518361847f2.545109111799274f-2.919760569307566f1.669298106606028f-0.9498597158375748f1.738717343372471f-1.980273078732414f1.185773509810703f-2.20222478928069f2.486784949517612f-1.566947578753116f2.608950805746377f-1.747775698661431f1.787384880236222f-2.113886866535128f1.82815270136305f-1.969262274927078f2.914811967559487f-2.01326203051023f1.609379438111771f-1.229063476828979f2.319902245447732f-0.9603700068836054f2.689224941290097f-3.572228033302092f2.466442029918173f-0.9223917649683044f1.216024011064964f-2.059167249991372f2.123544434307775f-1.968112362599086f1.358772456425681f-2.145833804468147f1.861959078298967f-2.373003933719316f2.77131853030656f-2.415114213541396f2.557276599949378f-1.853464811092968f1.547919042481614f-2.584970008620448f2.030313461653049f-3.000149373898966f1.890976562932404f-1.341847566083953f1.679514995855418f-2.062609239026094f1.587432582209969f-1.664854184390521f2.188843643630582f-0.5550735920706238f1.635219827502458f-1.44575207338103f1.631422923659952f-1.909986069606494f2.533967643252745f-2.551184237810888f2.028453578777348f-1.681972685850571f1.920114001062815f-1.409068455176874f2.408595338805091f-1.773842488124551f1.194802473668968f-1.807993262388843f1.909797828221801f-1.681645226454575f1.188416021468466f-1.528945274665556f2.292483157531493f-1.683754697093865f1.24389673611675f-2.107567489916735f1.862531256673869f-1.697882174139049f2.424423794709005f-2.452012847945099f1.256743377031605f-1.299828407236071f1.954568183320297f-2.428865723102679f2.654279991287781f-2.205792944148397f1.791619104934413f-2.224652903065965f2.208197280151114f-2.123466433117427f1.975757550665343f-1.516835412428488f1.502355553415784f-2.027222725271532f3.195342651576359f-2.347392588992685f1.700637900358486f-2.342421793557332f2.607320491384632f-1.064683652754847f1.769589878660264f-1.822954612692685f2.17362327295358f-1.500010645795693f2.01653751861008f-2.531195971978271f2.056141149662497f-2.333741270706003f2.223439298488656f-1.753998123264148f0.781750023723226f-2.305486419356215f1.592884137790688f-2.166870952062416f1.658666879899342f-2.23336570211898f2.429875439819464f-2.331636448235576f2.36768207735888f-1.851463455122584f2.131050991903962f-2.268914494176214f1.617808726348332f-2.069783841166707f1.630021247356442f-1.464023535552554f2.425415750706705f-2.431797054107974f2.901707542909176f-1.511824321667946f2.362637538832634f-2.575321399816765f1.733038987899551f-1.615297820146135f1.535409224037707f-1.500371259206561f2.082577498151028f-1.078661587590334f2.373867942009679f-1.511946394800697f1.423710745215506f-2.458031267181647f1.13995207960811f-1.806335725794262f1.652528388019872f-2.196122664474368: +dtype: float64: +stype: +dense \ No newline at end of file diff --git a/spec/utils_spec.rb b/spec/utils_spec.rb new file mode 100644 index 0000000..93b232d --- /dev/null +++ b/spec/utils_spec.rb @@ -0,0 +1,10 @@ +require 'spec_helper' + +RSpec.describe SVMKit::Utils do + it 'dumps and restores NMatrix object.' do + mat = NMatrix.rand([3, 3]) + dumped = described_class.dump_nmatrix(mat) + restored = described_class.restore_nmatrix(dumped) + expect(mat).to eq(restored) + end +end diff --git a/svmkit.gemspec b/svmkit.gemspec new file mode 100644 index 0000000..a9480b1 --- /dev/null +++ b/svmkit.gemspec @@ -0,0 +1,37 @@ +# coding: utf-8 +lib = File.expand_path('../lib', __FILE__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) + +require 'svmkit/version' + +SVMKit::DESCRIPTION = < 0.2.3' + + spec.add_development_dependency 'bundler', '~> 1.15' + spec.add_development_dependency 'rake', '~> 10.0' + spec.add_development_dependency 'rspec', '~> 3.0' + spec.add_development_dependency 'nmatrix', '~> 0.2.3' +end