This commit is contained in:
xuhongzuo 2023-11-07 20:47:50 +08:00
parent 5942ef5d6a
commit 59d2d752f1
3 changed files with 674 additions and 35 deletions

View File

@ -1,35 +0,0 @@
# Read the Docs configuration file for Sphinx projects
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Set the OS, Python version and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.12"
# You can also specify other tool versions:
# nodejs: "20"
# rust: "1.70"
# golang: "1.20"
# Build documentation in the "docs/" directory with Sphinx
sphinx:
configuration: docs/conf.py
# You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
# builder: "dirhtml"
# Fail on all warnings to avoid broken references
# fail_on_warning: true
# Optionally build your docs in additional formats such as PDF and ePub
# formats:
# - pdf
# - epub
# Optional but recommended, declare the Python requirements required
# to build your documentation
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
# python:
# install:
# - requirements: docs/requirements.txt

171
docs/conf.py Normal file
View File

@ -0,0 +1,171 @@
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
from os.path import dirname, abspath
sys.path.insert(0, abspath('..'))
deepod_dir = dirname(dirname(abspath(__file__)))
version_path = os.path.join(deepod_dir, 'deepod', 'version.py')
exec(open(version_path).read())
# -- Project information -----------------------------------------------------
project = 'deepod'
copyright = '2023, Hongzuo Xu'
author = 'Hongzuo Xu'
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.doctest',
'sphinx.ext.intersphinx',
'sphinx.ext.coverage',
'sphinx.ext.imgmath',
'sphinx.ext.viewcode',
'sphinxcontrib.bibtex',
# 'sphinx.ext.napoleon',
# 'sphinx_rtd_theme',
]
bibtex_bibfiles = ['zreferences.bib']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = ['.rst', '.md']
# The master toctree document.
master_doc = 'index'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path .
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'pyod.test.rst']
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
# html_theme = 'default'
html_theme = "furo"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}
# html_sidebars = {'**': ['globaltoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']}
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'deepoddoc'
# -- Options for LaTeX output ------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree_ into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'deepod.tex', 'deepod Documentation',
'Hongzuo Xu', 'manual'),
]
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'pyod', 'pyod Documentation',
[author], 1)
]
# -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree_ into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'DeepOD', 'DeepOD Documentation',
author, 'DeepOD', 'One line description of project.',
'Miscellaneous'),
]
# -- Extension configuration -------------------------------------------------
# -- Options for intersphinx extension ---------------------------------------
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'https://docs.python.org/': None}

503
docs/zreferences.bib Normal file
View File

@ -0,0 +1,503 @@
@inproceedings{liu2008isolation,
title={Isolation forest},
author={Liu, Fei Tony and Ting, Kai Ming and Zhou, Zhi-Hua},
booktitle={Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on},
pages={413--422},
year={2008},
organization={IEEE}
}
@article{liu2012isolation,
title={Isolation-based anomaly detection},
author={Liu, Fei Tony and Ting, Kai Ming and Zhou, Zhi-Hua},
journal={ACM Transactions on Knowledge Discovery from Data (TKDD)},
volume={6},
number={1},
pages={3},
year={2012},
publisher={ACM}
}
@inproceedings{kriegel2011interpreting,
title={Interpreting and unifying outlier scores},
author={Kriegel, Hans-Peter and Kroger, Peer and Schubert, Erich and Zimek, Arthur},
booktitle={Proceedings of the 2011 SIAM International Conference on Data Mining},
pages={13--24},
year={2011},
organization={SIAM}
}
@article{aggarwal2015theoretical,
title={Theoretical foundations and algorithms for outlier ensembles},
author={Aggarwal, Charu C and Sathe, Saket},
journal={ACM SIGKDD Explorations Newsletter},
volume={17},
number={1},
pages={24--47},
year={2015},
publisher={ACM}
}
@inproceedings{ramaswamy2000efficient,
title={Efficient algorithms for mining outliers from large data sets},
author={Ramaswamy, Sridhar and Rastogi, Rajeev and Shim, Kyuseok},
booktitle={ACM Sigmod Record},
volume={29},
number={2},
pages={427--438},
year={2000},
organization={ACM}
}
@inproceedings{angiulli2002fast,
title={Fast outlier detection in high dimensional spaces},
author={Angiulli, Fabrizio and Pizzuti, Clara},
booktitle={European Conference on Principles of Data Mining and Knowledge Discovery},
pages={15--27},
year={2002},
organization={Springer}
}
@inproceedings{kriegel2008angle,
title={Angle-based outlier detection in high-dimensional data},
author={Kriegel, Hans-Peter and Zimek, Arthur and others},
booktitle={Proceedings of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining},
pages={444--452},
year={2008},
organization={ACM}
}
@inproceedings{lazarevic2005feature,
title={Feature bagging for outlier detection},
author={Lazarevic, Aleksandar and Kumar, Vipin},
booktitle={Proceedings of the eleventh ACM SIGKDD international conference on Knowledge discovery in data mining},
pages={157--166},
year={2005},
organization={ACM}
}
@article{goldstein2012histogram,
title={Histogram-based outlier score (hbos): A fast unsupervised anomaly detection algorithm},
author={Goldstein, Markus and Dengel, Andreas},
journal={KI-2012: Poster and Demo Track},
pages={59--63},
year={2012},
publisher={Citeseer}
}
@techreport{shyu2003novel,
title={A novel anomaly detection scheme based on principal component classifier},
author={Shyu, Mei-Ling and Chen, Shu-Ching and Sarinnapakorn, Kanoksri and Chang, LiWu},
year={2003},
institution={MIAMI UNIV CORAL GABLES FL DEPT OF ELECTRICAL AND COMPUTER ENGINEERING}
}
@inproceedings{aggarwal2015outlier,
title={Outlier analysis},
author={Aggarwal, Charu C},
booktitle={Data mining},
pages={75--79},
year={2015},
organization={Springer}
}
@inproceedings{breunig2000lof,
title={LOF: identifying density-based local outliers},
author={Breunig, Markus M and Kriegel, Hans-Peter and Ng, Raymond T and Sander, J{\"o}rg},
booktitle={ACM sigmod record},
volume={29},
number={2},
pages={93--104},
year={2000},
organization={ACM}
}
@inproceedings{zhao2018xgbod,
title={XGBOD: Improving Supervised Outlier Detection with Unsupervised Representation Learning},
author={Zhao, Yue and Hryniewicki, Maciej K},
booktitle={International Joint Conference on Neural Networks (IJCNN)},
year={2018},
organization={IEEE}
}
@article{rousseeuw1999fast,
title={A fast algorithm for the minimum covariance determinant estimator},
author={Rousseeuw, Peter J and Driessen, Katrien Van},
journal={Technometrics},
volume={41},
number={3},
pages={212--223},
year={1999},
publisher={Taylor \& Francis Group}
}
@article{hardin2004outlier,
title={Outlier detection in the multiple cluster setting using the minimum covariance determinant estimator},
author={Hardin, Johanna and Rocke, David M},
journal={Computational Statistics \& Data Analysis},
volume={44},
number={4},
pages={625--638},
year={2004},
publisher={Elsevier}
}
@article{he2003discovering,
title={Discovering cluster-based local outliers},
author={He, Zengyou and Xu, Xiaofei and Deng, Shengchun},
journal={Pattern Recognition Letters},
volume={24},
number={9-10},
pages={1641--1650},
year={2003},
publisher={Elsevier}
}
@inproceedings{zhao2018dcso,
title={DCSO: Dynamic Combination of Detector Scores for Outlier Ensembles},
author={Yue Zhao and Maciej K. Hryniewicki},
booktitle={ACM SIGKDD Workshop on Outlier Detection De-constructed (ODD v5.0)},
year={2018},
organization={ACM}
}
@techreport{janssens2012stochastic,
title={Stochastic outlier selection},
author={Janssens, JHM and Husz{\'a}r, Ferenc and Postma, EO and van den Herik, HJ},
year={2012},
institution={Technical report TiCC TR 2012-001, Tilburg University, Tilburg Center for Cognition and Communication, Tilburg, The Netherlands}
}
@inproceedings{papadimitriou2003loci,
title={LOCI: Fast outlier detection using the local correlation integral},
author={Papadimitriou, Spiros and Kitagawa, Hiroyuki and Gibbons, Phillip B and Faloutsos, Christos},
booktitle={Data Engineering, 2003. Proceedings. 19th International Conference on},
pages={315--326},
year={2003},
organization={IEEE}
}
@inproceedings{zhao2019lscp,
title={{LSCP:} Locally Selective Combination in Parallel Outlier Ensembles},
author={Zhao, Yue and Nasrullah, Zain and Hryniewicki, Maciej K and Li, Zheng},
booktitle={Proceedings of the 2019 {SIAM} International Conference on Data Mining, {SDM} 2019},
pages={585--593},
month = {May},
year={2019},
address = {Calgary, Canada},
organization={SIAM},
url={https://doi.org/10.1137/1.9781611975673.66},
doi={10.1137/1.9781611975673.66}
}
@article{liu2019generative,
title={Generative adversarial active learning for unsupervised outlier detection},
author={Liu, Yezheng and Li, Zhe and Zhou, Chong and Jiang, Yuanchun and Sun, Jianshan and Wang, Meng and He, Xiangnan},
journal={IEEE Transactions on Knowledge and Data Engineering},
year={2019},
publisher={IEEE}
}
@article{zhao2019pyod,
title={{PyOD}: A python toolbox for scalable outlier detection},
author={Zhao, Yue and Nasrullah, Zain and Li, Zheng},
journal={Journal of Machine Learning Research},
volume={20},
number={96},
pages={1--7},
year={2019}
}
@article{ramakrishnan2019anomaly,
title={Anomaly Detection for an E-commerce Pricing System},
author={Ramakrishnan, Jagdish and Shaabani, Elham and Li, Chao and Sustik, M{\'a}ty{\'a}s A},
journal={arXiv preprint arXiv:1902.09566},
year={2019}
}
@inproceedings{kalayci2018anomaly,
title={Anomaly Detection in Wireless Sensor Networks Data by Using Histogram Based Outlier Score Method},
author={Kalayc{\i}, {\.I}lker and Ercan, Tuncay},
booktitle={2018 2nd International Symposium on Multidisciplinary Studies and Innovative Technologies (ISMSIT)},
pages={1--6},
year={2018},
organization={IEEE}
}
@article{scholkopf2001estimating,
title={Estimating the support of a high-dimensional distribution},
author={Sch{\"o}lkopf, Bernhard and Platt, John C and Shawe-Taylor, John and Smola, Alex J and Williamson, Robert C},
journal={Neural computation},
volume={13},
number={7},
pages={1443--1471},
year={2001},
publisher={MIT Press}
}
@inproceedings{tang2002enhancing,
title={Enhancing effectiveness of outlier detections for low density patterns},
author={Tang, Jian and Chen, Zhixiang and Fu, Ada Wai-Chee and Cheung, David W},
booktitle={Pacific-Asia Conference on Knowledge Discovery and Data Mining},
pages={535--548},
year={2002},
organization={Springer}
}
@article{krishnan2019alphaclean,
title={AlphaClean: Automatic Generation of Data Cleaning Pipelines},
author={Krishnan, Sanjay and Wu, Eugene},
journal={arXiv preprint arXiv:1904.11827},
year={2019}
}
@inproceedings{kriegel2009outlier,
title={Outlier detection in axis-parallel subspaces of high dimensional data},
author={Kriegel, Hans-Peter and Kr{\"o}ger, Peer and Schubert, Erich and Zimek, Arthur},
booktitle={Pacific-Asia Conference on Knowledge Discovery and Data Mining},
pages={831--838},
year={2009},
organization={Springer}
}
@inproceedings{li2019mad,
title={MAD-GAN: Multivariate anomaly detection for time series data with generative adversarial networks},
author={Li, Dan and Chen, Dacheng and Jin, Baihong and Shi, Lei and Goh, Jonathan and Ng, See-Kiong},
booktitle={International Conference on Artificial Neural Networks},
pages={703--716},
year={2019},
organization={Springer}
}
@article{wang2019advae,
title={adVAE: A self-adversarial variational autoencoder with Gaussian anomaly prior knowledge for anomaly detection},
author={Wang, Xuhong and Du, Ying and Lin, Shijie and Cui, Ping and Shen, Yuntian and Yang, Yupu},
journal={Knowledge-Based Systems},
year={2019},
publisher={Elsevier}
}
@inproceedings{gopalan2019pidforest,
title={PIDForest: Anomaly Detection via Partial Identification},
author={Gopalan, Parikshit and Sharan, Vatsal and Wieder, Udi},
booktitle={Advances in Neural Information Processing Systems},
pages={15783--15793},
year={2019}
}
@inproceedings{arning1996linear,
title={A Linear Method for Deviation Detection in Large Databases.},
author={Arning, Andreas and Agrawal, Rakesh and Raghavan, Prabhakar},
booktitle={KDD},
volume={1141},
number={50},
pages={972--981},
year={1996}
}
@article{kingma2013auto,
title={Auto-encoding variational bayes},
author={Kingma, Diederik P and Welling, Max},
journal={arXiv preprint arXiv:1312.6114},
year={2013}
}
@article{pevny2016loda,
title={Loda: Lightweight on-line detector of anomalies},
author={Pevn{\`y}, Tom{\'a}{\v{s}}},
journal={Machine Learning},
volume={102},
number={2},
pages={275--304},
year={2016},
publisher={Springer}
}
@article{burgess2018understanding,
title={Understanding disentangling in betVAE},
author={Burgess, Christopher P and Higgins, Irina and Pal, Arka and Matthey, Loic and Watters, Nick and Desjardins, Guillaume and Lerchner, Alexander},
journal={arXiv preprint arXiv:1804.03599},
year={2018}
}
@book{iglewicz1993detect,
title={How to detect and handle outliers},
author={Iglewicz, Boris and Hoaglin, David Caster},
volume={16},
year={1993},
publisher={Asq Press}
}
@inproceedings{li2020copod,
title={{COPOD:} Copula-Based Outlier Detection},
author={Li, Zheng and Zhao, Yue and Botta, Nicola and Ionescu, Cezar and Hu, Xiyang},
booktitle={IEEE International Conference on Data Mining (ICDM)},
year={2020},
organization={IEEE},
}
@article{almardeny2020novel,
title={A Novel Outlier Detection Method for Multivariate Data},
author={Almardeny, Yahya and Boujnah, Noureddine and Cleary, Frances},
journal={IEEE Transactions on Knowledge and Data Engineering},
year={2020},
publisher={IEEE}
}
@article{zhao2021suod,
title={SUOD: Accelerating Large-scale Unsupervised Heterogeneous Outlier Detection},
author={Zhao, Yue and Hu, Xiyang and Cheng, Cheng and Wang, Cong and Wan, Changlin and Wang, Wen and Yang, Jianing and Bai, Haoping and Li, Zheng and Xiao, Cao and Wang, Yunlong and Qiao, Zhi and Sun, Jimeng and Akoglu, Leman},
journal={Proceedings of Machine Learning and Systems},
year={2021}
}
@article{ruff2018deepsvdd,
title={Deep One-Class Classification},
author={Ruff, Lukas and Vandermeulen, Robert and Görnitz, Nico and Deecke, Lucas and Siddiqui, Shoaib and Binder, Alexander and Müller, Emmanuel and Kloft, Marius},
journal={International conference on machine learning},
year={2018}
}
@article{birge2006many,
title={How many bins should be put in a regular histogram},
author={Birg{\'e}, Lucien and Rozenholc, Yves},
journal={ESAIM: Probability and Statistics},
volume={10},
pages={24--45},
year={2006},
publisher={EDP Sciences}
}
@inproceedings{perini2020quantifying,
title={Quantifying the confidence of anomaly detectors in their example-wise predictions},
author={Perini, Lorenzo and Vercruyssen, Vincent and Davis, Jesse},
booktitle={Joint European Conference on Machine Learning and Knowledge Discovery in Databases},
pages={227--243},
year={2020},
publisher={Springer}
}
@article{li2021ecod,
title={ECOD: Unsupervised Outlier Detection Using Empirical Cumulative Distribution Functions},
author={Li, Zheng and Zhao, Yue and Hu, Xiyang and Botta, Nicola and Ionescu, Cezar and Chen, H. George},
journal={IEEE Transactions on Knowledge and Data Engineering},
year={2022},
publisher={IEEE}
}
@article{cook1977detection,
title={Detection of influential observation in linear regression},
author={Cook, R Dennis},
journal={Technometrics},
volume={19},
number={1},
pages={15--18},
year={1977},
publisher={Taylor \& Francis}
}
@inproceedings{latecki2007outlier,
title={Outlier detection with kernel density functions},
author={Latecki, Longin Jan and Lazarevic, Aleksandar and Pokrajac, Dragoljub},
booktitle={International Workshop on Machine Learning and Data Mining in Pattern Recognition},
pages={61--75},
year={2007},
organization={Springer}
}
@article{sugiyama2013rapid,
title={Rapid distance-based outlier detection via sampling},
author={Sugiyama, Mahito and Borgwardt, Karsten},
journal={Advances in neural information processing systems},
volume={26},
year={2013}
}
@article{bandaragoda2018isolation,
title={Isolation-based anomaly detection using nearest-neighbor ensembles},
author={Bandaragoda, Tharindu R and Ting, Kai Ming and Albrecht, David and Liu, Fei Tony and Zhu, Ye and Wells, Jonathan R},
journal={Computational Intelligence},
volume={34},
number={4},
pages={968--998},
year={2018},
publisher={Wiley Online Library}
}
@inproceedings{schlegl2017unsupervised,
title={Unsupervised anomaly detection with generative adversarial networks to guide marker discovery},
author={Schlegl, Thomas and Seeb{\"o}ck, Philipp and Waldstein, Sebastian M and Schmidt-Erfurth, Ursula and Langs, Georg},
booktitle={International conference on information processing in medical imaging},
pages={146--157},
year={2017},
organization={Springer}
}
@inproceedings{goodge2022lunar,
title={Lunar: Unifying local outlier detection methods via graph neural networks},
author={Goodge, Adam and Hooi, Bryan and Ng, See-Kiong and Ng, Wee Siong},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={36},
number={6},
pages={6737--6745},
year={2022}
}
@article{han2022adbench,
title={ADBench: Anomaly Detection Benchmark},
author={Han, Songqiao and Hu, Xiyang and Huang, Hailiang and Jiang, Mingqi and Zhao, Yue},
journal={arXiv preprint arXiv:2206.09426},
year={2022}
}
@inproceedings{you2017provable,
title={Provable self-representation based outlier detection in a union of subspaces},
author={You, Chong and Robinson, Daniel P and Vidal, Ren{\'e}},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={3395--3404},
year={2017}
}
@inproceedings{zenati2018adversarially,
title={Adversarially learned anomaly detection},
author={Zenati, Houssam and Romain, Manon and Foo, Chuan-Sheng and Lecouat, Bruno and Chandrasekhar, Vijay},
booktitle={2018 IEEE International conference on data mining (ICDM)},
pages={727--736},
year={2018},
organization={IEEE}
}
@article{hoffmann2007kernel,
title={Kernel PCA for novelty detection},
author={Hoffmann, Heiko},
journal={Pattern recognition},
volume={40},
number={3},
pages={863--874},
year={2007},
publisher={Elsevier}
}
@article{fang2001wrap,
title={Wrap-around L2-discrepancy of random sampling, Latin hypercube and uniform designs},
author={Fang, Kai-Tai and Ma, Chang-Xing},
journal={Journal of complexity},
volume={17},
number={4},
pages={608--624},
year={2001},
publisher={Elsevier}
}
@article{xu2023dif,
author={Xu, Hongzuo and Pang, Guansong and Wang, Yijie and Wang, Yongjun},
journal={IEEE Transactions on Knowledge and Data Engineering},
title={Deep Isolation Forest for Anomaly Detection},
year={2023},
volume={},
number={},
pages={1-14},
doi={10.1109/TKDE.2023.3270293}
}