Add code comments for documentation

2023-11-19 01:25:33 +00:00 · 2023-11-19 01:25:33 +00:00 · e50de0729c
parent 312d7adf18
commit e50de0729c
2 changed files with 164 additions and 43 deletions
--- a/deepod/metrics/_anomaly_detection.py
+++ b/deepod/metrics/_anomaly_detection.py
@ -6,15 +6,71 @@ from deepod.metrics.affiliation.metrics import pr_from_events


 def auc_roc(y_true, y_score):
+    """
+    Calculates the area under the Receiver Operating Characteristic (ROC) curve.
+
+    Args:
+    
+        y_true (np.array, required): 
+            True binary labels. 0 indicates a normal timestamp, and 1 indicates an anomaly.
+            
+        y_score (np.array, required): 
+            Predicted anomaly scores. A higher score indicates a higher likelihood of being an anomaly.
+
+    Returns:
+    
+        float: 
+            The score of the area under the ROC curve.
+    """
+    
    return metrics.roc_auc_score(y_true, y_score)


 def auc_pr(y_true, y_score):
+    """
+    Calculates the area under the Precision-Recall (PR) curve.
+
+    Args:
+    
+        y_true (np.array, required): 
+            True binary labels. 0 indicates a normal timestamp, and 1 indicates an anomaly.
+            
+        y_score (np.array, required): 
+            Predicted anomaly scores. A higher score indicates a higher likelihood of being an anomaly.
+
+    Returns:
+    
+        float: 
+            The score of the area under the PR curve.
+    """
+    
    return metrics.average_precision_score(y_true, y_score)


 def tabular_metrics(y_true, y_score):
-    """calculate evaluation metrics"""
+    """
+    Calculates evaluation metrics for tabular anomaly detection.
+
+    Args:
+    
+        y_true (np.array, required): 
+            Data label, 0 indicates normal timestamp, and 1 is anomaly.
+            
+        y_score (np.array, required): 
+            Predicted anomaly scores, higher score indicates higher likelihoods to be anomaly.
+
+    Returns:
+        tuple: A tuple containing:
+        
+        - auc_roc (float):
+            The score of area under the ROC curve.
+            
+        - auc_pr (float):
+            The score of area under the precision-recall curve.
+            
+        - f1 (float): 
+            The score of F1-score.
+    """

    # F1@k, using real percentage to calculate F1-score
    ratio = 100.0 * len(np.where(y_true == 0)[0]) / len(y_true)
@ -27,12 +83,62 @@ def tabular_metrics(y_true, y_score):


 def ts_metrics(y_true, y_score):
-    """calculate evaluation metrics for time series anomaly detection"""
+    """
+    Calculates evaluation metrics for time series anomaly detection.
+
+    Args:
+    
+        y_true (np.array, required): 
+            Data label, 0 indicates normal timestamp, and 1 is anomaly.
+            
+        y_score (np.array, required): 
+            Predicted anomaly scores, higher score indicates higher likelihoods to be anomaly.
+
+    Returns:
+        tuple: A tuple containing:
+        
+        - roc_auc_score (float):
+            The score of area under the ROC curve.
+            
+        - average_precision_score (float):
+            The score of area under the precision-recall curve.
+            
+        - best_f1 (float): 
+            The best score of F1-score.
+            
+        - best_p (float): 
+            The best score of precision.
+            
+        - best_r (float): 
+            The best score of recall.
+    """
+    
    best_f1, best_p, best_r = get_best_f1(y_true, y_score)
    return auc_roc(y_true, y_score), auc_pr(y_true, y_score), best_f1, best_p, best_r


 def get_best_f1(label, score):
+    """
+    Return the best F1-score, precision and recall
+
+    Args:
+        label (np.array, required): 
+            Data label, 0 indicates normal timestamp, and 1 is anomaly.
+            
+        score (np.array, required): 
+            Predicted anomaly scores, higher score indicates higher likelihoods to be anomaly.
+
+    Returns:
+        tuple: A tuple containing:
+        
+        - best_f1 (float):
+            The best score of F1-score.
+        - best_p (float):
+            The best score of precision.
+        - best_r (float):
+            The best score of recall.
+    """
+    
    precision, recall, _ = metrics.precision_recall_curve(y_true=label, probas_pred=score)
    f1 = 2 * precision * recall / (precision + recall + 1e-5)
    best_f1 = f1[np.argmax(f1)]
@ -43,38 +149,53 @@ def get_best_f1(label, score):

 def ts_metrics_enhanced(y_true, y_score, y_test):
    """
-    Compared with ts_metrics, this function can return more metrics
-    with one more input y_test (predictions of events)
-    revised by @Yiyuan Yang 2023/11/08
-
+    This function calculates additional evaluation metrics for time series anomaly detection. It returns a variety of metrics, including those sourced from the code in [A Huet et al. KDD22] and [J Paparrizos et al. VLDB22]. The function requires three inputs: y_true (data label), y_score (predicted anomaly scores), and y_test (predictions of events). 
+    
    Args:
-        y_true:
-        y_score:
-        y_test
+        y_true (np.array): 
+            Data label, where 0 indicates a normal timestamp and 1 indicates an anomaly.
+            
+        y_score (np.array): 
+            Predicted anomaly scores, where a higher score indicates a higher likelihood of being an anomaly.
+        
+        y_test (np.array): 
+            Predictions of events, where 0 indicates a normal timestamp and 1 indicates an anomaly.

    Returns:
-        auroc:
-        aupr:
-        best_f1:
-        best_p:
-        best_r:
-        affiliation_precision:
-        affiliation_recall:
-        vus_r_auroc:
-        vus_r_aupr:
-        vus_roc:
-        vus_pr:
-
-    Example:
-        from deepod.models.time_series import DCdetector
-        clf = DCdetector()
-        clf.fit(X_train)
-        pred, scores = clf.decision_function(X_test)
-
-        from deepod.metrics import point_adjustment
-        from deepod.metrics import ts_metrics_enhanced
-        adj_eval_metrics = ts_metrics_enhanced(labels, point_adjustment(labels, scores), pred)
-        print('adj_eval_metrics',adj_eval_metrics)
+        tuple: A tuple containing:
+        
+        - auroc (float):
+            The score of the area under the ROC curve.
+            
+        - aupr (float):
+            The score of the area under the precision-recall curve.
+            
+        - best_f1 (float): 
+            The best score of F1-score.
+            
+        - best_p (float): 
+            The best score of precision.
+            
+        - best_r (float): 
+            The best score of recall.
+            
+        - affiliation_precision (float):
+            The score of affiliation precision.
+            
+        - affiliation_recall (float):
+            The score of affiliation recall.
+            
+        - vus_r_auroc (float):
+            The score of range VUS ROC.
+            
+        - vus_r_aupr (float):
+            The score of range VUS PR.
+            
+        - vus_roc (float):
+            The score of VUS ROC.
+            
+        - vus_pr (float):
+            The score of VUS PR.
    """

    best_f1, best_p, best_r = get_best_f1(y_true, y_score)
--- a/deepod/metrics/_tsad_adjustment.py
+++ b/deepod/metrics/_tsad_adjustment.py
@ -5,20 +5,20 @@ def point_adjustment(y_true, y_score):
    """
    adjust the score for segment detection. i.e., for each ground-truth anomaly segment,
    use the maximum score as the score of all points in that segment. This corresponds to point-adjust f1-score.
-    ** This function is copied/modified from the source code in [Zhihan Li et al. KDD21]
+    *This function is copied/modified from the source code in [Zhihan Li et al. KDD21]* 

-    Parameters
-    ----------
-    y_true: np.array, required
-        data label, 0 indicates normal timestamp, and 1 is anomaly
+    Args:
+    
+        y_true (np.array, required): 
+            Data label, 0 indicates normal timestamp, and 1 is anomaly.
+            
+        y_score (np.array, required): 
+            Predicted anomaly scores, higher score indicates higher likelihoods to be anomaly.

-    y_score: np.array, required
-        predicted anomaly scores, higher score indicates higher likelihoods to be anomaly
-
-    Returns
-    -------
-    score: np.array
-        adjusted anomaly scores
+    Returns:
+    
+        np.array: 
+            Adjusted anomaly scores.

    """
    score = y_score.copy()