From d18871dd06149b6033b31b03c19a3e25e8f10e4f Mon Sep 17 00:00:00 2001 From: Nicolas Barthes <nicolas.barthes@cnrs.fr> Date: Fri, 19 Apr 2024 13:56:34 +0200 Subject: [PATCH] LWPLSR as a documented class --- docs/index.md | 5 ++- docs/model_creation.md | 7 +++ mkdocs.yml | 1 + src/Class_Mod/HDBSCAN_Clustering.py | 3 +- src/Class_Mod/LWPLSR_.py | 69 ++++++++++++++++++++--------- 5 files changed, 61 insertions(+), 24 deletions(-) create mode 100644 docs/model_creation.md diff --git a/docs/index.md b/docs/index.md index 17a491a..10504b2 100644 --- a/docs/index.md +++ b/docs/index.md @@ -8,4 +8,7 @@ This workflow aims at ... ## Clustering [K-Means](Clustering.md#k-means-clustering) -[HDBSCAN](Clustering.md#hdbscan-clustering) \ No newline at end of file +[HDBSCAN](Clustering.md#hdbscan-clustering) + +## Models Creation +[lwPlsR from Jchemo (Julia)](model_creation.md) \ No newline at end of file diff --git a/docs/model_creation.md b/docs/model_creation.md new file mode 100644 index 0000000..09695a6 --- /dev/null +++ b/docs/model_creation.md @@ -0,0 +1,7 @@ +# Models creation + +## PLSR from Pinard (scikit learn) +::: src.Class_Mod.KMEANS_.Sk_Kmeans + +## lwPlsR from Jchemo (Julia) +::: src.Class_Mod.LWPLSR_.LWPLSR \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index fd8d5d9..445e28b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -4,6 +4,7 @@ nav: - Home: 'index.md' - Dimensionality Reduction: 'Dimensionality_Reduction.md' - Clustering Methods: 'Clustering.md' + - Models Creation: 'model_creation.md' theme: diff --git a/src/Class_Mod/HDBSCAN_Clustering.py b/src/Class_Mod/HDBSCAN_Clustering.py index 9f64bce..ccecf5f 100644 --- a/src/Class_Mod/HDBSCAN_Clustering.py +++ b/src/Class_Mod/HDBSCAN_Clustering.py @@ -132,7 +132,8 @@ class Hdbscan: dist_function (func): function to determine distance between objects func args must be [np.array, np.array] where each array is a point - Returns: mutual_reachability (float) + Returns: + mutual_reachability (float) mutual reachability between points i and j """ diff --git a/src/Class_Mod/LWPLSR_.py b/src/Class_Mod/LWPLSR_.py index 18c6755..b5c7e49 100644 --- a/src/Class_Mod/LWPLSR_.py +++ b/src/Class_Mod/LWPLSR_.py @@ -1,25 +1,50 @@ from Packages import * from Class_Mod.Miscellaneous import * -def LWPLSR(x_train, x_test, y_train, y_test): - # prepare to send dataframes to julia and Jchemo - Main.x_train,Main.y_train,Main.x_test,Main.y_test = x_train, y_train, x_test, y_test - Main.eval(""" - #convert python pd.dataframes to julia dataframes - x_train_j = Main.x_train |> Pandas.DataFrame|> DataFrames.DataFrame; - y_train_j = Main.y_train |> Pandas.DataFrame|> DataFrames.DataFrame; - x_test_j = Main.x_test |> Pandas.DataFrame|> DataFrames.DataFrame; - y_test_j = Main.y_test |> Pandas.DataFrame|> DataFrames.DataFrame; - # Compute model - nlvdis = 5 ; metric = :mah - h = 1 ; k = 200 ; nlv = 15 #; scal = true - mod = Main.Jchemo.model(Main.Jchemo.lwplsr; nlvdis, metric, h, k, nlv) - Main.Jchemo.fit!(mod, X_train_j, y_train_j) - # predictions on test data calculation - res = Main.Jchemo.predict(mod, X_test_j) ; - score = Main.Jchemo.rmsep(res.pred, y_test_j) - resjp = Pandas.DataFrame(res.pred); - """) - score = Main.score - predicted_results_on_test = pd.DataFrame(Main.resjp) - return score, predicted_results_on_test \ No newline at end of file +class LWPLSR: + """ + The UMAP dimension reduction algorithm from scikit learn + """ + def __init__(self, x_train, x_test, y_train, y_test): + """Initiate the LWPLSR and prepare data for Julia computing.""" + self.x_train, self.y_train, self.x_test, self.y_test = x_train, x_test, y_train, y_test + # prepare to send dataframes to julia and Jchemo + self.Main.x_train,self.Main.y_train,self.Main.x_test,self.Main.y_test = self.x_train, self.y_train, self.x_test, self.y_test + + def Jchemo_lwplsr(self): + """Send data to Julia to compute lwplsr. + + Args: + self.Main.x_train (DataFrame): + self.Main.y_train (DataFrame): + self.Main.x_test (DataFrame): + self.Main.y_test (DataFrame): + + Returns: + self.scores (DataFrame): various metrics and scores + self.predicted_results_on_test (DataFrame): + """ + # launch Julia Jchemo lwplsr + Main.eval(""" + #convert python pd.dataframes to julia dataframes + x_train_j = self.Main.x_train |> Pandas.DataFrame|> DataFrames.DataFrame; + y_train_j = self.Main.y_train |> Pandas.DataFrame|> DataFrames.DataFrame; + x_test_j = self.Main.x_test |> Pandas.DataFrame|> DataFrames.DataFrame; + y_test_j = self.Main.y_test |> Pandas.DataFrame|> DataFrames.DataFrame; + # Compute model + nlvdis = 5 ; metric = :mah + h = 1 ; k = 200 ; nlv = 15 #; scal = true + mod = Main.Jchemo.model(Main.Jchemo.lwplsr; nlvdis, metric, h, k, nlv) + Main.Jchemo.fit!(mod, X_train_j, y_train_j) + # predictions on test data calculation + res = Main.Jchemo.predict(mod, X_test_j) ; + scores = Main.Jchemo.mse(res.pred, y_test_j) + scoresjp = Pandas.DataFrame(scores); + resjp = Pandas.DataFrame(res.pred); + """) + self.scores = self.Main.scoresjp + self.predicted_results_on_test = pd.DataFrame(self.Main.resjp) + + @property + def scores_(self): + return self.scores, self.predicted_results_on_test \ No newline at end of file -- GitLab