update

5748a0e9 · DIANE · 0565c2d0 · 5748a0e9 · 5748a0e9 · 5748a0e9
Commit 5748a0e9 authored 5 months ago by DIANE
--- a/src/form_data.json
+++ b/src/form_data.json
-{"meta_project": "ds", "meta_sample_species": "ds", "meta_sample_category": "Other", "meta_sample_pretreatment": "Pastile", "meta_machine_ID": "ds", "meta_sample_sub_category": "Animal part", "meta_sample_humidity": "Wet", "meta_scan_place": "Pace"}
\ No newline at end of file
+{"meta_project": "abderrahim", "meta_sample_species": "diane", "meta_sample_category": "Soil", "meta_sample_pretreatment": "Powder", "meta_machine_ID": "diane", "meta_sample_sub_category": "Leaf litter", "meta_sample_humidity": "Dry", "meta_scan_place": "Pace"}
\ No newline at end of file
--- a/src/star.pkl
+++ b/src/star.pkl
--- a/src/utils/Untitled-1.ipynb
+++ b/src/utils/Untitled-1.ipynb
@@ -275,23 +275,29 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 355,
+   "execution_count": 55,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "4\n",
+      "1\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
-       "2"
+       "{}"
      ]
     },
-     "execution_count": 355,
+     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
-   "source": [
-    "min(15,2)"
-   ]
+   "source": []
  },
  {
   "cell_type": "code",

 %% Cell type:code id: tags:

 ``` python
 import pandas as pd
 import matplotlib.pyplot as plt
 from pathlib import Path
 import numpy as np
 ```

 %% Cell type:code id: tags:

 ``` python
 path = 'C:/Users/diane/Desktop/xcalxcal.csv'
 df = pd.read_csv(path,decimal = '.', sep = ";", index_col=0)
 ```

 %% Cell type:code id: tags:

 ``` python
    def kmeans(x, max_k):
        k = Cluster.find_optimal_k(X=x)
        model = KMeans(n_clusters=k, init='k-means++', max_iter=300, n_init=10, random_state=42)
        model.fit(x)
        clu = [f'cluster#{i}' for i in model.predict(x)+1]
        res = tuple(zip(clu, x.index))
        centers = model.cluster_centers_
        for i in set(clu):
            # search the closest points of the cluster members to center of the cluster
            medoids[i], _ = pairwise_distances_argmin_min(tcr.iloc[clustered,:], clu_centers)

        return res, medoids
 ```

 %% Output

               name
    cluster#2  s126
    cluster#2  s256
    cluster#1   s27
    cluster#2  s166
    cluster#2   s27
    ...         ...
    cluster#2  s356
    cluster#2  s357
    cluster#1  s358
    cluster#2  s359
    cluster#1  s360
    
    [361 rows x 1 columns]

    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
 Cell     In[32], line 87
         84         return result
         86 m= SkKmeans()
    ---> 87 a,b = m.kmeans1layer(x = df, ratio = 0.2, max_k=10)
    ValueError: not enough values to unpack (expected 2, got 1)

 %% Cell type:code id: tags:

 ``` python



 M = SkKmeans()
 res, medoids = M.kmeans(df, max_k=40)
 medoids
 ```

 %% Output

    {'cluster#1': 's315', 'cluster#2': 's303'}

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Output

              names
    cluster#2  s126
    cluster#2  s256
    cluster#1   s27
    cluster#2  s166
    cluster#2   s27
    ...         ...
    cluster#2  s356
    cluster#2  s357
    cluster#1  s358
    cluster#2  s359
    cluster#1  s360
    
    [361 rows x 1 columns]

 %% Cell type:code id: tags:

 ``` python
 X = df
 ```

 %% Cell type:code id: tags:

 ``` python
 def selection_method(X, method, **kwargs):
    #['random', 'kennard-stone', 'medoids', 'meta-clusters']
    if method =='random':
        from sklearn.model_selection import train_test_split
    elif method == 'kennard-stone':
        from kennard_stone import train_test_split
    if method in ['random','kennard-stone']:
        selected, _ = train_test_split(X, train_size= kwargs['rset'])
        sname = selected.index

    if method in ['meta-ks','meta-medoids']:
        best_k = 2
        best_score = -1
        for k in range(2, min(10,X.shape[0])):
            from sklearn.cluster import KMeans
            from sklearn.metrics import silhouette_score
            model = KMeans(n_clusters=best_k, random_state=42, init='random', n_init=1, max_iter=100)
            labels = model.fit_predict(X)
            score = silhouette_score(X, labels)
            if score > best_score:
                best_score = score
                best_k = k
        model = KMeans(n_clusters=best_k, random_state=42, init='random', n_init=1, max_iter=100)
        model.fit(X)
        yp = model.predict(X)

        sname = []
        for i in range(best_k):
            t = X.loc[yp==i]
            if method == "meta-medoids":
                from scipy.spatial.distance import cdist
                distances = cdist(t.values, t.values, metric='euclidean')
                sum_distances = np.sum(distances, axis=1)
                medoid_index = np.argmin(sum_distances)
                sname.append(X.index[medoid_index])

            elif method == 'meta-ks':
                from kennard_stone import train_test_split
                selected, _ = train_test_split(t, train_size= kwargs['rset_meta'])
                sname.append(selected.index)
    return sname
 l = ['random', 'kennard-stone', 'meta-medoids', 'meta-ks']
 selection_method(X=df, method= l[2], rset = 0.01, rset_meta = 0.2)
 ```

 %% Output

    ['s116', 's212']

 %% Cell type:code id: tags:

 ``` python
-min(15,2)
 ```

 %% Output

-    2
+    4
+    1
+
+    {}

 %% Cell type:code id: tags:

 ``` python
 mask = df.index.duplicated(keep=False)  # Keep all duplicates (True for replicated)

 # For the duplicated sample_ids, apply suffix (_1, _2, etc.)
 df.index = df.index.where(~mask,
                           df.groupby(df.index).cumcount().add(1).astype(str).radd(df.index + '#'))
 len(set(df.index))
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:code id: tags:

 ``` python
 x.T.plot(y=x.index, kind='line', legend=False, )
 ```

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:code id: tags:

 ``` python
 267
 ```