Commit 4b765411 authored by Franka Ludig's avatar Franka Ludig
Browse files

Merge branch 'animation' into 'addnotebook'

animation hinzugefügt

See merge request !2
parents 56bd9dfa af33db77
%% Cell type:code id:chicken-minneapolis tags:
``` python
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.cluster import Birch
from ipywidgets import interactive
```
%% Cell type:markdown id:modified-bookmark tags:
## My Dataset
%% Cell type:code id:chicken-marshall tags:
``` python
rng = np.random.default_rng(seed=1)
x = 7 + rng.random(20)
x = np.append(x, 5 + rng.random(20))
x = np.append(x, 7 + rng.random(20))
y = 5 + rng.random(20)
y = np.append(y, 4 + rng.random(20))
y = np.append(y, 3 + rng.random(20))
y = np.append(y, 1 + rng.random(20))
plt.plot(x, y, '.')
data = np.stack((x, y), axis=1)
data_1D = np.append(x, y)
# print(data)
```
%% Output
%% Cell type:markdown id:prepared-republic tags:
# Manual K-Means
%% Cell type:code id:generic-dating tags:
``` python
# cog = center of gravity
# cs = center of gravities
# k = k cluster
# Evaluate k
ks = np.arange(0, 15)
# Gib die Anzhal der zu erwartenden Cluster an
ks = [2]
# für jedes Cluster
for k in ks:
# definiere die ersten zwei Datenpunkte als Startpunkte
cs = data[0:k].copy()
terminate = False
# Deklariere last_cog
last_cog = [np.array([0, 0]) for i in range(k)]
while not terminate:
# berechne alle Distanzen zwischen Datenpunkten und cs
dist = np.stack([[np.linalg.norm(c-d) for d in data] for c in cs], axis=1)
# finde die kleinste Distanz eines Datenpunkts zu den cs und gebe das dazugehörige Cluster in einen Array
z = np.array([np.argmin(d) for d in dist])
# i ist von 0 bis k und wenn z gleich 0 ist gehört der Datenpunkt dem nullten Cluster an,
# bei eins, dem ersten und so weiter. So besteht df aus einem Array mit Unterarrays mit Datenpukten der einzelnen Cluster
df = [data[z == i] for i in range(k)]
# berechne die neuen Schwerpunkte
current_cog = [np.sum(d, axis=0)/len(d) for d in df]
# berechne die Differenz zwischen dem aktuellen und dem letzten cog
diff_cog = np.sum(np.abs([last - current for last, current in zip(last_cog, current_cog)]))
# setzte den letzten cog auf den aktuellen
last_cog = current_cog
#print(diff_cog)
# setze den neuen Startpunkt auf den aktuellen cog
cs = current_cog
# wenn die Differenz unter einer gewissen Schwelle ist, beende den Algorithmus
if diff_cog < 0.3:
terminate = True
plt.scatter(df[0][:, 0], df[0][:, 1], color='green')
plt.scatter(df[1][:, 0], df[1][:, 1], color='pink')
# print(current_cog)
```
%% Output
25.16143318862738
2.0092105896679273
0.0
<matplotlib.collections.PathCollection at 0x12b0973d0>
<matplotlib.collections.PathCollection at 0x131775280>
%% Cell type:markdown id:printable-designer tags:
# Animated K-Means
%% Cell type:code id:upper-moment tags:
``` python
%matplotlib inline
import random
def kmeans(step=1):
cur_steps = 0
ks = np.arange(0, 15)
ks = [3]
# für jedes Cluster
for k in ks:
cs = data[0:k].copy()
last_cog = [np.array([0, 0]) for i in range(k)]
while cur_steps < step:
dist = np.stack([[np.linalg.norm(c-d) for d in data] for c in cs], axis=1)
z = np.array([np.argmin(d) for d in dist])
df = [data[z == i] for i in range(k)]
current_cog = [np.sum(d, axis=0)/len(d) for d in df]
diff_cog = np.sum(np.abs([last - current for last, current in zip(last_cog, current_cog)]))
last_cog = current_cog
cs = current_cog
cur_steps += 1
for i in range(ks[0]):
plt.scatter(df[i][:, 0], df[i][:, 1])
cs = np.squeeze(cs)
plt.plot(cs[:, 0], cs[:, 1], 'bo')
interactive(kmeans, step=(1, 10))
```
%% Output
%% Cell type:markdown id:czech-romance tags:
# K-Means with Scikit-Learn
%% Cell type:code id:saved-scope tags:
``` python
km = KMeans(n_clusters=2)
data_predicted = km.fit_predict(data)
df1 = data[data_predicted == 0]
df2 = data[data_predicted == 1]
#print(df2)
plt.scatter(df1[:, 0], df1[:, 1], color='green')
plt.scatter(df2[:, 0], df2[:, 1], color='pink')
```
%% Output
<matplotlib.collections.PathCollection at 0x12b049bb0>
%% Cell type:markdown id:broke-shark tags:
# BIRCH with Scikit-Learn
%% Cell type:code id:included-county tags:
``` python
brc = Birch(n_clusters=None)
brc.fit(data)
data_predicted = brc.predict(data)
#
df1 = data[data_predicted == 0]
df2 = data[data_predicted == 1]
#print(df2)
plt.scatter(df1[:, 0], df1[:, 1], color='green')
plt.scatter(df2[:, 0], df2[:, 1], color='pink')
```
%% Output
<matplotlib.collections.PathCollection at 0x12ac68a30>
%% Cell type:code id:third-burden tags:
``` python
```
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment