Skizze Wollknäuel
Contents
Skizze Wollknäuel#
Mathilda Musterfrau
s-mmuster@haw…
MatNr: 12 34 567
Problemstellungen#
Beschrieben unter http://jbusse.de/dsci-ml_ws2022/Studienarbeit-SS-2023.html:
abschätzen Körpergröße
abschätzen Geschlecht
meine_Datei = "../data/MaennerFrauenKnaeuel.csv"
EDA Explorative Datenanalyse#
import pandas as pd
df = pd.read_csv(meine_Datei, sep=";")
df.head()
Unnamed: 0 | age | height | spezies | |
---|---|---|---|---|
0 | 0 | 0.0 | 60 | b |
1 | 1 | 5.5 | 88 | b |
2 | 2 | 13.8 | 0 | b |
3 | 3 | 4.1 | 91 | b |
4 | 4 | 13.8 | 165 | b |
df.describe()
Unnamed: 0 | age | height | |
---|---|---|---|
count | 1260.000000 | 1260.000000 | 1260.000000 |
mean | 629.500000 | 25.519444 | 105.180952 |
std | 363.874979 | 23.836270 | 70.534624 |
min | 0.000000 | 0.000000 | -6.000000 |
25% | 314.750000 | 2.900000 | 30.000000 |
50% | 629.500000 | 17.800000 | 127.000000 |
75% | 944.250000 | 44.600000 | 169.000000 |
max | 1259.000000 | 80.000000 | 208.000000 |
df.shape
(1260, 4)
df.columns
Index(['Unnamed: 0', 'age', 'height', 'spezies'], dtype='object')
df.spezies.unique()
array(['b', 'g', 'm', 'M', 'w', 'F', 'K'], dtype=object)
Problem 1: Abschätzen Körpergröße#
y = df.pop("height")
y
0 60
1 88
2 0
3 91
4 165
...
1255 104
1256 208
1257 112
1258 0
1259 140
Name: height, Length: 1260, dtype: int64
X = df
X
Unnamed: 0 | age | spezies | |
---|---|---|---|
0 | 0 | 0.0 | b |
1 | 1 | 5.5 | b |
2 | 2 | 13.8 | b |
3 | 3 | 4.1 | b |
4 | 4 | 13.8 | b |
... | ... | ... | ... |
1255 | 1255 | 28.1 | K |
1256 | 1256 | 3.2 | K |
1257 | 1257 | 0.0 | K |
1258 | 1258 | 29.9 | K |
1259 | 1259 | 12.9 | K |
1260 rows × 3 columns
from sklearn.tree import DecisionTreeRegressor
# Define model. Specify a number for random_state to ensure same results each run
melbourne_model = DecisionTreeRegressor(random_state=1)
# Fit model
melbourne_model.fit(X, y)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In [11], line 7
4 melbourne_model = DecisionTreeRegressor(random_state=1)
6 # Fit model
----> 7 melbourne_model.fit(X, y)
File ~/miniconda3/lib/python3.9/site-packages/sklearn/tree/_classes.py:1342, in DecisionTreeRegressor.fit(self, X, y, sample_weight, check_input)
1313 def fit(self, X, y, sample_weight=None, check_input=True):
1314 """Build a decision tree regressor from the training set (X, y).
1315
1316 Parameters
(...)
1339 Fitted estimator.
1340 """
-> 1342 super().fit(
1343 X,
1344 y,
1345 sample_weight=sample_weight,
1346 check_input=check_input,
1347 )
1348 return self
File ~/miniconda3/lib/python3.9/site-packages/sklearn/tree/_classes.py:172, in BaseDecisionTree.fit(self, X, y, sample_weight, check_input)
170 check_X_params = dict(dtype=DTYPE, accept_sparse="csc")
171 check_y_params = dict(ensure_2d=False, dtype=None)
--> 172 X, y = self._validate_data(
173 X, y, validate_separately=(check_X_params, check_y_params)
174 )
175 if issparse(X):
176 X.sort_indices()
File ~/miniconda3/lib/python3.9/site-packages/sklearn/base.py:591, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params)
589 if "estimator" not in check_X_params:
590 check_X_params = {**default_check_params, **check_X_params}
--> 591 X = check_array(X, input_name="X", **check_X_params)
592 if "estimator" not in check_y_params:
593 check_y_params = {**default_check_params, **check_y_params}
File ~/miniconda3/lib/python3.9/site-packages/sklearn/utils/validation.py:856, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
854 array = array.astype(dtype, casting="unsafe", copy=False)
855 else:
--> 856 array = np.asarray(array, order=order, dtype=dtype)
857 except ComplexWarning as complex_warning:
858 raise ValueError(
859 "Complex data not supported\n{}\n".format(array)
860 ) from complex_warning
File ~/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py:2069, in NDFrame.__array__(self, dtype)
2068 def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
-> 2069 return np.asarray(self._values, dtype=dtype)
ValueError: could not convert string to float: 'b'