{ "cells": [ { "cell_type": "markdown", "id": "a1309400", "metadata": {}, "source": [ "# Skizze Wollknäuel\n", "\n", "* Mathilda Musterfrau\n", "* s-mmuster@haw...\n", "* MatNr: 12 34 567" ] }, { "cell_type": "markdown", "id": "788dfa11", "metadata": {}, "source": [ "## Problemstellungen\n", "\n", "Beschrieben unter :\n", "\n", "* abschätzen Körpergröße\n", "* abschätzen Geschlecht" ] }, { "cell_type": "code", "execution_count": 1, "id": "1eb4b46e", "metadata": {}, "outputs": [], "source": [ "meine_Datei = \"../data/MaennerFrauenKnaeuel.csv\"" ] }, { "cell_type": "markdown", "id": "d3775b25", "metadata": {}, "source": [ "## EDA Explorative Datenanalyse" ] }, { "cell_type": "code", "execution_count": 3, "id": "b22ed2d5", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "1f6f4926", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(meine_Datei, sep=\";\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "916d42df", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0ageheightspezies
000.060b
115.588b
2213.80b
334.191b
4413.8165b
\n", "
" ], "text/plain": [ " Unnamed: 0 age height spezies\n", "0 0 0.0 60 b\n", "1 1 5.5 88 b\n", "2 2 13.8 0 b\n", "3 3 4.1 91 b\n", "4 4 13.8 165 b" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 8, "id": "7ab28279", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0ageheight
count1260.0000001260.0000001260.000000
mean629.50000025.519444105.180952
std363.87497923.83627070.534624
min0.0000000.000000-6.000000
25%314.7500002.90000030.000000
50%629.50000017.800000127.000000
75%944.25000044.600000169.000000
max1259.00000080.000000208.000000
\n", "
" ], "text/plain": [ " Unnamed: 0 age height\n", "count 1260.000000 1260.000000 1260.000000\n", "mean 629.500000 25.519444 105.180952\n", "std 363.874979 23.836270 70.534624\n", "min 0.000000 0.000000 -6.000000\n", "25% 314.750000 2.900000 30.000000\n", "50% 629.500000 17.800000 127.000000\n", "75% 944.250000 44.600000 169.000000\n", "max 1259.000000 80.000000 208.000000" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": 11, "id": "e1b7c8e6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1260, 4)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 12, "id": "27b2e92e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Unnamed: 0', 'age', 'height', 'spezies'], dtype='object')" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 26, "id": "cc91754c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['b', 'g', 'm', 'M', 'w', 'F', 'K'], dtype=object)" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.spezies.unique()" ] }, { "cell_type": "markdown", "id": "aab6f955", "metadata": {}, "source": [ "## Problem 1: Abschätzen Körpergröße" ] }, { "cell_type": "code", "execution_count": 19, "id": "9e88184c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 60\n", "1 88\n", "2 0\n", "3 91\n", "4 165\n", " ... \n", "1255 104\n", "1256 208\n", "1257 112\n", "1258 0\n", "1259 140\n", "Name: height, Length: 1260, dtype: int64" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y = df.pop(\"height\")\n", "y" ] }, { "cell_type": "code", "execution_count": 23, "id": "00133909", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0agespezies
000.0b
115.5b
2213.8b
334.1b
4413.8b
............
1255125528.1K
125612563.2K
125712570.0K
1258125829.9K
1259125912.9K
\n", "

1260 rows × 3 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 age spezies\n", "0 0 0.0 b\n", "1 1 5.5 b\n", "2 2 13.8 b\n", "3 3 4.1 b\n", "4 4 13.8 b\n", "... ... ... ...\n", "1255 1255 28.1 K\n", "1256 1256 3.2 K\n", "1257 1257 0.0 K\n", "1258 1258 29.9 K\n", "1259 1259 12.9 K\n", "\n", "[1260 rows x 3 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = df\n", "X" ] }, { "cell_type": "code", "execution_count": 24, "id": "622c1f5b", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "could not convert string to float: 'b'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn [24], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m melbourne_model \u001b[38;5;241m=\u001b[39m DecisionTreeRegressor(random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Fit model\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m melbourne_model\u001b[38;5;241m.\u001b[39mfit(X, y)\n", "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/sklearn/tree/_classes.py:1342\u001b[0m, in \u001b[0;36mDecisionTreeRegressor.fit\u001b[0;34m(self, X, y, sample_weight, check_input)\u001b[0m\n\u001b[1;32m 1313\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfit\u001b[39m(\u001b[38;5;28mself\u001b[39m, X, y, sample_weight\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, check_input\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[1;32m 1314\u001b[0m \u001b[38;5;124;03m\"\"\"Build a decision tree regressor from the training set (X, y).\u001b[39;00m\n\u001b[1;32m 1315\u001b[0m \n\u001b[1;32m 1316\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1339\u001b[0m \u001b[38;5;124;03m Fitted estimator.\u001b[39;00m\n\u001b[1;32m 1340\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1342\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1343\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1344\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1345\u001b[0m \u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1346\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_input\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_input\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1347\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1348\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/sklearn/tree/_classes.py:172\u001b[0m, in \u001b[0;36mBaseDecisionTree.fit\u001b[0;34m(self, X, y, sample_weight, check_input)\u001b[0m\n\u001b[1;32m 170\u001b[0m check_X_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(dtype\u001b[38;5;241m=\u001b[39mDTYPE, accept_sparse\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcsc\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 171\u001b[0m check_y_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(ensure_2d\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m--> 172\u001b[0m X, y \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 173\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalidate_separately\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcheck_X_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcheck_y_params\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 174\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m issparse(X):\n\u001b[1;32m 176\u001b[0m X\u001b[38;5;241m.\u001b[39msort_indices()\n", "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/sklearn/base.py:591\u001b[0m, in \u001b[0;36mBaseEstimator._validate_data\u001b[0;34m(self, X, y, reset, validate_separately, **check_params)\u001b[0m\n\u001b[1;32m 589\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mestimator\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m check_X_params:\n\u001b[1;32m 590\u001b[0m check_X_params \u001b[38;5;241m=\u001b[39m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mdefault_check_params, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_X_params}\n\u001b[0;32m--> 591\u001b[0m X \u001b[38;5;241m=\u001b[39m \u001b[43mcheck_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mX\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mcheck_X_params\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mestimator\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m check_y_params:\n\u001b[1;32m 593\u001b[0m check_y_params \u001b[38;5;241m=\u001b[39m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mdefault_check_params, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_y_params}\n", "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/sklearn/utils/validation.py:856\u001b[0m, in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[0m\n\u001b[1;32m 854\u001b[0m array \u001b[38;5;241m=\u001b[39m array\u001b[38;5;241m.\u001b[39mastype(dtype, casting\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munsafe\u001b[39m\u001b[38;5;124m\"\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 855\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 856\u001b[0m array \u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43marray\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 857\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ComplexWarning \u001b[38;5;28;01mas\u001b[39;00m complex_warning:\n\u001b[1;32m 858\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 859\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mComplex data not supported\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(array)\n\u001b[1;32m 860\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcomplex_warning\u001b[39;00m\n", "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py:2069\u001b[0m, in \u001b[0;36mNDFrame.__array__\u001b[0;34m(self, dtype)\u001b[0m\n\u001b[1;32m 2068\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__array__\u001b[39m(\u001b[38;5;28mself\u001b[39m, dtype: npt\u001b[38;5;241m.\u001b[39mDTypeLike \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m np\u001b[38;5;241m.\u001b[39mndarray:\n\u001b[0;32m-> 2069\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_values\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'b'" ] } ], "source": [ "from sklearn.tree import DecisionTreeRegressor\n", "\n", "# Define model. Specify a number for random_state to ensure same results each run\n", "melbourne_model = DecisionTreeRegressor(random_state=1)\n", "\n", "# Fit model\n", "melbourne_model.fit(X, y)" ] }, { "cell_type": "markdown", "id": "d17efd56", "metadata": {}, "source": [ "## Aufgabe 2: Geschlecht abschätzen" ] }, { "cell_type": "code", "execution_count": null, "id": "8bee0501", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 5 }