{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Linear Regression" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('../data/weight-height.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GenderHeightWeight
0Male73.847017241.893563
1Male68.781904162.310473
2Male74.110105212.740856
3Male71.730978220.042470
4Male69.881796206.349801
\n", "
" ], "text/plain": [ " Gender Height Weight\n", "0 Male 73.847017 241.893563\n", "1 Male 68.781904 162.310473\n", "2 Male 74.110105 212.740856\n", "3 Male 71.730978 220.042470\n", "4 Male 69.881796 206.349801" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAA7QklEQVR4nO3deZwU9Z34/9e7qo8Zh0Mc8IBhREV0GSIkkqAhIR7ZRI1idjXGYDS7Odzdn6wxnomuIsuaTTyyq6tJFqPfXJgEMYmKJsZEjMJGDJqBABolHjBATJwgAmLPdPf790dVD31U9/TM9DXd7+fjwYOZ6qrqT3VDvetzvT+iqhhjjDHZnGoXwBhjTG2yAGGMMSaQBQhjjDGBLEAYY4wJZAHCGGNMIAsQxhhjAlmAMGUhIueJyC+K3PcfRGRluctULBF5XEQ+W6Zzt4vIbhFxi9h3koioiISG8H4bROSEwR4/gPc5QUS6Sr2vqS4LEKaPiHxJRB7O2vZinm3nFjqXqi5R1Q+VqFxlu2EPlIhcLyLfD9iuIjK5v+NVdbOqjlDVRLnKkvV+Har6+FDfq5xE5BUR+WC1y2FyWYAw6Z4AZqeebkXkYCAMvCtr22R/X2NMHbMAYdL9Fi8gzPB/nwOsAP6Qte2PqrpNREaLyF0isl1EtorIf6QFkoxmIxH5kIj8QUR2isjXReTX2bUCEblZRHaIyMsicqq/7Qbg/cDtftPM7UEFF5F7ReRP/vmfEJGOtNe+LSJ3iMhDIrJLRFaLyBFpr/+tiDzvH3s7IEP4DBERR0S+KCJ/FJFuEVkqIgf4r2U0G4nIYX55d4nIL/1yZtcKzhORzSLyuohc4x93CnA18HH/c1mbpyx9T+d+jWOpiHzXf78NIjKzwHXcKiJbRORNEXlGRN6f9lqz/7nuEJGNwLuzjs2oUfn7/kfAe3wPaAce9K/jShFpEpHv+5/dGyLyWxE5qNBnbsrDAoTpo6o9wGq8IID/95PAyqxtqdrDd4A4Xo3incCHgJymIBEZCywDvgS04gWc92btNsvfPha4EbhLRERVr/HLMN9vmpmfp/g/A44EDgSeBZZkvf4JYCEwBtgE3JBWtvuAf/Pf+4/A7DzvUayLgY8CHwDGAzuAO/Lsew/wNN7ncj1wfsA+7wOOAk4GrhORv1HVnwNfBn7kfy7TiyzbXOCHwP7AA0BgwPX9Fu/B4AC/nPeKSJP/2gLgCP/Ph4FPFfn+GVT1fGAzcIZ/HTf65xoNTMT7XP4Z2DuY85uhsQBhsv2afcHg/Xg35yeztv3af6I7FbhEVfeo6p+B/wKC+iZOAzao6o9VNQ7cBvwpa59XVfVOv23+O8AhQNFPjap6t6ruUtUY3o12uoiMTtvlx6r6tP/+S9hXIzoN2Kiqy1S1F/jvgLJlO8d/su37k/X6PwHXqGpXWnnOlqzOZhFpx3vyvk5Ve1R1Jd5NO9tCVd2rqmuBtUCxwSDISlV92P+cv1foXKr6fVXtVtW4qt4CRPECFcA5wA2q+ldV3YL3nZZKL15gmKyqCVV9RlXfLOH5TZEsQJhsTwDvE5ExwDhVfRH4P+C9/rZp/j6H4jVHbU+7Sf4v3hN8tvHAltQv6mWIzB7F8qe019/yfxxRTIFFxBWRr/hNOm8Cr/gvjQ06P/BW2rmDyraFwpaq6v7pf7JePxT4Sdrn8hyQIDfgjQf+mna95HnvfGUfjOxzNWUHrhQRuUxEnvOb3t7Ae6pPfaYZnxvw6hDKlO17wCPAD0Vkm4jcKCLhEp7fFMkChMn2G7wbwYXAKgD/6W2bv22bqr6Md3OIAWPTbpSjVLUj4JzbgbbULyIi6b8Xob+Uw/OAM4EP+mWflHqrIs69Ha8pI71sE/PvXpQtwKlZQaRJVbcGvPcBIrJf2raBvHfZUjH7/Q1X4dUUxvhBcCf7PtOMzw2vHyHdW0D6dR1c4O0yrkNVe1V1oapOxWuKPB24YKDXYIbOAoTJoKp7gTXApXhNSykr/W1P+PttB34B3CIio/yO2SNE5AMBp30IeIeIfNR/Wr2IwjeMbK8Bhxd4fSResOrGuyl9eQDnfgjoEJG/98t28QDLFuSbwA0iciiAiIwTkTOzd1LVV/E+6+tFJCIixwNnDOB9XgMmiUg5/h+PxOtf+gsQEpHrgFFpry8FviQiY0SkDfjXrOM7gXl+7e4UvP6YfDK+XxE5UUTeId6AhzfxmpyGPCzYDJwFCBPk13hNRemT1570t6UPb70AiAAb8Tpil+H1HWRQ1deBj+F1PncDU/FujLEiy3MrXhv+DhEJauv+Ll4Tx1a/LE8Ved70sn3FL9uR+DWnIbgVry/hFyKyyy/PrDz7ngcc77/3fwA/ovjP5V7/724ReXbwxQ30CF7H/wt4n+3bZDYpLfS3v4z3oPC9rOM/jxfs3sC7xp8WeK//BP7Nb5K7HC9AL8MLDs/h/XssON/DlIfYgkGm0vwn3i7gPFVdUe3y1BIR+RHwvKouqHZZjLEahKkIEfmwiOwvIlG88fvCAJ7065WIvNtvmnP8ppgzKfy0bUzFDDrHizEDdDzeWPpUk9RH/f6ORncw8GO8YZ1dwL+o6u+qWyRjPNbEZIwxJpA1MRljjAk0rJuYxo4dq5MmTap2MYwxZlh55plnXlfVcf3tN6wDxKRJk1izZk21i2GMMcOKiBQ1892amIwxxgSyAGGMMSaQBQhjjDGBLEAYY4wJZAHCGGNMIAsQxhgzzHTvjrF2yxt07y42r+PgDOthrsYY02ju79zKVfetI+w49CaT3HjWMcydMaEs72U1CGOMGSa6d8e46r51vN2bZFcsztu9Sa68b13ZahIWIIwxZpjo2rGXsJN52w47Dl07ypP30gKEMcYME21jmulNJjO29SaTtI1pLsv7WYAwxphhonVElBvPOoamsMPIaIimsMONZx1D64hoWd7POqmNMWYYmTtjArMnj6Vrx17axjSXLTiABQhjjBl2WkdEyxoYUqyJyRhjTCALEMYYYwJZgDDGGBPIAoQxxphAFiCMMcYEsgBhjDEmkAUIY4wZglJlVq1UhtaBsHkQxhgzSOmZVXsSCeafeCTzZrUPeI5CJTO0DoTVIIwxZhCyM6vG4sotj77Ae7/yGA90bh30edIztFa7VmE1CGOMGYRUZtW3yUyeF4t7N/jZk8dm1CS6d8cC02MEnSfsOCxZvZmvP76pqrUKCxDGGDMIQZlVUxwRNmzbyZwpBwKFm5CCztOTSHDHik3E4sm+wBEUdMqtbE1MIjJRRFaIyHMiskFEPu9vv15EtopIp//ntLRjviQim0TkDyLy4XKVzRhjhtp8k8qsGg1Jzmtv9ST43HfX8EDn1n4X+QnK0Dr/xCOJuJVb9yGfctYg4sBlqvqsiIwEnhGRR/3X/ktVb07fWUSmAucCHcB44JciMkVVE2UsozGmjuVr1in0RJ/vmCCpzKr3rN7M7f4Tf0osrlyxbC13XvDuwCakrh17+86fnaEV4I7HN2W8VznXfcinbAFCVbcD2/2fd4nIc0ChBrQzgR+qagx4WUQ2Ae8BflOuMhpj6ldQEJg9eSwbtu3kymVricU1p/lm5abXBzyaqHVElH89+UimT9yff/7eM7zVu++ZNhZXfvPH14ta5Cc7Q+uNZx3DlVllqWTzElSoD0JEJgHvBFYDs4H5InIBsAavlrEDL3g8lXZYFwEBRUQuBC4EaG9vL2/BjTHDUnqzTioIXLq0E9dxcB0hFteM/cOOw4ZtO3OOyW73L1S76Bg/ioTm9kncveoVrjt9Kose2jigm30l133Ip+wBQkRGAPcBl6jqmyLyDWARoP7ftwCfBnIb8rx9MjeoLgYWA8ycOTPndWOMCRoZFE9CPE+nsveELwWbgrJrJNeePpVp40f33bxbR0SZf+KR3PLoCxnnjrgO0yaMZtVVJw34Zl+pdR/yKWuAEJEwXnBYoqo/BlDV19JevxNY7v/aBUxMO7wN2FbO8hlj6lOhEUbp9gu7JFFuPOsYOsaPytsUFFQjueYn62mJuCRU+5qi5s1qz+mLSJ1jsDf7gfSJlFo5RzEJcBfwnKp+LW37IWm7/R2w3v/5AeBcEYmKyGHAkcDT5SqfMaZ+ZY8MioaEsJvZSBENOXzz/GNZddVJzJ0xoeB6z6kaSbY9PYmMUUmtI6LcdHbp1oy+v3Mrs7/6GJ/81mpmf3VgE/BKoZw1iNnA+cDvRaTT33Y18AkRmYHXfPQK8E8AqrpBRJYCG/FGQF1kI5iMMYOV3Ya/atPrOZ2+c6aMK3hM6sbeX43EFelriipV30FQraXScyHKOYppJcH9Cg8XOOYG4IZylckYUz+KaXpJb9Yp9sYd1BSUql1ced86XEfYE8t8dt3Tk+DnG7ZnNCUFnX8gzUX5ZlinD48tN5tJbYwZdnI6jD8ylWkTRvcNHc13E+6vH6DQDTw9wKx+qZsv/+z5jNe/8fhL3L3yFW46O3ho7EAT8gXVWio9F8IChDFmWAnsMP7pekZEXWLxJKpKczhU9DyGVFBYv3VnzlDUoMlz0yfuD8CIqMvurJpELJ7k8mXrmHrIKMa0RDImvg20uSi91lKtuRAWIIwxw0q+JHnpN+tdsTiw7yacOi7fjOqQI33HFzN5bvbkseztCe4i7Ykn+fB/P4HjCE0hl95kkotOmDyo5qJqz4WwAGGMGVbaxjTTkyhu/Ep2VtT0NRtg31N9EE16s6CvuHctPYnMWdeXfnAKiQKzsBIKiYTSm/AC1e0rXiS7S7bY5qJqzoWw9SCMMcNG9+4YS1ZvJpEsbo5sTyLJHSteDFyz4Z7VmwOHrqbEEsoXfuQFh3SuI9z4yPN5jgoWcV3mnzi5ZMNfK8VqEMaYYeH+zq1cuWxdxiQ0AEcgKF5EQg7zT5zM4ideIhaPZ7wWiycDn+qz9QacuDehRFwn76zs4PMkmTernXmz2quaOmOgrAZhjKl5qY7p7OAA0Bx2cybBRVzhns+8h+kTR+dtjkp/qm+JukWVw3VgwRlTCQoNqZtpU9gh5EDYlZzaQuuIKNMn7j8sggNYDcIYMwzk65gGSKhy/dwOFi3fNwLpnGPb+OTdTxN2nMDaBXjNT+lP9eu37uT6BzfQW6BzQZMwa9IBXHTCZG5fsQlX4C2/DyNVst5Ekkc+PydjFNNwCQjZLEAYY2pO9nyEfDOZoyHpG456SsfBdO3YS0vE5fTbV2YMKQ3y6dmT+m7cqff49+UbCpYrCZx220qiIQdQTn3HIdz3bGbKuEQStu3cy+SDRha15GgtswBhjKkp6RPKehJJ5p84mXmz2rnxrGO4dGknqVYmB7jsb4/KmeewbWf+2ka6u1e9zN8cMqrv+K4de4m4bk5/RbaeRJKehHfuB9Zuz7NXZpPXQCfJ1QoLEMaYqgh6ou7eHevriE7d4G959AVuX/Ei153ekdFclAS+/LPnaYmGGNEU4splXhqMeCKZu05AgFhcM+ZJ7Nzbw97ewsEhm5Onj3v86KaM66x2TqXBsgBhjBmSfE0nhbYvWb2ZO1a8SMR1Myaf3fKLFwI7omNx5br71wf2J1x3/3ocRzL6DgQvW6sjwt7e/HMmXJG+eRIhRwh464KyFx4Cr5N6T9okulrIqTRYFiCMMYOy70a/iYib2XSSr0nl/s6tfRPPgL7mnEuXduKI5Mw5SJfvpdSktHQKzDmylWjIZfnv/5T3nHt6Etz6yxcKTnobjNQaEqk+kWrnVBosCxDGmAHz5iSs7XuCTj31X3mfl4coqEll/OgmLvlhZ2Dzj3d4ae/Sjz73l6L2K2VwiLjCtadPzUnPcc7MNpau6RpQTqVa6NS2AGGMGZB9cxJy76xhx6Fzyxs5TSqaVM69c3WJQ0DtcR3h3x/cSCKZJJ7cl9dp6Zouls9/H3t6EkXd8GulU9smyhljMnTvjrF2yxt0744Fvp5vdTXwmk5mTNw/p0klllDiRabHGM729iaJxZM5fRlhx+uXKGaSXHqn9q5YPGPFukqzAGFMA8sOBvmWuEzfL/+cBG/G8OSDRmYs3RlxhabwwG41+UYHDVcD6XMICsCpTu1KsyYmYxpU0KI7ix7amNN3sOvteM46Cecc28Z3n9rcd67T3nEQi858BwBrt7zB7MljWXXVSRkT1waiVisb0ZBw4pQD+fnG1wruF3YFR8gYpVVsP0ItLBSUIqo1+k0UYebMmbpmzZpqF8OYYad7d4zZX30sI9V1xBUiISdjXYWWqEtvPJkxuigaEkAyhqM2hZ2+ABPUbr7413/MWYFtuFr2T8dx3l1PZ1x/yAHXcTJGcw1lHYcHOrfmLBRUyj4IEXlGVWf2t5/VIIxpQBu27cSRzHacsOvkDDPtTai/fV/QcMXJSYIqwIIH1md0zF6xbG3fZLADWiJluY5KCzsQDrncdHbuSm9BAWGwo4+qvVBQigUIYxpM9hDVlIQqC86YmpH07trTp3LdT9dn7BeLJwi5mW3kewMW3YnFlf/51YtMmzCaMfuFS38hVeA40rfsaCkDQpBqLhSUYgHCmDrVvTvGhm07AaFj/ChaR0QLDlGdO/0Qzpt1aF/Su7Yxzfx8/Z9y5wmI0FvklONv/+bVoV/IEIUEAi534Odx4Kazp2cEg2rfwMvNAoQxdWjJU69y3QPr8XPKEXLga+fM4NDWlrzHLF2zlcnjRjLr8FbaxjSzY08PCx5Yn7Nfsau51QoVgUH0tYZd4fq5HUwc00x6kE2phYls5WYBwpg6s+SpV7kmq1konvT6BJZ8ZlbeNZjBT34XcYnFEyST9JMPdXgYbEBzBE7pODjw5l8rE9nKzeZBGFNHunfHWLh8Y+BrgnDH43/s9xx7ehLE6yQ4DIWIBM49qKWJbOVmAcKYOuKtaRA8y+zteJIVfyguP5GBt3uTtERylyKtpYls5WYBwpg60jamuSFSWlRC1JWMtN0ptTSRrdwsQBhTB1KpMIC+NBepmkS9pa2oFPGHtGZrHRHlnGPbMradM7OtLjuqLUAYM8xl508CWD7/fX2ZU61C0b+QA3979DhCDrREXJrCTt70GN27Yyx9pitj29I1XWx6bVfBJIfDkY1iMmYYC1qi84pl6/j07EkZK6yZwlwRfr2pm6awS0/CmzCYb1RS0ApxAKfd9iTRkFtXo5qsBmHMMLZk9eacJTpj8SSLn3y5SiUanmIJpSeeZHcsQU88yaLlG/PWBIL6IN7u9fJV1duoJgsQxgxT3btj3P7YC4GvDbfJbNXgQN505IVGJbWOiGamMw85RLNGjtXLqCZrYjKmxEoxwzb7HEFpM7715EsEDLIxRXJduHDO4Zw67eCcdOT9jUpKT6bXl848rUmvXkY1WYAwpoRKMcM2+xznzGzjB09v6etTcAXOetcElj6ztRyX0DB6E3DH45uYN6udaz8ylYUPbiDsOiRUi1q/IT0X041n5WZ3rYdRTbYehDElErTGQlPYYdVVJxV9swg6hymfkdEQn5tzOF9/fBMhR/o6qM+bdeiAzzWccjMVux6E9UEYUyKlmGFbaL1nU3o9iQR3rNjE273FdVAX0joiWtSa08OJ/Us0pkT6m2GbmsxWaLx825jmjMV5TOm54tUcmsIO8088kojbGGkzBsP6IIwpkdTolqC26FS/AnhDIlOznBec0cF5x+1rzli56fWMEUiuwAeOHMdjL1gOpVL50YXHEQ65fYH7jsc3ZbxeLx3MpWABwpghyG53DloqMj37Z0pqac9rfroeBM6bdSjdu2NctrST9GkNCcWCQwldcHw7Mw9rzdhWrx3MpVC2ACEiE4HvAgfjZQ5erKq3isgBwI+AScArwDmqusM/5kvAZ4AEcLGqPlKu8hkzVPlGLGWvNJZv5m3KgvvX0xtP0JtQilyozQxAxBXOPraNT88+jMkHjcx5vVbWf65FZRvFJCKHAIeo6rMiMhJ4Bvgo8A/AX1X1KyLyRWCMql4lIlOBHwDvAcYDvwSmqGreBlkbxWSqZSAjlrp3x3jvV34VuMynqYyBjiard1UfxaSq21X1Wf/nXcBzwATgTOA7/m7fwQsa+Nt/qKoxVX0Z2IQXLIypOQMZsbRy0+uWMK9CrvjQFG7/xDvZL5y5joN1PA9ORUYxicgk4J3AauAgVd0OXhABDvR3mwBsSTusy99mTM3JN2KpJeJmjFBK9T+kJ86z9NvlpBx98EiSZEZk63genLJ3UovICOA+4BJVfVMk7/+OoBdynrtE5ELgQoD29vZSFdOYAQkasXTOzDY+8j8rcURIJJMsmNvBtPGjc4612kT53L7ij/zPij9yzsw2lq7pso7nISprgBCRMF5wWKKqP/Y3vyYih6jqdr+f4s/+9i5gYtrhbcC27HOq6mJgMXh9EGUrvDH9yM7Hc+ptT2bUFK75yXo+f9JkmxVdQXv9z3rpmi6Wz38fe3oS1vE8BGVrYhKvqnAX8Jyqfi3tpQeAT/k/fwq4P237uSISFZHDgCOBp8tVPmNKITV7dtvOvYHrL9z22KaAo0wpBd3Ewo7Dnp5E3c1srrRy1iBmA+cDvxeRTn/b1cBXgKUi8hlgM/AxAFXdICJLgY1AHLio0AgmY6ohf76d4KZTq+IOnVD4cwyqn1mfQ2mULUCo6kry/a+Bk/MccwNwQ7nKZMxQZM97uPb0qUwbP5q2Mc1s+etb1S5e3RpokI2G8i8XagbGZlIbU4T02dCpCW/X/GQ9LRGXnnjCJrhVSMih4Ge9X9jlm+cfy5wp4ypXqDpmyfqMySM9ud6K5/9M0JzSPT0JepPWlFQpIsLVpx5NU9ihJeLmvJ5E6Rg/qgolq09WgzAmwJKnXmXhgxtAlZ6kl66hJ6AT2lSWALMOb2XVVSfRtWMv67ftZNHyjTactUwsQBiTZclTr3pJ9NJYcKg8Fy8pW7qehLL65W6mTzyibwTZKR0HWx6lMrEmJmPSdO+OsXD5xmoXwwDieB3O2W5+5A8Za2nU40I9tcIChDFpunbs7VurwVTXlR8+OrBvJ+xaXqVKsSYmMyyVYv3f1DlaIm7fjNveeIKYDUmquqtPPZoLP3AELU0hrvlJZnNfQtXmOFSIBQgz7ORbh2EgUp3QIhCLK1G/Ezr9idV1IGGxouL+5QOHc+EHjgC8hZRQWPjgBsKuQ0LVOqIryAKEGVaC5iNced86Zk8eW/RNI6gTOhbQCW3BofIirvDZ9x+ese284w7llGnWEV0NFiDMsBK0Olsq138xNw7rhK5tC+Z2BH6P2av0mcqwTmozrORbh6HYNmkvwFgndDUFjQEIOcINH53mNSmZmlFUgBCRrxazzZhyS63D0BR2GBkN0RQeWN4dL8DYnIZa4wiMbLIGjVpT1JrUIvKsqr4ra9s6VT2mbCUrgq1J3biGMoppyepXc0bGmMoJu4Kg9GTNgrN1oyunJGtSi8i/iMjvgaNEZF3an5eBdaUqrDEDNZDJUamcSqnJVefNOpSrTzs6sKnDlF/EdfjUeyfRHC5uTW9TPf3V6e4Bfgb8J/DFtO27VPWvZSuVMSWSk6L7I1Pp3tPDHStexCE3lYMpvz09Ce5ZvaVv9bcUW8Oh9hQMEKq6E9gJfEJEXOAg/5gRIjJCVTdXoIzGDEpgiu6fWtNSJTgCjgjxPP09e9Lal1oirs1vqFFF9QqJyHzgeuA19i3gpEBV+yDM8FWKmdD9nbdrx16SNpmhOhSaIg67Y/sCQTTkIELGGt37RVwWzu3gxKMPtOBQg4odNnAJcJSqdpexLKZBlGImdDHn/XDHQfRYfKiKprCTkwE3kUzmLPbzVo+X2sSCQ20qdh7EFrymJmOGJL3ZZ1csztu9Sa68b11Gds5Snff+zu0lKrUZqCSw4IypfcORg7Kypix6aOOQv39THgVrECJyqf/jS8DjIvIQ0PdNqurXylg2U4dKMRM6qGkq6LymOkIOfbXC1FoNO/f2ctGSZ9kVi+fsP5Dv31RWf01MI/2/N/t/Iv4fYwZlKDOhCzVNtY1ppsf6G2rCDz93HDMPawX2pcjo3h3L+d5TbPRS7epvFNPCShXENIbUTOgrs270/T09FpOk7/R3HMx9v9uWc6wrYAvClYbrgCi4jkNSk3SMH01n177W5wuOb+8LDunSv3dNKrGE0uTPg7DRS7Wr2FFMD5K7LvtOYA3wv6r6dqkLZurX3BkTmD15bE5TUaGRTYWapu57pouv/vz5vEHAgkNxHIFCWUiiIeGms6fnfHebXttF55Y3mDFxfyYfNDLv8enfe/oaHBYcalexo5heAsYBP/B//zjekNcpwJ3A+aUvmqln2dk5+xvZlK9p6u6VL3H/WuuMHqr//LtpfKjjYP7nVy/ynd+8mvM0uF/E5ZuffBdzphwIkPHdTT5oZMHAkM6ysg4vxQaId6rqnLTfHxSRJ1R1johsKEfBTOMIaj66/N61TD1kVN+NJ6hp6tIPTuHLP3u+mkWvG//20/U8t/1Nlj7TxX7+0326pCod40dXqXSmWooNEONEpD01c1pE2oGx/ms9ZSmZaRhBzUc9CeW0/1nJzWcf09csMXvyWFZddVJfE8W3/++V6hW6ziQUvvtUbmIEm+Xc2IoNEJcBK0Xkj4AAhwH/n4i0AN8pV+FMYwhqPgLoiSe57N61OAIR1+2rNbzSvYela7rypnEwpdESdVl4hs1ybmRFpfsGEJEocDRegHi+FjqmLd13/Xigcytf+FGndSjXkLArPPWlky041KFSpfs+yf/774GPAEcAhwOn+duMKYmph4zCFnqrLb0J5VtPvpR3lnN2GnVTf/prYvoA8BhwRsBrCvy45CUydWEgyfju79zKFfeupdfmudWcb/z6Je5a+QoL5k7NWA60XPm0TG3pb6LcAv/vf6xMcUw9GMjNIzWCKTuxm6kdPYmktwKfwnnHHVrUpEVTH4pdk/ogEblLRH7m/z5VRD5T3qKZ4WigyfhSI5hMdTWHnX5X2Fv44Ia+mmH2d2arwdWnYv9nfht4BBjv//4CXgpwYzIM9ObRNqaZWNzWdaumiAs3nX0M/+8f383Vpx5N2A3eL+w6fc2Gg82nZYaXYgPEWFVdir9YkKrGsdUaTYCB3jxaR0T52LETK1E0E8AVOPfd7Vy+bB0XLfkdX/vlC1zx4aNxA0YMxP3vMTVpMZXKuyns2DyJOlXsPIg9ItKKn49JRI7D1odoSP11PgfNeL72I1P7ahBBx/zj7EksedpWr620ppDDzR+bzuXL1mb0J9z8yB9oDmeuBgcw/8Qj+76/fPm0TH3pbz2IS4BVwJXA/cDhIrIKLy/Tx8peOlNTiu18Tr95rN+6k0UPbSx4zOSDRnLB8e189zcWJCpJgVHN4dwkiG7uanDRkMO8We0Z2yyvUv3rr4mpDbgV+Lm/76PAPcB7VXVtmctmqix9nHsxnc/p+7eOiNI2pplFD23MOObye9ey6bVdOcd8/uQp/PILc7johMNxrc+6Is561wQ6xo/KaRJMqGasBtcUdrjpbGtCakT9DXO9HEBEIsBM4L3AScA1IvKGqk4tfxFNNWTXFi46YXLBleCWPPUqC5dvJOIK8aSXu+fQ1paCOZZ2vR1n4YMbCLsO8WSS900ex+N/+LPNpq6QT88+LO/6HOmrwVkTUuMqtg+iGRgFjPb/bAN+X65CmeoKGud++4oX8bKs7NObTNIScfnqz57jG79+CYAef0XJK+9bx/L57yuYY6nXjwQ9Ca+t+1fP/7lMV2SyXXB8e1+m3Hz9CdaEZPrrg1gMdAC7gNXA/wFfU9UdFSibqZKg7KoR1+XCOYdzx+Ob+p40z5nZxmm3PRk4yc0RYU9PghvPOobL712bs0+vVRMqKhoSlnxmFq90v5WzsM9AZr2bxtJfDaIdiAIvAluBLuCNYk4sIncDpwN/VtVp/rbrgc8Bf/F3u1pVH/Zf+xLwGbzhsxer6iMDuRBTOvmGqs6b1c68We196bZPufUJ4nnSY7zVk2D91p2cd9yhjB/dxNn/+1QFSm7y+fi7JzLzsNac5UAtZYYppGB3oKqeArwbuNnfdBnwWxH5hYj0t171t4FTArb/l6rO8P+kgsNU4Fy82sopwNdFJM90HVNuhca5t46IMn3i/mzb+Xbe4JCy4IH1bHptFz8OWCfaVNbSNV05s9kHOuvdNJ5++yDUywe+XkTewJv7sBOvZvAeYEGB454QkUlFluNM4IeqGgNeFpFN/vl/U+TxpsT6H+fefxNRPAmn3PqkrdtQYa7jDVV9uzd4QEFKoXW+ranJQP99EBfjjVyaDfTizYn4DXA3g++kni8iFwBrgMv8/owJQHobRJe/LahMFwIXArS3twftYiqgY/xowq7025dgwaEyrj7taI4+eBSg7Bd2mXfX0xmvB81mt5QZpj/9jTifBCwD3qOqh6vq+ar6dVVdq6qDSc78Dbw1JWYA24Fb/O1BacIC7yyqulhVZ6rqzHHjxg2iCKYY93duZfZXH+OT31rN7K8+xgOdWzNebx0R5ZaPTSfSX4Y3U3b/csLhXDjnCOZMGceOt3r55N1PI/5CYE1hJ28qDEuZYfrT3zyIS0v5Zqr6WupnEbkTWO7/2gWkJ+RpwxtKa6qg2HTOc2dMYP/9wvzz95/lrR5LzVUN0ZDDZ993OJD5vaUkk8rDF78/Y9RSOkuZYQqp6JxVETkk7de/A9b7Pz8AnCsiURE5DDgSeDr7eFMZA8nI2jF+NMkil601pbNfxM2Z4Rz0vUVDLnv6Cd6pgQcWHEy2YifKDZiI/AA4ARgrIl14HdoniMgMvOajV4B/AlDVDSKyFNgIxIGLVNUeSatkIG3T6TNxHSAWT6KAdT2UT8iBb37yXXSMH51xU7c+BVNqZatBqOonVPUQVQ2rapuq3uX3YbxDVY9R1bmquj1t/xtU9QhVPUpVf1aucpn8UnmRgAG1Tc+ePJbZR7TyVm+ShFpwKLczZ0xgzpQDrU/BlF3ZahBmeAmaMLXqqpMKtk13747xrSdf4s4nXiJuQaFi/uUDR+R9zfoUTClZgGgg+VIqdO+OceWytcTi2tcpffm9a3n44vczfeL+gcev3PQ6X/hhJ4MZymYG75yZE/J2OKdYDiVTKhYgGkShlApLVm8mllUFSM+6OnfGhIzjexJJYv1NozYl1xx2OW/WpGoXwzQQCxANoNCwVYA7VmwKPK4n7qVemHrIqJzjTWk4kPOJukJgyvO4dTibCrMA0QCCUio4ImzYtpPRzREirpO3RhB2HDq3vIErNiGuHII+9XyT09WGE5sKs7W7GkDQ8Me3ehJ87rtrWL91Z+CaDSmxeIItf32r37H0pvyaw6HAuSjGlIsFiAaQGv4YDWV+3bG4suihjVx7+r7lJcOuEHIgmpZC49bHgpugTGXZnAZTadbE1CC8tBgR/vl7z/BW777agIMwuinM4vNnAsr40c08/6c3uXTpOkADFwMypdcScelJJFFVmsOhvgWZlq7pyhhYYKOTTCVZgGggHeNHkczKgfhWb4L5P/gdTWGHeCKJiPSNVDLlFw05XHfGVKaNH91XO0gfivz5k6fYnAZTNRYg6kSxy0ZedMJkbvvVC/Rm3f/3JXhTehPW31AJIQfuvGAmc6ZkZiVO//5sToOpJgsQdaC/ZSO7d8dYsnozd6zYhAg5wcFURzwJ40c3VbsYxuRlAWKY6y819/2dW7ni3rXWl1CDoiHHRoeZmmYBYpjLN+wxtf3ye9f2u+qbqQ4RbFSSqWkWIIa5loibsUAMeP0JLRGXDdt2WnCoIWF/6HAk5JBIqo1KMjXPAkSN694dY8O2NwHNyf8PsKcnQdQVYmmBIOqK33Rhs59riesIi88/ltHNERuVZIYFCxA17P7OrVy2tJNUFoywK9zysekZHdBtY5oRJzN5jzhC25hm2sY0E3IgPYtGUO4fU3qO5K6L8XZvkvGjm/vNxmpMrbCZ1DUqlYI7/ebem1CuWLaO7t2xvm2FFolpHRFl3nvaM85rwaEygtImRfpqdsYMD1aDqFFdO/biigNk3lBcR+jasTejeSLfIjHdu2Pc8/TmShbb+AJ7fkSsU9oMK1aDqFFtY5pJaO7zfiKpOTeZfJPkNmx7E1u2oXYsOGOq9TuYYcVqEDWqdUSUm86ezqVZfRA3nZ058iV7kty1H5nKtAmptA02gqlWhBw4pePgahfDmAGxAFHDUk1H+UYxBU2Su+an62kOOyQVPj17EmFXbKhrCYUc+OIpRxNyHaaNH8V5dz1d1Op6qVTdVoMww4k1MdW41hFR5kwZx5wpBwKwdssbfZ3UqYWAsu3t9ZYE/cavXyKZVMKu0BS2r7oUkkm4+dEXOKAlwszDWrnpbG+AQOrzDTtCxN035yHFUnWb4chqEMNEUL6lqYeMIhYvPComoRASOP+4Q7nzyZcrVNr6lcQbrppKZ5I+QKAl4rKnJ0HbmGZWbXqdK7O+L6s9mOHGAsQwENSU9IUfdRJyHUQEUEIC8TwtSb1x5Xev/rVyBW4AYcfpazIKyriab2SZMcOJBYhhIGhN6YRCIq3tO19wAO+pd83mnWUs4fDnCjiO0BTyFu7pTSRzJrqlK6bJyFJ1m+HOAkSNCRqyGrSmtCkd14FHPj+HMS2Rvs9+1abXuWLZOlS9VfVcf7J61BXEEWsyMg3BAkQNybeuQ2q29GVLO20thxJwAHG8ZiIFrjt9Knt6EoxpgekT9wcI7FtI72Ow4GAagQWIGlFoXYcde3roiSc5+W8O4ucbXqtySYe/RX83jVM6DqZrx17Wb93Jooc2Bi62ZE1EptFZgKgRQf0MYcfh2p/+nofXW1AolZaIy7S0+SQfX/ybvIstGdPobHB8jQjqZ+hJJC04lFg8rXM5aB5JanRSPt27YxlzUYypZxYgakRQVtaz3jWh/wMNgjcKqRjzTzyyYOd/odFJ93duZfZXH+OT31rN7K8+xgOdW4dSbGNqngWIISrlE+XcGRNYddVJfP+zs1g+/32894jWEpSw/kVCwo8uPI6IW/ifczTkMG/WvvTnhVKlZ0vvI9oVi/dNlrOahKln1gcxBPlGHRUraEhr64goKze93ndebxpcrnzbG1HEdQmHXP71pMnc8ugLOa/vF3FJavASn8VOaMvXR2T5lUw9swAxSIVGHRVzw0gPLj2JBPNPPJJ5s9rZsaeHK5atoyeezLgZpUs9JzdSgDi8tZnXdvUELriTahaaN6ud21e8SCxt1mA0JHzzk+8KXK41pZjRSgNtjjKmHlgT0yDl6+DcsO3NfpucspsrYnHllkdf4Lj//BUf/u8n6OknO6jSeCvDnXDUgSQClmmLhjJX0Lvp7OkZTUY3nT2dOVMOHPJT/kCao4ypF1aDGKSgJ8q3euJ87rtriLiFm5yCmiuAotNyN1LNIaU54nLjWcf0JcBLr3WBl+W2bUxzWXMgWX4l02gsQAxS3+zme9f23dhT+ZFS6wMENTl1746xc28PPQlbm3ggFj/5MtfP7WDVVSdl3KALzT4vB5s8ZxqJBYghmD15LE6B4ZXZnZipm5krQjypODReU9Fg9SaUa36yHhTOO+5QYOj9QMaYwqwPYgi6duwl4rp5X0/vxEy/me3pSZBIesHhnRNHV6i09WHhgxsKLpjU30S3YthkOGM8ZQsQInK3iPxZRNanbTtARB4VkRf9v8ekvfYlEdkkIn8QkQ+Xq1yllC/LakvUzenE7NqxF1dyqxu/22JpuFNcyV2JLVvY3RcAyjGyyCbDGbNPOWsQ3wZOydr2ReBXqnok8Cv/d0RkKnAu0OEf83URyf9oXiOCRrbc8NFp3PPZ41h11UkZHdRtY5rpTRTfoBR1xcs6WoZy16rF5x/b7z4J1b4AUOqRRTYZzphMZeuDUNUnRGRS1uYzgRP8n78DPA5c5W//oarGgJdFZBPwHuA35SpfqRQzsiU1Ie7yDx3Fl3/2fFHnjRU5oqleNIUdwiEHDRjKmpI+pDWllCOLbDKcMZkq3Ul9kKpuB1DV7SJyoL99AvBU2n5d/rYcInIhcCFAe3t70C4VV2hky5KnXmXh8o1EXKE3oRzeuh8vdb9V4RJW3zkz27jvmS4Kxz2hORxiVyyesTXswMUnT2HerPbAz7lUI4tsMpwxmWplFFNQS0rgrURVFwOLAWbOnFnTj9lLnnqVa37qdcH0+Pe8RgwOAA+s3cYjl8xh2863AWXLjr0sWp65DkPH+FE5N+hIyOHhf30fkw8aWfYyppqsrswaNmu1B9OoKh0gXhORQ/zawyHAn/3tXcDEtP3agG0VLltJpJqTWiIuCx/cUO3i1AxXhD09CeZMGde3LbVoT3rTUNANuhLBIcUmwxmzT6UDxAPAp4Cv+H/fn7b9HhH5GjAeOBJ4usJlG7L0SVuxRBIKtKdXgyvw7kljeOrlHRV/795EblNNUNNQvht0UGLDcrHJcMZ4yhYgROQHeB3SY0WkC1iAFxiWishngM3AxwBUdYOILAU2AnHgIlUdVlONgyZt1Zr/+Og0Fi7fWJX3XnBGR9E33ewb9FCz5hpjBqeco5g+keelk/PsfwNwQ7nKU25BI2Cawg7JpBJ2Hd7u9eJdNQcnxeJJQoWmfvcj5AiqSlPEJZ5IktTM/FFhV7yZ5eqNwoqGHBRYcMZUzpt16KDe02ZLG1M9tdJJPewFjYBRhYcvfj97ehK0RFzue7aLxU+8VJUg4Qjs2NPD7lhmxcx1hESy/wI5wMK5HZwy7WA2bHuTfB3NqeahlojLnp6EDT01ZhizAFEiqREwly7tJJWtO5FMsnH7myhw5bJ1fUn8qiGpcOtjm3K2//uZHcyadAB3r3qZe9dsoTdPEZPAooe85qlFD+0LCtd+ZCrTJozOWfSoVGzoqTHVY7mY+pGel6e/HD273o6THgPiSbhi2dpBBYdoqPxzqFsiLtPGj2byQSP58t8fw1NXf5DL/nYK0ZBDczj3n4YrwsLlGzNmGi96aGNZO45tHQZjqsdqEAWkd46+HU+gqjSHQ4Edpd27Y4EdwIIgATmY+pO+Klq5pKetAO9mPG9WO9MnjubNvb1cdu/ajHL0JpJEQk7fnA6oTHOPDT01pjosQOSRb1RSapZvdkdp1469SMCw1kQyWXSAcB1wRIpeOGiw9os4JJWcJ/Hs0UIff/dEfvTbLbjikNAk153e0dfMlFKp5h4bempM5VkTUx5BqaTTZS8v2hJxA/MnXfHho1kwt6Pf93MFFn/y2AEHh5DjHbtf2CEaEk7+m3GkJ0QVvNFFTX6TUcQV4gnl2o9MzakBZSeq+8HTW7ypHOKdaWRTyJp7jGkgVoPII18q75S9vZnLi5501IE5+0RDDrMOb+WV7j15FweKuN640Js/NoOxI5uIulIwUZ/gpZ+IuGnNXhGXnoSy4IwOzpt1KN27Y2zYthMQOsaPYseeHk677UkAevxzL3poI6dMOzijBpQ9WigVrFKr31153zpWXXVSzqpuxpj6ZDWIPLI7R8OuEHJgZDRENOT1K8Ti+562H17/p4CzKC0Rl6vuW5d36lxSQcT7GtrGNCP9zFMYEQ1x5wUzueO8d+KI1xG+O5agJ55k0fKNdO+O0ToiypwpBzJnyjhaR0TZ05MgGsrMnp69sE5/ATH9mNYRUaZP3N+CgzF1zmoQBWR3joL3pL1zbw8XLfkdvYl4wePnn3gke3oSOU/m6eJJJZ7Uvqfz9FxEPYkEvQklfZrC3t44HeNH9a1mF4vvK0O+DuNihopmJ6rrSSRJJJMZo7JseKkxjcUCRD+yO0dbR0Tp3h3r92k7GnKYN8tLR97fvrDv5p4elFoiLqfe9iTJtCanVIf3QOYHFJulNDsg3vrLF/juU5v7Xj9nZpvVGoxpINbENAhBY/MvOL6dprBDS9QlEnK47oypfcElte9+AXMLUtJv7qkmnD09CZqymoaaQm5fLWEgHcZzZ0xg1VUn8f3PzspZ7S772qZP3B+Apc90Zby2dE2Xra5mTAOxGsQgBY3NP+qgUSx8cANh12HR8o2MjIaYO2NCxr53r3qJ+zu3950n7AquI4E39/5qCQOdHzCQoaKW4sIYYwEiQLGppdNvuN27Yyx6aCM9Ce0b9XPFsrV9cyVaR0RZuel1HtnwGi0Rh1hCueiEIzjp6IPyvk8xTUPlmh9gKS6MMRYgsgw2tXTQE3csrtyzejP/evKRGfMMUv73iZe44PhJBW/w1VofwVZXM8ZYgEgzlNTSbWOa+2oO6W5fsYl5s9qH1GRTrfURLMWFMY3NOqnTBM2ezp4vkE/riCjzTzwyZ3vEdfpusKVosgma8XzlfevK1nlscx6MaVwWINIM9SY+b1Y70VDmR5o6vlRZSYcSxIwxZiCsiSnNUNvdW0dEuens/McX02TTX9+CdR4bYypFNCAD6XAxc+ZMXbNmTUnOlX5jBobU7j7YDuRi+xYe6NyaE4RsjWZjTLFE5BlVndnfflaDoPSdvoMZejqQDnLrPDbGVELD90FUutM3n4H2LVjnsTGm3Bo+QAyl07e/JUgHwvoWjDG1puGbmAZ7Yy5Hs5RNTDPG1JKGDxCDuTEPZUJdIda3YIypJQ0fIGDgN+ZyJrKztZeNMbWiYQNE9lDUgdyYrb/AGNMIGjJADLX/wPoLjDGNoOECRKn6D6y/wBhT7xouQJSy/8D6C4wx9azh5kFY/4ExxhSn4QJEqbKqGmNMvWu4Jiaw/gNjjClGQwYIsP4DY4zpT8M1MRljjCmOBQhjjDGBLEAYY4wJZAHCGGNMIAsQxhhjAg3rNalF5C/Aq2V+m7HA62V+j1rTiNcMjXndjXjN0JjXnX7Nh6rquP4OGNYBohJEZE0xi3vXk0a8ZmjM627Ea4bGvO7BXLM1MRljjAlkAcIYY0wgCxD9W1ztAlRBI14zNOZ1N+I1Q2Ne94Cv2fogjDHGBLIahDHGmEAWIIwxxgSyAJFGRF4Rkd+LSKeIrPG3XS8iW/1tnSJyWrXLWWoisr+ILBOR50XkORE5XkQOEJFHReRF/+8x1S5nKeW55rr+rkXkqLRr6xSRN0Xkknr+rgtcc71/118QkQ0isl5EfiAiTYP5nq0PIo2IvALMVNXX07ZdD+xW1ZurVa5yE5HvAE+q6rdEJALsB1wN/FVVvyIiXwTGqOpVVS1oCeW55kuo8+86RURcYCswC7iIOv6uU7Ku+R+p0+9aRCYAK4GpqrpXRJYCDwNTGeD3bDWIBicio4A5wF0Aqtqjqm8AZwLf8Xf7DvDRapSvHApccyM5Gfijqr5KHX/XWdKvud6FgGYRCeE9/GxjEN+zBYhMCvxCRJ4RkQvTts8XkXUicnc9Vb99hwN/Af6fiPxORL4lIi3AQaq6HcD/+8BqFrLE8l0z1Pd3ne5c4Af+z/X8XadLv2ao0+9aVbcCNwObge3ATlX9BYP4ni1AZJqtqu8CTgUuEpE5wDeAI4AZeB/2LdUrXlmEgHcB31DVdwJ7gC9Wt0hll++a6/27BsBvUpsL3FvtslRKwDXX7XftB7szgcOA8UCLiHxyMOeyAJFGVbf5f/8Z+AnwHlV9TVUTqpoE7gTeU80ylkEX0KWqq/3fl+HdPF8TkUMA/L//XKXylUPgNTfAd51yKvCsqr7m/17P33VKxjXX+Xf9QeBlVf2LqvYCPwbeyyC+ZwsQPhFpEZGRqZ+BDwHrUx+o7++A9dUoX7mo6p+ALSJylL/pZGAj8ADwKX/bp4D7q1C8ssh3zfX+Xaf5BJlNLXX7XafJuOY6/643A8eJyH4iInj/vp9jEN+zjWLyicjheLUG8Jog7lHVG0Tke3jVUAVeAf4p1Y5XL0RkBvAtIAK8hDfCwwGWAu14/+A+pqp/rVYZSy3PNd9G/X/X+wFbgMNVdae/rZX6/q6Drrmu/1+LyELg40Ac+B3wWWAEA/yeLUAYY4wJZE1MxhhjAlmAMMYYE8gChDHGmEAWIIwxxgSyAGGMMSaQBQhj8hCR3Vm//4OI3N7PMXP9RGiF9jlBRJbnee0Sf1imMVVnAcKYElLVB1T1K0M4xSV4ydWMqToLEMYMgoiME5H7ROS3/p/Z/va+WoaIHCEiT/mv/3tWjWRE2noUS8RzMV7unBUisqIKl2VMhlC1C2BMDWsWkc603w/AS1cAcCvwX6q6UkTagUeAv8k6/lbgVlX9gYj8c9Zr7wQ68NIwr8JLFHmbiFwKnJi+Jokx1WIBwpj89qrqjNQvIvIPwEz/1w8CU71UNwCMSuXySnM8+3Lu34OXgjnlaVXt8s/bCUzCW+TFmJphAcKYwXGA41V1b/rGtIDRn1jazwns/6KpQdYHYczg/AKYn/rFT/6X7SngLP/nc4s87y4guyZiTFVYgDBmcC4GZvorkm0EsvsYwBuRdKmIPA0cAuws4ryLgZ9ZJ7WpBZbN1Zgy8ecz7FVVFZFzgU+o6pnVLpcxxbJ2T2PK51jgdn/RljeAT1e3OMYMjNUgjDHGBLI+CGOMMYEsQBhjjAlkAcIYY0wgCxDGGGMCWYAwxhgT6P8HMWHwwspWcm4AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df.plot(kind='scatter',\n", " x='Height',\n", " y='Weight',\n", " title='Weight and Height in adults')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.plot(kind='scatter',\n", " x='Height',\n", " y='Weight',\n", " title='Weight and Height in adults')\n", "\n", "# Here we're plotting the red line 'by hand' with fixed values\n", "# We'll try to learn this line with an algorithm below\n", "plt.plot([55, 78], [75, 250], color='red', linewidth=3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def line(x, w=0, b=0):\n", " return x * w + b" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x = np.linspace(55, 80, 100)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "yhat = line(x, w=0, b=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "yhat" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.plot(kind='scatter',\n", " x='Height',\n", " y='Weight',\n", " title='Weight and Height in adults')\n", "plt.plot(x, yhat, color='red', linewidth=3)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Cost Function" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def mean_squared_error(y_true, y_pred):\n", " s = (y_true - y_pred)**2\n", " return s.mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X = df[['Height']].values\n", "y_true = df['Weight'].values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y_true" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y_pred = line(X)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y_pred" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mean_squared_error(y_true, y_pred.ravel())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### you do it!\n", "\n", "Try changing the values of the parameters b and w in the line above and plot it again to see how the plot and the cost change." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.figure(figsize=(10, 5))\n", "\n", "# we are going to draw 2 plots in the same figure\n", "# first plot, data and a few lines\n", "ax1 = plt.subplot(121)\n", "df.plot(kind='scatter',\n", " x='Height',\n", " y='Weight',\n", " title='Weight and Height in adults', ax=ax1)\n", "\n", "# let's explore the cost function for a few values of b between -100 and +150\n", "bbs = np.array([-100, -50, 0, 50, 100, 150])\n", "mses = [] # we will append the values of the cost here, for each line\n", "for b in bbs:\n", " y_pred = line(X, w=2, b=b)\n", " mse = mean_squared_error(y_true, y_pred)\n", " mses.append(mse)\n", " plt.plot(X, y_pred)\n", "\n", "# second plot: Cost function\n", "ax2 = plt.subplot(122)\n", "plt.plot(bbs, mses, 'o-')\n", "plt.title('Cost as a function of b')\n", "plt.xlabel('b');" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Linear Regression with Keras" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Dense\n", "from tensorflow.keras.optimizers import Adam, SGD" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model = Sequential()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.add(Dense(1, input_shape=(1,)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.compile(Adam(learning_rate=0.8), 'mean_squared_error')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.fit(X, y_true, epochs=40)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y_pred = model.predict(X)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.plot(kind='scatter',\n", " x='Height',\n", " y='Weight',\n", " title='Weight and Height in adults')\n", "plt.plot(X, y_pred, color='red')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "W, B = model.get_weights()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "W" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "B" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Evaluating Model Performance" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import r2_score" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"The R2 score is {:0.3f}\".format(r2_score(y_true, y_pred)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Train Test Split" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X, y_true,\n", " test_size=0.2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "len(X_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "len(X_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "W[0, 0] = 0.0\n", "B[0] = 0.0\n", "model.set_weights((W, B))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.fit(X_train, y_train, epochs=50, verbose=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y_train_pred = model.predict(X_train).ravel()\n", "y_test_pred = model.predict(X_test).ravel()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import mean_squared_error as mse" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"The Mean Squared Error on the Train set is:\\t{:0.1f}\".format(mse(y_train, y_train_pred)))\n", "print(\"The Mean Squared Error on the Test set is:\\t{:0.1f}\".format(mse(y_test, y_test_pred)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"The R2 score on the Train set is:\\t{:0.3f}\".format(r2_score(y_train, y_train_pred)))\n", "print(\"The R2 score on the Test set is:\\t{:0.3f}\".format(r2_score(y_test, y_test_pred)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Classification" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('../data/user_visit_duration.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.plot(kind='scatter', x='Time (min)', y='Buy');" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model = Sequential()\n", "model.add(Dense(1, input_shape=(1,), activation='sigmoid'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.compile(SGD(learning_rate=0.5), 'binary_crossentropy', metrics=['accuracy'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X = df[['Time (min)']].values\n", "y = df['Buy'].values\n", "\n", "model.fit(X, y, epochs=25)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ax = df.plot(kind='scatter', x='Time (min)', y='Buy',\n", " title='Purchase behavior VS time spent on site')\n", "\n", "temp = np.linspace(0, 4)\n", "ax.plot(temp, model.predict(temp), color='orange')\n", "plt.legend(['model', 'data'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "temp_class = model.predict(temp) > 0.5" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ax = df.plot(kind='scatter', x='Time (min)', y='Buy',\n", " title='Purchase behavior VS time spent on site')\n", "\n", "temp = np.linspace(0, 4)\n", "ax.plot(temp, temp_class, color='orange')\n", "plt.legend(['model', 'data'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y_pred = model.predict(X)\n", "y_class_pred = y_pred > 0.5" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import accuracy_score" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"The accuracy score is {:0.3f}\".format(accuracy_score(y, y_class_pred)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Train/Test split\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "params = model.get_weights()\n", "params = [np.zeros(w.shape) for w in params]\n", "model.set_weights(params)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"The accuracy score is {:0.3f}\".format(accuracy_score(y, model.predict(X) > 0.5)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.fit(X_train, y_train, epochs=25, verbose=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"The train accuracy score is {:0.3f}\".format(accuracy_score(y_train, model.predict(X_train) > 0.5)))\n", "print(\"The test accuracy score is {:0.3f}\".format(accuracy_score(y_test, model.predict(X_test) > 0.5)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cross Validation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.wrappers.scikit_learn import KerasClassifier" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def build_logistic_regression_model():\n", " model = Sequential()\n", " model.add(Dense(1, input_shape=(1,), activation='sigmoid'))\n", " model.compile(SGD(learning_rate=0.5),\n", " 'binary_crossentropy',\n", " metrics=['accuracy'])\n", " return model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model = KerasClassifier(build_fn=build_logistic_regression_model,\n", " epochs=25,\n", " verbose=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import cross_val_score, KFold" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cv = KFold(3, shuffle=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "scores = cross_val_score(model, X, y, cv=cv)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "scores" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"The cross validation accuracy is {:0.4f} ± {:0.4f}\".format(scores.mean(), scores.std()))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Confusion Matrix" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import confusion_matrix" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "confusion_matrix(y, y_class_pred)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def pretty_confusion_matrix(y_true, y_pred, labels=[\"False\", \"True\"]):\n", " cm = confusion_matrix(y_true, y_pred)\n", " pred_labels = ['Predicted '+ l for l in labels]\n", " df = pd.DataFrame(cm, index=labels, columns=pred_labels)\n", " return df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pretty_confusion_matrix(y, y_class_pred, ['Not Buy', 'Buy'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import precision_score, recall_score, f1_score" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"Precision:\\t{:0.3f}\".format(precision_score(y, y_class_pred)))\n", "print(\"Recall: \\t{:0.3f}\".format(recall_score(y, y_class_pred)))\n", "print(\"F1 Score:\\t{:0.3f}\".format(f1_score(y, y_class_pred)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import classification_report" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(classification_report(y, y_class_pred))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Feature Preprocessing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Categorical Features" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('../data/weight-height.csv')\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df['Gender'].unique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pd.get_dummies(df['Gender'], prefix='Gender').head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Feature Transformations" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 1) Rescale with fixed factor" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df['Height (feet)'] = df['Height']/12.0\n", "df['Weight (100 lbs)'] = df['Weight']/100.0" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.describe().round(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### MinMax normalization" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import MinMaxScaler\n", "\n", "mms = MinMaxScaler()\n", "df['Weight_mms'] = mms.fit_transform(df[['Weight']])\n", "df['Height_mms'] = mms.fit_transform(df[['Height']])\n", "df.describe().round(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3) Standard normalization" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler\n", "\n", "ss = StandardScaler()\n", "df['Weight_ss'] = ss.fit_transform(df[['Weight']])\n", "df['Height_ss'] = ss.fit_transform(df[['Height']])\n", "df.describe().round(2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.figure(figsize=(15, 5))\n", "\n", "for i, feature in enumerate(['Height', 'Height (feet)', 'Height_mms', 'Height_ss']):\n", " plt.subplot(1, 4, i+1)\n", " df[feature].plot(kind='hist', title=feature)\n", " plt.xlabel(feature);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Machine Learning Exercises" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Exercise 1\n", "\n", "You've just been hired at a real estate investment firm and they would like you to build a model for pricing houses. You are given a dataset that contains data for house prices and a few features like number of bedrooms, size in square feet and age of the house. Let's see if you can build a model that is able to predict the price. In this exercise we extend what we have learned about linear regression to a dataset with more than one feature. Here are the steps to complete it:\n", "\n", "1. Load the dataset ../data/housing-data.csv\n", "- plot the histograms for each feature\n", "- create 2 variables called X and y: X shall be a matrix with 3 columns (sqft,bdrms,age) and y shall be a vector with 1 column (price)\n", "- create a linear regression model in Keras with the appropriate number of inputs and output\n", "- split the data into train and test with a 20% test size\n", "- train the model on the training set and check its accuracy on training and test set\n", "- how's your model doing? Is the loss growing smaller?\n", "- try to improve your model with these experiments:\n", " - normalize the input features with one of the rescaling techniques mentioned above\n", " - use a different value for the learning rate of your model\n", " - use a different optimizer\n", "- once you're satisfied with training, check the R2score on the test set" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Exercise 2\n", "\n", "Your boss was extremely happy with your work on the housing price prediction model and decided to entrust you with a more challenging task. They've seen a lot of people leave the company recently and they would like to understand why that's happening. They have collected historical data on employees and they would like you to build a model that is able to predict which employee will leave next. They would like a model that is better than random guessing. They also prefer false negatives than false positives, in this first phase. Fields in the dataset include:\n", "\n", "- Employee satisfaction level\n", "- Last evaluation\n", "- Number of projects\n", "- Average monthly hours\n", "- Time spent at the company\n", "- Whether they have had a work accident\n", "- Whether they have had a promotion in the last 5 years\n", "- Department\n", "- Salary\n", "- Whether the employee has left\n", "\n", "Your goal is to predict the binary outcome variable `left` using the rest of the data. Since the outcome is binary, this is a classification problem. Here are some things you may want to try out:\n", "\n", "1. load the dataset at ../data/HR_comma_sep.csv, inspect it with `.head()`, `.info()` and `.describe()`.\n", "- Establish a benchmark: what would be your accuracy score if you predicted everyone stay?\n", "- Check if any feature needs rescaling. You may plot a histogram of the feature to decide which rescaling method is more appropriate.\n", "- convert the categorical features into binary dummy columns. You will then have to combine them with the numerical features using `pd.concat`.\n", "- do the usual train/test split with a 20% test size\n", "- play around with learning rate and optimizer\n", "- check the confusion matrix, precision and recall\n", "- check if you still get the same results if you use a 5-Fold cross validation on all the data\n", "- Is the model good enough for your boss?\n", "\n", "As you will see in this exercise, the a logistic regression model is not good enough to help your boss. In the next chapter we will learn how to go beyond linear models.\n", "\n", "This dataset comes from https://www.kaggle.com/ludobenistant/hr-analytics/ and is released under [CC BY-SA 4.0 License](https://creativecommons.org/licenses/by-sa/4.0/)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 2 }