add all files

This commit is contained in:
Sem van der Hoeven
2021-05-26 15:12:05 +02:00
parent 5c3be10247
commit d979ca38f5
44 changed files with 43574 additions and 0 deletions

View File

@@ -0,0 +1,404 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Exercise 1\n",
"- load the dataset: `../data/international-airline-passengers.csv`\n",
"- inspect it using the `.info()` and `.head()` commands\n",
"- use the function `pd.to_datetime()` to change the column type of 'Month' to a datatime type\n",
"- set the index of df to be a datetime index using the column 'Month' and the `df.set_index()` method\n",
"- choose the appropriate plot and display the data\n",
"- choose appropriate scale\n",
"- label the axes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# - load the dataset: ../data/international-airline-passengers.csv\n",
"df = pd.read_csv('../data/international-airline-passengers.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# - inspect it using the .info() and .head() commands\n",
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# - use the function to_datetime() to change the column type of 'Month' to a datatime type\n",
"# - set the index of df to be a datetime index using the column 'Month' and tthe set_index() method\n",
"\n",
"df['Month'] = pd.to_datetime(df['Month'])\n",
"df = df.set_index('Month')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# - choose the appropriate plot and display the data\n",
"# - choose appropriate scale\n",
"# - label the axes\n",
"\n",
"df.plot();"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Exercise 2\n",
"- load the dataset: `../data/weight-height.csv`\n",
"- inspect it\n",
"- plot it using a scatter plot with Weight as a function of Height\n",
"- plot the male and female populations with 2 different colors on a new scatter plot\n",
"- remember to label the axes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# - load the dataset: ../data/weight-height.csv\n",
"# - inspect it\n",
"df = pd.read_csv('../data/weight-height.csv')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"df['Gender'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# - plot it using a scatter plot with Weight as a function of Height\n",
"_ = df.plot(kind='scatter', x='Height', y='Weight');"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# - plot the male and female populations with 2 different colors on a new scatter plot\n",
"# - remember to label the axes\n",
"\n",
"# this can be done in several ways, showing 2 here:\n",
"males = df[df['Gender'] == 'Male']\n",
"females = df.query('Gender == \"Female\"')\n",
"fig, ax = plt.subplots()\n",
"\n",
"males.plot(kind='scatter', x='Height', y='Weight',\n",
" ax=ax, color='blue', alpha=0.3,\n",
" title='Male & Female Populations')\n",
"\n",
"females.plot(kind='scatter', x='Height', y='Weight',\n",
" ax=ax, color='red', alpha=0.3);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df['Gendercolor'] = df['Gender'].map({'Male': 'blue', 'Female': 'red'})\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.plot(kind='scatter', \n",
" x='Height',\n",
" y='Weight',\n",
" c=df['Gendercolor'],\n",
" alpha=0.3,\n",
" title='Male & Female Populations');"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig, ax = plt.subplots()\n",
"ax.plot(males['Height'], males['Weight'], 'ob', \n",
" females['Height'], females['Weight'], 'or', alpha=0.3)\n",
"plt.xlabel('Height')\n",
"plt.ylabel('Weight')\n",
"plt.title('Male & Female Populations');"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"## Exercise 3\n",
"- plot the histogram of the heights for males and for females on the same plot\n",
"- use alpha to control transparency in the plot comand\n",
"- plot a vertical line at the mean of each population using `plt.axvline()`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"males['Height'].plot(kind='hist',\n",
" bins=50,\n",
" range=(50, 80),\n",
" alpha=0.3,\n",
" color='blue')\n",
"\n",
"females['Height'].plot(kind='hist',\n",
" bins=50,\n",
" range=(50, 80),\n",
" alpha=0.3,\n",
" color='red')\n",
"\n",
"plt.title('Height distribution')\n",
"plt.legend([\"Males\", \"Females\"])\n",
"plt.xlabel(\"Heigth (in)\")\n",
"\n",
"\n",
"plt.axvline(males['Height'].mean(), color='blue', linewidth=2)\n",
"plt.axvline(females['Height'].mean(), color='red', linewidth=2);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"males['Height'].plot(kind='hist',\n",
" bins=200,\n",
" range=(50, 80),\n",
" alpha=0.3,\n",
" color='blue',\n",
" cumulative=True,\n",
" density=True)\n",
"\n",
"females['Height'].plot(kind='hist',\n",
" bins=200,\n",
" range=(50, 80),\n",
" alpha=0.3,\n",
" color='red',\n",
" cumulative=True,\n",
" density=True)\n",
"\n",
"plt.title('Height distribution')\n",
"plt.legend([\"Males\", \"Females\"])\n",
"plt.xlabel(\"Heigth (in)\")\n",
"\n",
"plt.axhline(0.8)\n",
"plt.axhline(0.5)\n",
"plt.axhline(0.2);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Exercise 4\n",
"- plot the weights of the males and females using a box plot\n",
"- which one is easier to read?\n",
"- (remember to put in titles, axes and legends)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dfpvt = df.pivot(columns = 'Gender', values = 'Weight')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dfpvt.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dfpvt.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dfpvt.plot(kind='box')\n",
"plt.title('Weight Box Plot')\n",
"plt.ylabel(\"Weight (lbs)\");"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Exercise 5\n",
"- load the dataset: `../data/titanic-train.csv`\n",
"- learn about scattermatrix here: http://pandas.pydata.org/pandas-docs/stable/visualization.html\n",
"- display the data using a scattermatrix"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('../data/titanic-train.csv')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pandas.plotting import scatter_matrix"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"_ = scatter_matrix(df.drop('PassengerId', axis=1), figsize=(10, 10))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 1
}