{ "cells": [ { "cell_type": "markdown", "id": "ff15316b-91c4-410d-9a74-a4b4dfaac561", "metadata": {}, "source": [ "# Case study: heat map" ] }, { "cell_type": "code", "execution_count": 1, "id": "e6310c06-115d-469d-9ea4-806097195884", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "id": "53b89977-6ad6-446a-a0ff-402bae2e7011", "metadata": {}, "outputs": [], "source": [ "DATA_URL = 'https://raw.githubusercontent.com/health-data-science-OR/' \\\n", " + 'hpdm139-datasets/main/wisconsin.zip'" ] }, { "cell_type": "code", "execution_count": 3, "id": "6f616dbd-5f35-4574-b9aa-d8d7d033ace6", "metadata": {}, "outputs": [], "source": [ "def load_wisconsin(path):\n", " '''\n", " Load and clean the wisconsin breast cancer \n", " dataset.\n", " '''\n", " recoded_label = {'diagnosis': {'M':'1',\n", " 'B':'0'}}\n", " \n", " to_drop = ['Unnamed: 0', 'id']\n", " df = (pd.read_csv(path)\n", " .drop(to_drop, axis=1) \n", " .replace(recoded_label)\n", " .astype({'diagnosis': np.byte})\n", " )\n", " return df[df.columns[1:]], df['diagnosis']" ] }, { "cell_type": "code", "execution_count": 4, "id": "84951e8c-5dde-46f9-937d-424ff39c95e6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | radius_mean | \n", "texture_mean | \n", "perimeter_mean | \n", "area_mean | \n", "smoothness_mean | \n", "compactness_mean | \n", "concavity_mean | \n", "concave points_mean | \n", "symmetry_mean | \n", "fractal_dimension_mean | \n", "... | \n", "radius_worst | \n", "texture_worst | \n", "perimeter_worst | \n", "area_worst | \n", "smoothness_worst | \n", "compactness_worst | \n", "concavity_worst | \n", "concave points_worst | \n", "symmetry_worst | \n", "fractal_dimension_worst | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "17.99 | \n", "10.38 | \n", "122.8 | \n", "1001.0 | \n", "0.11840 | \n", "0.27760 | \n", "0.3001 | \n", "0.14710 | \n", "0.2419 | \n", "0.07871 | \n", "... | \n", "25.38 | \n", "17.33 | \n", "184.6 | \n", "2019.0 | \n", "0.1622 | \n", "0.6656 | \n", "0.7119 | \n", "0.2654 | \n", "0.4601 | \n", "0.11890 | \n", "
1 | \n", "20.57 | \n", "17.77 | \n", "132.9 | \n", "1326.0 | \n", "0.08474 | \n", "0.07864 | \n", "0.0869 | \n", "0.07017 | \n", "0.1812 | \n", "0.05667 | \n", "... | \n", "24.99 | \n", "23.41 | \n", "158.8 | \n", "1956.0 | \n", "0.1238 | \n", "0.1866 | \n", "0.2416 | \n", "0.1860 | \n", "0.2750 | \n", "0.08902 | \n", "
2 rows × 30 columns
\n", "