{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyMHXlkgClcqQMNXPWE7LeNG",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"
"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "q_2WoqLTuaO5",
"outputId": "df2a1adf-c3eb-4db4-a224-cfffe52d91b4"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.10/dist-packages (2.0.20)\n",
"Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy) (4.5.0)\n",
"Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy) (2.0.2)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.3.post1)\n",
"Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.23.5)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n"
]
}
],
"source": [
"!pip install sqlalchemy\n",
"\n",
"import sqlalchemy\n",
"\n",
"from sqlalchemy import text\n",
"\n",
"!pip install pandas\n",
"import pandas as pd\n"
]
},
{
"cell_type": "code",
"source": [
"#load csv file\n",
"df = pd.read_csv('Hospital_patients_datasets.csv')\n",
"df.head(5)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 330
},
"id": "yfGloQ_ButAW",
"outputId": "c6c456e4-fb65-409e-f8f0-3079d6f75347"
},
"execution_count": 41,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" PatientId AppointmentID Gender ScheduledDay \\\n",
"0 2.987250e+13 5642903 F 2016-04-29T18:38:08Z \n",
"1 5.589980e+14 5642503 M 2016-04-29T16:08:27Z \n",
"2 4.262960e+12 5642549 F 2016-04-29T16:19:04Z \n",
"3 8.679510e+11 5642828 F 2016-04-29T17:29:31Z \n",
"4 8.841190e+12 5642494 F 2016-04-29T16:07:23Z \n",
"\n",
" AppointmentDay Age Neighbourhood Scholarship Hipertension \\\n",
"0 2016-04-29T00:00:00Z 62 JARDIM DA PENHA 0 1 \n",
"1 2016-04-29T00:00:00Z 56 JARDIM DA PENHA 0 0 \n",
"2 2016-04-29T00:00:00Z 62 MATA DA PRAIA 0 0 \n",
"3 2016-04-29T00:00:00Z 8 PONTAL DE CAMBURI 0 0 \n",
"4 2016-04-29T00:00:00Z 56 JARDIM DA PENHA 0 1 \n",
"\n",
" Diabetes Alcoholism Handcap SMS_received No-show \n",
"0 0 0 0 0 No \n",
"1 0 0 0 0 No \n",
"2 0 0 0 0 No \n",
"3 0 0 0 0 No \n",
"4 1 0 0 0 No "
],
"text/html": [
"\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" PatientId | \n",
" AppointmentID | \n",
" Gender | \n",
" ScheduledDay | \n",
" AppointmentDay | \n",
" Age | \n",
" Neighbourhood | \n",
" Scholarship | \n",
" Hipertension | \n",
" Diabetes | \n",
" Alcoholism | \n",
" Handcap | \n",
" SMS_received | \n",
" No-show | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2.987250e+13 | \n",
" 5642903 | \n",
" F | \n",
" 2016-04-29T18:38:08Z | \n",
" 2016-04-29T00:00:00Z | \n",
" 62 | \n",
" JARDIM DA PENHA | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 1 | \n",
" 5.589980e+14 | \n",
" 5642503 | \n",
" M | \n",
" 2016-04-29T16:08:27Z | \n",
" 2016-04-29T00:00:00Z | \n",
" 56 | \n",
" JARDIM DA PENHA | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 2 | \n",
" 4.262960e+12 | \n",
" 5642549 | \n",
" F | \n",
" 2016-04-29T16:19:04Z | \n",
" 2016-04-29T00:00:00Z | \n",
" 62 | \n",
" MATA DA PRAIA | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 3 | \n",
" 8.679510e+11 | \n",
" 5642828 | \n",
" F | \n",
" 2016-04-29T17:29:31Z | \n",
" 2016-04-29T00:00:00Z | \n",
" 8 | \n",
" PONTAL DE CAMBURI | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 4 | \n",
" 8.841190e+12 | \n",
" 5642494 | \n",
" F | \n",
" 2016-04-29T16:07:23Z | \n",
" 2016-04-29T00:00:00Z | \n",
" 56 | \n",
" JARDIM DA PENHA | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
]
},
"metadata": {},
"execution_count": 41
}
]
},
{
"cell_type": "code",
"source": [
"\n",
"# Remove duplicate rows\n",
"df=pd.read_csv('Hospital_patients_datasets.csv')\n",
"df.drop_duplicates(keep=False, inplace=True)\n",
"df"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 634
},
"id": "16d_vWJmv2VP",
"outputId": "ef6806b2-4109-465e-f0c4-d114c28e6329"
},
"execution_count": 42,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" PatientId AppointmentID Gender ScheduledDay \\\n",
"0 2.987250e+13 5642903 F 2016-04-29T18:38:08Z \n",
"1 5.589980e+14 5642503 M 2016-04-29T16:08:27Z \n",
"2 4.262960e+12 5642549 F 2016-04-29T16:19:04Z \n",
"3 8.679510e+11 5642828 F 2016-04-29T17:29:31Z \n",
"4 8.841190e+12 5642494 F 2016-04-29T16:07:23Z \n",
"... ... ... ... ... \n",
"110522 2.572130e+12 5651768 F 2016-05-03T09:15:35Z \n",
"110523 3.596270e+12 5650093 F 2016-05-03T07:27:33Z \n",
"110524 1.557660e+13 5630692 F 2016-04-27T16:03:52Z \n",
"110525 9.213490e+13 5630323 F 2016-04-27T15:09:23Z \n",
"110526 3.775120e+14 5629448 F 2016-04-27T13:30:56Z \n",
"\n",
" AppointmentDay Age Neighbourhood Scholarship \\\n",
"0 2016-04-29T00:00:00Z 62 JARDIM DA PENHA 0 \n",
"1 2016-04-29T00:00:00Z 56 JARDIM DA PENHA 0 \n",
"2 2016-04-29T00:00:00Z 62 MATA DA PRAIA 0 \n",
"3 2016-04-29T00:00:00Z 8 PONTAL DE CAMBURI 0 \n",
"4 2016-04-29T00:00:00Z 56 JARDIM DA PENHA 0 \n",
"... ... ... ... ... \n",
"110522 2016-06-07T00:00:00Z 56 MARIA ORTIZ 0 \n",
"110523 2016-06-07T00:00:00Z 51 MARIA ORTIZ 0 \n",
"110524 2016-06-07T00:00:00Z 21 MARIA ORTIZ 0 \n",
"110525 2016-06-07T00:00:00Z 38 MARIA ORTIZ 0 \n",
"110526 2016-06-07T00:00:00Z 54 MARIA ORTIZ 0 \n",
"\n",
" Hipertension Diabetes Alcoholism Handcap SMS_received No-show \n",
"0 1 0 0 0 0 No \n",
"1 0 0 0 0 0 No \n",
"2 0 0 0 0 0 No \n",
"3 0 0 0 0 0 No \n",
"4 1 1 0 0 0 No \n",
"... ... ... ... ... ... ... \n",
"110522 0 0 0 0 1 No \n",
"110523 0 0 0 0 1 No \n",
"110524 0 0 0 0 1 No \n",
"110525 0 0 0 0 1 No \n",
"110526 0 0 0 0 1 No \n",
"\n",
"[110527 rows x 14 columns]"
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" PatientId | \n",
" AppointmentID | \n",
" Gender | \n",
" ScheduledDay | \n",
" AppointmentDay | \n",
" Age | \n",
" Neighbourhood | \n",
" Scholarship | \n",
" Hipertension | \n",
" Diabetes | \n",
" Alcoholism | \n",
" Handcap | \n",
" SMS_received | \n",
" No-show | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2.987250e+13 | \n",
" 5642903 | \n",
" F | \n",
" 2016-04-29T18:38:08Z | \n",
" 2016-04-29T00:00:00Z | \n",
" 62 | \n",
" JARDIM DA PENHA | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 1 | \n",
" 5.589980e+14 | \n",
" 5642503 | \n",
" M | \n",
" 2016-04-29T16:08:27Z | \n",
" 2016-04-29T00:00:00Z | \n",
" 56 | \n",
" JARDIM DA PENHA | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 2 | \n",
" 4.262960e+12 | \n",
" 5642549 | \n",
" F | \n",
" 2016-04-29T16:19:04Z | \n",
" 2016-04-29T00:00:00Z | \n",
" 62 | \n",
" MATA DA PRAIA | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 3 | \n",
" 8.679510e+11 | \n",
" 5642828 | \n",
" F | \n",
" 2016-04-29T17:29:31Z | \n",
" 2016-04-29T00:00:00Z | \n",
" 8 | \n",
" PONTAL DE CAMBURI | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 4 | \n",
" 8.841190e+12 | \n",
" 5642494 | \n",
" F | \n",
" 2016-04-29T16:07:23Z | \n",
" 2016-04-29T00:00:00Z | \n",
" 56 | \n",
" JARDIM DA PENHA | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 110522 | \n",
" 2.572130e+12 | \n",
" 5651768 | \n",
" F | \n",
" 2016-05-03T09:15:35Z | \n",
" 2016-06-07T00:00:00Z | \n",
" 56 | \n",
" MARIA ORTIZ | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" No | \n",
"
\n",
" \n",
" 110523 | \n",
" 3.596270e+12 | \n",
" 5650093 | \n",
" F | \n",
" 2016-05-03T07:27:33Z | \n",
" 2016-06-07T00:00:00Z | \n",
" 51 | \n",
" MARIA ORTIZ | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" No | \n",
"
\n",
" \n",
" 110524 | \n",
" 1.557660e+13 | \n",
" 5630692 | \n",
" F | \n",
" 2016-04-27T16:03:52Z | \n",
" 2016-06-07T00:00:00Z | \n",
" 21 | \n",
" MARIA ORTIZ | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" No | \n",
"
\n",
" \n",
" 110525 | \n",
" 9.213490e+13 | \n",
" 5630323 | \n",
" F | \n",
" 2016-04-27T15:09:23Z | \n",
" 2016-06-07T00:00:00Z | \n",
" 38 | \n",
" MARIA ORTIZ | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" No | \n",
"
\n",
" \n",
" 110526 | \n",
" 3.775120e+14 | \n",
" 5629448 | \n",
" F | \n",
" 2016-04-27T13:30:56Z | \n",
" 2016-06-07T00:00:00Z | \n",
" 54 | \n",
" MARIA ORTIZ | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" No | \n",
"
\n",
" \n",
"
\n",
"
110527 rows × 14 columns
\n",
"
\n",
"
\n",
"
\n"
]
},
"metadata": {},
"execution_count": 42
}
]
},
{
"cell_type": "code",
"source": [
"#Coverted Datatime to pandas dateonly\n",
"df=pd.read_csv('Hospital_patients_datasets.csv')\n",
"df[\"ScheduledDay\"] = pd.to_datetime(df[\"ScheduledDay\"]).dt.date\n",
"df[\"AppointmentDay\"] = pd.to_datetime(df[\"AppointmentDay\"]).dt.date\n",
"\n",
"df.head(5)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 330
},
"id": "TVe4wxuXDNcB",
"outputId": "cb8401be-c188-45ea-bba4-d770017959f2"
},
"execution_count": 43,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" PatientId AppointmentID Gender ScheduledDay AppointmentDay Age \\\n",
"0 2.987250e+13 5642903 F 2016-04-29 2016-04-29 62 \n",
"1 5.589980e+14 5642503 M 2016-04-29 2016-04-29 56 \n",
"2 4.262960e+12 5642549 F 2016-04-29 2016-04-29 62 \n",
"3 8.679510e+11 5642828 F 2016-04-29 2016-04-29 8 \n",
"4 8.841190e+12 5642494 F 2016-04-29 2016-04-29 56 \n",
"\n",
" Neighbourhood Scholarship Hipertension Diabetes Alcoholism \\\n",
"0 JARDIM DA PENHA 0 1 0 0 \n",
"1 JARDIM DA PENHA 0 0 0 0 \n",
"2 MATA DA PRAIA 0 0 0 0 \n",
"3 PONTAL DE CAMBURI 0 0 0 0 \n",
"4 JARDIM DA PENHA 0 1 1 0 \n",
"\n",
" Handcap SMS_received No-show \n",
"0 0 0 No \n",
"1 0 0 No \n",
"2 0 0 No \n",
"3 0 0 No \n",
"4 0 0 No "
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" PatientId | \n",
" AppointmentID | \n",
" Gender | \n",
" ScheduledDay | \n",
" AppointmentDay | \n",
" Age | \n",
" Neighbourhood | \n",
" Scholarship | \n",
" Hipertension | \n",
" Diabetes | \n",
" Alcoholism | \n",
" Handcap | \n",
" SMS_received | \n",
" No-show | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2.987250e+13 | \n",
" 5642903 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 62 | \n",
" JARDIM DA PENHA | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 1 | \n",
" 5.589980e+14 | \n",
" 5642503 | \n",
" M | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 56 | \n",
" JARDIM DA PENHA | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 2 | \n",
" 4.262960e+12 | \n",
" 5642549 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 62 | \n",
" MATA DA PRAIA | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 3 | \n",
" 8.679510e+11 | \n",
" 5642828 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 8 | \n",
" PONTAL DE CAMBURI | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 4 | \n",
" 8.841190e+12 | \n",
" 5642494 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 56 | \n",
" JARDIM DA PENHA | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
]
},
"metadata": {},
"execution_count": 43
}
]
},
{
"cell_type": "code",
"source": [
"#Renamed specific columns\n",
"df.rename(columns={'Hipertension': 'Hypertension', 'Handcap': 'Handicap', 'SMS_received': 'SMSReceived', 'No-show': 'NoShow'}, inplace=True)\n",
"df.head(5)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 313
},
"id": "7s5FBIp1GJl6",
"outputId": "8ec46d91-3983-4365-ac04-b8456e8a8cb3"
},
"execution_count": 44,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" PatientId AppointmentID Gender ScheduledDay AppointmentDay Age \\\n",
"0 2.987250e+13 5642903 F 2016-04-29 2016-04-29 62 \n",
"1 5.589980e+14 5642503 M 2016-04-29 2016-04-29 56 \n",
"2 4.262960e+12 5642549 F 2016-04-29 2016-04-29 62 \n",
"3 8.679510e+11 5642828 F 2016-04-29 2016-04-29 8 \n",
"4 8.841190e+12 5642494 F 2016-04-29 2016-04-29 56 \n",
"\n",
" Neighbourhood Scholarship Hypertension Diabetes Alcoholism \\\n",
"0 JARDIM DA PENHA 0 1 0 0 \n",
"1 JARDIM DA PENHA 0 0 0 0 \n",
"2 MATA DA PRAIA 0 0 0 0 \n",
"3 PONTAL DE CAMBURI 0 0 0 0 \n",
"4 JARDIM DA PENHA 0 1 1 0 \n",
"\n",
" Handicap SMSReceived NoShow \n",
"0 0 0 No \n",
"1 0 0 No \n",
"2 0 0 No \n",
"3 0 0 No \n",
"4 0 0 No "
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" PatientId | \n",
" AppointmentID | \n",
" Gender | \n",
" ScheduledDay | \n",
" AppointmentDay | \n",
" Age | \n",
" Neighbourhood | \n",
" Scholarship | \n",
" Hypertension | \n",
" Diabetes | \n",
" Alcoholism | \n",
" Handicap | \n",
" SMSReceived | \n",
" NoShow | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2.987250e+13 | \n",
" 5642903 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 62 | \n",
" JARDIM DA PENHA | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 1 | \n",
" 5.589980e+14 | \n",
" 5642503 | \n",
" M | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 56 | \n",
" JARDIM DA PENHA | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 2 | \n",
" 4.262960e+12 | \n",
" 5642549 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 62 | \n",
" MATA DA PRAIA | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 3 | \n",
" 8.679510e+11 | \n",
" 5642828 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 8 | \n",
" PONTAL DE CAMBURI | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 4 | \n",
" 8.841190e+12 | \n",
" 5642494 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 56 | \n",
" JARDIM DA PENHA | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
]
},
"metadata": {},
"execution_count": 44
}
]
},
{
"cell_type": "code",
"source": [
"#Dropped unwanted columns\n",
"df.drop(columns=['PatientId', 'AppointmentID', 'Neighbourhood'], inplace=True)\n",
"df.head(5)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "GARrUQpdHd3m",
"outputId": "5c22d435-7733-4c43-8ee8-4117b48a0f93"
},
"execution_count": 45,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Gender ScheduledDay AppointmentDay Age Scholarship Hypertension \\\n",
"0 F 2016-04-29 2016-04-29 62 0 1 \n",
"1 M 2016-04-29 2016-04-29 56 0 0 \n",
"2 F 2016-04-29 2016-04-29 62 0 0 \n",
"3 F 2016-04-29 2016-04-29 8 0 0 \n",
"4 F 2016-04-29 2016-04-29 56 0 1 \n",
"\n",
" Diabetes Alcoholism Handicap SMSReceived NoShow \n",
"0 0 0 0 0 No \n",
"1 0 0 0 0 No \n",
"2 0 0 0 0 No \n",
"3 0 0 0 0 No \n",
"4 1 0 0 0 No "
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Gender | \n",
" ScheduledDay | \n",
" AppointmentDay | \n",
" Age | \n",
" Scholarship | \n",
" Hypertension | \n",
" Diabetes | \n",
" Alcoholism | \n",
" Handicap | \n",
" SMSReceived | \n",
" NoShow | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 62 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 1 | \n",
" M | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 56 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 2 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 62 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 3 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 8 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
" 4 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 56 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
]
},
"metadata": {},
"execution_count": 45
}
]
},
{
"cell_type": "code",
"source": [
"#Created new column called Age_group\n",
"bins = [0, 20, 40, 60, 80, 100]\n",
"labels = ['0-20', '20-40', '40-60', '60-80', '80+']\n",
"\n",
"df['Age_group'] = pd.cut(df['Age'], bins=bins, labels=labels)\n",
"df.head(5)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "dtMlYFOfJJth",
"outputId": "5b1e2cfb-2d54-4258-e975-aff9b27546a4"
},
"execution_count": 46,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Gender ScheduledDay AppointmentDay Age Scholarship Hypertension \\\n",
"0 F 2016-04-29 2016-04-29 62 0 1 \n",
"1 M 2016-04-29 2016-04-29 56 0 0 \n",
"2 F 2016-04-29 2016-04-29 62 0 0 \n",
"3 F 2016-04-29 2016-04-29 8 0 0 \n",
"4 F 2016-04-29 2016-04-29 56 0 1 \n",
"\n",
" Diabetes Alcoholism Handicap SMSReceived NoShow Age_group \n",
"0 0 0 0 0 No 60-80 \n",
"1 0 0 0 0 No 40-60 \n",
"2 0 0 0 0 No 60-80 \n",
"3 0 0 0 0 No 0-20 \n",
"4 1 0 0 0 No 40-60 "
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Gender | \n",
" ScheduledDay | \n",
" AppointmentDay | \n",
" Age | \n",
" Scholarship | \n",
" Hypertension | \n",
" Diabetes | \n",
" Alcoholism | \n",
" Handicap | \n",
" SMSReceived | \n",
" NoShow | \n",
" Age_group | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 62 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
" 60-80 | \n",
"
\n",
" \n",
" 1 | \n",
" M | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 56 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
" 40-60 | \n",
"
\n",
" \n",
" 2 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 62 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
" 60-80 | \n",
"
\n",
" \n",
" 3 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 8 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
" 0-20 | \n",
"
\n",
" \n",
" 4 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 56 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
" 40-60 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
]
},
"metadata": {},
"execution_count": 46
}
]
},
{
"cell_type": "code",
"source": [
"#Drop the Age column\n",
"df.drop('Age', axis='columns', inplace=True)\n",
"df.head(5)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "mKg94eTBKwrW",
"outputId": "6912cffb-30b4-4a72-c63e-f001785b2e34"
},
"execution_count": 47,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Gender ScheduledDay AppointmentDay Scholarship Hypertension Diabetes \\\n",
"0 F 2016-04-29 2016-04-29 0 1 0 \n",
"1 M 2016-04-29 2016-04-29 0 0 0 \n",
"2 F 2016-04-29 2016-04-29 0 0 0 \n",
"3 F 2016-04-29 2016-04-29 0 0 0 \n",
"4 F 2016-04-29 2016-04-29 0 1 1 \n",
"\n",
" Alcoholism Handicap SMSReceived NoShow Age_group \n",
"0 0 0 0 No 60-80 \n",
"1 0 0 0 No 40-60 \n",
"2 0 0 0 No 60-80 \n",
"3 0 0 0 No 0-20 \n",
"4 0 0 0 No 40-60 "
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Gender | \n",
" ScheduledDay | \n",
" AppointmentDay | \n",
" Scholarship | \n",
" Hypertension | \n",
" Diabetes | \n",
" Alcoholism | \n",
" Handicap | \n",
" SMSReceived | \n",
" NoShow | \n",
" Age_group | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
" 60-80 | \n",
"
\n",
" \n",
" 1 | \n",
" M | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
" 40-60 | \n",
"
\n",
" \n",
" 2 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
" 60-80 | \n",
"
\n",
" \n",
" 3 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
" 0-20 | \n",
"
\n",
" \n",
" 4 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" No | \n",
" 40-60 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
]
},
"metadata": {},
"execution_count": 47
}
]
},
{
"cell_type": "code",
"source": [
"#Convert NoShow Datatype to binary\n",
"mapping = {'Yes': 1, 'No': 0}\n",
"df['NoShow'] = df['NoShow'].replace(mapping)\n",
"df.head(5)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "4mxs-NLOXpJB",
"outputId": "cce97dd7-b72d-4c61-af91-aa06b6a89a59"
},
"execution_count": 48,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Gender ScheduledDay AppointmentDay Scholarship Hypertension Diabetes \\\n",
"0 F 2016-04-29 2016-04-29 0 1 0 \n",
"1 M 2016-04-29 2016-04-29 0 0 0 \n",
"2 F 2016-04-29 2016-04-29 0 0 0 \n",
"3 F 2016-04-29 2016-04-29 0 0 0 \n",
"4 F 2016-04-29 2016-04-29 0 1 1 \n",
"\n",
" Alcoholism Handicap SMSReceived NoShow Age_group \n",
"0 0 0 0 0 60-80 \n",
"1 0 0 0 0 40-60 \n",
"2 0 0 0 0 60-80 \n",
"3 0 0 0 0 0-20 \n",
"4 0 0 0 0 40-60 "
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Gender | \n",
" ScheduledDay | \n",
" AppointmentDay | \n",
" Scholarship | \n",
" Hypertension | \n",
" Diabetes | \n",
" Alcoholism | \n",
" Handicap | \n",
" SMSReceived | \n",
" NoShow | \n",
" Age_group | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 60-80 | \n",
"
\n",
" \n",
" 1 | \n",
" M | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 40-60 | \n",
"
\n",
" \n",
" 2 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 60-80 | \n",
"
\n",
" \n",
" 3 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0-20 | \n",
"
\n",
" \n",
" 4 | \n",
" F | \n",
" 2016-04-29 | \n",
" 2016-04-29 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 40-60 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
]
},
"metadata": {},
"execution_count": 48
}
]
},
{
"cell_type": "code",
"source": [
"# Export the DataFrame to a CSV file\n",
"df.to_csv('patients.csv', index=False)"
],
"metadata": {
"id": "c5W5pMZpZFUj"
},
"execution_count": 49,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "ZdzhXUxEa71f"
},
"execution_count": null,
"outputs": []
}
]
}