{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyMHXlkgClcqQMNXPWE7LeNG", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "q_2WoqLTuaO5", "outputId": "df2a1adf-c3eb-4db4-a224-cfffe52d91b4" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.10/dist-packages (2.0.20)\n", "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy) (4.5.0)\n", "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy) (2.0.2)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (1.5.3)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.3.post1)\n", "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.23.5)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n" ] } ], "source": [ "!pip install sqlalchemy\n", "\n", "import sqlalchemy\n", "\n", "from sqlalchemy import text\n", "\n", "!pip install pandas\n", "import pandas as pd\n" ] }, { "cell_type": "code", "source": [ "#load csv file\n", "df = pd.read_csv('Hospital_patients_datasets.csv')\n", "df.head(5)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 330 }, "id": "yfGloQ_ButAW", "outputId": "c6c456e4-fb65-409e-f8f0-3079d6f75347" }, "execution_count": 41, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " PatientId AppointmentID Gender ScheduledDay \\\n", "0 2.987250e+13 5642903 F 2016-04-29T18:38:08Z \n", "1 5.589980e+14 5642503 M 2016-04-29T16:08:27Z \n", "2 4.262960e+12 5642549 F 2016-04-29T16:19:04Z \n", "3 8.679510e+11 5642828 F 2016-04-29T17:29:31Z \n", "4 8.841190e+12 5642494 F 2016-04-29T16:07:23Z \n", "\n", " AppointmentDay Age Neighbourhood Scholarship Hipertension \\\n", "0 2016-04-29T00:00:00Z 62 JARDIM DA PENHA 0 1 \n", "1 2016-04-29T00:00:00Z 56 JARDIM DA PENHA 0 0 \n", "2 2016-04-29T00:00:00Z 62 MATA DA PRAIA 0 0 \n", "3 2016-04-29T00:00:00Z 8 PONTAL DE CAMBURI 0 0 \n", "4 2016-04-29T00:00:00Z 56 JARDIM DA PENHA 0 1 \n", "\n", " Diabetes Alcoholism Handcap SMS_received No-show \n", "0 0 0 0 0 No \n", "1 0 0 0 0 No \n", "2 0 0 0 0 No \n", "3 0 0 0 0 No \n", "4 1 0 0 0 No " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PatientIdAppointmentIDGenderScheduledDayAppointmentDayAgeNeighbourhoodScholarshipHipertensionDiabetesAlcoholismHandcapSMS_receivedNo-show
02.987250e+135642903F2016-04-29T18:38:08Z2016-04-29T00:00:00Z62JARDIM DA PENHA010000No
15.589980e+145642503M2016-04-29T16:08:27Z2016-04-29T00:00:00Z56JARDIM DA PENHA000000No
24.262960e+125642549F2016-04-29T16:19:04Z2016-04-29T00:00:00Z62MATA DA PRAIA000000No
38.679510e+115642828F2016-04-29T17:29:31Z2016-04-29T00:00:00Z8PONTAL DE CAMBURI000000No
48.841190e+125642494F2016-04-29T16:07:23Z2016-04-29T00:00:00Z56JARDIM DA PENHA011000No
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 41 } ] }, { "cell_type": "code", "source": [ "\n", "# Remove duplicate rows\n", "df=pd.read_csv('Hospital_patients_datasets.csv')\n", "df.drop_duplicates(keep=False, inplace=True)\n", "df" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 634 }, "id": "16d_vWJmv2VP", "outputId": "ef6806b2-4109-465e-f0c4-d114c28e6329" }, "execution_count": 42, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " PatientId AppointmentID Gender ScheduledDay \\\n", "0 2.987250e+13 5642903 F 2016-04-29T18:38:08Z \n", "1 5.589980e+14 5642503 M 2016-04-29T16:08:27Z \n", "2 4.262960e+12 5642549 F 2016-04-29T16:19:04Z \n", "3 8.679510e+11 5642828 F 2016-04-29T17:29:31Z \n", "4 8.841190e+12 5642494 F 2016-04-29T16:07:23Z \n", "... ... ... ... ... \n", "110522 2.572130e+12 5651768 F 2016-05-03T09:15:35Z \n", "110523 3.596270e+12 5650093 F 2016-05-03T07:27:33Z \n", "110524 1.557660e+13 5630692 F 2016-04-27T16:03:52Z \n", "110525 9.213490e+13 5630323 F 2016-04-27T15:09:23Z \n", "110526 3.775120e+14 5629448 F 2016-04-27T13:30:56Z \n", "\n", " AppointmentDay Age Neighbourhood Scholarship \\\n", "0 2016-04-29T00:00:00Z 62 JARDIM DA PENHA 0 \n", "1 2016-04-29T00:00:00Z 56 JARDIM DA PENHA 0 \n", "2 2016-04-29T00:00:00Z 62 MATA DA PRAIA 0 \n", "3 2016-04-29T00:00:00Z 8 PONTAL DE CAMBURI 0 \n", "4 2016-04-29T00:00:00Z 56 JARDIM DA PENHA 0 \n", "... ... ... ... ... \n", "110522 2016-06-07T00:00:00Z 56 MARIA ORTIZ 0 \n", "110523 2016-06-07T00:00:00Z 51 MARIA ORTIZ 0 \n", "110524 2016-06-07T00:00:00Z 21 MARIA ORTIZ 0 \n", "110525 2016-06-07T00:00:00Z 38 MARIA ORTIZ 0 \n", "110526 2016-06-07T00:00:00Z 54 MARIA ORTIZ 0 \n", "\n", " Hipertension Diabetes Alcoholism Handcap SMS_received No-show \n", "0 1 0 0 0 0 No \n", "1 0 0 0 0 0 No \n", "2 0 0 0 0 0 No \n", "3 0 0 0 0 0 No \n", "4 1 1 0 0 0 No \n", "... ... ... ... ... ... ... \n", "110522 0 0 0 0 1 No \n", "110523 0 0 0 0 1 No \n", "110524 0 0 0 0 1 No \n", "110525 0 0 0 0 1 No \n", "110526 0 0 0 0 1 No \n", "\n", "[110527 rows x 14 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PatientIdAppointmentIDGenderScheduledDayAppointmentDayAgeNeighbourhoodScholarshipHipertensionDiabetesAlcoholismHandcapSMS_receivedNo-show
02.987250e+135642903F2016-04-29T18:38:08Z2016-04-29T00:00:00Z62JARDIM DA PENHA010000No
15.589980e+145642503M2016-04-29T16:08:27Z2016-04-29T00:00:00Z56JARDIM DA PENHA000000No
24.262960e+125642549F2016-04-29T16:19:04Z2016-04-29T00:00:00Z62MATA DA PRAIA000000No
38.679510e+115642828F2016-04-29T17:29:31Z2016-04-29T00:00:00Z8PONTAL DE CAMBURI000000No
48.841190e+125642494F2016-04-29T16:07:23Z2016-04-29T00:00:00Z56JARDIM DA PENHA011000No
.............................................
1105222.572130e+125651768F2016-05-03T09:15:35Z2016-06-07T00:00:00Z56MARIA ORTIZ000001No
1105233.596270e+125650093F2016-05-03T07:27:33Z2016-06-07T00:00:00Z51MARIA ORTIZ000001No
1105241.557660e+135630692F2016-04-27T16:03:52Z2016-06-07T00:00:00Z21MARIA ORTIZ000001No
1105259.213490e+135630323F2016-04-27T15:09:23Z2016-06-07T00:00:00Z38MARIA ORTIZ000001No
1105263.775120e+145629448F2016-04-27T13:30:56Z2016-06-07T00:00:00Z54MARIA ORTIZ000001No
\n", "

110527 rows × 14 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 42 } ] }, { "cell_type": "code", "source": [ "#Coverted Datatime to pandas dateonly\n", "df=pd.read_csv('Hospital_patients_datasets.csv')\n", "df[\"ScheduledDay\"] = pd.to_datetime(df[\"ScheduledDay\"]).dt.date\n", "df[\"AppointmentDay\"] = pd.to_datetime(df[\"AppointmentDay\"]).dt.date\n", "\n", "df.head(5)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 330 }, "id": "TVe4wxuXDNcB", "outputId": "cb8401be-c188-45ea-bba4-d770017959f2" }, "execution_count": 43, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " PatientId AppointmentID Gender ScheduledDay AppointmentDay Age \\\n", "0 2.987250e+13 5642903 F 2016-04-29 2016-04-29 62 \n", "1 5.589980e+14 5642503 M 2016-04-29 2016-04-29 56 \n", "2 4.262960e+12 5642549 F 2016-04-29 2016-04-29 62 \n", "3 8.679510e+11 5642828 F 2016-04-29 2016-04-29 8 \n", "4 8.841190e+12 5642494 F 2016-04-29 2016-04-29 56 \n", "\n", " Neighbourhood Scholarship Hipertension Diabetes Alcoholism \\\n", "0 JARDIM DA PENHA 0 1 0 0 \n", "1 JARDIM DA PENHA 0 0 0 0 \n", "2 MATA DA PRAIA 0 0 0 0 \n", "3 PONTAL DE CAMBURI 0 0 0 0 \n", "4 JARDIM DA PENHA 0 1 1 0 \n", "\n", " Handcap SMS_received No-show \n", "0 0 0 No \n", "1 0 0 No \n", "2 0 0 No \n", "3 0 0 No \n", "4 0 0 No " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PatientIdAppointmentIDGenderScheduledDayAppointmentDayAgeNeighbourhoodScholarshipHipertensionDiabetesAlcoholismHandcapSMS_receivedNo-show
02.987250e+135642903F2016-04-292016-04-2962JARDIM DA PENHA010000No
15.589980e+145642503M2016-04-292016-04-2956JARDIM DA PENHA000000No
24.262960e+125642549F2016-04-292016-04-2962MATA DA PRAIA000000No
38.679510e+115642828F2016-04-292016-04-298PONTAL DE CAMBURI000000No
48.841190e+125642494F2016-04-292016-04-2956JARDIM DA PENHA011000No
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 43 } ] }, { "cell_type": "code", "source": [ "#Renamed specific columns\n", "df.rename(columns={'Hipertension': 'Hypertension', 'Handcap': 'Handicap', 'SMS_received': 'SMSReceived', 'No-show': 'NoShow'}, inplace=True)\n", "df.head(5)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 313 }, "id": "7s5FBIp1GJl6", "outputId": "8ec46d91-3983-4365-ac04-b8456e8a8cb3" }, "execution_count": 44, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " PatientId AppointmentID Gender ScheduledDay AppointmentDay Age \\\n", "0 2.987250e+13 5642903 F 2016-04-29 2016-04-29 62 \n", "1 5.589980e+14 5642503 M 2016-04-29 2016-04-29 56 \n", "2 4.262960e+12 5642549 F 2016-04-29 2016-04-29 62 \n", "3 8.679510e+11 5642828 F 2016-04-29 2016-04-29 8 \n", "4 8.841190e+12 5642494 F 2016-04-29 2016-04-29 56 \n", "\n", " Neighbourhood Scholarship Hypertension Diabetes Alcoholism \\\n", "0 JARDIM DA PENHA 0 1 0 0 \n", "1 JARDIM DA PENHA 0 0 0 0 \n", "2 MATA DA PRAIA 0 0 0 0 \n", "3 PONTAL DE CAMBURI 0 0 0 0 \n", "4 JARDIM DA PENHA 0 1 1 0 \n", "\n", " Handicap SMSReceived NoShow \n", "0 0 0 No \n", "1 0 0 No \n", "2 0 0 No \n", "3 0 0 No \n", "4 0 0 No " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PatientIdAppointmentIDGenderScheduledDayAppointmentDayAgeNeighbourhoodScholarshipHypertensionDiabetesAlcoholismHandicapSMSReceivedNoShow
02.987250e+135642903F2016-04-292016-04-2962JARDIM DA PENHA010000No
15.589980e+145642503M2016-04-292016-04-2956JARDIM DA PENHA000000No
24.262960e+125642549F2016-04-292016-04-2962MATA DA PRAIA000000No
38.679510e+115642828F2016-04-292016-04-298PONTAL DE CAMBURI000000No
48.841190e+125642494F2016-04-292016-04-2956JARDIM DA PENHA011000No
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 44 } ] }, { "cell_type": "code", "source": [ "#Dropped unwanted columns\n", "df.drop(columns=['PatientId', 'AppointmentID', 'Neighbourhood'], inplace=True)\n", "df.head(5)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "GARrUQpdHd3m", "outputId": "5c22d435-7733-4c43-8ee8-4117b48a0f93" }, "execution_count": 45, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Gender ScheduledDay AppointmentDay Age Scholarship Hypertension \\\n", "0 F 2016-04-29 2016-04-29 62 0 1 \n", "1 M 2016-04-29 2016-04-29 56 0 0 \n", "2 F 2016-04-29 2016-04-29 62 0 0 \n", "3 F 2016-04-29 2016-04-29 8 0 0 \n", "4 F 2016-04-29 2016-04-29 56 0 1 \n", "\n", " Diabetes Alcoholism Handicap SMSReceived NoShow \n", "0 0 0 0 0 No \n", "1 0 0 0 0 No \n", "2 0 0 0 0 No \n", "3 0 0 0 0 No \n", "4 1 0 0 0 No " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GenderScheduledDayAppointmentDayAgeScholarshipHypertensionDiabetesAlcoholismHandicapSMSReceivedNoShow
0F2016-04-292016-04-2962010000No
1M2016-04-292016-04-2956000000No
2F2016-04-292016-04-2962000000No
3F2016-04-292016-04-298000000No
4F2016-04-292016-04-2956011000No
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 45 } ] }, { "cell_type": "code", "source": [ "#Created new column called Age_group\n", "bins = [0, 20, 40, 60, 80, 100]\n", "labels = ['0-20', '20-40', '40-60', '60-80', '80+']\n", "\n", "df['Age_group'] = pd.cut(df['Age'], bins=bins, labels=labels)\n", "df.head(5)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "dtMlYFOfJJth", "outputId": "5b1e2cfb-2d54-4258-e975-aff9b27546a4" }, "execution_count": 46, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Gender ScheduledDay AppointmentDay Age Scholarship Hypertension \\\n", "0 F 2016-04-29 2016-04-29 62 0 1 \n", "1 M 2016-04-29 2016-04-29 56 0 0 \n", "2 F 2016-04-29 2016-04-29 62 0 0 \n", "3 F 2016-04-29 2016-04-29 8 0 0 \n", "4 F 2016-04-29 2016-04-29 56 0 1 \n", "\n", " Diabetes Alcoholism Handicap SMSReceived NoShow Age_group \n", "0 0 0 0 0 No 60-80 \n", "1 0 0 0 0 No 40-60 \n", "2 0 0 0 0 No 60-80 \n", "3 0 0 0 0 No 0-20 \n", "4 1 0 0 0 No 40-60 " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GenderScheduledDayAppointmentDayAgeScholarshipHypertensionDiabetesAlcoholismHandicapSMSReceivedNoShowAge_group
0F2016-04-292016-04-2962010000No60-80
1M2016-04-292016-04-2956000000No40-60
2F2016-04-292016-04-2962000000No60-80
3F2016-04-292016-04-298000000No0-20
4F2016-04-292016-04-2956011000No40-60
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 46 } ] }, { "cell_type": "code", "source": [ "#Drop the Age column\n", "df.drop('Age', axis='columns', inplace=True)\n", "df.head(5)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "mKg94eTBKwrW", "outputId": "6912cffb-30b4-4a72-c63e-f001785b2e34" }, "execution_count": 47, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Gender ScheduledDay AppointmentDay Scholarship Hypertension Diabetes \\\n", "0 F 2016-04-29 2016-04-29 0 1 0 \n", "1 M 2016-04-29 2016-04-29 0 0 0 \n", "2 F 2016-04-29 2016-04-29 0 0 0 \n", "3 F 2016-04-29 2016-04-29 0 0 0 \n", "4 F 2016-04-29 2016-04-29 0 1 1 \n", "\n", " Alcoholism Handicap SMSReceived NoShow Age_group \n", "0 0 0 0 No 60-80 \n", "1 0 0 0 No 40-60 \n", "2 0 0 0 No 60-80 \n", "3 0 0 0 No 0-20 \n", "4 0 0 0 No 40-60 " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GenderScheduledDayAppointmentDayScholarshipHypertensionDiabetesAlcoholismHandicapSMSReceivedNoShowAge_group
0F2016-04-292016-04-29010000No60-80
1M2016-04-292016-04-29000000No40-60
2F2016-04-292016-04-29000000No60-80
3F2016-04-292016-04-29000000No0-20
4F2016-04-292016-04-29011000No40-60
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 47 } ] }, { "cell_type": "code", "source": [ "#Convert NoShow Datatype to binary\n", "mapping = {'Yes': 1, 'No': 0}\n", "df['NoShow'] = df['NoShow'].replace(mapping)\n", "df.head(5)\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "4mxs-NLOXpJB", "outputId": "cce97dd7-b72d-4c61-af91-aa06b6a89a59" }, "execution_count": 48, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Gender ScheduledDay AppointmentDay Scholarship Hypertension Diabetes \\\n", "0 F 2016-04-29 2016-04-29 0 1 0 \n", "1 M 2016-04-29 2016-04-29 0 0 0 \n", "2 F 2016-04-29 2016-04-29 0 0 0 \n", "3 F 2016-04-29 2016-04-29 0 0 0 \n", "4 F 2016-04-29 2016-04-29 0 1 1 \n", "\n", " Alcoholism Handicap SMSReceived NoShow Age_group \n", "0 0 0 0 0 60-80 \n", "1 0 0 0 0 40-60 \n", "2 0 0 0 0 60-80 \n", "3 0 0 0 0 0-20 \n", "4 0 0 0 0 40-60 " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GenderScheduledDayAppointmentDayScholarshipHypertensionDiabetesAlcoholismHandicapSMSReceivedNoShowAge_group
0F2016-04-292016-04-29010000060-80
1M2016-04-292016-04-29000000040-60
2F2016-04-292016-04-29000000060-80
3F2016-04-292016-04-2900000000-20
4F2016-04-292016-04-29011000040-60
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "
\n", "
\n" ] }, "metadata": {}, "execution_count": 48 } ] }, { "cell_type": "code", "source": [ "# Export the DataFrame to a CSV file\n", "df.to_csv('patients.csv', index=False)" ], "metadata": { "id": "c5W5pMZpZFUj" }, "execution_count": 49, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "ZdzhXUxEa71f" }, "execution_count": null, "outputs": [] } ] }