{ "cells": [ { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryCapitalAfricaAsiaC_AmericaEuropeN_AmericaOceaniaS_AmericaPopulationAreaDensityGDP_millionsGDP_per_capitaBirth_rateDeath_rate
0IndiaNew Delhi010000013732331701283232.01070.13632011370978.88280.44286218.77.3
1AndorraAndorra La Vella000100077204180.0428.9111113327.043093.6220927.37.4
2Sierra LeoneFreetown1000000789492127394.0288.19891211361.61439.10243136.010.2
3LuxembourgLuxembourg0001000620853993.0625.22960763428.4102163.31402111.67.3
4Bahamas, TheNassau00100003913693858.0101.44349411725.029958.93900615.17.3
\n", "
" ], "text/plain": [ " Country Capital Africa Asia C_America Europe N_America \\\n", "0 India New Delhi 0 1 0 0 0 \n", "1 Andorra Andorra La Vella 0 0 0 1 0 \n", "2 Sierra Leone Freetown 1 0 0 0 0 \n", "3 Luxembourg Luxembourg 0 0 0 1 0 \n", "4 Bahamas, The Nassau 0 0 1 0 0 \n", "\n", " Oceania S_America Population Area Density GDP_millions \\\n", "0 0 0 1373233170 1283232.0 1070.136320 11370978.8 \n", "1 0 0 77204 180.0 428.911111 3327.0 \n", "2 0 0 7894921 27394.0 288.198912 11361.6 \n", "3 0 0 620853 993.0 625.229607 63428.4 \n", "4 0 0 391369 3858.0 101.443494 11725.0 \n", "\n", " GDP_per_capita Birth_rate Death_rate \n", "0 8280.442862 18.7 7.3 \n", "1 43093.622092 7.3 7.4 \n", "2 1439.102431 36.0 10.2 \n", "3 102163.314021 11.6 7.3 \n", "4 29958.939006 15.1 7.3 " ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import xlrd\n", "\n", "xlsx = xlrd.open_workbook(\"CSC 357 Week 2 Lesson.xlsx\", on_demand=True)\n", "with pd.ExcelFile(xlsx) as wb:\n", " df = pd.read_excel(wb, \"info\")\n", "\n", "df.head(5)" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AfricaAsiaC_AmericaEuropeN_AmericaOceaniaS_AmericaPopulationAreaDensityGDP_millionsGDP_per_capitaBirth_rate
0010000013732331701283232.01070.13632011370978.88280.44286218.7
1000100077204180.0428.9111113327.043093.6220927.3
21000000789492127394.0288.19891211361.61439.10243136.0
30001000620853993.0625.22960763428.4102163.31402111.6
400100003913693858.0101.44349411725.029958.93900615.1
\n", "
" ], "text/plain": [ " Africa Asia C_America Europe N_America Oceania S_America Population \\\n", "0 0 1 0 0 0 0 0 1373233170 \n", "1 0 0 0 1 0 0 0 77204 \n", "2 1 0 0 0 0 0 0 7894921 \n", "3 0 0 0 1 0 0 0 620853 \n", "4 0 0 1 0 0 0 0 391369 \n", "\n", " Area Density GDP_millions GDP_per_capita Birth_rate \n", "0 1283232.0 1070.136320 11370978.8 8280.442862 18.7 \n", "1 180.0 428.911111 3327.0 43093.622092 7.3 \n", "2 27394.0 288.198912 11361.6 1439.102431 36.0 \n", "3 993.0 625.229607 63428.4 102163.314021 11.6 \n", "4 3858.0 101.443494 11725.0 29958.939006 15.1 " ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "birth_data = df.drop(columns = [\"Country\", \"Capital\", \"Death_rate\"])\n", "death_data = df.drop(columns = [\"Country\", \"Capital\", \"Birth_rate\"])\n", "birth_data.head(5)" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AfricaAsiaC_AmericaEuropeN_AmericaOceaniaS_AmericaPopulationAreaDensityGDP_millionsGDP_per_capitaDeath_rate
0010000013732331701283232.01070.13632011370978.88280.4428627.3
1000100077204180.0428.9111113327.043093.6220927.4
21000000789492127394.0288.19891211361.61439.10243110.2
30001000620853993.0625.22960763428.4102163.3140217.3
400100003913693858.0101.44349411725.029958.9390067.3
\n", "
" ], "text/plain": [ " Africa Asia C_America Europe N_America Oceania S_America Population \\\n", "0 0 1 0 0 0 0 0 1373233170 \n", "1 0 0 0 1 0 0 0 77204 \n", "2 1 0 0 0 0 0 0 7894921 \n", "3 0 0 0 1 0 0 0 620853 \n", "4 0 0 1 0 0 0 0 391369 \n", "\n", " Area Density GDP_millions GDP_per_capita Death_rate \n", "0 1283232.0 1070.136320 11370978.8 8280.442862 7.3 \n", "1 180.0 428.911111 3327.0 43093.622092 7.4 \n", "2 27394.0 288.198912 11361.6 1439.102431 10.2 \n", "3 993.0 625.229607 63428.4 102163.314021 7.3 \n", "4 3858.0 101.443494 11725.0 29958.939006 7.3 " ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "death_data.head(5)" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "birth_train, birth_test = train_test_split(birth_data, test_size=0.2, random_state=42)\n", "death_train, death_test = train_test_split(death_data, test_size=0.2, random_state=42)" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Birth_rate 1.000000\n", "Africa 0.742076\n", "Oceania 0.000850\n", "Population -0.030827\n", "C_America -0.068494\n", "Area -0.080206\n", "S_America -0.084688\n", "N_America -0.095789\n", "Asia -0.104140\n", "GDP_millions -0.139544\n", "Density -0.194040\n", "GDP_per_capita -0.534990\n", "Europe -0.542382\n", "Name: Birth_rate, dtype: float64" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "birth_corr_matrix = birth_train.corr()\n", "birth_corr_matrix[\"Birth_rate\"].sort_values(ascending=False)" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Death_rate 1.000000\n", "Europe 0.498187\n", "Africa 0.094868\n", "Area 0.089829\n", "N_America 0.030557\n", "GDP_millions 0.011293\n", "Population -0.000442\n", "GDP_per_capita -0.070903\n", "Density -0.076134\n", "C_America -0.115274\n", "S_America -0.125681\n", "Oceania -0.148835\n", "Asia -0.383382\n", "Name: Death_rate, dtype: float64" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "death_corr_matrix = death_train.corr()\n", "death_corr_matrix[\"Death_rate\"].sort_values(ascending=False)" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [], "source": [ "birth_train_labels = birth_train[\"Birth_rate\"].copy()\n", "birth_train = birth_train.drop(\"Birth_rate\", axis=1)\n", "death_train_labels = death_train[\"Death_rate\"].copy()\n", "death_train = death_train.drop(\"Death_rate\", axis=1)" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AfricaAsiaC_AmericaEuropeN_AmericaOceaniaS_AmericaPopulationAreaDensityGDP_millionsGDP_per_capita
0-0.616872-0.547723-0.313993-0.587220-0.1139613.630677-0.261712-0.1012643.356525-0.2102590.1818150.935520
1-0.616872-0.547723-0.313993-0.587220-0.1139613.630677-0.261712-0.257790-0.3622000.016860-0.254971-0.243053
2-0.616872-0.547723-0.3139931.702939-0.113961-0.275431-0.261712-0.246636-0.356950-0.136961-0.247924-0.419685
3-0.6168721.825742-0.313993-0.587220-0.113961-0.275431-0.2617120.058688-0.3134640.0158970.4679780.617741
4-0.616872-0.547723-0.3139931.702939-0.113961-0.275431-0.261712-0.249666-0.341158-0.198096-0.2398940.343716
\n", "
" ], "text/plain": [ " Africa Asia C_America Europe N_America Oceania S_America \\\n", "0 -0.616872 -0.547723 -0.313993 -0.587220 -0.113961 3.630677 -0.261712 \n", "1 -0.616872 -0.547723 -0.313993 -0.587220 -0.113961 3.630677 -0.261712 \n", "2 -0.616872 -0.547723 -0.313993 1.702939 -0.113961 -0.275431 -0.261712 \n", "3 -0.616872 1.825742 -0.313993 -0.587220 -0.113961 -0.275431 -0.261712 \n", "4 -0.616872 -0.547723 -0.313993 1.702939 -0.113961 -0.275431 -0.261712 \n", "\n", " Population Area Density GDP_millions GDP_per_capita \n", "0 -0.101264 3.356525 -0.210259 0.181815 0.935520 \n", "1 -0.257790 -0.362200 0.016860 -0.254971 -0.243053 \n", "2 -0.246636 -0.356950 -0.136961 -0.247924 -0.419685 \n", "3 0.058688 -0.313464 0.015897 0.467978 0.617741 \n", "4 -0.249666 -0.341158 -0.198096 -0.239894 0.343716 " ] }, "execution_count": 96, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.preprocessing import StandardScaler\n", "scaler = StandardScaler()\n", "birth_train = pd.DataFrame(scaler.fit_transform(birth_train))\n", "death_train = pd.DataFrame(scaler.fit_transform(death_train))\n", "birth_train.columns = [\"Africa\", \"Asia\", \"C_America\", \"Europe\", \"N_America\", \"Oceania\", \"S_America\",\n", " \"Population\", \"Area\", \"Density\", \"GDP_millions\", \"GDP_per_capita\"]\n", "death_train.columns = [\"Africa\", \"Asia\", \"C_America\", \"Europe\", \"N_America\", \"Oceania\", \"S_America\",\n", " \"Population\", \"Area\", \"Density\", \"GDP_millions\", \"GDP_per_capita\"]\n", "birth_train.head(5)" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AfricaAsiaC_AmericaEuropeN_AmericaOceaniaS_AmericaPopulationAreaDensityGDP_millionsGDP_per_capita
0-0.616872-0.547723-0.313993-0.587220-0.1139613.630677-0.261712-0.1012643.356525-0.2102590.1818150.935520
1-0.616872-0.547723-0.313993-0.587220-0.1139613.630677-0.261712-0.257790-0.3622000.016860-0.254971-0.243053
2-0.616872-0.547723-0.3139931.702939-0.113961-0.275431-0.261712-0.246636-0.356950-0.136961-0.247924-0.419685
3-0.6168721.825742-0.313993-0.587220-0.113961-0.275431-0.2617120.058688-0.3134640.0158970.4679780.617741
4-0.616872-0.547723-0.3139931.702939-0.113961-0.275431-0.261712-0.249666-0.341158-0.198096-0.2398940.343716
\n", "
" ], "text/plain": [ " Africa Asia C_America Europe N_America Oceania S_America \\\n", "0 -0.616872 -0.547723 -0.313993 -0.587220 -0.113961 3.630677 -0.261712 \n", "1 -0.616872 -0.547723 -0.313993 -0.587220 -0.113961 3.630677 -0.261712 \n", "2 -0.616872 -0.547723 -0.313993 1.702939 -0.113961 -0.275431 -0.261712 \n", "3 -0.616872 1.825742 -0.313993 -0.587220 -0.113961 -0.275431 -0.261712 \n", "4 -0.616872 -0.547723 -0.313993 1.702939 -0.113961 -0.275431 -0.261712 \n", "\n", " Population Area Density GDP_millions GDP_per_capita \n", "0 -0.101264 3.356525 -0.210259 0.181815 0.935520 \n", "1 -0.257790 -0.362200 0.016860 -0.254971 -0.243053 \n", "2 -0.246636 -0.356950 -0.136961 -0.247924 -0.419685 \n", "3 0.058688 -0.313464 0.015897 0.467978 0.617741 \n", "4 -0.249666 -0.341158 -0.198096 -0.239894 0.343716 " ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "death_train.head(5)" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LinearRegression\n", "lin_reg = LinearRegression()\n", "birth_lr = lin_reg.fit(birth_train, birth_train_labels)\n", "death_lr = lin_reg.fit(death_train, death_train_labels)" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "RMSE for Birth Rate: 15.211499774254994 \n", "RMSE for Death Rate: 2.108474856168463\n" ] } ], "source": [ "from sklearn.metrics import mean_squared_error\n", "birth_pred = birth_lr.predict(birth_train)\n", "birth_mse = mean_squared_error(birth_train_labels, birth_pred)\n", "birth_rmse = np.sqrt(birth_mse)\n", "death_pred = death_lr.predict(death_train)\n", "death_mse = mean_squared_error(death_train_labels, death_pred)\n", "death_rmse = np.sqrt(death_mse)\n", "print(\"RMSE for Birth Rate: \", birth_rmse, \"\\nRMSE for Death Rate: \", death_rmse)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }