{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Ordinal Encoding"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Price | \n",
" Fruit | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 12 | \n",
" Apple | \n",
"
\n",
" \n",
" 1 | \n",
" 10 | \n",
" Orange | \n",
"
\n",
" \n",
" 2 | \n",
" 14 | \n",
" Banana | \n",
"
\n",
" \n",
" 3 | \n",
" 11 | \n",
" Apple | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Price Fruit\n",
"0 12 Apple\n",
"1 10 Orange\n",
"2 14 Banana\n",
"3 11 Apple"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# create a dataframe\n",
"\n",
"import pandas as pd\n",
"\n",
"dict = {\"Price\":[12, 10, 14, 11], \"Fruit\":[\"Apple\", \"Orange\", \"Banana\", \"Apple\"]}\n",
"df = pd.DataFrame(dict)\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fruit after label encoding: \n",
" [[0.]\n",
" [2.]\n",
" [1.]\n",
" [0.]]\n",
"Shape of this array: (4, 1)\n"
]
}
],
"source": [
"from sklearn.preprocessing import OrdinalEncoder\n",
"\n",
"encoder = OrdinalEncoder()\n",
"\n",
"df_cat = df[[\"Fruit\"]] # Make sure getting a dataframe by using [[]], double brakcet \n",
"df_cat_encoded = encoder.fit_transform(df_cat)\n",
"\n",
"print(\"Fruit after label encoding: \\n\", df_cat_encoded)\n",
"print(\"Shape of this array: \", df_cat_encoded.shape)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1., 0., 0.],\n",
" [0., 0., 1.],\n",
" [0., 1., 0.],\n",
" [1., 0., 0.]])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import OneHotEncoder\n",
"\n",
"encoder2 = OneHotEncoder(sparse = False) # return an array\n",
"df_cat_one_hot = encoder2.fit_transform(df_cat)\n",
"\n",
"df_cat_one_hot"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Apple | \n",
" Banana | \n",
" Orange | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Apple Banana Orange\n",
"0 1.0 0.0 0.0\n",
"1 0.0 0.0 1.0\n",
"2 0.0 1.0 0.0\n",
"3 1.0 0.0 0.0"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df_cat_one_hot2 = pd.DataFrame(df_cat_one_hot, columns = [\"Apple\", \"Banana\", \"Orange\"])\n",
"\n",
"df_cat_one_hot2"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Fruit | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Apple | \n",
"
\n",
" \n",
" 1 | \n",
" Orange | \n",
"
\n",
" \n",
" 2 | \n",
" Banana | \n",
"
\n",
" \n",
" 3 | \n",
" Apple | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Fruit\n",
"0 Apple\n",
"1 Orange\n",
"2 Banana\n",
"3 Apple"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"Fruit\"]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}