{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Ordinal Encoding" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PriceFruit
012Apple
110Orange
214Banana
311Apple
\n", "
" ], "text/plain": [ " Price Fruit\n", "0 12 Apple\n", "1 10 Orange\n", "2 14 Banana\n", "3 11 Apple" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# create a dataframe\n", "\n", "import pandas as pd\n", "\n", "dict = {\"Price\":[12, 10, 14, 11], \"Fruit\":[\"Apple\", \"Orange\", \"Banana\", \"Apple\"]}\n", "df = pd.DataFrame(dict)\n", "\n", "df" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fruit after label encoding: \n", " [[0.]\n", " [2.]\n", " [1.]\n", " [0.]]\n", "Shape of this array: (4, 1)\n" ] } ], "source": [ "from sklearn.preprocessing import OrdinalEncoder\n", "\n", "encoder = OrdinalEncoder()\n", "\n", "df_cat = df[[\"Fruit\"]] # Make sure getting a dataframe by using [[]], double brakcet \n", "df_cat_encoded = encoder.fit_transform(df_cat)\n", "\n", "print(\"Fruit after label encoding: \\n\", df_cat_encoded)\n", "print(\"Shape of this array: \", df_cat_encoded.shape)\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1., 0., 0.],\n", " [0., 0., 1.],\n", " [0., 1., 0.],\n", " [1., 0., 0.]])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.preprocessing import OneHotEncoder\n", "\n", "encoder2 = OneHotEncoder(sparse = False) # return an array\n", "df_cat_one_hot = encoder2.fit_transform(df_cat)\n", "\n", "df_cat_one_hot" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AppleBananaOrange
01.00.00.0
10.00.01.0
20.01.00.0
31.00.00.0
\n", "
" ], "text/plain": [ " Apple Banana Orange\n", "0 1.0 0.0 0.0\n", "1 0.0 0.0 1.0\n", "2 0.0 1.0 0.0\n", "3 1.0 0.0 0.0" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df_cat_one_hot2 = pd.DataFrame(df_cat_one_hot, columns = [\"Apple\", \"Banana\", \"Orange\"])\n", "\n", "df_cat_one_hot2" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Fruit
0Apple
1Orange
2Banana
3Apple
\n", "
" ], "text/plain": [ " Fruit\n", "0 Apple\n", "1 Orange\n", "2 Banana\n", "3 Apple" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[[\"Fruit\"]]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }