{
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[['MILK', 'BREAD', 'BISCUIT'],\n",
" ['BREAD', 'MILK', 'BISCUIT', 'CORNFLAKES'],\n",
" ['BREAD', 'TEA', 'BOURNVITA'],\n",
" ['JAM', 'MAGGI', 'BREAD', 'MILK'],\n",
" ['MAGGI', 'TEA', 'BISCUIT'],\n",
" ['BREAD', 'TEA', 'BOURNVITA'],\n",
" ['MAGGI', 'TEA', 'CORNFLAKES'],\n",
" ['MAGGI', 'BREAD', 'TEA', 'BISCUIT'],\n",
" ['JAM', 'MAGGI', 'BREAD', 'TEA'],\n",
" ['BREAD', 'MILK'],\n",
" ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],\n",
" ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],\n",
" ['COFFEE', 'SUGER', 'BOURNVITA'],\n",
" ['BREAD', 'COFFEE', 'COCK'],\n",
" ['BREAD', 'SUGER', 'BISCUIT'],\n",
" ['COFFEE', 'SUGER', 'CORNFLAKES'],\n",
" ['BREAD', 'SUGER', 'BOURNVITA'],\n",
" ['BREAD', 'COFFEE', 'SUGER'],\n",
" ['BREAD', 'COFFEE', 'SUGER'],\n",
" ['TEA', 'MILK', 'COFFEE', 'CORNFLAKES']]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"from mlxtend.preprocessing import TransactionEncoder\n",
"from mlxtend.frequent_patterns import apriori\n",
"\n",
"df = pd.read_csv(\"grocery_data.csv\")\n",
"data = list(df[\"products\"].apply(lambda x:x.split(',')))\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>BISCUIT</th>\n",
" <th>BOURNVITA</th>\n",
" <th>BREAD</th>\n",
" <th>COCK</th>\n",
" <th>COFFEE</th>\n",
" <th>CORNFLAKES</th>\n",
" <th>JAM</th>\n",
" <th>MAGGI</th>\n",
" <th>MILK</th>\n",
" <th>SUGER</th>\n",
" <th>TEA</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" BISCUIT BOURNVITA BREAD COCK COFFEE CORNFLAKES JAM MAGGI MILK \\\n",
"0 1 0 1 0 0 0 0 0 1 \n",
"1 1 0 1 0 0 1 0 0 1 \n",
"2 0 1 1 0 0 0 0 0 0 \n",
"3 0 0 1 0 0 0 1 1 1 \n",
"4 1 0 0 0 0 0 0 1 0 \n",
"5 0 1 1 0 0 0 0 0 0 \n",
"6 0 0 0 0 0 1 0 1 0 \n",
"7 1 0 1 0 0 0 0 1 0 \n",
"8 0 0 1 0 0 0 1 1 0 \n",
"9 0 0 1 0 0 0 0 0 1 \n",
"10 1 0 0 1 1 1 0 0 0 \n",
"11 1 0 0 1 1 1 0 0 0 \n",
"12 0 1 0 0 1 0 0 0 0 \n",
"13 0 0 1 1 1 0 0 0 0 \n",
"14 1 0 1 0 0 0 0 0 0 \n",
"15 0 0 0 0 1 1 0 0 0 \n",
"16 0 1 1 0 0 0 0 0 0 \n",
"17 0 0 1 0 1 0 0 0 0 \n",
"18 0 0 1 0 1 0 0 0 0 \n",
"19 0 0 0 0 1 1 0 0 1 \n",
"\n",
" SUGER TEA \n",
"0 0 0 \n",
"1 0 0 \n",
"2 0 1 \n",
"3 0 0 \n",
"4 0 1 \n",
"5 0 1 \n",
"6 0 1 \n",
"7 0 1 \n",
"8 0 1 \n",
"9 0 0 \n",
"10 0 0 \n",
"11 0 0 \n",
"12 1 0 \n",
"13 0 0 \n",
"14 1 0 \n",
"15 1 0 \n",
"16 1 0 \n",
"17 1 0 \n",
"18 1 0 \n",
"19 0 1 "
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"te = TransactionEncoder()\n",
"\n",
"te_data = te.fit(data).transform(data).astype(\"int\")\n",
"df = pd.DataFrame(te_data,columns=te.columns_)\n",
"df\n",
"# df.to_csv(\"transformed_data.csv\", encoding='utf-8', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>support</th>\n",
" <th>itemsets</th>\n",
" <th>length</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.35</td>\n",
" <td>(BISCUIT)</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.20</td>\n",
" <td>(BOURNVITA)</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.65</td>\n",
" <td>(BREAD)</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.15</td>\n",
" <td>(COCK)</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.40</td>\n",
" <td>(COFFEE)</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.30</td>\n",
" <td>(CORNFLAKES)</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.10</td>\n",
" <td>(JAM)</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0.25</td>\n",
" <td>(MAGGI)</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.25</td>\n",
" <td>(MILK)</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.30</td>\n",
" <td>(SUGER)</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.35</td>\n",
" <td>(TEA)</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0.20</td>\n",
" <td>(BISCUIT, BREAD)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0.10</td>\n",
" <td>(BISCUIT, COCK)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>0.10</td>\n",
" <td>(BISCUIT, COFFEE)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0.15</td>\n",
" <td>(BISCUIT, CORNFLAKES)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0.10</td>\n",
" <td>(BISCUIT, MAGGI)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>0.10</td>\n",
" <td>(MILK, BISCUIT)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>0.05</td>\n",
" <td>(BISCUIT, SUGER)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>0.10</td>\n",
" <td>(BISCUIT, TEA)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>0.15</td>\n",
" <td>(BREAD, BOURNVITA)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>0.05</td>\n",
" <td>(COFFEE, BOURNVITA)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>0.10</td>\n",
" <td>(SUGER, BOURNVITA)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>0.10</td>\n",
" <td>(TEA, BOURNVITA)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>0.05</td>\n",
" <td>(COCK, BREAD)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>0.15</td>\n",
" <td>(BREAD, COFFEE)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>0.05</td>\n",
" <td>(CORNFLAKES, BREAD)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>0.10</td>\n",
" <td>(JAM, BREAD)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>0.15</td>\n",
" <td>(BREAD, MAGGI)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>0.20</td>\n",
" <td>(MILK, BREAD)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>0.20</td>\n",
" <td>(BREAD, SUGER)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>0.10</td>\n",
" <td>(CORNFLAKES, BISCUIT, COCK)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>0.10</td>\n",
" <td>(CORNFLAKES, BISCUIT, COFFEE)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>0.05</td>\n",
" <td>(MILK, BISCUIT, CORNFLAKES)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>0.10</td>\n",
" <td>(BISCUIT, TEA, MAGGI)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>0.05</td>\n",
" <td>(SUGER, BREAD, BOURNVITA)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>0.10</td>\n",
" <td>(TEA, BREAD, BOURNVITA)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>0.05</td>\n",
" <td>(SUGER, COFFEE, BOURNVITA)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>0.05</td>\n",
" <td>(COCK, BREAD, COFFEE)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>0.10</td>\n",
" <td>(BREAD, COFFEE, SUGER)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62</th>\n",
" <td>0.05</td>\n",
" <td>(CORNFLAKES, MILK, BREAD)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>63</th>\n",
" <td>0.10</td>\n",
" <td>(JAM, BREAD, MAGGI)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>0.05</td>\n",
" <td>(MILK, BREAD, JAM)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>0.05</td>\n",
" <td>(JAM, TEA, BREAD)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>66</th>\n",
" <td>0.05</td>\n",
" <td>(MILK, BREAD, MAGGI)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67</th>\n",
" <td>0.10</td>\n",
" <td>(TEA, BREAD, MAGGI)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>0.10</td>\n",
" <td>(CORNFLAKES, COCK, COFFEE)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69</th>\n",
" <td>0.05</td>\n",
" <td>(CORNFLAKES, MILK, COFFEE)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>70</th>\n",
" <td>0.05</td>\n",
" <td>(CORNFLAKES, COFFEE, SUGER)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71</th>\n",
" <td>0.05</td>\n",
" <td>(CORNFLAKES, TEA, COFFEE)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>0.05</td>\n",
" <td>(MILK, TEA, COFFEE)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>0.05</td>\n",
" <td>(TEA, CORNFLAKES, MAGGI)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>0.05</td>\n",
" <td>(MILK, TEA, CORNFLAKES)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75</th>\n",
" <td>0.05</td>\n",
" <td>(MAGGI, MILK, JAM)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>0.05</td>\n",
" <td>(JAM, TEA, MAGGI)</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>0.05</td>\n",
" <td>(CORNFLAKES, MILK, BISCUIT, BREAD)</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>0.05</td>\n",
" <td>(TEA, BISCUIT, BREAD, MAGGI)</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>0.10</td>\n",
" <td>(CORNFLAKES, BISCUIT, COCK, COFFEE)</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>0.05</td>\n",
" <td>(MAGGI, MILK, BREAD, JAM)</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>0.05</td>\n",
" <td>(JAM, TEA, BREAD, MAGGI)</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>0.05</td>\n",
" <td>(CORNFLAKES, MILK, TEA, COFFEE)</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>83 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" support itemsets length\n",
"0 0.35 (BISCUIT) 1\n",
"1 0.20 (BOURNVITA) 1\n",
"2 0.65 (BREAD) 1\n",
"3 0.15 (COCK) 1\n",
"4 0.40 (COFFEE) 1\n",
"5 0.30 (CORNFLAKES) 1\n",
"6 0.10 (JAM) 1\n",
"7 0.25 (MAGGI) 1\n",
"8 0.25 (MILK) 1\n",
"9 0.30 (SUGER) 1\n",
"10 0.35 (TEA) 1\n",
"11 0.20 (BISCUIT, BREAD) 2\n",
"12 0.10 (BISCUIT, COCK) 2\n",
"13 0.10 (BISCUIT, COFFEE) 2\n",
"14 0.15 (BISCUIT, CORNFLAKES) 2\n",
"15 0.10 (BISCUIT, MAGGI) 2\n",
"16 0.10 (MILK, BISCUIT) 2\n",
"17 0.05 (BISCUIT, SUGER) 2\n",
"18 0.10 (BISCUIT, TEA) 2\n",
"19 0.15 (BREAD, BOURNVITA) 2\n",
"20 0.05 (COFFEE, BOURNVITA) 2\n",
"21 0.10 (SUGER, BOURNVITA) 2\n",
"22 0.10 (TEA, BOURNVITA) 2\n",
"23 0.05 (COCK, BREAD) 2\n",
"24 0.15 (BREAD, COFFEE) 2\n",
"25 0.05 (CORNFLAKES, BREAD) 2\n",
"26 0.10 (JAM, BREAD) 2\n",
"27 0.15 (BREAD, MAGGI) 2\n",
"28 0.20 (MILK, BREAD) 2\n",
"29 0.20 (BREAD, SUGER) 2\n",
".. ... ... ...\n",
"53 0.10 (CORNFLAKES, BISCUIT, COCK) 3\n",
"54 0.10 (CORNFLAKES, BISCUIT, COFFEE) 3\n",
"55 0.05 (MILK, BISCUIT, CORNFLAKES) 3\n",
"56 0.10 (BISCUIT, TEA, MAGGI) 3\n",
"57 0.05 (SUGER, BREAD, BOURNVITA) 3\n",
"58 0.10 (TEA, BREAD, BOURNVITA) 3\n",
"59 0.05 (SUGER, COFFEE, BOURNVITA) 3\n",
"60 0.05 (COCK, BREAD, COFFEE) 3\n",
"61 0.10 (BREAD, COFFEE, SUGER) 3\n",
"62 0.05 (CORNFLAKES, MILK, BREAD) 3\n",
"63 0.10 (JAM, BREAD, MAGGI) 3\n",
"64 0.05 (MILK, BREAD, JAM) 3\n",
"65 0.05 (JAM, TEA, BREAD) 3\n",
"66 0.05 (MILK, BREAD, MAGGI) 3\n",
"67 0.10 (TEA, BREAD, MAGGI) 3\n",
"68 0.10 (CORNFLAKES, COCK, COFFEE) 3\n",
"69 0.05 (CORNFLAKES, MILK, COFFEE) 3\n",
"70 0.05 (CORNFLAKES, COFFEE, SUGER) 3\n",
"71 0.05 (CORNFLAKES, TEA, COFFEE) 3\n",
"72 0.05 (MILK, TEA, COFFEE) 3\n",
"73 0.05 (TEA, CORNFLAKES, MAGGI) 3\n",
"74 0.05 (MILK, TEA, CORNFLAKES) 3\n",
"75 0.05 (MAGGI, MILK, JAM) 3\n",
"76 0.05 (JAM, TEA, MAGGI) 3\n",
"77 0.05 (CORNFLAKES, MILK, BISCUIT, BREAD) 4\n",
"78 0.05 (TEA, BISCUIT, BREAD, MAGGI) 4\n",
"79 0.10 (CORNFLAKES, BISCUIT, COCK, COFFEE) 4\n",
"80 0.05 (MAGGI, MILK, BREAD, JAM) 4\n",
"81 0.05 (JAM, TEA, BREAD, MAGGI) 4\n",
"82 0.05 (CORNFLAKES, MILK, TEA, COFFEE) 4\n",
"\n",
"[83 rows x 3 columns]"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"apriori_data = apriori(df,min_support=0.01,use_colnames=True)\n",
"apriori_data.sort_values(by=\"support\",ascending=False)\n",
"apriori_data['length'] = apriori_data['itemsets'].apply(lambda x:len(x))\n",
"\n",
"apriori_data"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>support</th>\n",
" <th>itemsets</th>\n",
" <th>length</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0.20</td>\n",
" <td>(BISCUIT, BREAD)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0.10</td>\n",
" <td>(BISCUIT, COCK)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>0.10</td>\n",
" <td>(BISCUIT, COFFEE)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0.15</td>\n",
" <td>(BISCUIT, CORNFLAKES)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0.10</td>\n",
" <td>(BISCUIT, MAGGI)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>0.10</td>\n",
" <td>(MILK, BISCUIT)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>0.05</td>\n",
" <td>(BISCUIT, SUGER)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>0.10</td>\n",
" <td>(BISCUIT, TEA)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>0.15</td>\n",
" <td>(BREAD, BOURNVITA)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>0.05</td>\n",
" <td>(COFFEE, BOURNVITA)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>0.10</td>\n",
" <td>(SUGER, BOURNVITA)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>0.10</td>\n",
" <td>(TEA, BOURNVITA)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>0.05</td>\n",
" <td>(COCK, BREAD)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>0.15</td>\n",
" <td>(BREAD, COFFEE)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>0.05</td>\n",
" <td>(CORNFLAKES, BREAD)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>0.10</td>\n",
" <td>(JAM, BREAD)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>0.15</td>\n",
" <td>(BREAD, MAGGI)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>0.20</td>\n",
" <td>(MILK, BREAD)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>0.20</td>\n",
" <td>(BREAD, SUGER)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>0.20</td>\n",
" <td>(TEA, BREAD)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>0.15</td>\n",
" <td>(COCK, COFFEE)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>0.10</td>\n",
" <td>(CORNFLAKES, COCK)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>0.20</td>\n",
" <td>(CORNFLAKES, COFFEE)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>0.05</td>\n",
" <td>(MILK, COFFEE)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>0.20</td>\n",
" <td>(COFFEE, SUGER)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>0.05</td>\n",
" <td>(TEA, COFFEE)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>0.05</td>\n",
" <td>(CORNFLAKES, MAGGI)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>0.10</td>\n",
" <td>(MILK, CORNFLAKES)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>0.05</td>\n",
" <td>(CORNFLAKES, SUGER)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>0.10</td>\n",
" <td>(TEA, CORNFLAKES)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>0.10</td>\n",
" <td>(JAM, MAGGI)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>0.05</td>\n",
" <td>(MILK, JAM)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>0.05</td>\n",
" <td>(JAM, TEA)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>0.05</td>\n",
" <td>(MILK, MAGGI)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>0.20</td>\n",
" <td>(TEA, MAGGI)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>0.05</td>\n",
" <td>(MILK, TEA)</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" support itemsets length\n",
"11 0.20 (BISCUIT, BREAD) 2\n",
"12 0.10 (BISCUIT, COCK) 2\n",
"13 0.10 (BISCUIT, COFFEE) 2\n",
"14 0.15 (BISCUIT, CORNFLAKES) 2\n",
"15 0.10 (BISCUIT, MAGGI) 2\n",
"16 0.10 (MILK, BISCUIT) 2\n",
"17 0.05 (BISCUIT, SUGER) 2\n",
"18 0.10 (BISCUIT, TEA) 2\n",
"19 0.15 (BREAD, BOURNVITA) 2\n",
"20 0.05 (COFFEE, BOURNVITA) 2\n",
"21 0.10 (SUGER, BOURNVITA) 2\n",
"22 0.10 (TEA, BOURNVITA) 2\n",
"23 0.05 (COCK, BREAD) 2\n",
"24 0.15 (BREAD, COFFEE) 2\n",
"25 0.05 (CORNFLAKES, BREAD) 2\n",
"26 0.10 (JAM, BREAD) 2\n",
"27 0.15 (BREAD, MAGGI) 2\n",
"28 0.20 (MILK, BREAD) 2\n",
"29 0.20 (BREAD, SUGER) 2\n",
"30 0.20 (TEA, BREAD) 2\n",
"31 0.15 (COCK, COFFEE) 2\n",
"32 0.10 (CORNFLAKES, COCK) 2\n",
"33 0.20 (CORNFLAKES, COFFEE) 2\n",
"34 0.05 (MILK, COFFEE) 2\n",
"35 0.20 (COFFEE, SUGER) 2\n",
"36 0.05 (TEA, COFFEE) 2\n",
"37 0.05 (CORNFLAKES, MAGGI) 2\n",
"38 0.10 (MILK, CORNFLAKES) 2\n",
"39 0.05 (CORNFLAKES, SUGER) 2\n",
"40 0.10 (TEA, CORNFLAKES) 2\n",
"41 0.10 (JAM, MAGGI) 2\n",
"42 0.05 (MILK, JAM) 2\n",
"43 0.05 (JAM, TEA) 2\n",
"44 0.05 (MILK, MAGGI) 2\n",
"45 0.20 (TEA, MAGGI) 2\n",
"46 0.05 (MILK, TEA) 2"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"apriori_data[(apriori_data['length']==2) & (apriori_data['support']>=0.05)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}