{"id":1041,"date":"2021-11-07T04:03:59","date_gmt":"2021-11-07T12:03:59","guid":{"rendered":"https:\/\/gantovnik.com\/bio-tips\/?p=1041"},"modified":"2021-11-07T04:03:59","modified_gmt":"2021-11-07T12:03:59","slug":"186-generate-the-feature-importance","status":"publish","type":"post","link":"https:\/\/gantovnik.com\/bio-tips\/2021\/11\/186-generate-the-feature-importance\/","title":{"rendered":"#186 Generate the feature importance"},"content":{"rendered":"<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\nimport pandas as pd\r\nimport seaborn as sns\r\nimport os\r\nimport matplotlib.pyplot as plt\r\nos.chdir(r'D:\\projects\\wordpress\\ex186') \r\nsns.set(style=&quot;ticks&quot;)\r\n# read the downloaded input data (marketing data)\r\ndf = pd.read_csv('https:\/\/raw.githubusercontent.com\/TrainingByPackt\/Big-Data-Analysis-with-Python\/master\/Lesson07\/Dataset\/bank.csv', sep=';')\r\ndf&#x5B;'y'].replace(&#x5B;'yes','no'],&#x5B;1,0],inplace=True)\r\ndf&#x5B;'default'].replace(&#x5B;'yes','no'],&#x5B;1,0],inplace=True)\r\ndf&#x5B;'housing'].replace(&#x5B;'yes','no'],&#x5B;1,0],inplace=True)\r\ndf&#x5B;'loan'].replace(&#x5B;'yes','no'],&#x5B;1,0],inplace=True)\r\ncorr_df = df.corr()\r\nsns.heatmap(corr_df, xticklabels=corr_df.columns.values, yticklabels=corr_df.columns.values, annot = True, annot_kws={'size':12})\r\nheat_map=plt.gcf(); heat_map.set_size_inches(10,5)\r\nplt.xticks(fontsize=10); plt.yticks(fontsize=10);\r\nplt.savefig('ex186a.jpg', dpi=300)\r\nplt.show()\r\n#pip install boruta --upgrade\r\n\r\n# import DecisionTreeClassifier from sklearn and BorutaPy from boruta\r\nfrom sklearn.ensemble import RandomForestClassifier\r\nfrom boruta import BorutaPy\r\n# transform all categorical data types to integers (hot-encoding)\r\nfor col_name in df.columns:\r\n    if(df&#x5B;col_name].dtype == 'object'):\r\n        df&#x5B;col_name]= df&#x5B;col_name].astype('category')\r\n        df&#x5B;col_name] = df&#x5B;col_name].cat.codes\r\n\r\n# generate separate dataframes for IVs and DV (target variable)\r\nX = df.drop(&#x5B;'y'], axis=1).values\r\nY = df&#x5B;'y'].values\r\n# build RandomForestClassifier, Boruta models and\r\n# related parameter\r\nrfc = RandomForestClassifier(n_estimators=200, n_jobs=4, class_weight='balanced', max_depth=6)\r\nboruta_selector = BorutaPy(rfc, n_estimators='auto', verbose=2)\r\nn_train = len(X)\r\n# fit Boruta algorithm\r\nboruta_selector.fit(X, Y)\r\n# check ranking of features\r\nfeature_df = pd.DataFrame(df.drop(&#x5B;'y'], axis=1).columns.tolist(), columns=&#x5B;'features'])\r\nfeature_df&#x5B;'rank']=boruta_selector.ranking_\r\nfeature_df = feature_df.sort_values('rank', ascending=True).reset_index(drop=True)\r\nsns.barplot(x='rank',y='features',data=feature_df)\r\nplt.savefig('ex186b.jpg', dpi=300)\r\n<\/pre>\n<p><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex186a-2.jpg?resize=1080%2C540&#038;ssl=1\" alt=\"\" width=\"1080\" height=\"540\" class=\"alignnone size-full wp-image-1044\" srcset=\"https:\/\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex186a-2.jpg 2000w, https:\/\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex186a-2-1280x640.jpg 1280w, https:\/\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex186a-2-980x490.jpg 980w, https:\/\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex186a-2-480x240.jpg 480w\" sizes=\"(min-width: 0px) and (max-width: 480px) 480px, (min-width: 481px) and (max-width: 980px) 980px, (min-width: 981px) and (max-width: 1280px) 1280px, (min-width: 1281px) 2000px, 100vw\" \/><\/p>\n<p><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex186b.jpg?resize=1080%2C720&#038;ssl=1\" alt=\"\" width=\"1080\" height=\"720\" class=\"alignnone size-full wp-image-1045\" srcset=\"https:\/\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex186b.jpg 1200w, https:\/\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex186b-980x653.jpg 980w, https:\/\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex186b-480x320.jpg 480w\" sizes=\"(min-width: 0px) and (max-width: 480px) 480px, (min-width: 481px) and (max-width: 980px) 980px, (min-width: 981px) 1200px, 100vw\" \/><\/p>\n","protected":false},"excerpt":{"rendered":"<p>import pandas as pd import seaborn as sns import os import matplotlib.pyplot as plt os.chdir(r&#8217;D:\\projects\\wordpress\\ex186&#8242;) sns.set(style=&quot;ticks&quot;) # read the downloaded input data (marketing data) df = pd.read_csv(&#8216;https:\/\/raw.githubusercontent.com\/TrainingByPackt\/Big-Data-Analysis-with-Python\/master\/Lesson07\/Dataset\/bank.csv&#8217;, sep=&#8217;;&#8217;) df&#x5B;&#8217;y&#8217;].replace(&#x5B;&#8217;yes&#8217;,&#8217;no&#8217;],&#x5B;1,0],inplace=True) df&#x5B;&#8217;default&#8217;].replace(&#x5B;&#8217;yes&#8217;,&#8217;no&#8217;],&#x5B;1,0],inplace=True) df&#x5B;&#8217;housing&#8217;].replace(&#x5B;&#8217;yes&#8217;,&#8217;no&#8217;],&#x5B;1,0],inplace=True) df&#x5B;&#8217;loan&#8217;].replace(&#x5B;&#8217;yes&#8217;,&#8217;no&#8217;],&#x5B;1,0],inplace=True) corr_df = df.corr() sns.heatmap(corr_df, xticklabels=corr_df.columns.values, yticklabels=corr_df.columns.values, annot = True, annot_kws={&#8216;size&#8217;:12}) heat_map=plt.gcf(); heat_map.set_size_inches(10,5) plt.xticks(fontsize=10); plt.yticks(fontsize=10); plt.savefig(&#8216;ex186a.jpg&#8217;, dpi=300) plt.show() #pip install boruta &#8211;upgrade # import [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"nf_dc_page":"","_et_pb_use_builder":"","_et_pb_old_content":"","_et_gb_content_width":"","_lmt_disableupdate":"yes","_lmt_disable":"","jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[2],"tags":[],"class_list":["post-1041","post","type-post","status-publish","format-standard","hentry","category-python"],"modified_by":"gantovnik","jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack_shortlink":"https:\/\/wp.me\/p8bH0k-gN","jetpack_likes_enabled":true,"jetpack-related-posts":[{"id":654,"url":"https:\/\/gantovnik.com\/bio-tips\/2020\/09\/107-tsa-data-2020-vs-2019\/","url_meta":{"origin":1041,"position":0},"title":"#107: TSA Data 2020 vs 2019","author":"gantovnik","date":"2020-09-28","format":false,"excerpt":"#107: TSA Data 2020 vs 2019 The TSA has started to publish the daily volume of passengers going through checkpoints on its website. The data set also includes the numbers from 2019 in order to measure the impact of travel as a result of COVID-19. https:\/\/www.tsa.gov\/coronavirus\/passenger-throughput [code language=\"python\"] from bs4\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=350%2C200","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=350%2C200 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=525%2C300 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=700%2C400 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=1050%2C600 3x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=1400%2C800 4x"},"classes":[]},{"id":1028,"url":"https:\/\/gantovnik.com\/bio-tips\/2021\/11\/183-creating-a-histogram-using-seaborn\/","url_meta":{"origin":1041,"position":1},"title":"#184 Creating a histogram using seaborn","author":"gantovnik","date":"2021-11-07","format":false,"excerpt":"[code language=\"python\"] import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import os os.chdir(r'D:\\projects\\wordpress\\ex183') os.getcwd() url = \"https:\/\/archive.ics.uci.edu\/ml\/machine-learning-databases\/auto-mpg\/auto-mpg.data\" df = pd.read_csv(url) column_names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'year', 'origin', 'name'] df = pd.read_csv(url, names= column_names, delim_whitespace=True) print(df.head()) #Convert the horsepower and\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex183.jpg?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex183.jpg?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex183.jpg?resize=525%2C300&ssl=1 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex183.jpg?resize=700%2C400&ssl=1 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex183.jpg?resize=1050%2C600&ssl=1 3x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex183.jpg?resize=1400%2C800&ssl=1 4x"},"classes":[]},{"id":1032,"url":"https:\/\/gantovnik.com\/bio-tips\/2021\/11\/184-creating-a-boxplot-using-seaborn\/","url_meta":{"origin":1041,"position":2},"title":"#183 Creating a boxplot using seaborn","author":"gantovnik","date":"2021-11-07","format":false,"excerpt":"[code language=\"python\"] import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import os os.chdir(r'D:\\projects\\wordpress\\ex184') os.getcwd() url = \"https:\/\/archive.ics.uci.edu\/ml\/machine-learning-databases\/auto-mpg\/auto-mpg.data\" df = pd.read_csv(url) column_names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'year', 'origin', 'name'] df = pd.read_csv(url, names= column_names, delim_whitespace=True) print(df.head()) #Convert the horsepower and\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex184.jpg?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex184.jpg?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex184.jpg?resize=525%2C300&ssl=1 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex184.jpg?resize=700%2C400&ssl=1 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex184.jpg?resize=1050%2C600&ssl=1 3x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex184.jpg?resize=1400%2C800&ssl=1 4x"},"classes":[]},{"id":1037,"url":"https:\/\/gantovnik.com\/bio-tips\/2021\/11\/185-creating-a-scatter-plot-using-seaborn\/","url_meta":{"origin":1041,"position":3},"title":"#185 Creating a scatter plot using seaborn","author":"gantovnik","date":"2021-11-07","format":false,"excerpt":"[code language=\"python\"] import matplotlib.pyplot as plt import numpy as np import pandas as pd import os import seaborn as sns os.chdir(r'D:\\projects\\wordpress\\ex185') os.getcwd() url = \"https:\/\/archive.ics.uci.edu\/ml\/machine-learning-databases\/auto-mpg\/auto-mpg.data\" df = pd.read_csv(url) column_names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'year', 'origin', 'name'] df = pd.read_csv(url, names= column_names, delim_whitespace=True) df.loc[df.horsepower == '?', 'horsepower'] =\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=525%2C300&ssl=1 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=700%2C400&ssl=1 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=1050%2C600&ssl=1 3x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=1400%2C800&ssl=1 4x"},"classes":[]},{"id":85,"url":"https:\/\/gantovnik.com\/bio-tips\/2018\/12\/subplots-in-matplotlib\/","url_meta":{"origin":1041,"position":4},"title":"#13 Subplots in matplotlib","author":"gantovnik","date":"2018-12-29","format":false,"excerpt":"[code language=\"python\"] import os import matplotlib.pyplot as plt import numpy as np os.chdir('\/home\/vg\/Downloads\/projects\/ex13') os.getcwd() fig,axes = plt.subplots(2,2,figsize=(6,6),sharex=True,sharey=True,squeeze=False) x1=np.random.randn(100) x2=np.random.randn(100) axes[0,0].set_title(\"Uncorrelated\") axes[0,0].scatter(x1,x2) axes[0,1].set_title(\"Weakly positively correlated\") axes[0,1].scatter(x1,x1+x2) axes[1,0].set_title(\"Weakly negatively correlated\") axes[1,0].scatter(x1,-x1+x2) axes[1,1].set_title(\"Strongly correlated\") axes[1,1].scatter(x1,x1+0.15*x2) axes[1,1].set_xlabel(\"x\") axes[1,0].set_xlabel(\"x\") axes[0,0].set_ylabel(\"y\") axes[1,0].set_ylabel(\"y\") plt.subplots_adjust(left=0.1,right=0.95,bottom=0.1,top=0.95,wspace=0.1,hspace=0.2) plt.savefig(\"example13.png\", dpi=100) plt.show() plt.close() [\/code]","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"example13","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2018\/12\/example13.png?resize=350%2C200","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2018\/12\/example13.png?resize=350%2C200 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2018\/12\/example13.png?resize=525%2C300 1.5x"},"classes":[]},{"id":1977,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/11\/396-seaborn-pairplot-example-with-lowess-regression\/","url_meta":{"origin":1041,"position":5},"title":"#396 Seaborn pairplot example with lowess regression","author":"gantovnik","date":"2023-11-16","format":false,"excerpt":"[code language=\"python\"] import matplotlib.pyplot as plt import seaborn as sns def plot_extra(x, y, **kwargs): if kwargs['label'] == first_label: sns.regplot(data=kwargs['data'], x=x.name, y=y.name, lowess=True, scatter=False, color=kwargs['color']) df = sns.load_dataset('iris') first_label = df['species'][0] pg = sns.pairplot(df, hue='species', plot_kws={'alpha': 0.8}, palette='rainbow') pg.map_offdiag(plot_extra, color='crimson', data=df) legend_dict = {h.get_label(): h for h in pg.legend.legend_handles} # the\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/11\/ex396.png?fit=1200%2C930&ssl=1&resize=350%2C200","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/11\/ex396.png?fit=1200%2C930&ssl=1&resize=350%2C200 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/11\/ex396.png?fit=1200%2C930&ssl=1&resize=525%2C300 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/11\/ex396.png?fit=1200%2C930&ssl=1&resize=700%2C400 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/11\/ex396.png?fit=1200%2C930&ssl=1&resize=1050%2C600 3x"},"classes":[]}],"_links":{"self":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/1041","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/comments?post=1041"}],"version-history":[{"count":0,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/1041\/revisions"}],"wp:attachment":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/media?parent=1041"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/categories?post=1041"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/tags?post=1041"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}