{"id":243,"date":"2019-01-13T20:33:56","date_gmt":"2019-01-14T04:33:56","guid":{"rendered":"http:\/\/gantovnik.com\/bio-tips\/?p=243"},"modified":"2024-07-21T05:26:30","modified_gmt":"2024-07-21T12:26:30","slug":"classification","status":"publish","type":"post","link":"https:\/\/gantovnik.com\/bio-tips\/2019\/01\/classification\/","title":{"rendered":"#48 Classification"},"content":{"rendered":"<pre class=\"brush: python; title: ; notranslate\" title=\"\">\nimport os\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\nfrom sklearn.model_selection import train_test_split\nfrom sklearn import linear_model\nfrom sklearn import metrics\nfrom sklearn import tree\nfrom sklearn import neighbors\nfrom sklearn import svm\nfrom sklearn import ensemble\nfrom sklearn import cluster\nimport seaborn as sns\nos.chdir(r&amp;#039;D:\\projects\\wordpress\\ex48&amp;#039;)\nos.getcwd()\niris = datasets.load_iris()\nprint(type(iris))\nprint(iris.target_names)\nprint(iris.feature_names)\nprint(iris.data.shape)\nprint(iris.target.shape)\nX_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, train_size=0.7)\nclassifier = linear_model.LogisticRegression()\nprint(classifier.fit(X_train, y_train))\n\ny_test_pred = classifier.predict(X_test)\nprint(metrics.classification_report(y_test, y_test_pred))\n\nprint(np.bincount(y_test))\nprint(metrics.confusion_matrix(y_test, y_test_pred))\n\nclassifier = tree.DecisionTreeClassifier()\nclassifier.fit(X_train, y_train)\ny_test_pred = classifier.predict(X_test)\nprint(metrics.confusion_matrix(y_test, y_test_pred))\n\nclassifier = neighbors.KNeighborsClassifier()\nclassifier.fit(X_train, y_train)\ny_test_pred = classifier.predict(X_test)\nprint(metrics.confusion_matrix(y_test, y_test_pred))\n\nclassifier = svm.SVC()\nclassifier.fit(X_train, y_train)\ny_test_pred = classifier.predict(X_test)\nprint(metrics.confusion_matrix(y_test, y_test_pred))\n\nclassifier = ensemble.RandomForestClassifier()\nclassifier.fit(X_train, y_train)\ny_test_pred = classifier.predict(X_test)\nprint(metrics.confusion_matrix(y_test, y_test_pred))\n\ntrain_size_vec = np.linspace(0.1, 0.9, 30)\nclassifiers = &#x5B;tree.DecisionTreeClassifier,\n               neighbors.KNeighborsClassifier,\n               svm.SVC,\n               ensemble.RandomForestClassifier\n              ]\ncm_diags = np.zeros((3, len(train_size_vec), len(classifiers)), dtype=float)\nfor n, train_size in enumerate(train_size_vec):\n    X_train, X_test, y_train, y_test = \\\n        train_test_split(iris.data, iris.target, train_size=train_size)\n\n&lt;pre&gt;&lt;code&gt;for m, Classifier in enumerate(classifiers):\n    classifier = Classifier()\n    classifier.fit(X_train, y_train)\n    y_test_pred = classifier.predict(X_test)\n    cm_diags&#x5B;:, n, m] = metrics.confusion_matrix(y_test, y_test_pred).diagonal()\n    cm_diags&#x5B;:, n, m] \/= np.bincount(y_test)\n&lt;\/code&gt;&lt;\/pre&gt;\n\nfig, axes = plt.subplots(4, 1, figsize=(6,18))\n\nfor m, Classifier in enumerate(classifiers):\n    axes&#x5B;m].plot(train_size_vec, cm_diags&#x5B;2, :, m], label=iris.target_names&#x5B;2])\n    axes&#x5B;m].plot(train_size_vec, cm_diags&#x5B;1, :, m], label=iris.target_names&#x5B;1])\n    axes&#x5B;m].plot(train_size_vec, cm_diags&#x5B;0, :, m], label=iris.target_names&#x5B;0])\n    axes&#x5B;m].set_title(type(Classifier()).&lt;strong&gt;name&lt;\/strong&gt;)\n    axes&#x5B;m].set_ylim(0, 1.1)\n    axes&#x5B;m].set_xlim(0.1, 0.9)\n    axes&#x5B;m].set_ylabel(&amp;quot;classification accuracy&amp;quot;)\n    axes&#x5B;m].set_xlabel(&amp;quot;training size ratio&amp;quot;)\n    axes&#x5B;m].legend(loc=4)\n\nfig.tight_layout()\nplt.savefig(&amp;quot;example48.png&amp;quot;, dpi=100)\nplt.show()\nplt.close()\n<\/pre>\n<p><img data-recalc-dims=\"1\" loading=\"lazy\" decoding=\"async\" class=\"  wp-image-246 aligncenter\" src=\"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example48.png?resize=579%2C1737\" alt=\"example48\" width=\"579\" height=\"1737\" srcset=\"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example48.png?w=600&amp;ssl=1 600w, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example48.png?resize=100%2C300&amp;ssl=1 100w, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example48.png?resize=341%2C1024&amp;ssl=1 341w\" sizes=\"auto, (max-width: 579px) 100vw, 579px\" \/><br \/>\n&nbsp;<\/p>\n","protected":false},"excerpt":{"rendered":"<p>import os import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn import linear_model from sklearn import metrics from sklearn import tree from sklearn import neighbors from sklearn import svm from sklearn import ensemble from sklearn import cluster import seaborn as sns os.chdir(r&amp;#039;D:\\projects\\wordpress\\ex48&amp;#039;) os.getcwd() iris = datasets.load_iris() [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"nf_dc_page":"","_et_pb_use_builder":"","_et_pb_old_content":"","_et_gb_content_width":"","_lmt_disableupdate":"yes","_lmt_disable":"","_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_post_was_ever_published":false},"categories":[2],"tags":[],"class_list":["post-243","post","type-post","status-publish","format-standard","hentry","category-python"],"modified_by":"gantovnik","jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack_shortlink":"https:\/\/wp.me\/p8bH0k-3V","jetpack_likes_enabled":true,"jetpack-related-posts":[{"id":232,"url":"https:\/\/gantovnik.com\/bio-tips\/2019\/01\/regression\/","url_meta":{"origin":243,"position":0},"title":"#47 Regression","author":"gantovnik","date":"2019-01-13","format":false,"excerpt":"[code language=\"python\"] import os import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn import linear_model from sklearn import metrics from sklearn import tree from sklearn import neighbors from sklearn import svm from sklearn import ensemble from sklearn import cluster import seaborn\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example47_2.png?resize=350%2C200","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example47_2.png?resize=350%2C200 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example47_2.png?resize=525%2C300 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example47_2.png?resize=700%2C400 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example47_2.png?resize=1050%2C600 3x"},"classes":[]},{"id":304,"url":"https:\/\/gantovnik.com\/bio-tips\/2019\/01\/training-a-perceptron-via-scikit-learn\/","url_meta":{"origin":243,"position":1},"title":"#53 Training a perceptron via scikit-learn","author":"gantovnik","date":"2019-01-22","format":false,"excerpt":"[code language=\"python\"] import os import matplotlib.pyplot as plt import numpy as np from sklearn import datasets os.chdir(r'D:\\projects\\wordpress\\ex53') os.getcwd() iris = datasets.load_iris() X = iris.data[:, [2, 3]] y = iris.target from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) from sklearn.preprocessing import StandardScaler sc = StandardScaler() sc.fit(X_train)\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example53.png?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example53.png?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example53.png?resize=525%2C300&ssl=1 1.5x"},"classes":[]},{"id":248,"url":"https:\/\/gantovnik.com\/bio-tips\/2019\/01\/clustering\/","url_meta":{"origin":243,"position":2},"title":"#49 Clustering","author":"gantovnik","date":"2019-01-13","format":false,"excerpt":"[code language=\"python\"] import os import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn import metrics from sklearn import cluster os.chdir(r'D:\\projects\\wordpress\\ex49') os.getcwd() iris = datasets.load_iris() X, y = iris.data, iris.target np.random.seed(123) n_clusters = 3 c = cluster.KMeans(n_clusters=n_clusters) c.fit(X) y_pred = c.predict(X) print(y_pred[::8]) print(y[::8]) idx_0, idx_1, idx_2\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"example49","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example49.png?resize=350%2C200","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example49.png?resize=350%2C200 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example49.png?resize=525%2C300 1.5x"},"classes":[]},{"id":2104,"url":"https:\/\/gantovnik.com\/bio-tips\/2024\/01\/411-clustering-using-dbscan-algorithm-in-sklearn-cluster-in-python\/","url_meta":{"origin":243,"position":3},"title":"#411 Clustering using DBSCAN algorithm in sklearn.cluster in python","author":"gantovnik","date":"2024-01-18","format":false,"excerpt":"DBSCAN works by finding core points that have many data points within a given radius. Once the core is defined, the process is iteratively computed until there are no more core points definable within the maximum radius. This algorithm does exceptionally well compared to kmeans where there is noise present\u2026","rel":"","context":"In &quot;cluster&quot;","block_context":{"text":"cluster","link":"https:\/\/gantovnik.com\/bio-tips\/category\/cluster\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/01\/ex411.png?resize=350%2C200&ssl=1","width":350,"height":200},"classes":[]},{"id":1041,"url":"https:\/\/gantovnik.com\/bio-tips\/2021\/11\/186-generate-the-feature-importance\/","url_meta":{"origin":243,"position":4},"title":"#186 Generate the feature importance","author":"gantovnik","date":"2021-11-07","format":false,"excerpt":"[code language=\"python\"] import pandas as pd import seaborn as sns import os import matplotlib.pyplot as plt os.chdir(r'D:\\projects\\wordpress\\ex186') sns.set(style=\"ticks\") # read the downloaded input data (marketing data) df = pd.read_csv('https:\/\/raw.githubusercontent.com\/TrainingByPackt\/Big-Data-Analysis-with-Python\/master\/Lesson07\/Dataset\/bank.csv', sep=';') df['y'].replace(['yes','no'],[1,0],inplace=True) df['default'].replace(['yes','no'],[1,0],inplace=True) df['housing'].replace(['yes','no'],[1,0],inplace=True) df['loan'].replace(['yes','no'],[1,0],inplace=True) corr_df = df.corr() sns.heatmap(corr_df, xticklabels=corr_df.columns.values, yticklabels=corr_df.columns.values, annot = True, annot_kws={'size':12}) heat_map=plt.gcf(); heat_map.set_size_inches(10,5) plt.xticks(fontsize=10); plt.yticks(fontsize=10); plt.savefig('ex186a.jpg', dpi=300)\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex186b.jpg?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex186b.jpg?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex186b.jpg?resize=525%2C300&ssl=1 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex186b.jpg?resize=700%2C400&ssl=1 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex186b.jpg?resize=1050%2C600&ssl=1 3x"},"classes":[]},{"id":401,"url":"https:\/\/gantovnik.com\/bio-tips\/2019\/02\/neural-network-prediction\/","url_meta":{"origin":243,"position":5},"title":"#60 Neural network prediction using python neurolab","author":"gantovnik","date":"2019-02-19","format":false,"excerpt":"[code language=\"python\"] import os import matplotlib.pyplot as plt import numpy as np import neurolab as nl os.chdir(r'D:\\projects\\wordpress\\ex60') os.getcwd() #create train sets x=np.linspace(-10,10,60) y=np.cos(x)*0.9 size=len(x) x_train=x.reshape(size,1) y_train=y.reshape(size,1) #create network with 4 layers and randomly initiate d=[[1,1],[45,1],[45,45,1],[45,45,45,1]] for i in range(4): net=nl.net.newff([[-10,10]],d[i]) train_net=nl.train.train_gd(net,x_train,y_train,epochs=1000,show=100) outp=net.sim(x_train) plt.subplot(2,1,1) plt.grid(True) plt.plot(train_net) plt.title('Hidden Layers: ' + str(i))\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/02\/example69_3.png?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/02\/example69_3.png?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/02\/example69_3.png?resize=525%2C300&ssl=1 1.5x"},"classes":[]}],"_links":{"self":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/243","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/comments?post=243"}],"version-history":[{"count":1,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/243\/revisions"}],"predecessor-version":[{"id":2908,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/243\/revisions\/2908"}],"wp:attachment":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/media?parent=243"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/categories?post=243"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/tags?post=243"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}