{"id":1892,"date":"2023-07-17T01:55:23","date_gmt":"2023-07-17T08:55:23","guid":{"rendered":"https:\/\/gantovnik.com\/bio-tips\/?p=1892"},"modified":"2023-07-17T01:55:23","modified_gmt":"2023-07-17T08:55:23","slug":"363-remove-duplicate-rows-from-pandas-database","status":"publish","type":"post","link":"https:\/\/gantovnik.com\/bio-tips\/2023\/07\/363-remove-duplicate-rows-from-pandas-database\/","title":{"rendered":"#363 Remove duplicate rows from pandas database"},"content":{"rendered":"<p>data.csv<\/p>\n<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\n            first column second column\r\n0   GGAGCAGCGAGGCAACCGGG    TTCTGGCAGT\r\n1   CGAGCGTATGATAGCAACTT    TGGAGGTTGC\r\n2   CGTATGGTCGCCTTTCTCCA    ACAGGGGGCT\r\n3   AAAGTTCGTGTACCTCTATG    ACATACCTGT\r\n4   AAAGTTCGTGTACCTCTATG    ACATACCTGT\r\n5   AAAGTTCGTGTACCTCTATG    ACATACCTGT\r\n6   AAAGTTCGTGTACCTCTATG    ACATACCTGT\r\n7   AAAGTTCGTGTACCTCTATG    ACATACCTGT\r\n8   AAAGTTCGTGTACCTCTATG    ACATACCTGT\r\n9   AAAGTTCGTGTACCTCTATG    ACATACCTGT\r\n10  AAAGTTCGTGTACCTCTATG    ACATACCTGT\r\n11  AAAGTTCGTGTACCTCTATG    ACATACCTGT\r\n12  GGAGCAGCGAGGCAACCGGG    TTCTGGCAGT\r\n<\/pre>\n<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\nimport pandas as pd\r\ndf = pd.read_csv('data.csv')\r\nprint(df)\r\ndf = df.drop_duplicates()\r\nprint(df)\r\n<\/pre>\n<p>Output:<\/p>\n<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\n           first column second column\r\n0  GGAGCAGCGAGGCAACCGGG    TTCTGGCAGT\r\n1  CGAGCGTATGATAGCAACTT    TGGAGGTTGC\r\n2  CGTATGGTCGCCTTTCTCCA    ACAGGGGGCT\r\n3  AAAGTTCGTGTACCTCTATG    ACATACCTGT\r\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>data.csv first column second column 0 GGAGCAGCGAGGCAACCGGG TTCTGGCAGT 1 CGAGCGTATGATAGCAACTT TGGAGGTTGC 2 CGTATGGTCGCCTTTCTCCA ACAGGGGGCT 3 AAAGTTCGTGTACCTCTATG ACATACCTGT 4 AAAGTTCGTGTACCTCTATG ACATACCTGT 5 AAAGTTCGTGTACCTCTATG ACATACCTGT 6 AAAGTTCGTGTACCTCTATG ACATACCTGT 7 AAAGTTCGTGTACCTCTATG ACATACCTGT 8 AAAGTTCGTGTACCTCTATG ACATACCTGT 9 AAAGTTCGTGTACCTCTATG ACATACCTGT 10 AAAGTTCGTGTACCTCTATG ACATACCTGT 11 AAAGTTCGTGTACCTCTATG ACATACCTGT 12 GGAGCAGCGAGGCAACCGGG TTCTGGCAGT import pandas as pd df = pd.read_csv(&#8216;data.csv&#8217;) print(df) df = df.drop_duplicates() [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"nf_dc_page":"","_et_pb_use_builder":"","_et_pb_old_content":"","_et_gb_content_width":"","_lmt_disableupdate":"yes","_lmt_disable":"","jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[66,2],"tags":[],"class_list":["post-1892","post","type-post","status-publish","format-standard","hentry","category-pandas","category-python"],"modified_by":"gantovnik","jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack_shortlink":"https:\/\/wp.me\/p8bH0k-uw","jetpack_likes_enabled":true,"jetpack-related-posts":[{"id":2158,"url":"https:\/\/gantovnik.com\/bio-tips\/2024\/05\/421-plot-a-pandas-data-frame-for-loops-on-columns\/","url_meta":{"origin":1892,"position":0},"title":"#421 Plot a pandas data frame for loops on columns","author":"gantovnik","date":"2024-05-04","format":false,"excerpt":"[code language=\"python\"] import numpy as np import pandas as pd import matplotlib.pyplot as plt np.random.seed(2023) df = pd.DataFrame({ \"x\": np.linspace(0, 1, 10), \"A\": np.random.rand(10), \"B\": np.random.rand(10), \"C\": np.random.rand(10), }) for column in df: if column != 'x': plt.plot(df['x'], df[column],label=f\"x vs. {column}\") plt.scatter(df['x'], df[column]) plt.title(\"Performance\") plt.grid() plt.xlabel('x') plt.ylabel('Efficiency') plt.legend() plt.show() plt.savefig('ex421.png',\u2026","rel":"","context":"In &quot;matplotlib&quot;","block_context":{"text":"matplotlib","link":"https:\/\/gantovnik.com\/bio-tips\/category\/matplotlib\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/05\/ex421.png?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/05\/ex421.png?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/05\/ex421.png?resize=525%2C300&ssl=1 1.5x"},"classes":[]},{"id":1752,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/01\/333-add-columns-with-max-min-values-grouped-by-values-on-the-selected-column-using-pandas\/","url_meta":{"origin":1892,"position":1},"title":"#333 Add columns with max min values grouped by values on the selected column using pandas","author":"gantovnik","date":"2023-01-05","format":false,"excerpt":"table.txt [code language=\"python\"] quad lcid eid fx fy fxy QUAD 1 23 1.2 1.0 21.0 QUAD 4 24 2.6 2.0 22.0 QUAD 1 25 3.2 3.0 23.0 QUAD 2 23 4.6 4.0 24.0 QUAD 4 24 5.6 5.0 25.0 QUAD 2 25 6.2 6.0 26.0 QUAD 3 23 7.2 7.0\u2026","rel":"","context":"In &quot;pandas&quot;","block_context":{"text":"pandas","link":"https:\/\/gantovnik.com\/bio-tips\/category\/pandas\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":1028,"url":"https:\/\/gantovnik.com\/bio-tips\/2021\/11\/183-creating-a-histogram-using-seaborn\/","url_meta":{"origin":1892,"position":2},"title":"#184 Creating a histogram using seaborn","author":"gantovnik","date":"2021-11-07","format":false,"excerpt":"[code language=\"python\"] import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import os os.chdir(r'D:\\projects\\wordpress\\ex183') os.getcwd() url = \"https:\/\/archive.ics.uci.edu\/ml\/machine-learning-databases\/auto-mpg\/auto-mpg.data\" df = pd.read_csv(url) column_names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'year', 'origin', 'name'] df = pd.read_csv(url, names= column_names, delim_whitespace=True) print(df.head()) #Convert the horsepower and\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex183.jpg?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex183.jpg?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex183.jpg?resize=525%2C300&ssl=1 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex183.jpg?resize=700%2C400&ssl=1 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex183.jpg?resize=1050%2C600&ssl=1 3x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex183.jpg?resize=1400%2C800&ssl=1 4x"},"classes":[]},{"id":1032,"url":"https:\/\/gantovnik.com\/bio-tips\/2021\/11\/184-creating-a-boxplot-using-seaborn\/","url_meta":{"origin":1892,"position":3},"title":"#183 Creating a boxplot using seaborn","author":"gantovnik","date":"2021-11-07","format":false,"excerpt":"[code language=\"python\"] import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import os os.chdir(r'D:\\projects\\wordpress\\ex184') os.getcwd() url = \"https:\/\/archive.ics.uci.edu\/ml\/machine-learning-databases\/auto-mpg\/auto-mpg.data\" df = pd.read_csv(url) column_names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'year', 'origin', 'name'] df = pd.read_csv(url, names= column_names, delim_whitespace=True) print(df.head()) #Convert the horsepower and\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex184.jpg?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex184.jpg?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex184.jpg?resize=525%2C300&ssl=1 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex184.jpg?resize=700%2C400&ssl=1 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex184.jpg?resize=1050%2C600&ssl=1 3x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex184.jpg?resize=1400%2C800&ssl=1 4x"},"classes":[]},{"id":1750,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/01\/332-remove-duplicates-by-columns-and-keep-the-row-with-the-highest-value-in-the-selected-column-by-pandas\/","url_meta":{"origin":1892,"position":4},"title":"#332 Remove duplicates by columns and keep the row with the highest value in the selected column by pandas","author":"gantovnik","date":"2023-01-05","format":false,"excerpt":"table.txt [code language=\"python\"] quad lcid eid fx fy fxy QUAD 1 23 1.2 1.0 21.0 QUAD 4 24 2.6 2.0 22.0 QUAD 1 25 3.2 3.0 23.0 QUAD 2 23 4.6 4.0 24.0 QUAD 4 24 5.6 5.0 25.0 QUAD 2 25 6.2 6.0 26.0 QUAD 3 23 7.2 7.0\u2026","rel":"","context":"In &quot;pandas&quot;","block_context":{"text":"pandas","link":"https:\/\/gantovnik.com\/bio-tips\/category\/pandas\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":1037,"url":"https:\/\/gantovnik.com\/bio-tips\/2021\/11\/185-creating-a-scatter-plot-using-seaborn\/","url_meta":{"origin":1892,"position":5},"title":"#185 Creating a scatter plot using seaborn","author":"gantovnik","date":"2021-11-07","format":false,"excerpt":"[code language=\"python\"] import matplotlib.pyplot as plt import numpy as np import pandas as pd import os import seaborn as sns os.chdir(r'D:\\projects\\wordpress\\ex185') os.getcwd() url = \"https:\/\/archive.ics.uci.edu\/ml\/machine-learning-databases\/auto-mpg\/auto-mpg.data\" df = pd.read_csv(url) column_names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'year', 'origin', 'name'] df = pd.read_csv(url, names= column_names, delim_whitespace=True) df.loc[df.horsepower == '?', 'horsepower'] =\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=525%2C300&ssl=1 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=700%2C400&ssl=1 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=1050%2C600&ssl=1 3x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=1400%2C800&ssl=1 4x"},"classes":[]}],"_links":{"self":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/1892","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/comments?post=1892"}],"version-history":[{"count":0,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/1892\/revisions"}],"wp:attachment":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/media?parent=1892"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/categories?post=1892"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/tags?post=1892"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}