{"id":1750,"date":"2023-01-05T12:10:55","date_gmt":"2023-01-05T20:10:55","guid":{"rendered":"https:\/\/gantovnik.com\/bio-tips\/?p=1750"},"modified":"2023-01-05T12:10:55","modified_gmt":"2023-01-05T20:10:55","slug":"332-remove-duplicates-by-columns-and-keep-the-row-with-the-highest-value-in-the-selected-column-by-pandas","status":"publish","type":"post","link":"https:\/\/gantovnik.com\/bio-tips\/2023\/01\/332-remove-duplicates-by-columns-and-keep-the-row-with-the-highest-value-in-the-selected-column-by-pandas\/","title":{"rendered":"#332 Remove duplicates by columns and keep the row with the highest value in the selected column by pandas"},"content":{"rendered":"<p>table.txt<\/p>\n<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\nquad lcid eid fx fy fxy\r\nQUAD 1 23 1.2 1.0 21.0\r\nQUAD 4 24 2.6 2.0 22.0\r\nQUAD 1 25 3.2 3.0 23.0\r\nQUAD 2 23 4.6 4.0 24.0\r\nQUAD 4 24 5.6 5.0 25.0\r\nQUAD 2 25 6.2 6.0 26.0\r\nQUAD 3 23 7.2 7.0 27.0\r\nQUAD 3 24 6.2 8.0 28.0\r\nQUAD 2 25 5.2 9.0 29.0\r\nQUAD 4 23 3.3 10.0 30.0\r\nQUAD 4 24 12.2 11.0 31.0\r\nQUAD 1 25 1.1 12.0 32.0\r\n<\/pre>\n<p>ex332.py<\/p>\n<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\nimport pandas as pd\r\ndf = pd.read_csv(&quot;table.txt&quot;, sep=&quot; &quot;)\r\n# display DataFrame\r\nprint(&quot;Input:&quot;)\r\nprint(df)\r\n# Keeping the row with the highest value\r\n# Remove duplicates by columns eid and keeping the\r\n# row with the highest value in column fx\r\nprint(&quot; &quot;)\r\n# max fx\r\ndf1=df.sort_values('fx', ascending=False).drop_duplicates('eid').sort_index()\r\ndf1=df1.sort_values('eid', ascending=True)\r\nprint(&quot;Result max fx:&quot;)\r\nprint(df1)\r\n# min fx\r\ndf1=df.sort_values('fx', ascending=True).drop_duplicates('eid').sort_index()\r\ndf1=df1.sort_values('eid', ascending=True)\r\nprint(&quot;Result min fx:&quot;)\r\nprint(df1)\r\n\r\n# max fy\r\ndf1=df.sort_values('fy', ascending=False).drop_duplicates('eid').sort_index()\r\ndf1=df1.sort_values('eid', ascending=True)\r\nprint(&quot;Result max fy:&quot;)\r\nprint(df1)\r\n# min fy\r\ndf1=df.sort_values('fy', ascending=True).drop_duplicates('eid').sort_index()\r\ndf1=df1.sort_values('eid', ascending=True)\r\nprint(&quot;Result min fy:&quot;)\r\nprint(df1)\r\n\r\n# max fxy\r\ndf1=df.sort_values('fxy', ascending=False).drop_duplicates('eid').sort_index()\r\ndf1=df1.sort_values('eid', ascending=True)\r\nprint(&quot;Result max fxy:&quot;)\r\nprint(df1)\r\n# min fxy\r\ndf1=df.sort_values('fxy', ascending=True).drop_duplicates('eid').sort_index()\r\ndf1=df1.sort_values('eid', ascending=True)\r\nprint(&quot;Result min fxy:&quot;)\r\nprint(df1)\r\n<\/pre>\n<p>Output:<\/p>\n<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\nInput:\r\n    quad  lcid  eid    fx    fy   fxy\r\n0   QUAD     1   23   1.2   1.0  21.0\r\n1   QUAD     4   24   2.6   2.0  22.0\r\n2   QUAD     1   25   3.2   3.0  23.0\r\n3   QUAD     2   23   4.6   4.0  24.0\r\n4   QUAD     4   24   5.6   5.0  25.0\r\n5   QUAD     2   25   6.2   6.0  26.0\r\n6   QUAD     3   23   7.2   7.0  27.0\r\n7   QUAD     3   24   6.2   8.0  28.0\r\n8   QUAD     2   25   5.2   9.0  29.0\r\n9   QUAD     4   23   3.3  10.0  30.0\r\n10  QUAD     4   24  12.2  11.0  31.0\r\n11  QUAD     1   25   1.1  12.0  32.0\r\n \r\nResult max fx:\r\n    quad  lcid  eid    fx    fy   fxy\r\n6   QUAD     3   23   7.2   7.0  27.0\r\n10  QUAD     4   24  12.2  11.0  31.0\r\n5   QUAD     2   25   6.2   6.0  26.0\r\nResult min fx:\r\n    quad  lcid  eid   fx    fy   fxy\r\n0   QUAD     1   23  1.2   1.0  21.0\r\n1   QUAD     4   24  2.6   2.0  22.0\r\n11  QUAD     1   25  1.1  12.0  32.0\r\nResult max fy:\r\n    quad  lcid  eid    fx    fy   fxy\r\n9   QUAD     4   23   3.3  10.0  30.0\r\n10  QUAD     4   24  12.2  11.0  31.0\r\n11  QUAD     1   25   1.1  12.0  32.0\r\nResult min fy:\r\n   quad  lcid  eid   fx   fy   fxy\r\n0  QUAD     1   23  1.2  1.0  21.0\r\n1  QUAD     4   24  2.6  2.0  22.0\r\n2  QUAD     1   25  3.2  3.0  23.0\r\nResult max fxy:\r\n    quad  lcid  eid    fx    fy   fxy\r\n9   QUAD     4   23   3.3  10.0  30.0\r\n10  QUAD     4   24  12.2  11.0  31.0\r\n11  QUAD     1   25   1.1  12.0  32.0\r\nResult min fxy:\r\n   quad  lcid  eid   fx   fy   fxy\r\n0  QUAD     1   23  1.2  1.0  21.0\r\n1  QUAD     4   24  2.6  2.0  22.0\r\n2  QUAD     1   25  3.2  3.0  23.0\r\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>table.txt quad lcid eid fx fy fxy QUAD 1 23 1.2 1.0 21.0 QUAD 4 24 2.6 2.0 22.0 QUAD 1 25 3.2 3.0 23.0 QUAD 2 23 4.6 4.0 24.0 QUAD 4 24 5.6 5.0 25.0 QUAD 2 25 6.2 6.0 26.0 QUAD 3 23 7.2 7.0 27.0 QUAD 3 24 6.2 8.0 28.0 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"nf_dc_page":"","_et_pb_use_builder":"","_et_pb_old_content":"","_et_gb_content_width":"","_lmt_disableupdate":"yes","_lmt_disable":"","jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[66,2],"tags":[68,67],"class_list":["post-1750","post","type-post","status-publish","format-standard","hentry","category-pandas","category-python","tag-min-max-sorting","tag-pandas"],"modified_by":"gantovnik","jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack_shortlink":"https:\/\/wp.me\/p8bH0k-se","jetpack_likes_enabled":true,"jetpack-related-posts":[{"id":1752,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/01\/333-add-columns-with-max-min-values-grouped-by-values-on-the-selected-column-using-pandas\/","url_meta":{"origin":1750,"position":0},"title":"#333 Add columns with max min values grouped by values on the selected column using pandas","author":"gantovnik","date":"2023-01-05","format":false,"excerpt":"table.txt [code language=\"python\"] quad lcid eid fx fy fxy QUAD 1 23 1.2 1.0 21.0 QUAD 4 24 2.6 2.0 22.0 QUAD 1 25 3.2 3.0 23.0 QUAD 2 23 4.6 4.0 24.0 QUAD 4 24 5.6 5.0 25.0 QUAD 2 25 6.2 6.0 26.0 QUAD 3 23 7.2 7.0\u2026","rel":"","context":"In &quot;pandas&quot;","block_context":{"text":"pandas","link":"https:\/\/gantovnik.com\/bio-tips\/category\/pandas\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":1901,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/07\/367-extracting-columns-of-the-data-file-using-awk\/","url_meta":{"origin":1750,"position":1},"title":"#367 Extracting columns of the data file using awk.","author":"gantovnik","date":"2023-07-20","format":false,"excerpt":"example.csv [code language=\"python\"] Type,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz QUAD,5000001,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,5000004,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,5000005,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,5000006,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,5000007,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code] ex366.txt [code language=\"python\"] awk '{ print $1, $2, $3, $4}' FS=, OFS=, example.csv [\/code] variant: [code language=\"python\"] awk 'BEGIN { FS=OFS=\",\" } NF { print $1, $2, $3, $4 }' example.csv [\/code] output: [code language=\"python\"] Type,LCID,EID,Nx QUAD,5000001,389635,5.21044 QUAD,5000002,389636,4.332321\u2026","rel":"","context":"In &quot;awk&quot;","block_context":{"text":"awk","link":"https:\/\/gantovnik.com\/bio-tips\/category\/awk\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":1905,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/07\/370-find-max-and-min-in-column-using-awk\/","url_meta":{"origin":1750,"position":2},"title":"#370 Find min and max in column using awk","author":"gantovnik","date":"2023-07-21","format":false,"excerpt":"example.csv [code language=\"python\"] Type,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz QUAD,5000001,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,5000004,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,5000005,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,5000006,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,5000007,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code] ex370.txt [code language=\"python\"] awk 'NR==2 || $4 < min {min=$4} END{ print \"min=\" min}' FS=, OFS=, example.csv awk 'NR==2 || $4 > max {max=$4} END{ print \"max=\" max}' FS=, OFS=, example.csv [\/code] output: [code language=\"python\"] min=3.332321 max=50.332455 [\/code]\u2026","rel":"","context":"In &quot;awk&quot;","block_context":{"text":"awk","link":"https:\/\/gantovnik.com\/bio-tips\/category\/awk\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":1909,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/07\/371-search-and-replace-using-gsub-in-awk\/","url_meta":{"origin":1750,"position":3},"title":"#371 Searching and replacing using gsub in awk","author":"gantovnik","date":"2023-07-22","format":false,"excerpt":"example.csv [code language=\"python\"] Type,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz QUAD,5000001,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,5000004,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,5000005,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,5000006,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,5000007,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code] ex370.txt [code language=\"python\"] awk '{gsub(\/^50\/,\"75\",$2);print}' FS=, OFS=, example.csv [\/code] output: [code language=\"python\"] Type,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz QUAD,7500001,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,7500002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,7500003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,7500004,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,7500005,389639,50.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,7500006,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,7500007,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code]","rel":"","context":"In &quot;awk&quot;","block_context":{"text":"awk","link":"https:\/\/gantovnik.com\/bio-tips\/category\/awk\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":1899,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/07\/366-print-lines-in-a-range-using-awk\/","url_meta":{"origin":1750,"position":4},"title":"#366 Print lines in a range using awk","author":"gantovnik","date":"2023-07-20","format":false,"excerpt":"example.csv [code language=\"python\"] Type,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz QUAD,5000001,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,5000004,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,5000005,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,5000006,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,5000007,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code] ex366.txt [code language=\"python\"] awk 'NR>2 && NR < 5' example.csv [\/code] output: [code language=\"python\"] QUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 [\/code]","rel":"","context":"In &quot;awk&quot;","block_context":{"text":"awk","link":"https:\/\/gantovnik.com\/bio-tips\/category\/awk\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":1898,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/07\/365-remove-header-with-awk\/","url_meta":{"origin":1750,"position":5},"title":"#365 Remove header with awk","author":"gantovnik","date":"2023-07-20","format":false,"excerpt":"example.csv [code language=\"python\"] Type,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz QUAD,5000009,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,5000009,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000009,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,5000009,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,5000009,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,5000009,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,5000009,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code] ex365.txt [code language=\"python\"] awk 'NR>1' example.csv [\/code] output: [code language=\"python\"] QUAD,5000009,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,5000009,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000009,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,5000009,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,5000009,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,5000009,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,5000009,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code]","rel":"","context":"In &quot;awk&quot;","block_context":{"text":"awk","link":"https:\/\/gantovnik.com\/bio-tips\/category\/awk\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]}],"_links":{"self":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/1750","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/comments?post=1750"}],"version-history":[{"count":0,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/1750\/revisions"}],"wp:attachment":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/media?parent=1750"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/categories?post=1750"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/tags?post=1750"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}