{"id":1904,"date":"2023-07-20T14:38:21","date_gmt":"2023-07-20T21:38:21","guid":{"rendered":"https:\/\/gantovnik.com\/bio-tips\/?p=1904"},"modified":"2023-07-20T14:38:21","modified_gmt":"2023-07-20T21:38:21","slug":"369-identifying-duplicate-lines-using-awk","status":"publish","type":"post","link":"https:\/\/gantovnik.com\/bio-tips\/2023\/07\/369-identifying-duplicate-lines-using-awk\/","title":{"rendered":"#369 Identifying duplicate lines using awk."},"content":{"rendered":"<p>example.csv<\/p>\n<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\nType,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz\r\nQUAD,5000001,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323\r\nQUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659\r\nQUAD,5000003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519\r\nQUAD,5000004,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895\r\nQUAD,5000005,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301\r\nQUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659\r\nQUAD,5000006,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531\r\nQUAD,5000007,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157\r\n<\/pre>\n<p>ex368.txt<\/p>\n<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\nawk 'a&#x5B;$0]++' example.csv\r\n<\/pre>\n<p>output:<\/p>\n<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\nQUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659\r\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>example.csv Type,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz QUAD,5000001,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,5000004,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,5000005,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000006,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,5000007,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 ex368.txt awk &#8216;a&#x5B;$0]++&#8217; example.csv output: QUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"nf_dc_page":"","_et_pb_use_builder":"","_et_pb_old_content":"","_et_gb_content_width":"","_lmt_disableupdate":"yes","_lmt_disable":"","jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[18],"tags":[],"class_list":["post-1904","post","type-post","status-publish","format-standard","hentry","category-awk"],"modified_by":"gantovnik","jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack_shortlink":"https:\/\/wp.me\/p8bH0k-uI","jetpack_likes_enabled":true,"jetpack-related-posts":[{"id":1899,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/07\/366-print-lines-in-a-range-using-awk\/","url_meta":{"origin":1904,"position":0},"title":"#366 Print lines in a range using awk","author":"gantovnik","date":"2023-07-20","format":false,"excerpt":"example.csv [code language=\"python\"] Type,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz QUAD,5000001,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,5000004,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,5000005,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,5000006,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,5000007,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code] ex366.txt [code language=\"python\"] awk 'NR>2 && NR < 5' example.csv [\/code] output: [code language=\"python\"] QUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 [\/code]","rel":"","context":"In &quot;awk&quot;","block_context":{"text":"awk","link":"https:\/\/gantovnik.com\/bio-tips\/category\/awk\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":1901,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/07\/367-extracting-columns-of-the-data-file-using-awk\/","url_meta":{"origin":1904,"position":1},"title":"#367 Extracting columns of the data file using awk.","author":"gantovnik","date":"2023-07-20","format":false,"excerpt":"example.csv [code language=\"python\"] Type,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz QUAD,5000001,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,5000004,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,5000005,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,5000006,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,5000007,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code] ex366.txt [code language=\"python\"] awk '{ print $1, $2, $3, $4}' FS=, OFS=, example.csv [\/code] variant: [code language=\"python\"] awk 'BEGIN { FS=OFS=\",\" } NF { print $1, $2, $3, $4 }' example.csv [\/code] output: [code language=\"python\"] Type,LCID,EID,Nx QUAD,5000001,389635,5.21044 QUAD,5000002,389636,4.332321\u2026","rel":"","context":"In &quot;awk&quot;","block_context":{"text":"awk","link":"https:\/\/gantovnik.com\/bio-tips\/category\/awk\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":1909,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/07\/371-search-and-replace-using-gsub-in-awk\/","url_meta":{"origin":1904,"position":2},"title":"#371 Searching and replacing using gsub in awk","author":"gantovnik","date":"2023-07-22","format":false,"excerpt":"example.csv [code language=\"python\"] Type,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz QUAD,5000001,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,5000004,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,5000005,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,5000006,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,5000007,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code] ex370.txt [code language=\"python\"] awk '{gsub(\/^50\/,\"75\",$2);print}' FS=, OFS=, example.csv [\/code] output: [code language=\"python\"] Type,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz QUAD,7500001,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,7500002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,7500003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,7500004,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,7500005,389639,50.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,7500006,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,7500007,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code]","rel":"","context":"In &quot;awk&quot;","block_context":{"text":"awk","link":"https:\/\/gantovnik.com\/bio-tips\/category\/awk\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":1905,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/07\/370-find-max-and-min-in-column-using-awk\/","url_meta":{"origin":1904,"position":3},"title":"#370 Find min and max in column using awk","author":"gantovnik","date":"2023-07-21","format":false,"excerpt":"example.csv [code language=\"python\"] Type,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz QUAD,5000001,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,5000004,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,5000005,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,5000006,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,5000007,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code] ex370.txt [code language=\"python\"] awk 'NR==2 || $4 < min {min=$4} END{ print \"min=\" min}' FS=, OFS=, example.csv awk 'NR==2 || $4 > max {max=$4} END{ print \"max=\" max}' FS=, OFS=, example.csv [\/code] output: [code language=\"python\"] min=3.332321 max=50.332455 [\/code]\u2026","rel":"","context":"In &quot;awk&quot;","block_context":{"text":"awk","link":"https:\/\/gantovnik.com\/bio-tips\/category\/awk\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":1903,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/07\/368-performing-calculations-column-wise-using-awk\/","url_meta":{"origin":1904,"position":4},"title":"#368 Performing calculations column-wise using awk","author":"gantovnik","date":"2023-07-20","format":false,"excerpt":"example.csv [code language=\"python\"] Type,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz QUAD,5000001,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,5000002,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000003,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,5000004,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,5000005,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,5000006,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,5000007,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code] ex368.txt [code language=\"python\"] awk 'NR==1{''} NR>1{{ SUM=SUM+$4 }} END { print \"Sum=\" SUM }' FS=, OFS=, example.csv [\/code] output: [code language=\"python\"] Sum=32.2045 [\/code] ex368.txt [code language=\"python\"] awk 'NR==1{''} NR>1{{ SUM=SUM+$4 }} END {print \"Mean=\" SUM\/(NR-1)}' FS=, OFS=, example.csv\u2026","rel":"","context":"In &quot;awk&quot;","block_context":{"text":"awk","link":"https:\/\/gantovnik.com\/bio-tips\/category\/awk\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":1898,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/07\/365-remove-header-with-awk\/","url_meta":{"origin":1904,"position":5},"title":"#365 Remove header with awk","author":"gantovnik","date":"2023-07-20","format":false,"excerpt":"example.csv [code language=\"python\"] Type,LCID,EID,Nx,Ny,Nxy,Mx,My,Mxy,Qyz,Qxz QUAD,5000009,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,5000009,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000009,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,5000009,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,5000009,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,5000009,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,5000009,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code] ex365.txt [code language=\"python\"] awk 'NR>1' example.csv [\/code] output: [code language=\"python\"] QUAD,5000009,389635,5.21044,0.9993295,3.184029,0.228490059,0.762438914,0.349926963,0.007402616,0.645762323 QUAD,5000009,389636,4.332321,-45.55149,10.13951,0.128933201,0.289045003,0.442976734,0.394346902,0.965424659 QUAD,5000009,389637,5.332321,-65.55149,12.32391,0.867613103,0.345799842,0.902593358,0.56308979,0.77084519 QUAD,5000009,389638,4.332321,-35.55149,14.27251,0.429855813,0.882431001,0.15453015,0.710809946,0.918974895 QUAD,5000009,389639,5.332455,-75.55149,15.37321,0.738401857,0.289007109,0.59902178,0.824840285,0.571976301 QUAD,5000009,389640,4.332321,-65.55149,13.27221,0.102678011,0.581496802,0.785601755,0.415283869,0.817283531 QUAD,5000009,389641,3.332321,-45.55149,12.67351,0.363658748,0.803570041,0.70688413,0.459446702,0.330453157 [\/code]","rel":"","context":"In &quot;awk&quot;","block_context":{"text":"awk","link":"https:\/\/gantovnik.com\/bio-tips\/category\/awk\/"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]}],"_links":{"self":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/1904","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/comments?post=1904"}],"version-history":[{"count":0,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/1904\/revisions"}],"wp:attachment":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/media?parent=1904"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/categories?post=1904"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/tags?post=1904"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}