Icon

kn_​forum_​38543_​rank_​deduplicate

remove duplicates with condition and add the last date

remove duplicates with condition and add the last date

SELECT t2.* /* mark the Top row per ID as the chosen one */, CASE WHEN t2."rank_id" =1 THEN 'chosen' ELSE 'duplicate' END AS "duplicate_type_classifier"FROM /* start t2 */( SELECT * , ROW_NUMBER() OVER (/* set which unique ID should be kept - all others will be deleted *//* this makes sure even if there is a *complete* duplicate only one row will remain (and be intact) */ PARTITION BY t1."sprintid", t1."issueid"/* set rules which ID should remain on Top in this case the one with the latest enry and then the last contact */ ORDER BY t1."changecreation" DESC NULLS LAST ) AS "rank_id"FROM #table# AS t1WHERE t1."issueorigin" = 'completed' AND t1."changeto" = 'In Progress' AND t1."changefield" = 'status') t2/* end t2 */ORDER BY t2."Counter" , t2."rank_id" remove duplicates with condition and add the last date DROP TABLE IF EXISTS "PUBLIC"."test_005";CREATE TABLE "PUBLIC"."test_005"AS SELECT * FROM (SELECT t6."changefield" AS "changefield",t6."changeto" AS "changeto",t6."sprintid" AS "sprintid",t6."issueid" AS "issueid",t6."issueorigin" AS"issueorigin",t6."changecreation" AS "changecreation",t6."Counter" AS "Counter",t5."max_changecreation_cond" AS "max_changecreation_cond" FROM (SELECT * FROM "PUBLIC"."test_001") t6 LEFT JOIN (SELECT "changefield", "changeto", "sprintid", "issueid", "issueorigin", "changecreation" AS "max_changecreation_cond", "Counter","rank_id", "duplicate_type_classifier" FROM (SELECT * FROM ( SELECT t2.* /* mark the Top row per ID as the chosen one */, CASE WHEN t2."rank_id" =1 THEN 'chosen' ELSE 'duplicate' END AS "duplicate_type_classifier"FROM /* start t2 */( SELECT * , ROW_NUMBER() OVER (/* set which unique ID should be kept - all others will be deleted *//* this makes sure even if there is a *complete* duplicate only one row will remain (and be intact) */ PARTITION BY t1."sprintid", t1."issueid"/* set rules which ID should remain on Top in this case the one with the latest enry and then the last contact */ ORDER BY t1."changecreation" DESC NULLS LAST ) AS "rank_id"FROM (SELECT * FROM "PUBLIC"."test_001") AS t1WHERE t1."issueorigin" = 'completed' AND t1."changeto" = 'In Progress' AND t1."changefield" = 'status') t2/* end t2 */ORDER BY t2."Counter" , t2."rank_id" ) AS t3WHERE "duplicate_type_classifier" = 'chosen') AS t4) t5 ON t6."sprintid"=t5."sprintid" AND t6."issueid"=t5."issueid") AS t7 ORDER BY "Counter"; Example data.xlsxin memorytest_001test_001test_001changecreationchangecreation$issueorigin$ = "completed" AND $changeto$ = "In Progress" AND $changefield$ = "status" => TRUETRUE => FALSEremove dupliactesbysprintidissueidwith MAX(changecreation) remainingjoin bysprintidissueidmax_changecreation_condCounter as ID$duplicate-type-classifier$ = "chosen" => TRUETRUE => FALSEthe Magic Queryto find the latest entryjoin bysprintidissueidmax_changecreation_condonly chosen ones ...SORT BY CounterPUBLIC.test_005=> do it all inone big stepPUBLIC.test_005 Excel Reader H2 Connector DB Table Creator DB Table Remover DB Writer String to Date&Time Column Rename Rule-basedRow Filter DuplicateRow Filter Joiner Column Rename Counter Generation Sorter Sorter Rule-basedRow Filter DB Query DB Reader DB Joiner DB Column Rename DB Row Filter DB Sorter DB SQL Executor DB Table Selector DB Reader SELECT t2.* /* mark the Top row per ID as the chosen one */, CASE WHEN t2."rank_id" =1 THEN 'chosen' ELSE 'duplicate' END AS "duplicate_type_classifier"FROM /* start t2 */( SELECT * , ROW_NUMBER() OVER (/* set which unique ID should be kept - all others will be deleted *//* this makes sure even if there is a *complete* duplicate only one row will remain (and be intact) */ PARTITION BY t1."sprintid", t1."issueid"/* set rules which ID should remain on Top in this case the one with the latest enry and then the last contact */ ORDER BY t1."changecreation" DESC NULLS LAST ) AS "rank_id"FROM #table# AS t1WHERE t1."issueorigin" = 'completed' AND t1."changeto" = 'In Progress' AND t1."changefield" = 'status') t2/* end t2 */ORDER BY t2."Counter" , t2."rank_id" remove duplicates with condition and add the last date DROP TABLE IF EXISTS "PUBLIC"."test_005";CREATE TABLE "PUBLIC"."test_005"AS SELECT * FROM (SELECT t6."changefield" AS "changefield",t6."changeto" AS "changeto",t6."sprintid" AS "sprintid",t6."issueid" AS "issueid",t6."issueorigin" AS"issueorigin",t6."changecreation" AS "changecreation",t6."Counter" AS "Counter",t5."max_changecreation_cond" AS "max_changecreation_cond" FROM (SELECT * FROM "PUBLIC"."test_001") t6 LEFT JOIN (SELECT "changefield", "changeto", "sprintid", "issueid", "issueorigin", "changecreation" AS "max_changecreation_cond", "Counter","rank_id", "duplicate_type_classifier" FROM (SELECT * FROM ( SELECT t2.* /* mark the Top row per ID as the chosen one */, CASE WHEN t2."rank_id" =1 THEN 'chosen' ELSE 'duplicate' END AS "duplicate_type_classifier"FROM /* start t2 */( SELECT * , ROW_NUMBER() OVER (/* set which unique ID should be kept - all others will be deleted *//* this makes sure even if there is a *complete* duplicate only one row will remain (and be intact) */ PARTITION BY t1."sprintid", t1."issueid"/* set rules which ID should remain on Top in this case the one with the latest enry and then the last contact */ ORDER BY t1."changecreation" DESC NULLS LAST ) AS "rank_id"FROM (SELECT * FROM "PUBLIC"."test_001") AS t1WHERE t1."issueorigin" = 'completed' AND t1."changeto" = 'In Progress' AND t1."changefield" = 'status') t2/* end t2 */ORDER BY t2."Counter" , t2."rank_id" ) AS t3WHERE "duplicate_type_classifier" = 'chosen') AS t4) t5 ON t6."sprintid"=t5."sprintid" AND t6."issueid"=t5."issueid") AS t7 ORDER BY "Counter"; Example data.xlsxin memorytest_001test_001test_001changecreationchangecreation$issueorigin$ = "completed" AND $changeto$ = "In Progress" AND $changefield$ = "status" => TRUETRUE => FALSEremove dupliactesbysprintidissueidwith MAX(changecreation) remainingjoin bysprintidissueidmax_changecreation_condCounter as ID$duplicate-type-classifier$ = "chosen" => TRUETRUE => FALSEthe Magic Queryto find the latest entryjoin bysprintidissueidmax_changecreation_condonly chosen ones ...SORT BY CounterPUBLIC.test_005=> do it all inone big stepPUBLIC.test_005Excel Reader H2 Connector DB Table Creator DB Table Remover DB Writer String to Date&Time Column Rename Rule-basedRow Filter DuplicateRow Filter Joiner Column Rename Counter Generation Sorter Sorter Rule-basedRow Filter DB Query DB Reader DB Joiner DB Column Rename DB Row Filter DB Sorter DB SQL Executor DB Table Selector DB Reader

Nodes

Extensions

Links