Рет қаралды 78
In this tutorial, learn how to leverage the ROW_NUMBER() function in Snowflake for advanced data ranking and deduplication. Discover practical examples and step-by-step guidance to enhance your data analysis skills with Snowflake's powerful SQL capabilities.
------------------------------
-- ROW_NUMBER()
-- The ROW_NUMBER() function in Snowflake is used to assign a unique sequential integer to rows within a partition of a result set. This is often useful for ranking rows within a group or for generating a unique identifier for rows
--ROW_NUMBER() OVER ([ query_partition_clause ] order_by_clause)
-- Use Cases for ROW_NUMBER()
-- Eliminating Duplicates
-- Top N per Group
CREATE or replace TABLE employees (
employee_id INT,
first_name VARCHAR(50),
salary DECIMAL(10, 2),
department_id INT
);
INSERT INTO employees (employee_id, first_name, salary, department_id) VALUES
(1, 'John', 50000, 10),
(2, 'Jane', 55000, 10),
(3, 'Jake', 60000, 20),
(4, 'Jill', 60000, 20),
(5, 'Jim', 70000, 30),
(6, 'Jack', 75000, 30),
(7, 'Joe', 80000, 10),
(8, 'Jen', 85000, 20);
select employee_id, first_name, salary,
ROW_NUMBER() OVER (order by salary ) row_num
from
EMPLOYEES;
select employee_id, first_name, salary,
ROW_NUMBER() OVER (order by salary ) row_num,
dense_rank() OVER (order by salary ) den_ranking,
rank() OVER (order by salary ) ranking
from
EMPLOYEES;
select employee_id, first_name, salary, department_id ,
ROW_NUMBER() OVER (partition by department_id order by Salary desc ) emp_rownum_dept
from
EMPLOYEES;
select * from (
select employee_id, first_name, salary, department_id ,
ROW_NUMBER() OVER (partition by department_id order by Salary desc ) emp_rownum_dept
from
EMPLOYEES) where emp_rownum_dept =1 ;
-- The ordering of first is applied after all other operations are completed.
select employee_id, first_name, salary,
ROW_NUMBER() OVER (order by salary desc ) row_num
from
EMPLOYEES
order by first_name;
-- Removing Duplicate row from table
INSERT INTO employees (employee_id, first_name, salary, department_id) VALUES
(1, 'John', 50000, 10),
(2, 'Jane', 55000, 10),
(3, 'Jake', 60000, 20),
(4, 'Jill', 60000, 20),
(5, 'Jim', 70000, 30),
(6, 'Jack', 75000, 30);
select * from employees order by employee_id;
select employee_id,
first_name,
salary,
department_id from (
SELECT
employee_id,
first_name,
salary,
department_id,
ROW_NUMBER() OVER (PARTITION BY employee_id ORDER BY employee_id desc) AS row_num
FROM
employees
)
WHERE
row_num = 1
order by employee_id;
-- find N salary
select * from (
select employee_id, first_name, salary,
ROW_NUMBER() OVER (order by salary desc) row_num
from
EMPLOYEES)
where row_num=2;