10 SQL Interview Questions Asked By Amazon For Data Analysts and Data Engineers

 

Sample Data Scripts -

-- 1. sales table

CREATE TABLE sales (

    order_id INT,

    user_id INT,

    product_id INT,

    sale_date DATE,

    quantity INT,

    revenue DECIMAL(10,2)

);


INSERT INTO sales (order_id, user_id, product_id, sale_date, quantity, revenue) VALUES

(101, 1, 1, '2023-08-01', 2, 2000.00),

(102, 2, 2, '2023-08-01', 3, 1500.00),

(103, 1, 1, '2023-08-02', 1, 1000.00),

(104, 3, 3, '2023-08-03', 5, 4000.00),

(105, 4, 2, '2023-08-05', 2, 1000.00),

(106, 2, 3, '2023-08-07', 1, 800.00);



-- 2. products table

CREATE TABLE products (

    product_id INT PRIMARY KEY,

    product_name VARCHAR(100),

    price DECIMAL(10,2)

);


INSERT INTO products (product_id, product_name, price) VALUES

(1, 'Laptop', 1000.00),

(2, 'Tablet', 500.00),

(3, 'Phone', 800.00);


-- 3. user_activity table

CREATE TABLE user_activity (

    user_id INT,

    activity_date DATE

);


INSERT INTO user_activity (user_id, activity_date) VALUES

(1, '2023-08-01'),

(1, '2023-08-02'),

(1, '2023-08-03'),

(2, '2023-08-01'),

(2, '2023-08-03'),

(3, '2023-08-02');



-- 4. employees table with department

CREATE TABLE employees (

    emp_id INT PRIMARY KEY,

    name VARCHAR(100),

    salary INT,

    department_id INT,

    manager_id INT

);


INSERT INTO employees (emp_id, name, salary, department_id, manager_id) VALUES

(1, 'Alice', 90000, 101, NULL),

(2, 'Bob', 85000, 101, 1),

(3, 'Charlie', 87000, 102, 1),

(4, 'Diana', 86000, 101, 2),

(5, 'Eve', 83000, 102, 3);


-- 5. users table

CREATE TABLE users (

    user_id INT PRIMARY KEY

);


INSERT INTO users (user_id) VALUES

(1), (2), (3), (4);


-- 6. orders table

CREATE TABLE orders (

    order_id INT,

    customer_id INT,

    order_date DATE,

    revenue DECIMAL(10,2)

);


INSERT INTO orders (order_id, customer_id, order_date, revenue) VALUES

(201, 1, '2023-07-01', 500.00),

(202, 1, '2023-08-01', 700.00),

(203, 2, '2023-07-15', 600.00),

(204, 2, '2023-08-01', 800.00),

(205, 3, '2023-08-03', 300.00),

(206, 1, '2023-09-01', 900.00);


-- 7. sales for median query

INSERT INTO sales (order_id, user_id, product_id, sale_date, quantity, revenue) VALUES

(107, 4, 2, '2023-08-08', 2, 1000.00),

(108, 4, 3, '2023-08-09', 1, 800.00);


-- 8. Additional order times table

CREATE TABLE order_times (

    order_id INT,

    user_id INT,

    order_time DATETIME

);



INSERT INTO order_times (order_id, user_id, order_time) VALUES

(301, 1, '2023-08-01 10:00:00'),

(302, 1, '2023-08-01 10:00:30'),

(303, 2, '2023-08-01 11:00:00'),

(304, 2, '2023-08-01 11:02:00');


-- SQL Queries 

-- 1. Running Total by Product Within a Month

--Table: sales(order_id, product_id, sale_date, quantity)

--Task: Calculate a running total of quantity sold per product in each month.


SELECT 

product_id,

MONTH(sale_date) as MTH,

sale_date,

SUM(quantity) OVER (partition by product_id, MONTH(sale_date)) as running_total

FROM sales

ORDER BY product_id, sale_date;



-- 2. Consecutive Login Streaks

--Table: user_activity(user_id, activity_date)

--Task: Identify the maximum number of consecutive login days per user.


With CTE_RN as 

(SELECT 

user_id,

activity_date,

ROW_NUMBER() OVER (Partition By User_id ORDER BY activity_date) as rn 

FROM user_activity)

, CTE_GRP AS 

(Select 

user_id,

activity_date,

DATEADD(day,-1*rn, CAST(activity_date as DATE)) as grp

FROM CTE_RN)


Select user_id, COUNT(*) as streak_length,

MIN(activity_date) as START_DATE,

MAX(activity_date) as END_DATE

FROM CTE_GRP

GROUP BY user_id, grp



-- 3. Nth Highest Salary Per Department

--Table: employees(employee_id, name, salary, department_id)

--Task: Return the 3rd highest salary per department.


With CTE_RNK as 

(Select *,

DENSE_RANK() OVER (partition by department_id ORDER BY salary DESC) as rnk

FROM employees)


Select * 

FROM CTE_RNK

where rnk = 3


-- 4. Users Who Ordered All Products

--Tables: users(user_id), sales(user_id, product_id), products(product_id)

--Task: Return users who have purchased every product.


SELECT user_id

FROM sales

GROUP BY user_id

HAVING COUNT(DISTINCT product_id) = (SELECT COUNT(*) FROM products);



-- 5. Detect Duplicate Orders Within 1 Minute

--Table: orders(order_id, user_id, order_time)

--Task: Identify users who placed more than one order within 1 minute.


Select user_id, order_id, order_time FROM 

(SELECT *,

LAG(order_time) OVER (partition by user_id ORDER By order_time) as prev_time

FROM order_times) prev 

WHERE DATEDIFF(second, prev_time, order_time) <= 60



-- 6. Rank Products by Revenue in Each Quarter

--Tables: sales(product_id, sale_date, quantity), products(product_id, price)

--Task: Rank products based on total revenue in each quarter.


With CTE_Revenue as

(Select product_id,

YEAR(sale_date) as YEAR,

DATEPART(quarter, sale_date) as QTR,

SUM(revenue) as Total_Revenue

FROM sales 

GROUP BY product_id, YEAR(sale_date) ,

DATEPART(quarter, sale_date))


Select * ,

RANK() OVER (Partition by year, QTR Order By Total_Revenue DESC) as rank 

FROM CTE_Revenue



-- 7. Recursive Hierarchy (Org Chart)

--Table: employees(emp_id, name, manager_id)

--Task: Show full reporting chain for each employee.


WITH org_chart AS (

    -- Anchor member: top-level managers

  Select 

  emp_id,

  name,

  manager_id ,

  CAST(name as VARCHAR(MAX)) as path 

  FROM employees

  WHERE manager_id is NULL

   

    UNION ALL

    

    -- Recursive member: employees reporting to those above


Select 

   emp.emp_id,

   emp.name,

   emp.manager_id ,

   CAST(oc.path + '->' + emp.name as VARCHAR(MAX)) as path 

  FROM employees emp INNER JOIN org_chart oc 

  ON emp.manager_id = oc.emp_id


)


Select * 

FROM org_chart



-- 8. Customers with Revenue Growth Month-over-Month

--Table: orders(customer_id, order_date, revenue)

--Task: Identify customers whose revenue increased month over month.


WITH monthly_revenue AS (

  SELECT customer_id,

         YEAR(order_date) as YR, MONTH(order_date) AS MTH,

         SUM(revenue) AS total

  FROM orders

  GROUP BY customer_id, YEAR(order_date) , MONTH(order_date)

),

revenue_diff AS (

  SELECT *,

         LAG(total) OVER (PARTITION BY customer_id ORDER BY YR, MTH) AS prev_total

  FROM monthly_revenue

)

SELECT customer_id, YR, MTH, total, prev_total

FROM revenue_diff

WHERE total > prev_total;


-- 9. Median Revenue Per Product

--Tables: sales(product_id, revenue)

--Task: Calculate median revenue per product.


SELECT DISTINCT

       product_id,

       PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY revenue) 

           OVER (PARTITION BY product_id) AS median_revenue

FROM sales;



-- 10. Detect Order Gaps (Missing Days)

--Table: sales(sale_date)

--Task: Find dates in a month when no sales were made.


--WITH all_dates AS (

--  SELECT generate_series('2023-08-01'::date, '2023-08-31'::date, interval '1 day') AS sale_date

--),

-- Rewrite Using Generate Series (CTE - all_dates)


With CTE_All_Dates AS 

(Select CAST('2023-08-01' AS DATE) AS sale_date 

UNION ALL 

Select DATEADD(day, 1, sale_date) 

FROM CTE_All_Dates 

WHERE sale_date < '2023-08-31')

,

CTE_sales as

(Select DISTINCT sale_date 

FROM sales)


Select AD.sale_date 

FROM CTE_All_Dates AD LEFT JOIN 

CTE_sales S 

ON AD.sale_date = S.sale_date 

WHERE S.sale_date IS NULL 



Post a Comment

Previous Post Next Post

Contact Form