Delta Lake SQL Analytics Queries
-- 1. Cumulative revenue per day
SELECT
order_date,
SUM(revenue) OVER (ORDER BY order_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT
ROW) AS cumulative_revenue
FROM sales_table;
-- 2. First and last login per user
SELECT
user_id,
MIN(login_time) AS first_login,
MAX(login_time) AS last_login
FROM event_logs
GROUP BY user_id;
-- 3. Top 3 customers by spend in each region
SELECT *
FROM (
SELECT *,
RANK() OVER (PARTITION BY region ORDER BY spend DESC) AS rank
FROM customer_spend
) ranked
WHERE rank <= 3;
-- 4. Detect and delete duplicate records
-- Detect duplicates
SELECT *, COUNT(*) OVER (PARTITION BY txn_id, txn_time, amount) AS dup_count
FROM transactions
HAVING dup_count > 1;
-- Delete extras (keep one)
DELETE FROM transactions
WHERE row_id NOT IN (
SELECT MIN(row_id)
FROM transactions
GROUP BY txn_id, txn_time, amount
);
-- 5. Users who made purchases in 3 consecutive months
WITH monthly AS (
SELECT user_id,
DATE_TRUNC('month', purchase_date) AS month
FROM purchases
GROUP BY user_id, DATE_TRUNC('month', purchase_date)
),
consecutive AS (
SELECT user_id, month,
LAG(month, 1) OVER (PARTITION BY user_id ORDER BY month) AS prev1,
LAG(month, 2) OVER (PARTITION BY user_id ORDER BY month) AS prev2
FROM monthly
)
SELECT DISTINCT user_id
FROM consecutive
WHERE DATEDIFF(month, prev1, month) = 1
AND DATEDIFF(month, prev2, prev1) = 1;
-- 6. Skewed joins and broadcast fix
-- Without fix
SELECT * FROM large_fact f
JOIN small_dim d ON f.dim_id = d.id;
-- With broadcast hint
SELECT /*+ BROADCAST(d) */ * FROM large_fact f
JOIN small_dim d ON f.dim_id = d.id;
-- 7. 7-day moving average of product sales
SELECT
product_id,
sale_date,
AVG(sales) OVER (
PARTITION BY product_id
ORDER BY sale_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS moving_avg_7d
FROM product_sales;
-- 8. Pivot daily sales into month-wise columns
SELECT product_id,
SUM(CASE WHEN MONTH(sale_date) = 1 THEN sales ELSE 0 END) AS jan_sales,
SUM(CASE WHEN MONTH(sale_date) = 2 THEN sales ELSE 0 END) AS feb_sales,
SUM(CASE WHEN MONTH(sale_date) = 3 THEN sales ELSE 0 END) AS mar_sales,
SUM(CASE WHEN MONTH(sale_date) = 4 THEN sales ELSE 0 END) AS apr_sales,
SUM(CASE WHEN MONTH(sale_date) = 5 THEN sales ELSE 0 END) AS may_sales,
SUM(CASE WHEN MONTH(sale_date) = 6 THEN sales ELSE 0 END) AS jun_sales,
SUM(CASE WHEN MONTH(sale_date) = 7 THEN sales ELSE 0 END) AS jul_sales,
SUM(CASE WHEN MONTH(sale_date) = 8 THEN sales ELSE 0 END) AS aug_sales,
SUM(CASE WHEN MONTH(sale_date) = 9 THEN sales ELSE 0 END) AS sep_sales,
SUM(CASE WHEN MONTH(sale_date) = 10 THEN sales ELSE 0 END) AS oct_sales,
SUM(CASE WHEN MONTH(sale_date) = 11 THEN sales ELSE 0 END) AS nov_sales,
SUM(CASE WHEN MONTH(sale_date) = 12 THEN sales ELSE 0 END) AS dec_sales
FROM sales
GROUP BY product_id;
-- 9. Customers who bought products every month in a year
WITH monthly_purchase AS (
SELECT customer_id, MONTH(purchase_date) AS month
FROM purchases
WHERE YEAR(purchase_date) = 2024
GROUP BY customer_id, MONTH(purchase_date)
)
SELECT customer_id
FROM monthly_purchase
GROUP BY customer_id
HAVING COUNT(DISTINCT month) = 12;
-- 10. Rank products by sales per year
SELECT *,
RANK() OVER (PARTITION BY YEAR(sale_date) ORDER BY total_sales DESC) AS year_rank
FROM (
SELECT product_id,
YEAR(sale_date) AS year,
SUM(sales) AS total_sales
FROM sales
GROUP BY product_id, YEAR(sale_date)
);
-- 11. Employees earning more than their department average
SELECT *
FROM employees e
JOIN (
SELECT department_id, AVG(salary) AS avg_salary
FROM employees
GROUP BY department_id
) dept_avg
ON e.department_id = dept_avg.department_id
WHERE e.salary > dept_avg.avg_salary;
-- 12. Median transaction amount
WITH ranked AS (
SELECT amount,
ROW_NUMBER() OVER (ORDER BY amount) AS rn,
COUNT(*) OVER () AS cnt
FROM transactions
)
SELECT AVG(amount) AS median
FROM ranked
WHERE rn IN (FLOOR((cnt + 1)/2.0), CEIL((cnt + 1)/2.0));
-- 13. Users who placed first order in last 30 days
WITH first_order AS (
SELECT user_id, MIN(order_date) AS first_order_date
FROM orders
GROUP BY user_id
)
SELECT user_id
FROM first_order
WHERE first_order_date >= DATE_SUB(CURRENT_DATE(), 30);
-- 14. Compare price change between two dates
SELECT product_id,
MAX(CASE WHEN price_date = '2025-01-01' THEN price END) AS price_on_start,
MAX(CASE WHEN price_date = '2025-07-01' THEN price END) AS price_on_end,
MAX(CASE WHEN price_date = '2025-07-01' THEN price END)
- MAX(CASE WHEN price_date = '2025-01-01' THEN price END) AS price_diff
FROM product_prices
WHERE price_date IN ('2025-01-01', '2025-07-01')
GROUP BY product_id;
-- 15. Customers with first and last transaction on same day
WITH user_txns AS (
SELECT user_id,
MIN(txn_date) AS first_txn,
MAX(txn_date) AS last_txn
FROM transactions
GROUP BY user_id
)
SELECT user_id
FROM user_txns
WHERE first_txn = last_txn;
-- 16. Percentage of returning users per month
WITH firsts AS (
SELECT user_id, MIN(DATE_TRUNC('month', order_date)) AS first_month
FROM orders
GROUP BY user_id
),
monthly_users AS (
SELECT DISTINCT user_id, DATE_TRUNC('month', order_date) AS month
FROM orders
)
SELECT month,
COUNT(CASE WHEN first_month <> month THEN user_id END) * 100.0 / COUNT(user_id)
AS returning_user_pct
FROM monthly_users
LEFT JOIN firsts USING (user_id)
GROUP BY month;
-- 17. Products never sold
SELECT p.product_id
FROM products p
LEFT JOIN sales s ON p.product_id = s.product_id
WHERE s.product_id IS NULL;
-- 18. Detect schema drift in Delta
DESCRIBE HISTORY delta.`/path/to/delta_table`;
-- 19. Departments where >=2 employees have identical salaries
SELECT department_id
FROM employees
GROUP BY department_id, salary
HAVING COUNT(*) >= 2;
-- 20. Group users by login streaks of 3+ consecutive days
WITH ranked AS (
SELECT user_id,
login_date,
DATEDIFF(login_date, ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY
login_date)) AS grp
FROM logins
),
grouped AS (
SELECT user_id, grp, COUNT(*) AS streak_len
FROM ranked
GROUP BY user_id, grp
)
SELECT user_id, streak_len
FROM grouped
WHERE streak_len >= 3;