-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup-test-db.sql
More file actions
322 lines (289 loc) · 8.8 KB
/
Copy pathsetup-test-db.sql
File metadata and controls
322 lines (289 loc) · 8.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
-- Setup script for testing InstantIndex with realistic e-commerce data
-- Creates tables, generates data, and simulates query traffic
-- 1. Enable required extensions
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
CREATE EXTENSION IF NOT EXISTS hypopg;
-- Reset stats
SELECT pg_stat_statements_reset();
-- 2. Create schema
DROP TABLE IF EXISTS order_items CASCADE;
DROP TABLE IF EXISTS orders CASCADE;
DROP TABLE IF EXISTS products CASCADE;
DROP TABLE IF EXISTS categories CASCADE;
DROP TABLE IF EXISTS users CASCADE;
DROP TABLE IF EXISTS sessions CASCADE;
CREATE TABLE users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) NOT NULL,
username VARCHAR(100) NOT NULL,
status VARCHAR(20) DEFAULT 'active',
country VARCHAR(2),
created_at TIMESTAMP DEFAULT NOW(),
last_login TIMESTAMP
);
CREATE TABLE categories (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
slug VARCHAR(100) NOT NULL,
parent_id INTEGER REFERENCES categories(id)
);
CREATE TABLE products (
id SERIAL PRIMARY KEY,
category_id INTEGER REFERENCES categories(id),
name VARCHAR(255) NOT NULL,
slug VARCHAR(255) NOT NULL,
price DECIMAL(10,2) NOT NULL,
stock INTEGER DEFAULT 0,
status VARCHAR(20) DEFAULT 'active',
created_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id),
status VARCHAR(20) DEFAULT 'pending',
total DECIMAL(10,2),
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE order_items (
id SERIAL PRIMARY KEY,
order_id INTEGER REFERENCES orders(id),
product_id INTEGER REFERENCES products(id),
quantity INTEGER,
price DECIMAL(10,2)
);
CREATE TABLE sessions (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id),
started_at TIMESTAMP DEFAULT NOW(),
ended_at TIMESTAMP,
page_views INTEGER DEFAULT 0
);
-- 3. Generate realistic data
\echo 'Generating users...'
INSERT INTO users (email, username, status, country, created_at, last_login)
SELECT
'user' || i || '@example.com',
'user' || i,
CASE
WHEN random() < 0.7 THEN 'active'
WHEN random() < 0.9 THEN 'inactive'
ELSE 'suspended'
END,
(ARRAY['US', 'UK', 'CA', 'DE', 'FR', 'JP', 'AU'])[floor(random() * 7 + 1)],
NOW() - (random() * INTERVAL '2 years'),
CASE WHEN random() < 0.6 THEN NOW() - (random() * INTERVAL '30 days') ELSE NULL END
FROM generate_series(1, 50000) i;
\echo 'Generating categories...'
INSERT INTO categories (name, slug, parent_id)
VALUES
('Electronics', 'electronics', NULL),
('Clothing', 'clothing', NULL),
('Home & Garden', 'home-garden', NULL),
('Sports', 'sports', NULL),
('Books', 'books', NULL),
('Toys', 'toys', NULL),
('Laptops', 'laptops', 1),
('Phones', 'phones', 1),
('Cameras', 'cameras', 1),
('Mens Clothing', 'mens-clothing', 2),
('Womens Clothing', 'womens-clothing', 2);
\echo 'Generating products...'
INSERT INTO products (category_id, name, slug, price, stock, status, created_at)
SELECT
(floor(random() * 11) + 1)::INTEGER,
'Product ' || i,
'product-' || i,
(random() * 1000 + 10)::DECIMAL(10,2),
(random() * 1000)::INTEGER,
CASE WHEN random() < 0.9 THEN 'active' ELSE 'inactive' END,
NOW() - (random() * INTERVAL '1 year')
FROM generate_series(1, 10000) i;
\echo 'Generating orders...'
INSERT INTO orders (user_id, status, total, created_at, updated_at)
SELECT
(random() * 49999)::INTEGER + 1, -- 1 to 50000
(ARRAY['pending', 'processing', 'shipped', 'delivered', 'cancelled'])[floor(random() * 5 + 1)],
(random() * 1000 + 20)::DECIMAL(10,2),
NOW() - (random() * INTERVAL '1 year'),
NOW() - (random() * INTERVAL '1 year')
FROM generate_series(1, 200000) i;
\echo 'Generating order items...'
INSERT INTO order_items (order_id, product_id, quantity, price)
SELECT
(random() * 199999)::INTEGER + 1, -- 1 to 200000
(random() * 9999)::INTEGER + 1, -- 1 to 10000
(random() * 5)::INTEGER + 1,
(random() * 500 + 10)::DECIMAL(10,2)
FROM generate_series(1, 500000) i;
\echo 'Generating sessions...'
INSERT INTO sessions (user_id, started_at, ended_at, page_views)
SELECT
(random() * 49999)::INTEGER + 1, -- 1 to 50000
NOW() - (random() * INTERVAL '90 days'),
NOW() - (random() * INTERVAL '90 days') + (random() * INTERVAL '2 hours'),
(random() * 50)::INTEGER + 1
FROM generate_series(1, 300000) i;
\echo 'Data generation complete!'
\echo ''
\echo 'Database statistics:'
SELECT 'users' as table_name, COUNT(*) as rows FROM users
UNION ALL
SELECT 'categories', COUNT(*) FROM categories
UNION ALL
SELECT 'products', COUNT(*) FROM products
UNION ALL
SELECT 'orders', COUNT(*) FROM orders
UNION ALL
SELECT 'order_items', COUNT(*) FROM order_items
UNION ALL
SELECT 'sessions', COUNT(*) FROM sessions
ORDER BY table_name;
-- 4. Run ANALYZE to update statistics
ANALYZE;
-- 5. Reset pg_stat_user_tables to exclude initial data load from write statistics
\echo ''
\echo 'Resetting write statistics (to exclude initial data load)...'
SELECT pg_stat_reset();
\echo ''
\echo 'Running realistic query workload to populate pg_stat_statements...'
\echo 'This simulates real application traffic patterns.'
\echo ''
-- Simulate common query patterns (these would benefit from indexes)
-- User login queries (very frequent)
DO $$
BEGIN
FOR i IN 1..1000 LOOP
PERFORM * FROM users WHERE email = 'user' || (random() * 50000)::int || '@example.com';
END LOOP;
END $$;
-- Active user lookups (frequent)
DO $$
BEGIN
FOR i IN 1..500 LOOP
PERFORM * FROM users WHERE status = 'active' AND country = 'US' LIMIT 100;
END LOOP;
END $$;
-- Recent orders for user (very frequent - dashboard)
DO $$
BEGIN
FOR i IN 1..2000 LOOP
PERFORM * FROM orders
WHERE user_id = (random() * 50000)::int + 1
ORDER BY created_at DESC
LIMIT 10;
END LOOP;
END $$;
-- Order details with items (frequent)
DO $$
BEGIN
FOR i IN 1..800 LOOP
PERFORM o.*, oi.*, p.name, p.price
FROM orders o
JOIN order_items oi ON oi.order_id = o.id
JOIN products p ON p.id = oi.product_id
WHERE o.id = (random() * 200000)::int + 1;
END LOOP;
END $$;
-- Product search by category (very frequent)
DO $$
BEGIN
FOR i IN 1..1500 LOOP
PERFORM * FROM products
WHERE category_id = (random() * 11)::int + 1
AND status = 'active'
ORDER BY created_at DESC
LIMIT 20;
END LOOP;
END $$;
-- Product search by slug (frequent)
DO $$
BEGIN
FOR i IN 1..600 LOOP
PERFORM * FROM products WHERE slug = 'product-' || (random() * 10000)::int;
END LOOP;
END $$;
-- Recent orders by status (admin dashboard - frequent)
DO $$
BEGIN
FOR i IN 1..400 LOOP
PERFORM * FROM orders
WHERE status = 'pending'
ORDER BY created_at DESC
LIMIT 50;
END LOOP;
END $$;
-- User activity in last 30 days (analytics)
DO $$
BEGIN
FOR i IN 1..300 LOOP
PERFORM COUNT(*) FROM sessions
WHERE user_id = (random() * 50000)::int + 1
AND started_at > NOW() - INTERVAL '30 days';
END LOOP;
END $$;
-- Top selling products (complex aggregation)
DO $$
BEGIN
FOR i IN 1..200 LOOP
PERFORM p.id, p.name, SUM(oi.quantity) as total_sold
FROM products p
JOIN order_items oi ON oi.product_id = p.id
JOIN orders o ON o.id = oi.order_id
WHERE o.created_at > NOW() - INTERVAL '30 days'
GROUP BY p.id, p.name
ORDER BY total_sold DESC
LIMIT 10;
END LOOP;
END $$;
-- User orders summary (join heavy)
DO $$
BEGIN
FOR i IN 1..500 LOOP
PERFORM u.id, u.email, COUNT(o.id) as order_count, SUM(o.total) as total_spent
FROM users u
LEFT JOIN orders o ON o.user_id = u.id
WHERE u.id = (random() * 50000)::int + 1
GROUP BY u.id, u.email;
END LOOP;
END $$;
-- Recent sessions by user (frequent)
DO $$
BEGIN
FOR i IN 1..400 LOOP
PERFORM * FROM sessions
WHERE user_id = (random() * 50000)::int + 1
ORDER BY started_at DESC
LIMIT 20;
END LOOP;
END $$;
\echo ''
\echo 'Workload simulation complete!'
\echo ''
\echo 'Query statistics:'
SELECT
COUNT(*) as unique_queries,
SUM(calls) as total_executions,
ROUND(AVG(mean_exec_time)::numeric, 2) as avg_time_ms
FROM pg_stat_statements
WHERE query NOT LIKE '%pg_stat_statements%';
\echo ''
\echo '================================================'
\echo 'Database ready for InstantIndex testing!'
\echo '================================================'
\echo ''
\echo 'To test InstantIndex, run:'
\echo ' node dist/cli.js analyze postgresql://localhost/instantindex_test'
\echo ''
\echo 'Expected results:'
\echo ' - Should find ~8-12 missing indexes'
\echo ' - Should show 50-80% performance improvement'
\echo ' - Key indexes likely on:'
\echo ' * users(email) - for login'
\echo ' * users(status, country) - for filtered searches'
\echo ' * orders(user_id, created_at DESC) - for user dashboard'
\echo ' * products(category_id, status) - for product listings'
\echo ' * products(slug) - for product pages'
\echo ' * order_items(order_id) - for order details'
\echo ' * sessions(user_id, started_at DESC) - for activity'
\echo ''