diff --git a/schemachange/sc_alter_table.c b/schemachange/sc_alter_table.c index 16f90c70e5..4cbd9b2179 100644 --- a/schemachange/sc_alter_table.c +++ b/schemachange/sc_alter_table.c @@ -589,7 +589,7 @@ int do_alter_table(struct ireq *iq, struct schema_change_type *s, /* set sc_genids, 0 them if we are starting a new schema change, or * restore them to their previous values if we are resuming */ - if (init_sc_genids(newdb, s)) { + if (init_sc_genids(newdb, &db->sc_genids, s)) { sc_errf(s, "failed initilizing sc_genids\n"); delete_temp_table(iq, newdb); change_schemas_recover(s->tablename); @@ -605,19 +605,19 @@ int do_alter_table(struct ireq *iq, struct schema_change_type *s, assert(db->sharding_func); for (i = 0; i < newdb->dtastripe; i++) { for (int shard = 0; shard < db->sharding_arg->n - 1; shard++) { - if (newdb->sc_genids[i] < db->sharding_arg->cs[shard].resume_genids[i]) { + if (db->sc_genids[i] < db->sharding_arg->cs[shard].resume_genids[i]) { logmsg(LOGMSG_INFO, "%s updating %s stripe %d sc_genid from %llx (%lld) to %llx (%lld) using %s\n", - __func__, s->tablename, i, newdb->sc_genids[i], newdb->sc_genids[i], + __func__, s->tablename, i, db->sc_genids[i], db->sc_genids[i], db->sharding_arg->cs[shard].resume_genids[i], db->sharding_arg->cs[shard].resume_genids[i], db->sharding_arg->ss[shard]->tablename); - newdb->sc_genids[i] = db->sharding_arg->cs[shard].resume_genids[i]; + db->sc_genids[i] = db->sharding_arg->cs[shard].resume_genids[i]; } } } } for (i = 0; i < newdb->dtastripe; i++) { logmsg(LOGMSG_INFO, "%s: %s stripe %d result resume genid %llx (%lld)\n", __func__, s->tablename, i, - newdb->sc_genids[i], newdb->sc_genids[i]); + db->sc_genids[i], db->sc_genids[i]); } Pthread_rwlock_wrlock(&db->sc_live_lk); @@ -675,7 +675,7 @@ int do_alter_table(struct ireq *iq, struct schema_change_type *s, changed == SC_CONSTRAINT_CHANGE) { if (!s->live) gbl_readonly_sc = 1; - rc = convert_all_records(db, newdb, newdb->sc_genids, s); + rc = convert_all_records(db, newdb, db->sc_genids, s); if (rc == 1) rc = 0; } else rc = 0; @@ -719,8 +719,7 @@ int do_alter_table(struct ireq *iq, struct schema_change_type *s, live_sc_off(db); for (i = 0; i < gbl_dtastripe; i++) { - sc_errf(s, " > [%s] stripe %2d was at 0x%016llx\n", s->tablename, - i, newdb->sc_genids[i]); + sc_errf(s, " > [%s] stripe %2d was at 0x%016llx\n", s->tablename, i, db->sc_genids[i]); } while (s->logical_livesc) { @@ -798,7 +797,7 @@ static int do_merge_table(struct ireq *iq, struct schema_change_type *s, /* set sc_genids, 0 them if we are starting a new schema change, or * restore them to their previous values if we are resuming */ - if (init_sc_genids(newdb, s)) { + if (init_sc_genids(newdb, &db->sc_genids, s)) { sc_client_error(s, "Failed to initialize sc_genids"); return -1; } @@ -848,7 +847,7 @@ static int do_merge_table(struct ireq *iq, struct schema_change_type *s, /* skip converting records for fastinit and planned schema change * that doesn't require rebuilding anything. */ - rc = convert_all_records(db, newdb, newdb->sc_genids, s); + rc = convert_all_records(db, newdb, db->sc_genids, s); if (rc == 1) rc = 0; remove_ongoing_alter(s); @@ -886,8 +885,7 @@ static int do_merge_table(struct ireq *iq, struct schema_change_type *s, live_sc_off(db); for (i = 0; i < gbl_dtastripe; i++) { - sc_errf(s, " > [%s] stripe %2d was at 0x%016llx\n", s->tablename, - i, newdb->sc_genids[i]); + sc_errf(s, " > [%s] stripe %2d was at 0x%016llx\n", s->tablename, i, db->sc_genids[i]); } while (s->logical_livesc) { @@ -1259,7 +1257,7 @@ int do_upgrade_table_int(struct schema_change_type *s) sc_printf(s, "Starting FULL table upgrade.\n"); } - if (init_sc_genids(db, s)) { + if (init_sc_genids(db, &db->sc_genids, s)) { sc_errf(s, "failed initilizing sc_genids\n"); return SC_LLMETA_ERR; } diff --git a/schemachange/sc_callbacks.c b/schemachange/sc_callbacks.c index 657c20496f..69fccc7dea 100644 --- a/schemachange/sc_callbacks.c +++ b/schemachange/sc_callbacks.c @@ -284,8 +284,7 @@ int live_sc_post_update_delayed_key_adds(struct ireq *iq, void *trans, unsigned #endif /* need to check where the cursor is, even tho that check was done once in * post_update */ - int is_gen_gt_scptr = is_genid_right_of_stripe_pointer( - iq->usedb->handle, newgenid, usedb->sc_to->sc_genids); + int is_gen_gt_scptr = is_genid_right_of_stripe_pointer(iq->usedb->handle, newgenid, usedb->sc_from->sc_genids); if (is_gen_gt_scptr) { if (iq->debug) { reqprintf(iq, "%s: skip genid 0x%llx to the right of scptr", __func__, newgenid); diff --git a/schemachange/sc_logic.c b/schemachange/sc_logic.c index 45a48ced18..a5d2b73474 100644 --- a/schemachange/sc_logic.c +++ b/schemachange/sc_logic.c @@ -1771,6 +1771,10 @@ int backout_schema_changes(struct ireq *iq, tran_type *tran) backout_constraint_pointers(s->newdb, s->db); } change_schemas_recover(s->db->tablename); + if (s->db->sc_genids) { + free(s->db->sc_genids); + s->db->sc_genids = NULL; + } } /* TODO: (NC) Also delete view? */ sc_del_unused_files_tran(s->db, tran); diff --git a/schemachange/sc_records.c b/schemachange/sc_records.c index deff41fa20..1fef827070 100644 --- a/schemachange/sc_records.c +++ b/schemachange/sc_records.c @@ -196,22 +196,22 @@ static inline void lkcounter_check(struct convert_record_data *data, int now) * If success it returns 0, if failure it returns <0 */ int gbl_debug_stall_in_oplog_seed = 0; -int init_sc_genids(struct dbtable *db, struct schema_change_type *s) +int init_sc_genids(struct dbtable *db, unsigned long long **p_sc_genids, struct schema_change_type *s) { void *rec; int orglen, bdberr, stripe; - unsigned long long *sc_genids; + unsigned long long *sc_genids = *p_sc_genids; - if (db->sc_genids == NULL) { - db->sc_genids = malloc(sizeof(unsigned long long) * MAXDTASTRIPE); - if (db->sc_genids == NULL) { + if (sc_genids == NULL) { + sc_genids = malloc(sizeof(unsigned long long) * MAXDTASTRIPE); + if (sc_genids == NULL) { logmsg(LOGMSG_ERROR, "init_sc_genids: failed to allocate sc_genids\n"); return -1; } } - sc_genids = db->sc_genids; + *p_sc_genids = sc_genids; /* if we aren't resuming simply zero the genids */ if (!s->resume) { @@ -284,9 +284,10 @@ int init_sc_genids(struct dbtable *db, struct schema_change_type *s) stripe); sc_genids[stripe] = 0ULL; } - } else + } else { rc = bdb_get_high_genid(db->tablename, stripe, &sc_genids[stripe], &bdberr); + } if (rc < 0 || bdberr != BDBERR_NOERROR) { sc_errf(s, "init_sc_genids: failed to find newest genid for " "stripe: %d\n", diff --git a/schemachange/sc_records.h b/schemachange/sc_records.h index dfc7dd3fe3..489a0a2697 100644 --- a/schemachange/sc_records.h +++ b/schemachange/sc_records.h @@ -96,7 +96,7 @@ int upgrade_all_records(struct dbtable *db, unsigned long long *sc_genids, void convert_record_data_cleanup(struct convert_record_data *data); -int init_sc_genids(struct dbtable *db, struct schema_change_type *s); +int init_sc_genids(struct dbtable *db, unsigned long long **p_sc_genids, struct schema_change_type *s); void live_sc_enter_exclusive_all(bdb_state_type *, tran_type *); diff --git a/schemachange/sc_schema.c b/schemachange/sc_schema.c index 475880d812..c62e4c8270 100644 --- a/schemachange/sc_schema.c +++ b/schemachange/sc_schema.c @@ -614,8 +614,7 @@ void verify_schema_change_constraint(struct ireq *iq, void *trans, if (usedb->sc_to->n_constraints == 0) goto unlock; - if (is_genid_right_of_stripe_pointer(usedb->handle, newgenid, - usedb->sc_to->sc_genids)) { + if (is_genid_right_of_stripe_pointer(usedb->handle, newgenid, usedb->sc_from->sc_genids)) { goto unlock; } diff --git a/schemachange/schemachange.c b/schemachange/schemachange.c index 9039daafea..ae133b9044 100644 --- a/schemachange/schemachange.c +++ b/schemachange/schemachange.c @@ -592,8 +592,7 @@ int live_sc_post_delete_int(struct ireq *iq, void *trans, return 0; } - if (is_genid_right_of_stripe_pointer(iq->usedb->handle, genid, - iq->usedb->sc_to->sc_genids)) { + if (is_genid_right_of_stripe_pointer(iq->usedb->handle, genid, iq->usedb->sc_from->sc_genids)) { return 0; } @@ -690,8 +689,7 @@ int live_sc_post_add_int(struct ireq *iq, void *trans, unsigned long long genid, return 0; } - if (is_genid_right_of_stripe_pointer(iq->usedb->handle, genid, - iq->usedb->sc_to->sc_genids)) { + if (is_genid_right_of_stripe_pointer(iq->usedb->handle, genid, iq->usedb->sc_from->sc_genids)) { return 0; } @@ -779,7 +777,7 @@ int live_sc_post_update_int(struct ireq *iq, void *trans, return 0; } - unsigned long long *sc_genids = iq->usedb->sc_to->sc_genids; + unsigned long long *sc_genids = iq->usedb->sc_from->sc_genids; if (iq->debug) { reqpushprefixf(iq, "live_sc_post_update: "); } diff --git a/tests/timepart_noneres.test/Makefile b/tests/timepart_noneres.test/Makefile new file mode 100644 index 0000000000..b4c0ac1057 --- /dev/null +++ b/tests/timepart_noneres.test/Makefile @@ -0,0 +1,8 @@ +ifeq ($(TESTSROOTDIR),) + include ../testcase.mk +else + include $(TESTSROOTDIR)/testcase.mk +endif +ifeq ($(TEST_TIMEOUT),) + export TEST_TIMEOUT=5m +endif diff --git a/tests/timepart_noneres.test/README b/tests/timepart_noneres.test/README new file mode 100644 index 0000000000..7fa7a8489a --- /dev/null +++ b/tests/timepart_noneres.test/README @@ -0,0 +1 @@ +This tests resuming a time partition collapse. diff --git a/tests/timepart_noneres.test/lrl.options b/tests/timepart_noneres.test/lrl.options new file mode 100644 index 0000000000..8276b79448 --- /dev/null +++ b/tests/timepart_noneres.test/lrl.options @@ -0,0 +1,3 @@ +table t t.csc2 +init_with_genid48 0 +multitable_ddl 1 diff --git a/tests/timepart_noneres.test/output.log b/tests/timepart_noneres.test/output.log new file mode 100644 index 0000000000..4f6deb6aa8 --- /dev/null +++ b/tests/timepart_noneres.test/output.log @@ -0,0 +1,122 @@ +[ + { + "NAME" : "t", + "PERIOD" : "daily", + "RETENTION" : 7, + "SHARD0NAME": "", + "ROLLOUT" : "TRUNCATE", + "TABLES" : + [ + { + "TABLENAME" : "$0_F64CD191", + }, + { + "TABLENAME" : "$1_A2620AE4", + }, + { + "TABLENAME" : "$2_C429139B", + }, + { + "TABLENAME" : "$3_8A26EC5C", + }, + { + "TABLENAME" : "$4_3FDAE1EA", + }, + { + "TABLENAME" : "$5_9FEC23C1", + }, + { + "TABLENAME" : "$6_93A67B48", + } + ] + } +] +(name='t', period='daily', retention=7, nshards=7, version=1, shard0name='') +(name='t', shardname='$0_F64CD191') +(name='t', shardname='$1_A2620AE4') +(name='t', shardname='$2_C429139B') +(name='t', shardname='$3_8A26EC5C') +(name='t', shardname='$4_3FDAE1EA') +(name='t', shardname='$5_9FEC23C1') +(name='t', shardname='$6_93A67B48') +(rows inserted=5) +(rows inserted=10) +(rows inserted=10) +(rows inserted=10) +(rows inserted=10) +(rows inserted=10) +(rows inserted=10) +(rows inserted=5) +[] + +(count(*)=70) +(a=6, b=NULL) +(a=7, b=NULL) +(a=8, b=NULL) +(a=9, b=NULL) +(a=10, b=NULL) +(a=16, b=NULL) +(a=17, b=NULL) +(a=18, b=NULL) +(a=19, b=NULL) +(a=20, b=NULL) +(a=26, b=NULL) +(a=27, b=NULL) +(a=28, b=NULL) +(a=29, b=NULL) +(a=30, b=NULL) +(a=31, b=NULL) +(a=32, b=NULL) +(a=33, b=NULL) +(a=34, b=NULL) +(a=35, b=NULL) +(a=36, b=NULL) +(a=37, b=NULL) +(a=38, b=NULL) +(a=39, b=NULL) +(a=40, b=NULL) +(a=46, b=NULL) +(a=47, b=NULL) +(a=48, b=NULL) +(a=49, b=NULL) +(a=50, b=NULL) +(a=51, b=NULL) +(a=52, b=NULL) +(a=53, b=NULL) +(a=54, b=NULL) +(a=55, b=NULL) +(a=56, b=NULL) +(a=57, b=NULL) +(a=58, b=NULL) +(a=59, b=NULL) +(a=60, b=NULL) +(a=61, b=NULL) +(a=62, b=NULL) +(a=63, b=NULL) +(a=64, b=NULL) +(a=65, b=NULL) +(a=66, b=NULL) +(a=67, b=NULL) +(a=68, b=NULL) +(a=69, b=NULL) +(a=70, b=NULL) +(a=1021, b=NULL) +(a=1022, b=NULL) +(a=1023, b=NULL) +(a=1024, b=NULL) +(a=1025, b=NULL) +(a=1041, b=NULL) +(a=1042, b=NULL) +(a=1043, b=NULL) +(a=1044, b=NULL) +(a=1045, b=NULL) +(a=2001, b=NULL) +(a=2002, b=NULL) +(a=2003, b=NULL) +(a=2004, b=NULL) +(a=2005, b=NULL) +(a=2006, b=NULL) +(a=2007, b=NULL) +(a=2008, b=NULL) +(a=2009, b=NULL) +(a=2010, b=NULL) diff --git a/tests/timepart_noneres.test/runit b/tests/timepart_noneres.test/runit new file mode 100755 index 0000000000..29b56d84ac --- /dev/null +++ b/tests/timepart_noneres.test/runit @@ -0,0 +1,194 @@ +#!/usr/bin/env bash +bash -n "$0" | exit 1 +source ${TESTSROOTDIR}/tools/runit_common.sh + +# args +# +DBN=$1 + +OUT="run.log" +C="cdb2sql ${CDB2_OPTIONS} ${DBN} default" +CT="cdb2sql --tabs ${CDB2_OPTIONS} ${DBN} default" +master=`getmaster` +CM="cdb2sql ${CDB2_OPTIONS} ${DBN} --host ${master}" + +rm $OUT 2>/dev/null +touch $OUT + +delayed_writes() +{ + $CM "exec procedure sys.cmd.send('debg 500')" + $CM "exec procedure sys.cmd.send('ndebg 500')" + echo "Sleeping 10 seconds" + sleep 10 + $C "delete from t where a between 1 and 5 or a between 11 and 15" + echo "Sleeping 10 more seconds" + sleep 10 + $C "update t set a = a + 1000 where a between 21 and 25 or a between 41 and 45" + sleep 10 + echo "Sleeping 10 more seconds finally" + $C "insert into t(a) select * from generate_series(2001, 2010)" +} + +function timepart_stats +{ + dbn=$1 + # check the current partitions + echo $CT "exec procedure sys.cmd.send('partitions')" | egrep -v "STARTTIME|LOW|HIGH|SOURCE_ID" + $CT "exec procedure sys.cmd.send('partitions')" | egrep -v "STARTTIME|LOW|HIGH|SOURCE_ID" >> $OUT + if (( $? != 0 )) ; then + echo "FAILURE" + exit 1 + fi + + echo $C "select name, period, retention, nshards, version,shard0name from comdb2_timepartitions " + $C "select name, period, retention, nshards, version,shard0name from comdb2_timepartitions " >> $OUT + if (( $? != 0 )) ; then + echo "FAILURE" + exit 1 + fi + + echo $C "select name, shardname from comdb2_timepartshards" + $C "select name, shardname from comdb2_timepartshards" >> $OUT + if (( $? != 0 )) ; then + echo "FAILURE" + exit 1 + fi +} + +echo "Creating a 7 days time partition" +echo $C "alter table t partitioned by time period 'daily' retention 7 start '2026-01-01T'" +$C "alter table t partitioned by time period 'daily' retention 7 start '2026-01-01T'" >> $OUT +if (( $? != 0 )) ; then + echo "FAILURE partitioning the table" + exit 1 +fi + +timepart_stats + +#"$0_F64CD191", +#"$1_A2620AE4", +#"$2_C429139B", +#"$3_8A26EC5C", +#"$4_3FDAE1EA", +#"$5_9FEC23C1", +#"$6_93A67B48", + +echo "Time `${CT} 'select now()'`" + +#The idea is to populate shard 0 with some new rows; +#These rows will be processed as part of shard 0 alter in the first 10 seconds +#At this point the delayed delete triggers and try to delete them from both +#source and new table +#if sc_genids are properly set, delete should notice that the rows it deletes +#are already processed by schema change and they need to be removed from new table too + +echo "Populating shard 0 first half \$0_F64CD191" +$C "insert into '\$0_F64CD191' (a) select * from generate_series(6, 10)" >> $OUT +if (( $? != 0 )) ; then + echo "FAILURE populating shard 0 first half" + exit 1 +fi +echo "Populating shard 1 \$1_A2620AE4" +$C "insert into '\$1_A2620AE4' (a) select * from generate_series(11, 20)" >> $OUT +if (( $? != 0 )) ; then + echo "FAILURE populating shard 1" + exit 1 +fi +echo "Populating shard 2 \$2_C429139B" +$C "insert into '\$2_C429139B' (a) select * from generate_series(21, 30)" >> $OUT +if (( $? != 0 )) ; then + echo "FAILURE populating shard 2" + exit 1 +fi +echo "Populating shard 3 \$3_8A26EC5C" +$C "insert into '\$3_8A26EC5C' (a) select * from generate_series(31, 40)" >> $OUT +if (( $? != 0 )) ; then + echo "FAILURE populating shard 3" + exit 1 +fi +echo "Populating shard 4 \$4_3FDAE1EA" +$C "insert into '\$4_3FDAE1EA' (a) select * from generate_series(41, 50)" >> $OUT +if (( $? != 0 )) ; then + echo "FAILURE populating shard 4" + exit 1 +fi +echo "Populating shard 5 \$5_9FEC23C1" +$C "insert into '\$5_9FEC23C1' (a) select * from generate_series(51, 60)" >> $OUT +if (( $? != 0 )) ; then + echo "FAILURE populating shard 5" + exit 1 +fi +echo "Populating shard 6 \$6_93A67B48" +$C "insert into '\$6_93A67B48' (a) select * from generate_series(61, 70)" >> $OUT +if (( $? != 0 )) ; then + echo "FAILURE populating shard 6" + exit 1 +fi +echo "Populating shard 0 second half \$0_F64CD191" +$C "insert into '\$0_F64CD191' (a) select * from generate_series(1, 5)" >> $OUT +if (( $? != 0 )) ; then + echo "FAILURE populating shard 0 second half" + exit 1 +fi + +echo "Slow down the schema change" +$CM "exec procedure sys.cmd.send('bdb setattr SC_FORCE_DELAY 1')" +if (( $? != 0 )) ; then + echo "FAILURE to setattr sc_force_delay" + exit 1 +fi +$CM "exec procedure sys.cmd.send('scdelay 1000')" +if (( $? != 0 )) ; then + echo "FAILURE to scdelay" + exit 1 +fi + +echo "Launching delayed writes" +delayed_writes & + +echo "Time `${CT} 'select now()'`" + +echo "Partitioning the table" +$C "ALTER TABLE T PARTITIONED BY NONE" +if (( $? != 0 )) ; then + echo "FAILURE to launch schema change" + exit 1 +fi + +echo "Waiting for delayed writers to finish" +wait + +timepart_stats + +$C "select count(*) from t" >> $OUT +if (( $? != 0 )) ; then + echo "FAILURE to count total rows" + exit 1 +fi +$C "select * from t order by 1" >> $OUT +if (( $? != 0 )) ; then + echo "FAILURE to select rows" + exit 1 +fi + + +testcase_output=$(cat $OUT) +expected_output=$(cat output.log) +if [[ "$testcase_output" != "$expected_output" ]]; then + + # print message + echo " ^^^^^^^^^^^^" + echo "The above testcase (${testcase}) has failed!!!" + echo " " + echo "Use 'diff ' to see why:" + echo "> diff ${PWD}/{output.log,run.log}" + echo " " + diff output.log $OUT + echo " " + + # quit + exit 1 +fi + +echo "SUCCESS" diff --git a/tests/timepart_noneres.test/t.csc2 b/tests/timepart_noneres.test/t.csc2 new file mode 100644 index 0000000000..7d2a4dad20 --- /dev/null +++ b/tests/timepart_noneres.test/t.csc2 @@ -0,0 +1,9 @@ +schema +{ + int a + cstring b[10] null=yes +} +keys +{ + "pk" = a +}