The following code also adds two additional gourds.
- Date guard: It's impossible to delete data newer than 3 months
- Time guard: If the process takes more than 10 minutes it quite.
1. First, we create a dynamic query utility procedure.
CREATE PROCEDURE `dynamicQuery`(query_string text)
BEGIN
set @st=query_string;
PREPARE stmt FROM @st;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
END
2. For debugging we'll add a log table.
CREATE TABLE `log` (
`min` int(11) DEFAULT NULL,
`max` int(11) DEFAULT NULL,
`target` date DEFAULT NULL,
`scan` date DEFAULT NULL,
`scanid` int(11) DEFAULT NULL,
`counter` int(11) DEFAULT NULL,
`created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`procid` int(11) NOT NULL DEFAULT '0',
`object` varchar(63) DEFAULT NULL
) ENGINE=MyISAM;
3. Create delete_until procedure.
CREATE PROCEDURE `delete_until`(tbl_name varchar(63),until_date DATE)
main: BEGIN
SET @TNAME=tbl_name;
SET @TARGET=until_date;
SET @MX=0;
SET @MN=0;
SET @BATCH=7000;
SET @VALIDTARGET=1;
SET @DELETED=-100;
SET @PROC=connection_id();
SET @ETSTART=UNIX_TIMESTAMP();
SET @TIMEOUT=600; -- 10 minutes
-- 3 month guard. Its impossible to delete data for the last three month
if datediff(date(now()),@TARGET) < 91 then
select 'only delete data that is older then three month';
LEAVE main;
end if;
SET @DELETED=0;
-- Get min and max id
SET @q=CONCAT("SELECT max(id), min(id) into @MX,@MN from ",@TNAME);
call dynamicQuery(@q);
-- Check the min id is older then @TARGET
SET @cond=CONCAT("SELECT EXISTS ( SELECT id FROM ",@TNAME," WHERE id = @MN AND created_at < @TARGET ) into @VALIDTARGET;");
call dynamicQuery(@cond);
-- Loop from min to max in 7000 batch
deleteloop: WHILE @VALIDTARGET>0 DO
-- Delete rows
SET @q=CONCAT("delete from ",@TNAME," where id between @MN and @MN+@BATCH and created_at < @TARGET;");
call dynamicQuery(@q);
-- Accumulate rows for summary
SET @DELETED = @DELETED+@BATCH;
-- Promote @MN Iterator
SET @MN=@MN+@BATCH+1;
-- If the largest id in the next iteration is newer then @TARGET, exit condition is met. @VALIDTARGET=0.
call dynamicQuery(@cond);
-- insert to log
insert into log(object,procid,min,max,target,scan,scanid,counter) values(@TNAME,@PROC,@MN,@Mx,@TARGET,null,@VALIDTARGET,@DELETED);
SET @ETEND=UNIX_TIMESTAMP();
if @ETEND-@ETSTART > @TIMEOUT then -- Ten minutes timeout guard exceeded.
LEAVE deleteloop;
end if;
END WHILE;
SELECT @DELETED;
END
4. A chunk insert as a bonus.
CREATE PROCEDURE `chunk_insert`(tbl_source varchar(63),tbl_target varchar(63))
BEGIN
SET @TOTABLE=tbl_target;
SET @FROMTABLE=tbl_source;
SET @MX=0;
SET @MN=0;
SET @BATCH=7000;
SET @TOID=0;
SET @LASTLOOP=0;
SET @INSERTED=0;
SET @PROC=connection_id();
-- Get min and max id from remote table
SET @q=CONCAT("SELECT max(id), min(id) into @MX,@MN from ",@FROMTABLE);
call dynamicQuery(@q);
-- Loop from min to max in 7000 batch
WHILE @LASTLOOP=0 DO
IF @MN+@BATCH > @MX THEN
SET @LASTLOOP=1;
SET @TOID=@MX;
SET @INSERTED=@INSERTED+(@MX-@MN);
ELSE
SET @TOID=@MN+@BATCH;
SET @INSERTED=@INSERTED+@BATCH;
END IF;
insert into log (procid,min,max,target,scan,scanid,counter) values(@PROC,@MN,@Mx,null,null,@TOID,@LASTLOOP);
SET @q=CONCAT('insert into ',@TOTABLE,' select * from ',@FROMTABLE,' where id between @MN and @TOID;');
call dynamicQuery(@q);
SET @MN=@TOID+1;
END WHILE;
select @INSERTED;
END
5. Usage: Delete data that is 5 months old from table `my_huge_table_name`
call delete_until('my_huge_table_name',DATE(NOW() - INTERVAL 5 MONTH));
No comments:
Post a Comment