Wednesday, March 2, 2016

MySql huge tables chunked delete

In order to avoid table locking while deleting data from huge tables, it's recommended to delete chunks of rows.


The following code also adds two additional gourds.

  •     Date guard: It's impossible to delete data newer than 3 months
  •     Time guard: If the process takes more than 10 minutes it quite.

1. First, we create a dynamic query utility procedure.
 CREATE PROCEDURE `dynamicQuery`(query_string text)

 BEGIN

     set @st=query_string;

     PREPARE stmt FROM @st;

     EXECUTE stmt;

     DEALLOCATE PREPARE stmt;

 END



2. For debugging we'll add a log table.


 CREATE TABLE `log` ( 

  `min` int(11) DEFAULT NULL, 

  `max` int(11) DEFAULT NULL, 

  `target` date DEFAULT NULL, 

  `scan` date DEFAULT NULL, 

  `scanid` int(11) DEFAULT NULL, 

  `counter` int(11) DEFAULT NULL, 

  `created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, 

  `procid` int(11) NOT NULL DEFAULT '0', 

  `object` varchar(63) DEFAULT NULL 

 ) ENGINE=MyISAM; 



3. Create delete_until procedure.

 CREATE PROCEDURE `delete_until`(tbl_name varchar(63),until_date DATE)  
 main: BEGIN  
   SET @TNAME=tbl_name;  
   SET @TARGET=until_date;  
   SET @MX=0;  
   SET @MN=0;  
   SET @BATCH=7000;  
   SET @VALIDTARGET=1;  
   SET @DELETED=-100;  
   SET @PROC=connection_id();  
   SET @ETSTART=UNIX_TIMESTAMP();  
   SET @TIMEOUT=600; -- 10 minutes  
   -- 3 month guard. Its impossible to delete data for the last three month  
      if datediff(date(now()),@TARGET) < 91 then  
           select 'only delete data that is older then three month';  
           LEAVE main;  
   end if;  
      SET @DELETED=0;  
      -- Get min and max id  
      SET @q=CONCAT("SELECT max(id), min(id) into @MX,@MN from ",@TNAME);  
      call dynamicQuery(@q);  
      -- Check the min id is older then @TARGET  
      SET @cond=CONCAT("SELECT EXISTS ( SELECT id FROM ",@TNAME," WHERE id = @MN AND created_at < @TARGET ) into @VALIDTARGET;");  
      call dynamicQuery(@cond);  
      -- Loop from min to max in 7000 batch  
      deleteloop: WHILE @VALIDTARGET>0 DO  
           -- Delete rows   
           SET @q=CONCAT("delete from ",@TNAME," where id between @MN and @MN+@BATCH and created_at < @TARGET;");  
           call dynamicQuery(@q);  
           -- Accumulate rows for summary  
           SET @DELETED = @DELETED+@BATCH;  
           -- Promote @MN Iterator  
           SET @MN=@MN+@BATCH+1;  
           -- If the largest id in the next iteration is newer then @TARGET, exit condition is met. @VALIDTARGET=0.  
           call dynamicQuery(@cond);  
           -- insert to log  
           insert into log(object,procid,min,max,target,scan,scanid,counter) values(@TNAME,@PROC,@MN,@Mx,@TARGET,null,@VALIDTARGET,@DELETED);  
           SET @ETEND=UNIX_TIMESTAMP();  
           if @ETEND-@ETSTART > @TIMEOUT then -- Ten minutes timeout guard exceeded.  
                LEAVE deleteloop;  
           end if;  
      END WHILE;  
      SELECT @DELETED;  
 END  


4. A chunk insert as a bonus.

 CREATE PROCEDURE `chunk_insert`(tbl_source varchar(63),tbl_target varchar(63)) 

 BEGIN 

   SET @TOTABLE=tbl_target; 

   SET @FROMTABLE=tbl_source; 

   SET @MX=0; 

   SET @MN=0; 

   SET @BATCH=7000; 

   SET @TOID=0; 

   SET @LASTLOOP=0; 

   SET @INSERTED=0; 

   SET @PROC=connection_id(); 

      -- Get min and max id from remote table 

   SET @q=CONCAT("SELECT max(id), min(id) into @MX,@MN from ",@FROMTABLE); 

   call dynamicQuery(@q); 

   -- Loop from min to max in 7000 batch 

   WHILE @LASTLOOP=0 DO 

           IF @MN+@BATCH > @MX THEN 

                SET @LASTLOOP=1; 

                SET @TOID=@MX; 

       SET @INSERTED=@INSERTED+(@MX-@MN); 

           ELSE 

                SET @TOID=@MN+@BATCH; 

       SET @INSERTED=@INSERTED+@BATCH; 

           END IF; 

     insert into log (procid,min,max,target,scan,scanid,counter) values(@PROC,@MN,@Mx,null,null,@TOID,@LASTLOOP); 

           SET @q=CONCAT('insert into ',@TOTABLE,' select * from ',@FROMTABLE,' where id between @MN and @TOID;'); 

     call dynamicQuery(@q); 

     SET @MN=@TOID+1; 

   END WHILE; 

   select @INSERTED; 

 END 


5. Usage: Delete data that is 5 months old from table `my_huge_table_name`

call delete_until('my_huge_table_name',DATE(NOW() - INTERVAL 5 MONTH));

No comments: