Tuesday, March 15, 2016

Java vs Golang - Parameterized tests

Although I like Java, I think that with all its drawback it brings years of knowledge and optimization, I find too many times that when you "think in Java" you tend to creat complicated solutions. By complicated I mean hard for a developer to grasp and to remember.

In the following example we can see several differences that make the Java code look `old` and clumsy and not fun.
1. 7 imports  VS 1 in Golang.
2. Everything is a class. Fibonacci is a function in Golang.
3. Why hiding from the developer the fact that the test runs in a loop?
4. All these annotations.

Simplicity matter!
When you try to produce a simple code you end up with less dependency, less code, less magic, less moving parts and less things to know in order to produce quality code.
And as always, but especially in programming, Less Is More.


Simplicity Matters by Rich Hickey

The Java way from JUnit-team
 import static org.junit.Assert.assertEquals;  
 import java.util.Arrays;  
 import java.util.Collection;  
 import org.junit.Test;  
 import org.junit.runner.RunWith;  
 import org.junit.runners.Parameterized;  
 import org.junit.runners.Parameterized.Parameters;  
 @RunWith(Parameterized.class)  
 public class FibonacciTest {  
   @Parameters  
   public static Collection<Object[]> data() {  
     return Arrays.asList(new Object[][] {     
          { 0, 0 }, { 1, 1 }, { 2, 1 }, { 3, 2 }, { 4, 3 }, { 5, 5 }, { 6, 8 }   
       });  
   }  
   private int fInput;  
   private int fExpected;  
   public FibonacciTest(int input, int expected) {  
     fInput= input;  
     fExpected= expected;  
   }  
   @Test  
   public void test() {  
     assertEquals(fExpected, Fibonacci.compute(fInput));  
   }  
 }  
 public class Fibonacci {  
   public static int compute(int n) {  
     int result = 0;  
     if (n <= 1) {   
       result = n;   
     } else {   
       result = compute(n - 1) + compute(n - 2);   
     }  
     return result;  
   }  
 }  


The Golang way
 package Fibonacci  
 import "testing"  
 func TestFibonacci(t *testing.T) {  
      parameters := []struct {  
           input, expected int  
      }{  
           {0, 0}, {1, 1}, {2, 1}, {3, 2}, {4, 3}, {5, 5}, {6, 8},  
      }  
      for i := range parameters {  
           actual := Fibonacci(parameters[i].input)  
           if actual != parameters[i].expected {  
                t.Logf("expected%d: , actual:%d", parameters[i].expected, actual)  
                t.Fail()  
           }  
      }  
 }  
 func Fibonacci(n int) (result int) {
 if n <= 1 {
  result = n
 } else {
  result = Fibonacci(n-1) + Fibonacci(n-2)
 }
 return
}

Wednesday, March 2, 2016

MySql huge tables chunked delete

In order to avoid table locking while deleting data from huge tables, it's recommended to delete chunks of rows.


The following code also adds two additional gourds.

  •     Date guard: It's impossible to delete data newer than 3 months
  •     Time guard: If the process takes more than 10 minutes it quite.

1. First, we create a dynamic query utility procedure.
 CREATE PROCEDURE `dynamicQuery`(query_string text)

 BEGIN

     set @st=query_string;

     PREPARE stmt FROM @st;

     EXECUTE stmt;

     DEALLOCATE PREPARE stmt;

 END



2. For debugging we'll add a log table.


 CREATE TABLE `log` ( 

  `min` int(11) DEFAULT NULL, 

  `max` int(11) DEFAULT NULL, 

  `target` date DEFAULT NULL, 

  `scan` date DEFAULT NULL, 

  `scanid` int(11) DEFAULT NULL, 

  `counter` int(11) DEFAULT NULL, 

  `created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, 

  `procid` int(11) NOT NULL DEFAULT '0', 

  `object` varchar(63) DEFAULT NULL 

 ) ENGINE=MyISAM; 



3. Create delete_until procedure.

 CREATE PROCEDURE `delete_until`(tbl_name varchar(63),until_date DATE)  
 main: BEGIN  
   SET @TNAME=tbl_name;  
   SET @TARGET=until_date;  
   SET @MX=0;  
   SET @MN=0;  
   SET @BATCH=7000;  
   SET @VALIDTARGET=1;  
   SET @DELETED=-100;  
   SET @PROC=connection_id();  
   SET @ETSTART=UNIX_TIMESTAMP();  
   SET @TIMEOUT=600; -- 10 minutes  
   -- 3 month guard. Its impossible to delete data for the last three month  
      if datediff(date(now()),@TARGET) < 91 then  
           select 'only delete data that is older then three month';  
           LEAVE main;  
   end if;  
      SET @DELETED=0;  
      -- Get min and max id  
      SET @q=CONCAT("SELECT max(id), min(id) into @MX,@MN from ",@TNAME);  
      call dynamicQuery(@q);  
      -- Check the min id is older then @TARGET  
      SET @cond=CONCAT("SELECT EXISTS ( SELECT id FROM ",@TNAME," WHERE id = @MN AND created_at < @TARGET ) into @VALIDTARGET;");  
      call dynamicQuery(@cond);  
      -- Loop from min to max in 7000 batch  
      deleteloop: WHILE @VALIDTARGET>0 DO  
           -- Delete rows   
           SET @q=CONCAT("delete from ",@TNAME," where id between @MN and @MN+@BATCH and created_at < @TARGET;");  
           call dynamicQuery(@q);  
           -- Accumulate rows for summary  
           SET @DELETED = @DELETED+@BATCH;  
           -- Promote @MN Iterator  
           SET @MN=@MN+@BATCH+1;  
           -- If the largest id in the next iteration is newer then @TARGET, exit condition is met. @VALIDTARGET=0.  
           call dynamicQuery(@cond);  
           -- insert to log  
           insert into log(object,procid,min,max,target,scan,scanid,counter) values(@TNAME,@PROC,@MN,@Mx,@TARGET,null,@VALIDTARGET,@DELETED);  
           SET @ETEND=UNIX_TIMESTAMP();  
           if @ETEND-@ETSTART > @TIMEOUT then -- Ten minutes timeout guard exceeded.  
                LEAVE deleteloop;  
           end if;  
      END WHILE;  
      SELECT @DELETED;  
 END  


4. A chunk insert as a bonus.

 CREATE PROCEDURE `chunk_insert`(tbl_source varchar(63),tbl_target varchar(63)) 

 BEGIN 

   SET @TOTABLE=tbl_target; 

   SET @FROMTABLE=tbl_source; 

   SET @MX=0; 

   SET @MN=0; 

   SET @BATCH=7000; 

   SET @TOID=0; 

   SET @LASTLOOP=0; 

   SET @INSERTED=0; 

   SET @PROC=connection_id(); 

      -- Get min and max id from remote table 

   SET @q=CONCAT("SELECT max(id), min(id) into @MX,@MN from ",@FROMTABLE); 

   call dynamicQuery(@q); 

   -- Loop from min to max in 7000 batch 

   WHILE @LASTLOOP=0 DO 

           IF @MN+@BATCH > @MX THEN 

                SET @LASTLOOP=1; 

                SET @TOID=@MX; 

       SET @INSERTED=@INSERTED+(@MX-@MN); 

           ELSE 

                SET @TOID=@MN+@BATCH; 

       SET @INSERTED=@INSERTED+@BATCH; 

           END IF; 

     insert into log (procid,min,max,target,scan,scanid,counter) values(@PROC,@MN,@Mx,null,null,@TOID,@LASTLOOP); 

           SET @q=CONCAT('insert into ',@TOTABLE,' select * from ',@FROMTABLE,' where id between @MN and @TOID;'); 

     call dynamicQuery(@q); 

     SET @MN=@TOID+1; 

   END WHILE; 

   select @INSERTED; 

 END 


5. Usage: Delete data that is 5 months old from table `my_huge_table_name`

call delete_until('my_huge_table_name',DATE(NOW() - INTERVAL 5 MONTH));