diff --git a/binary_dna_conversion.py b/binary_dna_conversion.py
index d143dac7e851ebcd8aedc426e4ccc99097431282..09e0bfcbf18dd27343f51541fb42451e0b3dced3 100755
--- a/binary_dna_conversion.py
+++ b/binary_dna_conversion.py
@@ -1,4 +1,5 @@
 import sys
+import random
 import hashing
 import sequence_control
 
@@ -11,10 +12,10 @@ bit_pair_to_dna = {"00": "A", "01": "G", "10": "T", "11": "C"}
 # "_" is for missing nucleotides
 dna_to_bit_pair = {"A": "00", "G": "01", "T" :"10", "C": "11", "_" :"00"}
 
-bit_to_dna_x = {"0": "A", "1": "T"}
-bit_to_dna_y = {"0": "G", "1": "C"}
+bit_to_dna_AT = {"0": "A", "1": "T"}
+bit_to_dna_GC = {"0": "G", "1": "C"}
 
-dna_to_bit_xy = {"A": "0", "T": "1", "G": "0",  "C": "1", "_": "0"}
+dna_to_bit_ATGC = {"A": "0", "T": "1", "G": "0",  "C": "1", "_": "0"}
 
 
 def binary_to_dna(binary_string: str) -> str:
@@ -41,64 +42,7 @@ def dna_to_binary(sequence: str) -> str:
     return binary_string
 
 
-def binary_to_dna_xy(binary_string: str) -> str:
-    """
-    convert binaries into dna sequence with some properties
-    the binaries are divided into parts of 6 bits
-    for each 6 bits part, 1st bit : (0:A, 1:T)
-                          2nd bit : (0:G, 1:C)
-                          3rd 4th bits are normally converted into dna : (00:A, 01:G, 10:T, 11:C)
-                          5th 6th bits are normally converted into dna
-    this ensure that the total sequence cannot contain homopolymeres > 3,
-    the GC% is in [25%, 75%] and the conversion rate is 1.5bit/base
-    """
-    
-    sequence = ""
-    n_sextuplet, rest = divmod(len(binary_string), 6)
-    for i in range(0, n_sextuplet):
-        bits6 = binary_string[i*6:(i+1)*6]
-        nucleotide_1 = bit_to_dna_x[bits6[0]]
-        nucleotide_2 = bit_to_dna_y[bits6[1]]
-        nucleotide_3 = bit_pair_to_dna[bits6[2:4]]
-        nucleotide_4 = bit_pair_to_dna[bits6[4:6]]    
-        sequence += nucleotide_1 + nucleotide_2 + nucleotide_3 + nucleotide_4
-        
-    bit_rest = binary_string[n_sextuplet*6:n_sextuplet*6+rest] # rest is 0-2-4 bits
-    if len(bit_rest) >= 2: 
-        sequence += bit_to_dna_x[bit_rest[0]] + bit_to_dna_y[bit_rest[1]]
-        if len(bit_rest) == 4:
-            sequence += bit_pair_to_dna[bit_rest[2:4]]
-    
-    return sequence
-
-
-def dna_to_binary_xy(sequence: str) -> str:
-    """
-    convert back a sequence to binaries (opposite of binary_to_dna_xy)
-    #TODO possibility to detect and solve some errors
-    """
-    
-    binary_string = ""
-    n_quadruplet, rest = divmod(len(sequence), 4)
-    for i in range(0, n_quadruplet):
-        nucleotides4 = sequence[i*4:(i+1)*4]
-        bit_1 = dna_to_bit_xy[nucleotides4[0]] # should only be A or T #TODO CG equals error
-        bit_2 = dna_to_bit_xy[nucleotides4[1]] # should only be G or C #TODO AT equals error
-        bits_3456 = dna_to_bit_pair[nucleotides4[2]] + dna_to_bit_pair[nucleotides4[3]]
-        binary_string += bit_1 + bit_2 + bits_3456
-    
-    sequence_rest = sequence[n_quadruplet*4:n_quadruplet*4+rest]
-    if len(sequence_rest) >= 1:
-        binary_string += dna_to_bit_xy[sequence_rest[0]]
-        if len(sequence_rest) >= 2:
-            binary_string += dna_to_bit_xy[sequence_rest[1]]
-            if len(sequence_rest) >= 3:
-                binary_string += dna_to_bit_pair[sequence_rest[2]]
-    
-    return binary_string
-
-
-def binary_to_dna_z(binary_string: str) -> str:
+def binary_to_dna_abaab(binary_string: str) -> str:
     """
     convert binaries into dna sequence with some properties
     the binaries are divided into parts of 8 bits
@@ -108,9 +52,9 @@ def binary_to_dna_z(binary_string: str) -> str:
                           6th 7th bits are normally converted into dna
                           8th bit is converted depending on previous conversion : if previous in {AT}, then (0:G, 1:C), elif in {GC}, then (0:A, 1:T)
     this ensure that the total sequence cannot contain homopolymeres > 3,
-    the GC% is in [40%, 60%] and the conversion rate is 1.6bit/base
+    the GC% is in [40%, 60%] in a window of 5 and the conversion rate is 1.6 bit/base
     
-    warning : need the binary string to be a multiple of 2, or rest is inconsitant with decoding 
+    warning : need the binary string to be a multiple of 2, or rest is inconsistent with decoding 
     ex : a rest of 0 -> A, a rest of 00 -> A
     so how to decode A ?
     only allowing rests multiple of 2 removes ambiguity
@@ -126,15 +70,15 @@ def binary_to_dna_z(binary_string: str) -> str:
         bits8 = binary_string[i*8:(i+1)*8]
         nucleotide_1 = bit_pair_to_dna[bits8[0:2]]
         if nucleotide_1 in ["A", "T"]:
-            nucleotide_2 = bit_to_dna_y[bits8[2]]
+            nucleotide_2 = bit_to_dna_GC[bits8[2]]
         else:
-            nucleotide_2 = bit_to_dna_x[bits8[2]]
+            nucleotide_2 = bit_to_dna_AT[bits8[2]]
         nucleotide_3 = bit_pair_to_dna[bits8[3:5]]
         nucleotide_4 = bit_pair_to_dna[bits8[5:7]]
         if nucleotide_4 in ["A", "T"]:
-            nucleotide_5 = bit_to_dna_y[bits8[7]]
+            nucleotide_5 = bit_to_dna_GC[bits8[7]]
         else:
-            nucleotide_5 = bit_to_dna_x[bits8[7]]
+            nucleotide_5 = bit_to_dna_AT[bits8[7]]
         sequence += nucleotide_1 + nucleotide_2 + nucleotide_3 + nucleotide_4 + nucleotide_5
     # rest should be 0 because all documents contains a round number of octet
     # but some "0" can be added to fill the fragments
@@ -148,22 +92,22 @@ def binary_to_dna_z(binary_string: str) -> str:
             sequence += bit_pair_to_dna[bit_rest[2:4]] # nucleotide_2 also from a pair of bits
         elif len(bit_rest) == 6:   
             if nucleotide_1 in ["A", "T"]:
-                sequence += bit_to_dna_y[bit_rest[2]] # nucleotide_2 from a single bit and depending on nucleotide 1
+                sequence += bit_to_dna_GC[bit_rest[2]] # nucleotide_2 from a single bit and depending on nucleotide 1
             else:
-                sequence += bit_to_dna_x[bit_rest[2]] # nucleotide_2 from a single bit and depending on nucleotide 1
+                sequence += bit_to_dna_AT[bit_rest[2]] # nucleotide_2 from a single bit and depending on nucleotide 1
             
             sequence += bit_pair_to_dna[bit_rest[3:5]] # nucleotide_3 from a pair of bits
-            sequence += bit_to_dna_x[bit_rest[5]] # nucleotide_4 from a single bit
+            sequence += bit_to_dna_AT[bit_rest[5]] # nucleotide_4 from a single bit
 
     # remove the banned words
-    sequence_wo_banwords = remove_ban_words_z_encoding(sequence)
+    sequence_wo_banwords = remove_ban_words_abaab_encoding(sequence)
     
     return sequence_wo_banwords
     
 
-def dna_to_binary_z(sequence: str) -> str:
+def dna_to_binary_abaab(sequence: str) -> str:
     """
-    convert back a sequence to binaries (opposite of binary_to_dna_z)
+    convert back a sequence to binaries (opposite of binary_to_dna_abaab)
     """
     
     binary_string = ""
@@ -171,10 +115,10 @@ def dna_to_binary_z(sequence: str) -> str:
     for i in range(0, n_quintuplet):
         nucleotides5 = sequence[i*5:(i+1)*5]
         bits_12 = dna_to_bit_pair[nucleotides5[0]]
-        bit_3 = dna_to_bit_xy[nucleotides5[1]]
+        bit_3 = dna_to_bit_ATGC[nucleotides5[1]]
         bits_45 = dna_to_bit_pair[nucleotides5[2]]
         bits_67 = dna_to_bit_pair[nucleotides5[3]]
-        bit_8 = dna_to_bit_xy[nucleotides5[4]]
+        bit_8 = dna_to_bit_ATGC[nucleotides5[4]]
         binary_string += bits_12 + bit_3 + bits_45 + bits_67 + bit_8
     
     # handle rest < 5
@@ -190,29 +134,27 @@ def dna_to_binary_z(sequence: str) -> str:
             binary_string += dna_to_bit_pair[sequence_rest[1]] # just act like the rest was 2
 
         elif len(sequence_rest) == 4: # 4 bases -> 2 bits + 1 bit + 2 bits + 1 bit
-            binary_string += dna_to_bit_xy[sequence_rest[1]] # 1 base -> 1 bit
+            binary_string += dna_to_bit_ATGC[sequence_rest[1]] # 1 base -> 1 bit
             binary_string += dna_to_bit_pair[sequence_rest[2]] # 1 base -> 2 bits
-            binary_string += dna_to_bit_xy[sequence_rest[3]] # 1 base -> 1 bit
+            binary_string += dna_to_bit_ATGC[sequence_rest[3]] # 1 base -> 1 bit
     
     return binary_string
 
 
-
-
-def remove_ban_words_z_encoding(sequence: str, z_method_offset=0) -> str:
+def remove_ban_words_abaab_encoding(sequence: str, abaab_method_offset=0) -> str:
     """
-    remove banned words from a sequence encoded with the binary_conversion.binary_to_dna_z() method
-    the z method ensure a GC% in [40;60] on a window of 5 bases (&z&&z), but we the constraint is on larger windows
+    remove banned words from a sequence encoded with the binary_conversion.binary_to_dna_abaab() method
+    the abaab method ensure a GC% in [40;60] on a window of 5 bases (&z&&z), but we the constraint is on larger windows
     changing a base to avoid a banned word should not affect the whole sequence too much
-    the changed base must be a z base, and be A<=>G; T<=>C, so the decoding of the updated sequence will still return the same result
+    the changed base must be a beta base, and be A<=>G; T<=>C, so the decoding of the updated sequence will still return the same result
     
-    if the original sequence has already been fragmented, the 5 bases windows of the z method can be offset, z_method_offset is used to correct this
+    if the original sequence has already been fragmented, the 5 bases windows of the abaab method can be offset, abaab_method_offset is used to correct this
     """
     
     forbidden_sequences = ["GGTCTC", "GAGACC"]
 
-    # change a z base but keep the binary meaning for the decoding
-    dna_change_z = {"A": "G", "T": "C", "G": "A", "C": "T"}
+    # change a beta base but keep the binary meaning for the decoding
+    dna_change_beta = {"A": "G", "T": "C", "G": "A", "C": "T"}
 
     # check for banned words
     indexs_ban_words = []
@@ -225,20 +167,20 @@ def remove_ban_words_z_encoding(sequence: str, z_method_offset=0) -> str:
     if indexs_ban_words == []:
         return sequence
     
-    # indexes of z bases in the sequence (2nd and 5th base every 5 base window)
-    z_indexes = [ z_method_offset+ 5*k+i for k in range(-1, len(sequence)//5) for i in [1,4] ]
+    # indexes of beta bases in the sequence (2nd and 5th base every 5 base window)
+    beta_indexes = [ abaab_method_offset+ 5*k+i for k in range(-1, len(sequence)//5) for i in [1,4] ]
 
     sequence_wo_bans = sequence
 
     for banned_words_indexes in indexs_ban_words:
-        # indexes of z bases that can be changed to remove a banned word
-        potential_changes_index = [ k for k in z_indexes if k >= banned_words_indexes[0] and k < banned_words_indexes[1] ]
+        # indexes of beta bases that can be changed to remove a banned word
+        potential_changes_index = [ k for k in beta_indexes if k >= banned_words_indexes[0] and k < banned_words_indexes[1] ]
 
         for change_index in potential_changes_index:
             sequence_wo_bans_list = list(sequence_wo_bans) # turn into list because python strings are immutable
             
-            # change a z_base without changing the meaning of encoded data
-            sequence_wo_bans_list[change_index] = dna_change_z[sequence_wo_bans[change_index]]
+            # change a beta_base without changing the meaning of encoded data
+            sequence_wo_bans_list[change_index] = dna_change_beta[sequence_wo_bans[change_index]]
             
             index_before_ban_word = max(0, banned_words_indexes[0]-3) # test the changed part from 3 bases before to avoid creation of homopolymeres
             index_after_ban_word = min(len(sequence)-1, banned_words_indexes[1]+3) # test to 3 bases after
@@ -265,12 +207,102 @@ def remove_ban_words_z_encoding(sequence: str, z_method_offset=0) -> str:
     return sequence_wo_bans
 
 
+def binary_to_dna_baa(binary_string: str, GC_window=20) -> str:
+    """
+    convert binaries into dna sequence with some properties
+    the binaries are divided into parts of 5 bits
+    for each 5 bits part, 1st 2nd bits are normally converted into dna : (00:A, 01:G, 10:T, 11:C)
+                          3rd bit is converted depending on previous converted sequence : 
+                              (0:G, 1:C) or (0:A, 1:T), first to break potential homopolymere, if not then to adjust %GC of the previous sequence          
+                          4th 5th bits are normally converted into dna
+
+    this ensure that the total sequence cannot contain homopolymeres > 3,
+    the GC% is in [33%, 66%] in the worst case in a window of 3, but tends to 50% with the adjustments and the conversion rate is 1.66 bit/base
+    
+    warning : need the binary string to be a multiple of 5, or rest is inconsistent with decoding 
+    ex : a rest of 0 -> A, a rest of 00 -> A
+    so how to decode A ?
+    only allowing rests multiple of 5 removes ambiguity (also possible 0,2 or 3 modulo 5)
+
+    """
+    if len(binary_string) % 5 != 0:
+            print("error binary dna conversion, need a binary string multiple of 5")
+            exit(0)
+    
+    sequence = ""
+    n_quintuplets, rest = divmod(len(binary_string), 5)
+    for i in range(0, n_quintuplets):
+        bits5 = binary_string[i*5:(i+1)*5]
+        
+        nucleotide_2 = bit_pair_to_dna[bits5[1:3]]
+        nucleotide_3 = bit_pair_to_dna[bits5[3:5]]
+        
+        # make nucleotide 2
+        if len(sequence) >= 3:
+            # check 3 previous encoded bases
+            if sequence[-3] == sequence[-2] == sequence[-1] or sequence[-2] == sequence[-1] == nucleotide_2 or sequence[-1] == nucleotide_2 == nucleotide_3:
+                # break homopolymere
+                if sequence[-1] in ["A", "T"]:
+                    nucleotide_1 = bit_to_dna_GC[bits5[0]]
+                else:
+                    nucleotide_1 = bit_to_dna_AT[bits5[0]]
+            else:
+                # adjust GC%
+                check_window = sequence[-min(len(sequence), GC_window):]
+                if check_window.count("A")+check_window.count("T") > check_window.count("G")+check_window.count("C"):
+                    nucleotide_1 = bit_to_dna_GC[bits5[0]]
+                else:
+                    nucleotide_1 = bit_to_dna_AT[bits5[0]]
+    
+        else:
+            if nucleotide_2 in ["A", "T"]:
+                nucleotide_1 = bit_to_dna_GC[bits5[0]]
+            else:
+                nucleotide_1 = bit_to_dna_AT[bits5[0]]
+        
+        sequence += nucleotide_1 + nucleotide_2 + nucleotide_3
+
+    bit_rest = binary_string[n_quintuplets*5:n_quintuplets*5+rest] # rest is 0-2-3 bits
+
+    # last conversions depends on the length of the rest, 2 bits = 1 base; 3 bits = 2 bases
+    if len(bit_rest) >= 2:
+        nucleotide_1 = bit_pair_to_dna[bit_rest[0:2]] # nucleotide_1 from a pair of bits
+        sequence += nucleotide_1
+        if len(bit_rest) == 3: 
+            if nucleotide_1 in ["A", "T"]:
+                sequence += bit_to_dna_GC[bit_rest[2]] # nucleotide_2 from a single bit and depending on nucleotide 1
+            else:
+                sequence += bit_to_dna_AT[bit_rest[2]] # nucleotide_2 from a single bit and depending on nucleotide 1
+            
+    return sequence
+
+
+def test_conversion():
+    
+    binary_size = 15000
+    
+    random_binary = ""
+    for i in range(binary_size):
+         random_binary += random.Random().choice(['0', '1'])
+        
+    seq_z = binary_to_dna_abaab(random_binary)
+    seq_aba = binary_to_dna_aba(random_binary)
+    seq_baa = binary_to_dna_baa(random_binary)
+    
+    print("check z encoding")
+    sequence_control.sanity_check(seq_z)
+    print("check aba encoding")
+    sequence_control.sanity_check(seq_aba)
+    print("check baa encoding")
+    sequence_control.sanity_check(seq_baa)
+    
+    
 # =================== main ======================= #
 if __name__ == '__main__':
     #doc_path = sys.argv[1]
-    binary_string = sys.argv[1]
-    seq = binary_to_dna_z(binary_string)
+    #binary_string = sys.argv[1]
+    #seq = binary_to_dna_abaab(binary_string)
     #print(seq)
-    #print(dna_to_binary_z(seq))
-    
+    #print(dna_to_binary_abaab(seq))
+    test_conversion()
 
diff --git a/file_to_dna.py b/file_to_dna.py
index a84ab59c6ebf058e7c943f89d40547d68c98696a..d5cc0f457f7ce02a32a983e409ec4aa1739f1c13 100755
--- a/file_to_dna.py
+++ b/file_to_dna.py
@@ -90,7 +90,7 @@ def convert_file_to_bits(input_path: str) -> str:
     
 def size_of_dna_from_bit_len(bit_length):
     """
-    assume binary to dna_z conversion
+    assume binary to dna_abaab conversion
     get the size the dna sequence will be from the given binary string length
     conversion rate is 1.6 bit/base
     """
@@ -124,7 +124,7 @@ def get_max_binary_len() -> int:
 
 def size_binary_from_dna_len(dna_length):
     """
-    assume dna_z to binary conversion
+    assume dna_abaab to binary conversion
     get the length the binary string will be from the given dna sequence
     conversion rate is 1.6bit/base
     """
@@ -216,7 +216,7 @@ def encode_file(input_path: str, output_path: str) -> None:
     filtered_binary_string += binary_check_sum
         
     # convert binaries into dna sequence
-    sequence = bdc.binary_to_dna_z(filtered_binary_string)
+    sequence = bdc.binary_to_dna_abaab(filtered_binary_string)
     
     if add_A_at_end: sequence += "A"
     
@@ -274,7 +274,7 @@ def decode_file(input_path: str, output_path: str) -> None:
     sequence = "".join(sub_sequences_dict.values())
 
     # convert the dna sequence into a binary string
-    binary_from_dna_string = bdc.dna_to_binary_z(sequence)
+    binary_from_dna_string = bdc.dna_to_binary_abaab(sequence)
     
     if not binary_from_dna_string:
         print("warning file conversion, decoding an empty file")
@@ -330,8 +330,8 @@ if __name__ == '__main__':
     #    seq = encode_file("toto", i)
         #decode_file(seq, "toto")
     #encode_file("", "test")
-    #seq = binary_to_dna_z(binary_string)
+    #seq = binary_to_dna_abaab(binary_string)
     #print(seq)
-    #print(dna_to_binary_z(seq))
+    #print(dna_to_binary_abaab(seq))