# >>===================================================<<
# ||+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+||
# ||--------------------|A|L|G|-----------------------|||
# |||B|u|r|r|o|w|s|-|W|h|e|e|l|e|r|-|T|r|a|n|s|f|o|r|m|||
# ||+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+||
# >>===================================================<<

# Template by Leo Ackermann (2025)
# Code by ____

from ks import simple_kark_sort
import matplotlib.pyplot as plt
import random


# >>==========================================================================<<
# >>                          Run-length encoding                             <<
# >>==========================================================================<<

def compress_RLE(seq):
    '''
    Compress a sequence using run-length encoding, as a list of (letter, count)
    couples
    '''
    pass


def decompress_RLE(rle):
    '''
    Decompress a run-length encoded sequence into a string
    '''
    pass


def evaluate_RLE_compression(fm, plot=False):
    '''
    TODO (exo). Write a clear docstring.
    '''

    crle = compress_RLE(fm.get_string())
    cbwtrle = compress_RLE(fm.bwt)

    print()
    print(f"#runs in seq: \t {len(crle)}")
    print(f"#runs in bwt: \t {len(cbwtrle)} (-{(len(crle)-len(cbwtrle))/len(crle)*100 :.1f}%)")
    print()

    if plot:
        fig, ax = plt.subplots(figsize=(16, 10))
        ax.hist([x[1] for x in cbwtrle], bins='auto', align='left')
        for rect in ax.patches:
            height = rect.get_height()
            ax.annotate(f'{int(height)}', xy=(rect.get_x()+rect.get_width()/2, height),
                    xytext=(0, 5), textcoords='offset points', ha='center', va='bottom')
        plt.title("Distribution of run lengths")
        plt.show()


# >>==========================================================================<<
# >>                              FM-index class                              <<
# >>==========================================================================<<

class FMindex:

    # >>===[ Class constructor ]================================================

    def __init__(self, seq, verbose=False):

        # Dummy fields declaration, so that pretty printer works throughout the
        # tutorial
        # NOTE. Respective definitions are defered to their init functions
        letters = sorted(set(seq + '$')) + ['_']
        self.sa = ['_' for _ in range(len(seq) + 1)]
        self.bwt = ['_' for _ in range(len(seq) + 1)]
        self.fm_count = dict([(x, '_') for x in letters])
        self.fm_rank = ['_' for _ in range(len(seq) + 1)]
        self.fm_ranks = dict([(x, ['_' for _ in range(len(seq) + 1)]) for x in letters])
        self.next_smallest_letter = dict([(x, '_') for x in letters])


    # >>===[ Pretty printer ]===================================================

    def pretty_print(self):
        '''
        Pretty printer for the FM index
        '''
        # NOTE. Some computations are inefficient. But we don't care in this
        # context (printable => small)

        F = sorted(self.bwt)
        C_as_array = list(map(lambda x: self.fm_count[x], F))
        letters = sorted(set(self.bwt))

        print()
        print("#\t\tC\tF\tL\tR\t\t" + '\t'.join([f"R[{x}]" for x in letters]))
        print("------------------------" + "--------" * len(letters))
        for i in range(len(self.bwt)):
            print(f"{i}\t\t{C_as_array[i]}\t{F[i]}\t{self.bwt[i]}\t{self.fm_rank[i]}\t\t" \
                  + '\t\t'.join([f"{self.fm_ranks[x][i]}" for x in letters]))
        print()


    # >>===[ Attribute initialisation functions ]===============================


    # >>===[ Pattern matching functions ]=======================================


# >>==========================================================================<<
# >>                             Miscellaneous                                <<
# >>==========================================================================<<

def open_fasta_as_string(filepath):
    with open(filepath) as fastafile:
        return ''.join(fastafile.read().splitlines()[1:]).upper()

def random_dna_of_length(n):
    dna = ""
    letters = ['A', 'T', 'C', 'G']
    for _ in range(n):
        dna += random.choice(letters)
    return dna