# >>===================================================<<
# ||+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+||
# ||--------------------|B|I|F|-----------------------|||
# |||B|u|r|r|o|w|s|-|W|h|e|e|l|e|r|-|T|r|a|n|s|f|o|r|m|||
# ||+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+||
# >>===================================================<<

# Code by Leo Ackermann (2026)


def bwt(seq):
    '''
    Compute the Burrows-Wheeler transform of the input sequence
    COMPLEXITY: O(|seq|^2 · log|seq|)
    '''

    studded_seq = seq + '$' + seq
    rotations = [studded_seq[i:i+len(seq)+1] for i in range(len(seq) + 1)]
    return ''.join([row[-1] for row in sorted(rotations)])


def ibwt(bwt_seq):
    '''
    Retrieve the string whose Burrows-Wheeler transform is given as input
    COMPLEXITY: O(|bwt_seq|^3 · log|bwt_seq|)
    '''

    first_cols = ["" for _ in range(len(bwt_seq))]
    for _ in range(len(bwt_seq)):
        first_cols = sorted(map(lambda x: x[0] + x[1], zip(bwt_seq, first_cols)))
    return first_cols[0][1:]


def rle(seq):
    '''
    Compute the Run-Length-Encoding of the input sequence
    COMPLEXITY: O(|seq|)
    '''

    seq_rle = ""
    current_char = seq[0]
    counter_current_char = 0
    for c in seq:
        if c == current_char:
            counter_current_char += 1
        else:
            if counter_current_char > 1:
                seq_rle += str(counter_current_char)
            seq_rle += current_char
            current_char = c
            counter_current_char = 1
    if counter_current_char > 1:
        seq_rle += str(counter_current_char)
    seq_rle += c

    return seq_rle


def irle(rle_seq):
    '''
    Retrieve the string whose Run-Length-Encoding is given as input
    COMPLEXITY: O(|rle_seq|)
    '''

    seq = ""
    i = 0
    while i < len(rle_seq):
        str_counter = ""
        while rle_seq[i].isdigit():
            str_counter += rle_seq[i]
            i += 1
        counter = int(str_counter) if str_counter else 1
        seq += counter * rle_seq[i]
        i += 1

    return seq


def open_file_as_string(filepath):
    '''
    Open a file as a string, ignoring the first line and newline symbols
    '''

    with open(filepath) as f:
        return ''.join(f.read().splitlines()[1:])


def report_compression_capabilities():
    '''
    Compute the compression ratio for a few files
    '''
    filenames = ["random.txt", "dickens.txt", "dudh.txt"]

    print()
    for f in filenames:
        s = open_file_as_string(f)
        len_s = len(s)
        len_rle_s = len(rle(s))
        len_rle_bwt_s = len(rle(bwt(s)))
        print(f)
        print(f"  len(s):\t\t\t{len_s}")
        print(f"  len(rle(s)):\t\t{len_rle_s} ({int((len_rle_s-len_s)/len_s*100):+}%)")
        print(f"  len(bwt(rle(s))):\t{len_rle_bwt_s} ({int((len_rle_bwt_s-len_s)/len_s*100):+}%)")
        print()