import re

from utils import get_unique_items
from functions.separator import separator


def remove_unused_char(string_list):

    new_string_list = []
    for index, item in enumerate(string_list):
        if item.isdigit():
            continue
        elif item == "":
            continue
        elif not re.match(".*[a-zA-Z]+\w*", item):
            continue

        elif len(item) < 5:
            continue
        else:
            new_string_list.append(item)
    return new_string_list


def similarity_prune_very(string1: str, string2: str):

    spliced_string1 = get_unique_items(string1.split(' '))
    sorted_spliced_string1 = sorted(separator(spliced_string1))
    new_spliced_string1 = remove_unused_char(sorted_spliced_string1)
    split1 = separator(new_spliced_string1)

    spliced_string2 = get_unique_items(string2.split(' '))
    sorted_spliced_string2 = sorted(separator(spliced_string2))
    new_spliced_string2 = remove_unused_char(sorted_spliced_string2)
    split2 = separator(new_spliced_string2)

    count = 0
    for i in split1:
        if i in split2:
            count += 1
    return count


    # return len(set(new_spliced_string1) & set(new_spliced_string2))
