openoker
/
OpenAGI


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
							"""
Copyright 2023 Yingqiang Ge

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.

"""

__author__ = "Yingqiang Ge"
__copyright__ = "Copyright 2023, OpenAGI"
__date__ = "2023/04/10"
__license__ = "Apache 2.0"
__version__ = "0.0.1"


from evaluate import load
import numpy as np
import torch
from sentence_transformers import SentenceTransformer, util

def txt_eval(predictions, references, bertscore, device="cuda"):
    score = bertscore.compute(
                    predictions=predictions,
                    references=references,
                    lang="en",
                    model_type="microsoft/deberta-xlarge-mnli",
                    device=device)["f1"]
    
    return score


def txt_loader(path):
    text = []
    with open(path) as f:
        lines = f.readlines()
        for line in lines:
            text.append(line)
    f.close()
    return text


def image_similarity(im1, im2, model, extractor):
    batch_size = len(im1)
    # Load two images
    img1 = extractor(im1, return_tensors="pt")
    img2 = extractor(im2, return_tensors="pt")

    # Preprocess the images and get their embeddings
    with torch.no_grad():
        emb1 = model(img1.pixel_values)[0].squeeze().numpy()
        emb2 = model(img2.pixel_values)[0].squeeze().numpy()

    # Compute the cosine similarity between the embeddings
    dist = np.mean(np.array([np.linalg.norm(emb1[i] - emb2[i], ord='fro') for i in range(batch_size)]))
    return dist

def module_seq_filter(module_seq, task_id):
    io_dict = { 
                "Colorization":['image','image'],  
                "Image Denoising":['image','image'], 
                "Image Deblurring":['image','image'],
                "Image Super Resolution":['image','image'],
                "Image Classification":['image','text'],  
                "Image Captioning":['image','text'], 
                "Object Detection":['image','text'],
                "Text Summarization":['text','text'],  
                "Text Generation":['text','text'], 
                "Machine Translation":['text','text'],  
                "Fill Mask":['text','text'],
                "Sentiment Analysis":['text','text'],
                "Text to Image Generation":['text','image'],
                "Question Answering":['text-text','text'],
                "Visual Question Answering":['image-text','text']
        }
    module_seq_list = module_seq.split(", ")
    input_type = io_dict[module_seq_list[0]][0]
    output_type = io_dict[module_seq_list[-1]][1]
    if input_type == "image" and output_type == "image" and 0<=task_id<=14:
        return True
    elif input_type == "image" and output_type == "text" and 15<=task_id<=104:
        return True
    elif input_type == "text" and output_type == "image" and 105<=task_id<=107:
        return True
    elif input_type == "text" and output_type == "text" and 108<=task_id<=125:
        return True
    elif input_type == "image-text" and output_type == "text" and 126<=task_id<=170:
        return True
    elif input_type == "text-text" and output_type == "text" and 171<=task_id<=188:
        return True
    else:
        return False
    
    
def whole_module_seq_filter(module_seq, task_id):
    io_dict = { 
                "Colorization":['image','image'],  
                "Image Denoising":['image','image'], 
                "Image Deblurring":['image','image'],
                "Image Super Resolution":['image','image'],
                "Image Classification":['image','text'],  
                "Image Captioning":['image','text'], 
                "Object Detection":['image','text'],
                "Text Summarization":['text','text'],  
                "Text Generation":['text','text'], 
                "Machine Translation":['text','text'],  
                "Fill Mask":['text','text'],
                "Sentiment Analysis":['text','text'],
                "Text to Image Generation":['text','image'],
                "Question Answering":['text-text','text'],
                "Visual Question Answering":['image-text','text']
        }
    module_seq_list = module_seq.split(", ")
    condition_1 = None
    for i, m in enumerate(module_seq_list):
        if i < len(module_seq_list)-1 and io_dict[m][1] != io_dict[module_seq_list[i+1]][0]:
            condition_1 = False
            break
        else:
            condition_1 = True
            
        
    condition_2 = None   
    input_type = io_dict[module_seq_list[0]][0]
    output_type = io_dict[module_seq_list[-1]][1]
    if input_type == "image" and output_type == "image" and 0<=task_id<=14:
        condition_2 = True
    elif input_type == "image" and output_type == "text" and 15<=task_id<=104:
        condition_2 = True
    elif input_type == "text" and output_type == "image" and 105<=task_id<=107:
        condition_2 = True
    elif input_type == "text" and output_type == "text" and 108<=task_id<=125:
        condition_2 = True
    elif input_type == "image-text" and output_type == "text" and 126<=task_id<=170:
        condition_2 = True
    elif input_type == "text-text" and output_type == "text" and 171<=task_id<=188:
        condition_2 = True
    else:
        condition_2 = False
        
    return condition_1 and condition_2
    
    
def match_module_seq(model_steps, sentence_model):
    module_seq = ""

    for i in range(len(model_steps)):

        sentences1 = [model_steps[i]]*15

        sentences2 = ["Image Classification","Colorization","Object Detection",\
                  "Image Super Resolution","Image Captioning","Image Deblurring",\
                  "Image Denoising","Text to Image Generation","Visual Question Answering",\
                  "Sentiment Analysis","Question Answering","Text Summarization",\
                  "Text Generation","Machine Translation","Fill Mask"]

        #Compute embedding for both lists
        embeddings1 = sentence_model.encode(sentences1, convert_to_tensor=True)#.to(device_)
        embeddings2 = sentence_model.encode(sentences2, convert_to_tensor=True)#.to(device_)

        #Compute cosine-similarities
        cosine_scores = util.cos_sim(embeddings1, embeddings2)
        similarities = torch.stack([cosine_scores[i][i] for i in range(15)])

        module_index = torch.argmax(similarities).item()
        module_seq += sentences2[module_index] + ", "
        # print(similarities[module_index])
        # print(sentences2[module_index])

    #Output the pairs with their score
    # for i in range(len(sentences1)):
    #     print("{} \t\t {} \t\t Score: {:.4f}".format(sentences1[i], sentences2[i], cosine_scores[i][i]))
    module_seq = module_seq.strip()[:-1]
    return module_seq