～祝！「Phi-3」リリース！(つづき) ～

・・・「Phi-3」がリリースされたのはいいが・・・

QEU:FOUNDER ： “Phi-3って、よさげだなあ。やってみたいなあ・・・。”

D先生（設定年齢65歳）： “じゃあ、finetuneしてみれば・・・？”

QEU:FOUNDER ： “T4程度のGPUでは、エラーがでました（笑）。まあ、しゃあない・・・。地道にトレーニングデータセットの開発を続けましょう。今回は、データセットにKeywordという情報を追加しましょう。”

D先生（設定年齢65歳）： “そうすると、カテゴリと情報が重複しません？”

QEU:FOUNDER ： “そういえばそうだわ・・・。データセットの編集には便利なので、生データには残すが、モデルの学習にはつかわないようにしましょう。”

# ------
# プロンプト変換用の関数(AIassistant)
def formatting_for_AIassistant(text_begining,text_medium,reasoning,transformed_input,response,language):

    # ---
    #"category": open_qa,closed_qa,classification,creative,translation,reasoning,brainstorm,q_to_q_
    #"language": English,Japanese,Chinese
    #"field": "technology,culture,math,other,travel,economy,language,history,nature,creative",
    #"flag": AI assistant, creator, logician, translator
    # ---
    if language == "English":
        text_all = f'''
        {text_begining}<|reasoning|>{reasoning}\n{text_medium}<|response|>{response}<|endoftext|>
        '''
    else:
        if len(transformed_input) >= 3:
            text_all = f'''
            {text_begining}<|reasoning|>{reasoning}\n{text_medium}<|transformed_input|>{transformed_input}\n<|response|>{response}<|endoftext|>
            '''
        else:
            text_all = f'''
            {text_begining}<|reasoning|>{reasoning}\n{text_medium}<|response|>{response}<|endoftext|>
            '''
 
    return text_all

# ------
# プロンプト変換用の関数(creator)
def format-ting_for_creator(text_begining,text_medium,reasoning,transformed_input,response,language):

    # ---
    if language == "English":
        text_all = f'''
        {text_begining}<|reasoning|>{reasoning}\n{text_medium}<|response|>{response}<|endoftext|>
        '''
    else:
        if len(transformed_input) >= 3:
            text_all = f'''
            {text_begining}<|reasoning|>{reasoning}\n{text_medium}<|transformed_input|>{transformed_input}\n<|response|>{response}<|endoftext|>
            '''
        else:
            text_all = f'''
            {text_begining}<|reasoning|>{reasoning}\n{text_medium}<|response|>{response}<|endoftext|>
            '''
 
    return text_all

# ------
# プロンプト変換用の関数(translator)
def formatting_for_translator(text_begining,text_medium,reasoning,response):

    # ---
    text_all = f'''
    {text_begining}<|reasoning|>{reasoning}\n{text_medium}<|response|>{response}<|endoftext|>
    '''
 
    return text_all

# ------
# プロンプト変換用の関数(logician)
def format-ting_for_logician(text_begining,text_medium,reasoning,transformed_input,response,language):

    # ---
    if language == "English":
        text_all = f'''
        {text_begining}<|reasoning|>{reasoning}\n{text_medium}<|response|>{response}<|endoftext|>
        '''
    else:
        if len(transformed_input) >= 3:
            text_all = f'''
            {text_begining}<|reasoning|>{reasoning}\n{text_medium}<|transformed_input|>{transformed_input}\n<|response|>{response}<|endoftext|>
            '''
        else:
            text_all = f'''
            {text_begining}<|reasoning|>{reasoning}\n{text_medium}<|response|>{response}<|endoftext|>
            '''
 
    return text_all

# ------
# 基本プロンプトは以下の通り(reasoning is all you need.)
#<|system|>sys_message\n<|prompt|>prompt\n<|reasoning|>reasoning\n<|response|>response<|endoftext|>
# ------
# プロンプト変換用の関数(Batch: For Reasoning)
def formatting_prompts_func(examples):
    # ---
    sys_message_AIassistant = "You are an AI assistant who is good at providing answers in multiple languages. Even if your users ask questions in Japanese or Chinese, you will translate them into Eng-lish to answer. You will answer appropriately by referring information called 'Reasoning'."
    # ---
    sys_message_translator = "You are translator who is fluent in multiple languages.You will answer appropriately by referring information called 'Reasoning'."
    # ---
    sys_message_creator = "You are creator who create amazing content with your free ideas. You at-tract readers with your beautiful writing such as novel or advertisement. Even if users ask questions in Japanese or Chinese, you will translate their instructions into English to answer properly. You will answer appropriately by referring information called 'Reasoning'."
    # ---
    sys_message_logician = "You are an excellent logician and are instructing users on \"thinking pro-cedures for providing better quality reasoning to user-provided questions.\". "
    # ---
    output_text = []
    # ---
    for i in range(len(examples["instruction"])):
        # ---
        instruction = examples["instruction"][i]
        input_text = examples["input"][i]
        # ---
        category = examples["category"][i]
        language = examples["language"][i]
        field = examples["field"][i]
        keywords = examples["keywords"][i]
        flag = examples["flag"][i]
        # ---
        # 推論(Reasoning)
        reasoning = examples["reasoning"][i]
        # keywordsを参照して、改造する
        if len(keywords) >= 3:
            reasoning = reasoning.replace(", referring to relevant field information, before",f", referring to relevant field information such as {keywords}, before")
        # ---
        transformed_input = examples["transformed_input"][i]
        response = examples["output"][i]
        # ---
        # システムメッセージを選択する
        if flag == "logician":
            sys_message = sys_message_logician
        elif flag == "creator":
            sys_message = sys_message_creator
        elif flag == "translator":
            sys_message = sys_message_translator
        else:
            sys_message = sys_message_AIassistant
        # ---
        # プロンプトの初めの部分を作成する
        if len(input_text) >= 3:
            text_begining = f'''
                <|system|>{sys_message}\n<|instruction|>{instruction}\n<|context|>{input_text}\n
                '''
            if len(keywords) >= 3:
                text_medium = f'''
                    <|keywords|>{keywords}\n<|field|>{field}\n<|flag|>{flag}\n<|language|>{language}\n
                    '''
            else:
                text_medium = f'''
                    <|field|>{field}\n<|flag|>{flag}\n<|language|>{language}\n
                    '''
        else:
            text_begining = f'''
                <|system|>{sys_message}\n<|instruction|>{instruction}\n
                '''
            if len(keywords) >= 3:
                text_medium = f'''
                    <|keywords|>{keywords}\n<|field|>{field}\n<|flag|>{flag}\n<|language|>{language}\n
                    '''
            else:
                text_medium = f'''
                    <|field|>{field}\n<|flag|>{flag}\n<|language|>{language}\n
                    '''
        # ---
        # システムメッセージを選択する
        if flag == "logician":
            text = format-ting_for_logician(text_begining,text_medium,reasoning,transformed_input,response,language)
        elif flag == "creator":
            text = format-ting_for_creator(text_begining,text_medium,reasoning,transformed_input,response,language)
        elif flag == "translator":
            text = formatting_for_translator(text_begining,text_medium,reasoning,response)
        else:
            text = format-ting_for_AIassistant(text_begining,text_medium,reasoning,transformed_input,response,language)
        # ---
        output_text.append(text)
    return output_text