Firefly-Qwen-7B Open-Source Chinese Dialogue Model - Combining Multiple Datasets to Handle Daily and Math Q&A

Home

Firefly Qwen 7b

Developed by YeungNLP

A Chinese dialogue model fine-tuned based on Qwen-7B, integrating MOSS dataset and school math problem data

Large Language Model

Transformers

#Multi-turn dialogue optimization #Math problem solving #Chinese large language model

Downloads 23

Release Time : 8/17/2023

Model Overview

An open-source large language model optimized for Chinese dialogue scenarios, supporting single-turn and multi-turn conversational interactions

Model Features

Chinese dialogue optimization

Specialized fine-tuning for Chinese context, with better dialogue fluency than the original Qwen-7B

Enhanced math capabilities

Incorporates 20,000 school math problems to improve mathematical reasoning

Multi-turn dialogue support

Supports context memory up to 1000 tokens for maintaining dialogue coherence

Model Capabilities

Open-domain dialogue

Math problem solving

Context understanding

Text generation

Use Cases

Educational applications

Math tutoring

Solving and explaining primary/secondary school math problems step-by-step

Accuracy improved by approximately 15% compared to the base model

Intelligent customer service

Multi-turn consultation

Handling complex user inquiry scenarios

Context retention accuracy exceeds 80%

🚀 Firefly Qwen-7B Fine-tuning Project

This project uses the Firefly project to fine-tune the Tongyi Qianwen Qwen-7B model. The training data consists of approximately one million rounds of dialogue data, including the moss data shared by the project and 20,000 pieces of school math data.

For more details, please refer to the project Firefly.

💻 Usage Examples

Basic Usage

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
"""
Single-round dialogue, without the memory function of dialogue history
"""


def main():
    model_name = 'YeungNLP/firefly-qwen-7b'

    max_new_tokens = 500
    top_p = 0.9
    temperature = 0.35
    repetition_penalty = 1.0
    device = 'cuda'
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        device_map='auto'
    ).to(device).eval()
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True,
        # llama does not support fast
        use_fast=False if model.config.model_type == 'llama' else True
    )
    # QWenTokenizer is special, with pad_token_id, bos_token_id, and eos_token_id all being None. The token corresponding to eod_id is <|endoftext|>
    if tokenizer.__class__.__name__ == 'QWenTokenizer':
        tokenizer.pad_token_id = tokenizer.eod_id
        tokenizer.bos_token_id = tokenizer.eod_id
        tokenizer.eos_token_id = tokenizer.eod_id

    text = input('User：')
    while True:
        text = text.strip()
        # chatglm uses the official data organization format
        if model.config.model_type == 'chatglm':
            text = '[Round 1]\n\n问：{}\n\n答：'.format(text)
            input_ids = tokenizer(text, return_tensors="pt", add_special_tokens=False).input_ids.to(device)
        # To be compatible with qwen-7b, because tokenizing its eos_token cannot get the corresponding eos_token_id
        else:
            input_ids = tokenizer(text, return_tensors="pt", add_special_tokens=False).input_ids.to(device)
            bos_token_id = torch.tensor([[tokenizer.bos_token_id]], dtype=torch.long).to(device)
            eos_token_id = torch.tensor([[tokenizer.eos_token_id]], dtype=torch.long).to(device)
            input_ids = torch.concat([bos_token_id, input_ids, eos_token_id], dim=1)
        with torch.no_grad():
            outputs = model.generate(
                input_ids=input_ids, max_new_tokens=max_new_tokens, do_sample=True,
                top_p=top_p, temperature=temperature, repetition_penalty=repetition_penalty,
                eos_token_id=tokenizer.eos_token_id
            )
        outputs = outputs.tolist()[0][len(input_ids[0]):]
        response = tokenizer.decode(outputs)
        response = response.strip().replace(tokenizer.eos_token, "").strip()
        print("Firefly：{}".format(response))
        text = input('User：')


if __name__ == '__main__':
    main()

Advanced Usage

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch


def main():
    model_name = 'YeungNLP/firefly-qwen-7b'

    device = 'cuda'
    max_new_tokens = 500    # Maximum number of tokens generated per round of dialogue
    history_max_len = 1000  # Maximum token length remembered by the model
    top_p = 0.9
    temperature = 0.35
    repetition_penalty = 1.0

    # Load the model
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        device_map='auto'
    ).to(device).eval()
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True,
        # llama does not support fast
        use_fast=False if model.config.model_type == 'llama' else True
    )
    # QWenTokenizer is special, with pad_token_id, bos_token_id, and eos_token_id all being None. The token corresponding to eod_id is <|endoftext|>
    if tokenizer.__class__.__name__ == 'QWenTokenizer':
        tokenizer.pad_token_id = tokenizer.eod_id
        tokenizer.bos_token_id = tokenizer.eod_id
        tokenizer.eos_token_id = tokenizer.eod_id

    # Record all historical records
    if model.config.model_type != 'chatglm':
        history_token_ids = torch.tensor([[tokenizer.bos_token_id]], dtype=torch.long)
    else:
        history_token_ids = torch.tensor([[]], dtype=torch.long)

    # Start the dialogue
    utterance_id = 0    # Record the current round of dialogue, to fit the data organization format of chatglm
    user_input = input('User：')
    while True:
        utterance_id += 1
        # chatglm uses the official data organization format
        if model.config.model_type == 'chatglm':
            user_input = '[Round {}]\n\n问：{}\n\n答：'.format(utterance_id, user_input)
            user_input_ids = tokenizer(user_input, return_tensors="pt", add_special_tokens=False).input_ids
        # Firefly's data organization format
        # To be compatible with qwen-7b, because tokenizing its eos_token cannot get the corresponding eos_token_id
        else:
            input_ids = tokenizer(user_input, return_tensors="pt", add_special_tokens=False).input_ids
            eos_token_id = torch.tensor([[tokenizer.eos_token_id]], dtype=torch.long)
            user_input_ids = torch.concat([input_ids, eos_token_id], dim=1)
        history_token_ids = torch.concat((history_token_ids, user_input_ids), dim=1)
        model_input_ids = history_token_ids[:, -history_max_len:].to(device)
        with torch.no_grad():
            outputs = model.generate(
                input_ids=model_input_ids, max_new_tokens=max_new_tokens, do_sample=True, top_p=top_p,
                temperature=temperature, repetition_penalty=repetition_penalty, eos_token_id=tokenizer.eos_token_id
            )
        model_input_ids_len = model_input_ids.size(1)
        response_ids = outputs[:, model_input_ids_len:]
        history_token_ids = torch.concat((history_token_ids, response_ids.cpu()), dim=1)
        response = tokenizer.batch_decode(response_ids)
        print("Firefly：" + response[0].strip().replace(tokenizer.eos_token, ""))
        user_input = input('User：')


if __name__ == '__main__':
    main()

Featured Recommended AI Models

Empowering the Future, Your AI Solution Knowledge Base

English 简体中文繁體中文にほんご