llama - 2 医療相談オープンソース医療相談モデル - 無料で専門的な医療問題解答を提供

ホーム

Llama 2 Medical Consultation

Ashishkrによって開発

Llama-2-7b-chat-hfを基に微調整した医療相談モデルで、医療関連の質問に回答するために特別に設計されています

大規模言語モデル

TensorBoard

#医療Q&A微調整 #神経科症状分析 #術後リハビリ相談

ダウンロード数 364

リリース時間 : 8/23/2023

モデル概要

このモデルは医療相談シナリオ向けに微調整された大規模言語モデルで、患者の説明に基づいて医療アドバイスを提供できます

モデル特徴

医療分野微調整

医療相談シナリオ向けに最適化されており、医療用語や患者の説明を理解できます

リソース効率

T4 GPU(16GB VRAM)やCPU(32GB RAM)で動作可能で、様々な展開環境に適しています

4ビット量子化

4ビット量子化技術をサポートし、メモリ要件を低減しながらモデル性能を維持します

モデル能力

医療質問回答

症状分析

医療用語理解

患者相談対応

使用事例

医療相談

症状分析

患者が説明した症状に基づいて初期医療アドバイスを提供

患者が考えられる病因と対処法を理解するのを支援

術後相談

患者の術後回復に関する質問に回答

術後ケアのアドバイスと注意事項を提供

🚀 llama-2-7b-hf医療相談用ファインチューニングモデル

このモデルはmeta-llama/Llama-2-7b-hfをベースに、医療相談用にファインチューニングされたものです。T4 GPU（16GB VRAM）やCPU（32GB RAM）で動作します。

🚀 クイックスタート

GPUでの実行方法

import transformers
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from torch import cuda, bfloat16

base_model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)


hf_auth = "your-huggingface-access-token"
model_config = transformers.AutoConfig.from_pretrained(
    base_model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    base_model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

config = PeftConfig.from_pretrained("Ashishkr/llama-2-medical-consultation")
model = PeftModel.from_pretrained(model, "Ashishkr/llama-2-medical-consultation").to(device)

model.eval()
print(f"Model loaded on {device}")

tokenizer = transformers.AutoTokenizer.from_pretrained(
    base_model_id,
    use_auth_token=hf_auth
)

def llama_generate(
    model: AutoModelForCausalLM,
    tokenizer: AutoTokenizer,
    prompt: str,
    max_new_tokens: int = 128,
    temperature: float = 0.92):

    inputs = tokenizer(
        [prompt],
        return_tensors="pt",
        return_token_type_ids=False,
    ).to(
        device
    )

    # Check if bfloat16 is supported, otherwise use float16
    dtype_to_use = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16

    with torch.autocast("cuda", dtype=dtype_to_use):
        response = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            return_dict_in_generate=True,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )

    decoded_output = tokenizer.decode(
        response["sequences"][0],
        skip_special_tokens=True,
    )

    return decoded_output[len(prompt) :]

prompt = """
 instruction: "If you are a doctor, please answer the medical questions based on the patient's description." \n

input: "Hi, I had a subarachnoid bleed and coiling of brain aneurysm last year.
I am having some major bilateral temple pain along with numbness that comes and
goes in my left arm/hand/fingers. I have had headaches since the aneurysm,
but this is different. Also, my moods have been horrible for the past few weeks.\n

response:  """
# You can use the function as before
response = llama_generate(
    model,
    tokenizer,
    prompt,
    max_new_tokens=100,
    temperature=0.92,
)

print(response)

CPUでの実行方法

import torch
import transformers
from torch import cuda, bfloat16
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer


base_model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

bnb_config = transformers.BitsAndBytesConfig(
    llm_int8_enable_fp32_cpu_offload = True
)

import torch
hf_auth = "YOUR-HUGGINGFACE-ACCESS-TOKEN"
model_config = transformers.AutoConfig.from_pretrained(
    base_model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    base_model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    # device_map='auto',
    use_auth_token=hf_auth
)

config = PeftConfig.from_pretrained("Ashishkr/llama-2-medical-consultation")
model = PeftModel.from_pretrained(model, "Ashishkr/llama-2-medical-consultation").to(device)

model.eval()
print(f"Model loaded on {device}")

tokenizer = transformers.AutoTokenizer.from_pretrained(
    base_model_id,
    use_auth_token=hf_auth
)

def llama_generate(
    model: AutoModelForCausalLM,
    tokenizer: AutoTokenizer,
    prompt: str,
    max_new_tokens: int = 128,
    temperature: float = 0.92):

    inputs = tokenizer(
        [prompt],
        return_tensors="pt",
        return_token_type_ids=False,
    ).to(
        device
    )

    # Check if bfloat16 is supported, otherwise use float16
    dtype_to_use = torch.float32
    with torch.autocast("cuda", dtype=dtype_to_use):
        response = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            return_dict_in_generate=True,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )

    decoded_output = tokenizer.decode(
        response["sequences"][0],
        skip_special_tokens=True,
    )

    return decoded_output[len(prompt) :]

prompt = """
 instruction: "If you are a doctor, please answer the medical questions based on the patient's description." \n

input: "Hi, I had a subarachnoid bleed and coiling of brain aneurysm last year.
I am having some major bilateral temple pain along with numbness that comes and
goes in my left arm/hand/fingers. I have had headaches since the aneurysm,
but this is different. Also, my moods have been horrible for the past few weeks.\n

response:  """
# You can use the function as before
response = llama_generate(
    model,
    tokenizer,
    prompt,
    max_new_tokens=100,
    temperature=0.92,
)

print(response)