from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# ๋ชจ๋ธ๊ณผ ํ ํฌ๋์ด์ ๋ก๋ (CPU)
model_name = "gpt2" # ๊ฒฝ๋ ๋ชจ๋ธ ์ฌ์ฉ
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# ์
๋ ฅ ํ
์คํธ
input_text = "์ธ๊ณต์ง๋ฅ์ด ์ธ์์ ๋ฐ๊พธ๋ ๋ฐฉ์์"
# ํ ํฐํ ๋ฐ ๋ชจ๋ธ ์
๋ ฅ
inputs = tokenizer(input_text, return_tensors="pt")
# ํ
์คํธ ์์ฑ (CPU์์ ์คํ)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_length=100,
num_return_sequences=1,
no_repeat_ngram_size=2,
temperature=0.7
)
# ๊ฒฐ๊ณผ ๋์ฝ๋ฉ
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# GPU ์ฌ์ฉ ๊ฐ๋ฅ ์ฌ๋ถ ํ์ธ
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# ๋ชจ๋ธ๊ณผ ํ ํฌ๋์ด์ ๋ก๋ (GPU ์ฌ์ฉ)
model_name = "gpt2" # ๊ฒฝ๋ ๋ชจ๋ธ ์ฌ์ฉ
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# ์
๋ ฅ ํ
์คํธ
input_text = "์ธ๊ณต์ง๋ฅ์ด ์ธ์์ ๋ฐ๊พธ๋ ๋ฐฉ์์"
# ํ ํฐํ ๋ฐ ๋ชจ๋ธ ์
๋ ฅ (GPU๋ก ์ด๋)
inputs = tokenizer(input_text, return_tensors="pt").to(device)
# ํ
์คํธ ์์ฑ (GPU์์ ์คํ)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_length=100,
num_return_sequences=1,
no_repeat_ngram_size=2,
temperature=0.7,
pad_token_id=tokenizer.eos_token_id
)
# ๊ฒฐ๊ณผ ๋์ฝ๋ฉ (๋ค์ CPU๋ก ์ด๋)
generated_text = tokenizer.decode(outputs[0].cpu(), skip_special_tokens=True)
print(generated_text)
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
# 4๋นํธ ์์ํ ์ค์ (๋ฉ๋ชจ๋ฆฌ ์ ์ฝ)
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
)
# ๋ชจ๋ธ ๋ก๋ (4๋นํธ ์์ํ ์ ์ฉ)
model_name = "EleutherAI/polyglot-ko-1.3b" # ํ๊ตญ์ด ๋ชจ๋ธ
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=quantization_config,
device_map="auto" # ์๋์ผ๋ก GPU/CPU ํ ๋น
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# ์์ฑ ํ์ดํ๋ผ์ธ ์ฌ์ฉ
from transformers import pipeline
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device=0 if torch.cuda.is_available() else -1
)
# ํ
์คํธ ์์ฑ
result = pipe(
"์ค๋ ๋ ์จ๊ฐ ์ข์์",
max_length=100,
do_sample=True,
temperature=0.7,
top_p=0.9
)
print(result[0]['generated_text'])
ํ๋กฌํํธ ์์ง๋์ด๋ง
๊ธฐ๋ณธ ๋ชจ๋ธ ํ์ฉ
RAG(๊ฒ์ ์ฆ๊ฐ ์์ฑ) ์ค์ต
๋ชจ๋ธ ๋ฏธ์ธ์กฐ์ (Fine-tuning)
์ ํ๋ฆฌ์ผ์ด์ ๊ฐ๋ฐ