RAG 시스템 실전 구축 (v19)
요약
RAG(검색 증강 생성) 시스템의 기본 개념과 핵심 루프 구조를 설명합니다. 특히 효율적인 정보 검색을 위한 의미적 청킹(Semantic Chunking) 전략과 구현 코드를 상세히 다룹니다.
핵심 포인트
- RAG의 4단계 루프: 질문, 검색, 증강, 생성 과정 이해
- 임베딩 모델을 활용한 벡터 DB 검색 메커니즘
- 문장 유사도를 기반으로 한 의미적 청킹 전략 구현
RAG 시스템 실전 구축 (v19)
1. RAG 시스템 기본 개념
RAG (Retrieval-Augmented Generation)는 검색 기반 생성 모델로, LLM이 외부 지식을 활용해 더 정확하고 최신 정보를 제공할 수 있게 합니다. 핵심 루프는 다음과 같습니다:
사용자 질문 → 검색 (Retrieval) → 증강 (Augmentation) → 생성 (Generation)
RAG 루프 구조
# 단순 RAG 루프 예시
class SimpleRAG:
def __init__(self, embedder, vector_db, generator):
self.embedder = embedder
self.vector_db = vector_db
self.generator = generator
def query(self, question):
# 1. 질문 임베딩
query_embedding = self.embedder.encode(question)
# 2. 검색
relevant_docs = self.vector_db.search(query_embedding, k=5)
# 3. 증강 (context 구성)
context = " ".join([doc.content for doc in relevant_docs])
# 4. 생성
response = self.generator.generate(question, context)
return response
2. 청킹 전략 (Chunking Strategies)
2.1 의미적 청킹 (Semantic Chunking)
from sentence_transformers import SentenceTransformer
import numpy as np
class SemanticChunker:
def __init__(self, model_name="all-MiniLM-L6-v2"):
self.model = SentenceTransformer(model_name)
def chunk_by_semantic(self, text, threshold=0.75):
sentences = self.split_into_sentences(text)
embeddings = self.model.encode(sentences)
chunks = []
current_chunk = [sentences[0]]
current_embedding = embeddings[0]
for i in range(1, len(sentences)):
similarity = self.cosine_similarity(current_embedding, embeddings[i])
if similarity > threshold:
current_chunk.append(sentences[i])
else:
chunks.append(" ".join(current_chunk))
current_chunk = [sentences[i]]
current_embedding = embeddings[i]
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
def cosine_similarity(self, a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
2.2 재귀적 청킹 (Recursive Chunking)
class RecursiveChunker:
def __init__(self, max_chunk_size=512):
self.max_chunk_size = max_chunk_size
def recursive_chunk(self, text, separators=["\n\n", "\n", " ", ""]):
chunks = []
def split_recursive(text, level=0):
if level >= len(separators) or len(text) <= self.max_chunk_size:
chunks.append(text)
return
separator = separators[level]
parts = text.split(separator)
current_chunk = ""
for part in parts:
if len(current_chunk) + len(part) + len(separator) <= self.max_chunk_size:
current_chunk += part + separator
else:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = part + separator
if current_chunk and len(current_chunk) > 0:
chunks.append(current_chunk.strip())
split_recursive(text)
return chunks
2.3 에이전트 기반 청킹 (Agentic Chunking)
class AgenticChunker:
def __init__(self, prompt_template):
self.prompt_template = prompt_template
def chunk_with_agent(self, text, max_tokens=1000):
# 텍스트를 여러 부분으로 분할하고 각 부분을 요약하여 청킹
sections = self.divide_into_sections(text)
chunks = []
for section in sections:
if len(section) > max_tokens:
sub_sections = self.split_section(section, max_tokens)
chunks.extend(sub_sections)
else:
chunks.append(section)
return chunks
def divide_into_sections(self, text):
# 제목 기준 분할
import re
sections = re.split(r'(#{1,6}\s+.*?)(?=\n#{1,6}|\Z)', text, flags=re.DOTALL)
return [s for s in sections if s.strip()]
3. 임베딩 모델 선택 및 비교
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
class EmbeddingBenchmark:
def __init__(self):
self.models = {
"all-MiniLM-L6-v2": SentenceTransformer("all-MiniLM-L6-v2"),
"all-mpnet-base-v2": SentenceTransformer("all-mpnet-base-v2"),
"multi-qa-MiniLM-L6-v2": SentenceTransformer("multi-qa-MiniLM-L6-v2")
}
def compare_models(self, texts, queries):
results = {}
for model_name, model in self.models.items():
# 텍스트 임베딩
text_embeddings = model.encode(texts)
query_embeddings = model.encode(queries)
# 유사도 계산
similarities = cosine_similarity(query_embeddings, text_embeddings)
results[model_name] = {
"mean_similarity": np.mean(similarities),
"std_similarity": np.std(similarities),
"top_k_similarities": np.max(similarities, axis=1)
}
return results
# 사용 예시
benchmark = EmbeddingBenchmark()
texts = ["Python은 인기 있는 프로그래밍 언어입니다.", "JavaScript는 웹 개발에 사용됩니다."]
queries = ["Python에 대해 설명해 주세요.", "웹 개발 언어는 무엇인가요?"]
results = benchmark.compare_models(texts, queries)
print(results)
4. 벡터 데이터베이스 비교
4.1 Chroma (로컬)
import chromadb
from chromadb.utils import embedding_functions
class ChromaVectorDB:
def __init__(self, collection_name="rag_collection"):
self.client = chromadb.Client()
self.collection = self.client.get_or_create_collection(
name=collection_name,
embedding_function=embedding_functions.DefaultEmbeddingFunction()
)
def add_documents(self, documents, ids):
self.collection.add(
documents=documents,
ids=ids
)
def search(self, query, k=5):
results = self.collection.query(
query_texts=[query],
n_results=k
)
return results['documents'][0]
4.2 Qdrant
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Filter, FieldCondition, MatchValue
class QdrantVectorDB:
def __init__(self, host="localhost", port=6333):
self.client = QdrantClient(host=host, port=port)
self.collection_name = "rag_collection"
if not self.client.collection_exists(collection_name=self.collection_name):
self.client.create_collection(
collection_name=self.collection_name,
vectors_config=VectorParams(size=384, distance="Cosine")
)
def add_documents(self, documents, ids):
self.client.upsert(
collection_name=self.collection_name,
points=[
{
"id": idx,
"vector": doc.embedding,
"payload": {"content": doc.content}
} for idx, doc in enumerate(documents)
]
)
def search(self, query_vector, k=5):
results = self.client.search(
collection_name=self.collection_name,
query_vector=query_vector,
limit=k
)
return [hit.payload['content'] for hit in results]
4.3 pgvector (PostgreSQL 확장)
import psycopg2
import numpy as np
class PGVectorDB:
def __init__(self, connection_string):
self.conn = psycopg2.connect(connection_string)
self.setup_table()
def setup_table(self):
with self.conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS rag_documents (
id SERIAL PRIMARY KEY,
content TEXT,
embedding VECTOR(384)
)
""")
cur.execute("""
CREATE INDEX IF NOT EXISTS idx_embedding ON rag_documents
USING ivfflat (embedding vector_cosine_ops)
""")
self.conn.commit()
def add_documents(self, documents):
with self.conn.cursor() as cur:
for doc in documents:
cur.execute(
"INSERT INTO rag_documents (content, embedding) VALUES (%s, %s)",
(doc.content, doc.embedding.tolist())
)
self.conn.commit()
def search(self, query_vector, k=5):
with self.conn.cursor() as cur:
cur.execute("""
SELECT content FROM rag_documents
ORDER BY embedding <=> %s
LIMIT %s
""", (query_vector.tolist(), k))
return [row[0] for row in cur.fetchall()]
5. 완전한 RAG 파이프라인 구현
python
import os
from typing import List, Dict, Any
from
---
📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)
AI 자동 생성 콘텐츠
본 콘텐츠는 Dev.to AI tag의 원문을 AI가 자동으로 요약·번역·분석한 것입니다. 원 저작권은 원저작자에게 있으며, 정확한 내용은 반드시 원문을 확인해 주세요.
원문 바로가기