RAG 시스템 실전 구축 (v48)

실제 ML 엔지니어와 백엔드 개발자가 RAG 시스템을 빠르게 구축하고 운영할 수 있는 실전 가이드

1. RAG 기초 원리 (Retrieval → Augmentation → Generation 루프)

RAG (Retrieval-Augmented Generation)는 다음과 같은 세 가지 핵심 단계로 구성됩니다:

Retrieval (검색): 입력 쿼리와 가장 관련성이 높은 문서/조각을 찾습니다
Augmentation (보완): 검색된 정보를 프롬프트에 포함시켜 LLM의 컨텍스트를 확장합니다
Generation (생성): 확장된 컨텍스트를 기반으로 정확한 응답을 생성합니다

쿼리 → [검색] → 문서 조각 → [보완] → 프롬프트 → [생성] → 응답

이 루프는 대규모 모델의 지식 제한을 극복하고, 특정 도메인 지식을 활용할 수 있게 해줍니다.

2. Chunking 전략 (Semantic, Recursive, Agentic)

2.1 Semantic Chunking

Semantic Chunking은 의미 단위로 텍스트를 분할하여 의미적 연결이 강한 문장들을 묶습니다.

from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.cluster import KMeans

class SemanticChunker:
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)

    def chunk_semantically(self, text, threshold=0.7):
        # 문장별로 분할
        sentences = text.split('. ')
        embeddings = self.model.encode(sentences)

        # 클러스터링으로 의미 단위 분할
        kmeans = KMeans(n_clusters=max(1, len(sentences) // 3))
        kmeans.fit(embeddings)

        # 클러스터별 문장 그룹화
        clusters = {}
        for i, cluster_id in enumerate(kmeans.labels_):
            if cluster_id not in clusters:
                clusters[cluster_id] = []
            clusters[cluster_id].append(sentences[i])

        # 그룹화된 문장들을 하나의 chunk로 결합
        chunks = [' '.join(cluster_sentences) for cluster_sentences in clusters.values()]
        return chunks

# 사용 예시
chunker = SemanticChunker()
text = "RAG 시스템은 검색과 생성을 결합한 아키텍처입니다. 이 시스템은 대규모 언어 모델의 지식 제한을 극복합니다. Semantic Chunking은 의미 단위로 텍스트를 분할합니다."
chunks = chunker.chunk_semantically(text)

2.2 Recursive Chunking

문서를 재귀적으로 분할하여 최적의 chunk 크기를 찾습니다.

class RecursiveChunker:
    def __init__(self, max_chunk_size=500, min_chunk_size=100):
        self.max_chunk_size = max_chunk_size
        self.min_chunk_size = min_chunk_size

    def chunk_recursive(self, text, separators=["\n\n", "\n", " ", ""]):
        chunks = []
        current_chunk = ""

        for separator in separators:
            if separator in text:
                parts = text.split(separator)
                for part in parts:
                    if len(current_chunk) + len(part) + len(separator) <= self.max_chunk_size:
                        current_chunk += part + separator
                    else:
                        if current_chunk:
                            chunks.append(current_chunk.strip())
                        current_chunk = part + separator
                if current_chunk:
                    chunks.append(current_chunk.strip())
                break

        if not chunks:
            chunks = [text]

        return chunks

# 사용 예시
recursive_chunker = RecursiveChunker()
text = "이 문서는 RAG 시스템 구축에 대한 실전 가이드입니다. 각 단계별로 코드 예제와 설명을 포함합니다."
chunks = recursive_chunker.chunk_recursive(text)

2.3 Agentic Chunking

LLM을 사용하여 chunking을 자동화하고, 특정 목적에 맞춘 chunk를 생성합니다.

from openai import OpenAI
import json

class AgenticChunker:
    def __init__(self, api_key):
        self.client = OpenAI(api_key=api_key)

    def generate_chunking_prompt(self, text, target_chunks=5):
        prompt = f"""
        다음 텍스트를 {target_chunks}개의 의미 단위로 분할하세요.
        각 chunk는 최대 1000자 이내여야 하며, 의미가 완전히 끝나야 합니다.
        JSON 형식으로 반환하세요:
        {{
            "chunks": ["chunk1", "chunk2", "..."]
        }}

        텍스트: {text}
        """
        return prompt

    def chunk_agentic(self, text, target_chunks=5):
        response = self.client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": self.generate_chunking_prompt(text, target_chunks)}],
            response_format={"type": "json_object"}
        )
        result = json.loads(response.choices[0].message.content)
        return result["chunks"]

3. Embedding 모델 선택 및 비교

3.1 주요 임베딩 모델

# 다양한 임베딩 모델 비교
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
import torch

class EmbeddingEvaluator:
    def __init__(self):
        self.models = {
            'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2'),
            'all-mpnet-base-v2': SentenceTransformer('all-mpnet-base-v2'),
            'sentence-t5-xxl': SentenceTransformer('sentence-t5-xxl'),
            'bge-small-en': SentenceTransformer('BAAI/bge-small-en')
        }

    def get_embedding(self, text, model_name):
        model = self.models[model_name]
        return model.encode(text)

    def compare_models(self, texts, reference_embedding):
        results = {}
        for name, model in self.models.items():
            embeddings = model.encode(texts)
            # 코사인 유사도 계산
            similarities = []
            for emb in embeddings:
                sim = np.dot(emb, reference_embedding) / (
                    np.linalg.norm(emb) * np.linalg.norm(reference_embedding)
                )
                similarities.append(sim)
            results[name] = np.mean(similarities)
        return results

# 사용 예시
evaluator = EmbeddingEvaluator()
texts = ["RAG 시스템은 검색 기반 생성 모델입니다.", "검색을 통해 관련 정보를 가져와 생성합니다."]
reference = evaluator.get_embedding("RAG 시스템의 핵심 개념", "all-MiniLM-L6-v2")
results = evaluator.compare_models(texts, reference)

4. Vector Database 비교 (Chroma, Qdrant, pgvector, Milvus)

4.1 Chroma

import chromadb
from chromadb.config import Settings
import numpy as np

class ChromaVectorDB:
    def __init__(self, collection_name="rag_collection"):
        self.client = chromadb.Client(Settings(anonymized_telemetry=False))
        self.collection = self.client.get_or_create_collection(collection_name)

    def add_documents(self, documents, embeddings, ids):
        self.collection.add(
            embeddings=embeddings,
            documents=documents,
            ids=ids
        )

    def search(self, query_embedding, n_results=5):
        results = self.collection.query(
            query_embeddings=[query_embedding],
            n_results=n_results
        )
        return results

4.2 Qdrant

from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Filter, FieldCondition, MatchValue

class QdrantVectorDB:
    def __init__(self, host="localhost", port=6333):
        self.client = QdrantClient(host=host, port=port)
        self.collection_name = "rag_collection"

    def create_collection(self):
        self.client.create_collection(
            collection_name=self.collection_name,
            vectors_config=VectorParams(size=384, distance="Cosine")
        )

    def add_documents(self, documents, embeddings, ids):
        self.client.upsert(
            collection_name=self.collection_name,
            points=[
                {
                    "id": id,
                    "vector": embedding.tolist(),
                    "payload": {"document": doc}
                }
                for id, doc, embedding in zip(ids, documents, embeddings)
            ]
        )

    def search(self, query_embedding, n_results=5):
        results = self.client.search(
            collection_name=self.collection_name,
            query_vector=query_embedding.tolist(),
            limit=n_results
        )
        return [(hit.id, hit.payload["document"]) for hit in results]

4.3 pgvector


python
import psycopg2
from psycopg2.extras import Json
import numpy as np

class PGVectorDB:
    def __init

---

📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)

RAG 시스템 실전 구축 (v48)

요약

핵심 포인트

RAG 시스템 실전 구축 (v48)

1. RAG 기초 원리 (Retrieval → Augmentation → Generation 루프)

2. Chunking 전략 (Semantic, Recursive, Agentic)

2.1 Semantic Chunking

2.2 Recursive Chunking

2.3 Agentic Chunking

3. Embedding 모델 선택 및 비교

3.1 주요 임베딩 모델

4. Vector Database 비교 (Chroma, Qdrant, pgvector, Milvus)

4.1 Chroma

4.2 Qdrant

4.3 pgvector

댓글