YouTube 文件載入器
將 YouTube 影片的文字記錄轉換為文件格式
from llama_index.core import download_loader
# 載入環境變數
from dotenv import load_dotenv
load_dotenv()
YoutubeTranscriptReader = download_loader("YoutubeTranscriptReader")
loader = YoutubeTranscriptReader()
youtube_documents = loader.load_data(ytlinks=['https://www.youtube.com/watch?v=nHcbHdgVUJg&ab_channel=WintWealth'])
會得到如下格式的json資料
[Document(id_='nHcbHdgVUJg', embedding=None, metadata={'video_id': 'nHcbHdgVUJg'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text="I'm a Commerce graduate.......", start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n')]
載入PDF文件
使用下面的程式碼可以讀取本地端的pdf文件,請確保該檔案存在
from pathlib import Path
from llama_index.core import download_loader
PDFReader = download_loader("PDFReader")
loader = PDFReader()
pdf_document = loader.load_data(file=Path('./sample.pdf'))
會產生如下的json檔案
[Document(id_='c63920fc-1f19-4112-ab6a-d18aa193c037', embedding=None, metadata={'page_label': '1', 'file_name': 'sample.pdf'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text=' \n \n \n \n \n \nLangChain 技术解密: \n构建大模型应用的全景指南 \n \n \n王浩帆 编著 \n \n \n \n \n \n \n \n \n \n \n \n \n', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), ......]
載入Notion筆記本
from llama_index.core import download_loader
import os
NotionPageReader = download_loader('NotionPageReader')
integration_token = "your-notion-token"
database_id = "your-database-id"
reader = NotionPageReader(integration_token=integration_token)
notion_documents = reader.load_data(database_id=database_id)
建立對話機器人
import os
from llama_index.core import VectorStoreIndex
all_documents = youtube_documents + pdf_document
index = VectorStoreIndex.from_documents(all_documents)
對話測試
query_engine = index.as_query_engine()
response = query_engine.query("介紹LangChain")
print(response)
LangChain是一个基于大语言模型的应用程序开发框架,旨在简化创建大模型应用程序的过程。它提供了一套完整的工具、组件和接口,使开发者能够轻松地利用大语….
可得到以上回應