from typing importList, Optional from pydantic import BaseModel, Field
classPerson(BaseModel): """Information about a person.""" name: Optional[str] = Field(default=None, description="The name of the person") hair_color: Optional[str] = Field( default=None, description="The color of the person's hair if known" ) height_in_meters: Optional[str] = Field( default=None, description="Height measured in meters" )
classData(BaseModel): """Extracted data about people.""" people: List[Person]
这里有几个关键设计点:
所有字段都是可选的(Optional),这允许模型在无法确定某个属性时返回空值
每个字段都有清晰的描述,这些描述会帮助语言模型更准确地提取信息
使用嵌套模型结构支持提取多个实体
2. 配置提示模板
接下来,我们需要设置提示模板来指导语言模型如何提取信息:
1 2 3 4 5 6 7 8 9 10 11 12
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([ ( "system", "You are an expert extraction algorithm. " "Only extract relevant information from the text. " "If you do not know the value of an attribute asked to extract, " "return null for the attribute's value.", ), ("human", "{text}"), ])
from typing importList, Optional from pydantic import BaseModel, Field from langchain_core.prompts import ChatPromptTemplate from langchain_openai import ChatOpenAI
classPerson(BaseModel): """Information about a person."""
# ^ Doc-string for the entity Person. # This doc-string is sent to the LLM as the description of the schema Person, # and it can help to improve extraction results.
# Note that: # 1. Each field is an `optional` -- this allows the model to decline to extract it! # 2. Each field has a `description` -- this description is used by the LLM. # Having a good description can help improve extraction results. name: Optional[str] = Field(default=None, description="The name of the person") hair_color: Optional[str] = Field( default=None, description="The color of the person's hair if known" ) height_in_meters: Optional[str] = Field( default=None, description="Height measured in meters" )
classData(BaseModel): """Extracted data about people."""
# Creates a model so that we can extract multiple entities. people: List[Person]
# Define a custom prompt to provide instructions and any additional context. # 1) You can add examples into the prompt template to improve extraction quality # 2) Introduce additional parameters to take context into account (e.g., include metadata # about the document from which the text was extracted.) prompt = ChatPromptTemplate.from_messages( [ ( "system", "You are an expert extraction algorithm. " "Only extract relevant information from the text. " "If you do not know the value of an attribute asked to extract, " "return null for the attribute's value.", ), # Please see the how-to about improving performance with # reference examples. # MessagesPlaceholder('examples'), ("human", "{text}"), ] )