- From Page or Locator
- From Content
Copy
Ask AI
export declare function extractStructuredData(options: {
source: Page | Locator;
dataSchema: JsonSchema | z.ZodSchema;
prompt?: string;
strategy?: "IMAGE" | "MARKDOWN" | "HTML";
enableDomMatching?: boolean;
enableCache?: boolean;
maxRetries?: number;
model?: SUPPORTED_MODELS;
apiKey?: string;
}): Promise<any>;
Examples
Copy
Ask AI
import { extractStructuredData } from '@intuned/browser/ai';
export default async function handler(params, page, context){
await page.goto("https://books.toscrape.com/")
const product = await extractStructuredData({
source: page,
strategy: "HTML",
model: "gpt-4o",
dataSchema: {
type: "object",
properties: {
name: { type: "string" },
price: { type: "string" },
description: { type: "string" },
inStock: { type: "boolean" }
},
required: ["name", "price"]
},
prompt: "Extract product details from this e page"
});
console.log(`Found book: ${product.name} - ${product.price}`);
}
Arguments
Configuration object containing extraction parameters
Show options
Show options
Playwright Page object to extract data from the entire page or Locator object to extract data from a specific element
JsonSchema defining the structure of the data to extract. This can be a JsonSchema or ZodSchema
Type of extraction: “HTML”, “IMAGE”, or “MARKDOWN”. Defaults to “HTML”
Optional prompt to guide the extraction process and provide more context
Whether to enable DOM element matching during extraction. Defaults to false. When set to true, all types in the schema must be strings to match with the DOM elements. The extracted results will be matched with the DOM elements and returned, then cached in a smart fashion so that the next time the same data is extracted, the result will be returned from the cache even if the DOM has minor changes.
Whether to enable caching of the extracted data. Defaults to true
Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
AI model to use for extraction. See SUPPORTED_MODELS for all supported models. Defaults to “claude-3-5-haiku-latest”
Optional API key for AI extraction (if provided, will not be billed to your account)