Copy
Ask AI
export declare function extractObjectFromPage(
page: Page,
options: {
label: string;
entityName: string;
entitySchema: SimpleObjectSchema;
strategy?: ImageStrategy | HtmlStrategy;
prompt?: string;
optionalPropertiesInvalidator?: (
result: Record<string, string | null> | null
) => string[];
variantKey?: string;
apiKey?: string;
}
): Promise<Record<string, string | null> | null>;
Deprecated: This function is deprecated and will be removed in the future.
Examples
Copy
Ask AI
import { extractObjectFromPage } from "@intuned/sdk/optimized-extractors";
await page.goto("https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html")
const book = await extractObjectFromPage(page,
{
entityName: "book",
label: "book-extraction",
entitySchema: {
type: "object",
required: ["name","price","reviews"],
properties: {
name: {
type: "string",
description: "book name",
},
price: {
type: "string",
description: "book price"
},
reviews: {
type: "string",
description: "Number of reviews"
}
}
}
},
)
console.log(book)
// output:
// { name: 'A Light in the Attic', price: '£51.77', reviews: '0' }
Arguments
The Playwright Page object from which to extract the data.
Show options
Show options
A label for this extraction process, used for billing and monitoring.
The name of the entity being extracted. it must be between 1 and 50 characters long and can only contain letters, digits, periods, underscores, and hyphens.
The schema of the entity being extracted.
Optional. The strategy to use for extraction, if not provided, the html strategy with claude haiku will be used.
Optional. A prompt to guide the extraction process.
Optional. A function to invalidate optional properties.
Optional. A variant key for the extraction process.
Optional. An API key to use for the AI extraction. Extractions made with you API key will not be billed to your account.