Official Python SDK for ScrapeGraphAI
pip install scrapegraph-py
from scrapegraph_py import Client client = Client(api_key="your-api-key-here")
SGAI_API_KEY
client = Client()
response = client.smartscraper( website_url="https://example.com", user_prompt="Extract the main heading and description" )
Basic Schema Example
from pydantic import BaseModel, Field class ArticleData(BaseModel): title: str = Field(description="The article title") author: str = Field(description="The author's name") publish_date: str = Field(description="Article publication date") content: str = Field(description="Main article content") category: str = Field(description="Article category") response = client.smartscraper( website_url="https://example.com/blog/article", user_prompt="Extract the article information", output_schema=ArticleData ) print(f"Title: {response.title}") print(f"Author: {response.author}") print(f"Published: {response.publish_date}")
Advanced Schema Example
from typing import List from pydantic import BaseModel, Field class Employee(BaseModel): name: str = Field(description="Employee's full name") position: str = Field(description="Job title") department: str = Field(description="Department name") email: str = Field(description="Email address") class Office(BaseModel): location: str = Field(description="Office location/city") address: str = Field(description="Full address") phone: str = Field(description="Contact number") class CompanyData(BaseModel): name: str = Field(description="Company name") description: str = Field(description="Company description") industry: str = Field(description="Industry sector") founded_year: int = Field(description="Year company was founded") employees: List[Employee] = Field(description="List of key employees") offices: List[Office] = Field(description="Company office locations") website: str = Field(description="Company website URL") # Extract comprehensive company information response = client.smartscraper( website_url="https://example.com/about", user_prompt="Extract detailed company information including employees and offices", output_schema=CompanyData ) # Access nested data print(f"Company: {response.name}") print("\nKey Employees:") for employee in response.employees: print(f"- {employee.name} ({employee.position})") print("\nOffice Locations:") for office in response.offices: print(f"- {office.location}: {office.address}")
response = client.searchscraper( user_prompt="What are the key features and pricing of ChatGPT Plus?" )
from pydantic import BaseModel, Field from typing import List class ProductInfo(BaseModel): name: str = Field(description="Product name") description: str = Field(description="Product description") price: str = Field(description="Product price") features: List[str] = Field(description="List of key features") availability: str = Field(description="Availability information") response = client.searchscraper( user_prompt="Find information about iPhone 15 Pro", output_schema=ProductInfo ) print(f"Product: {response.name}") print(f"Price: {response.price}") print("\nFeatures:") for feature in response.features: print(f"- {feature}")
from typing import List from pydantic import BaseModel, Field class MarketPlayer(BaseModel): name: str = Field(description="Company name") market_share: str = Field(description="Market share percentage") key_products: List[str] = Field(description="Main products in market") strengths: List[str] = Field(description="Company's market strengths") class MarketTrend(BaseModel): name: str = Field(description="Trend name") description: str = Field(description="Trend description") impact: str = Field(description="Expected market impact") timeframe: str = Field(description="Trend timeframe") class MarketAnalysis(BaseModel): market_size: str = Field(description="Total market size") growth_rate: str = Field(description="Annual growth rate") key_players: List[MarketPlayer] = Field(description="Major market players") trends: List[MarketTrend] = Field(description="Market trends") challenges: List[str] = Field(description="Industry challenges") opportunities: List[str] = Field(description="Market opportunities") # Perform comprehensive market research response = client.searchscraper( user_prompt="Analyze the current AI chip market landscape", output_schema=MarketAnalysis ) # Access structured market data print(f"Market Size: {response.market_size}") print(f"Growth Rate: {response.growth_rate}") print("\nKey Players:") for player in response.key_players: print(f"\n{player.name}") print(f"Market Share: {player.market_share}") print("Key Products:") for product in player.key_products: print(f"- {product}") print("\nMarket Trends:") for trend in response.trends: print(f"\n{trend.name}") print(f"Impact: {trend.impact}") print(f"Timeframe: {trend.timeframe}")
response = client.markdownify( website_url="https://example.com" )
import asyncio from scrapegraph_py import AsyncClient async def main(): async with AsyncClient() as client: response = await client.smartscraper( website_url="https://example.com", user_prompt="Extract the main content" ) print(response) asyncio.run(main())
client.submit_feedback( request_id="your-request-id", rating=5, feedback_text="Great results!" )
License
Was this page helpful?