|
| 1 | +import json |
| 2 | +import os |
| 3 | +from importlib.metadata import version |
| 4 | +from platform import architecture, python_version |
| 5 | +from typing import Any, List, Type |
| 6 | + |
| 7 | +from crewai.tools import BaseTool |
| 8 | +from pydantic import BaseModel, ConfigDict, Field |
| 9 | + |
| 10 | +try: |
| 11 | + from oxylabs import RealtimeClient |
| 12 | + from oxylabs.sources.response import Response as OxylabsResponse |
| 13 | + |
| 14 | + OXYLABS_AVAILABLE = True |
| 15 | +except ImportError: |
| 16 | + RealtimeClient = Any |
| 17 | + OxylabsResponse = Any |
| 18 | + |
| 19 | + OXYLABS_AVAILABLE = False |
| 20 | + |
| 21 | + |
| 22 | +__all__ = ["OxylabsAmazonProductScraperTool", "OxylabsAmazonProductScraperConfig"] |
| 23 | + |
| 24 | + |
| 25 | +class OxylabsAmazonProductScraperArgs(BaseModel): |
| 26 | + query: str = Field(description="Amazon product ASIN") |
| 27 | + |
| 28 | + |
| 29 | +class OxylabsAmazonProductScraperConfig(BaseModel): |
| 30 | + """ |
| 31 | + Amazon Product Scraper configuration options: |
| 32 | + https://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/amazon/product |
| 33 | + """ |
| 34 | + |
| 35 | + domain: str | None = Field( |
| 36 | + None, description="The domain to limit the search results to." |
| 37 | + ) |
| 38 | + geo_location: str | None = Field(None, description="The Deliver to location.") |
| 39 | + user_agent_type: str | None = Field(None, description="Device type and browser.") |
| 40 | + render: str | None = Field(None, description="Enables JavaScript rendering.") |
| 41 | + callback_url: str | None = Field(None, description="URL to your callback endpoint.") |
| 42 | + context: list | None = Field( |
| 43 | + None, |
| 44 | + description="Additional advanced settings and controls for specialized requirements.", |
| 45 | + ) |
| 46 | + parse: bool | None = Field(None, description="True will return structured data.") |
| 47 | + parsing_instructions: dict | None = Field( |
| 48 | + None, description="Instructions for parsing the results." |
| 49 | + ) |
| 50 | + |
| 51 | + |
| 52 | +class OxylabsAmazonProductScraperTool(BaseTool): |
| 53 | + """ |
| 54 | + Scrape Amazon product pages with OxylabsAmazonProductScraperTool. |
| 55 | +
|
| 56 | + Get Oxylabs account: |
| 57 | + https://dashboard.oxylabs.io/en |
| 58 | +
|
| 59 | + Args: |
| 60 | + username (str): Oxylabs username. |
| 61 | + password (str): Oxylabs password. |
| 62 | + config: Configuration options. See ``OxylabsAmazonProductScraperConfig`` |
| 63 | + """ |
| 64 | + |
| 65 | + model_config = ConfigDict( |
| 66 | + arbitrary_types_allowed=True, |
| 67 | + validate_assignment=True, |
| 68 | + ) |
| 69 | + name: str = "Oxylabs Amazon Product Scraper tool" |
| 70 | + description: str = "Scrape Amazon product pages with Oxylabs Amazon Product Scraper" |
| 71 | + args_schema: Type[BaseModel] = OxylabsAmazonProductScraperArgs |
| 72 | + |
| 73 | + oxylabs_api: RealtimeClient |
| 74 | + config: OxylabsAmazonProductScraperConfig |
| 75 | + package_dependencies: List[str] = ["oxylabs"] |
| 76 | + |
| 77 | + def __init__( |
| 78 | + self, |
| 79 | + username: str | None = None, |
| 80 | + password: str | None = None, |
| 81 | + config: OxylabsAmazonProductScraperConfig |
| 82 | + | dict = OxylabsAmazonProductScraperConfig(), |
| 83 | + **kwargs, |
| 84 | + ) -> None: |
| 85 | + bits, _ = architecture() |
| 86 | + sdk_type = ( |
| 87 | + f"oxylabs-crewai-sdk-python/" |
| 88 | + f"{version('crewai')} " |
| 89 | + f"({python_version()}; {bits})" |
| 90 | + ) |
| 91 | + |
| 92 | + if username is None or password is None: |
| 93 | + username, password = self._get_credentials_from_env() |
| 94 | + |
| 95 | + if OXYLABS_AVAILABLE: |
| 96 | + # import RealtimeClient to make it accessible for the current scope |
| 97 | + from oxylabs import RealtimeClient |
| 98 | + |
| 99 | + kwargs["oxylabs_api"] = RealtimeClient( |
| 100 | + username=username, |
| 101 | + password=password, |
| 102 | + sdk_type=sdk_type, |
| 103 | + ) |
| 104 | + else: |
| 105 | + import click |
| 106 | + |
| 107 | + if click.confirm( |
| 108 | + "You are missing the 'oxylabs' package. Would you like to install it?" |
| 109 | + ): |
| 110 | + import subprocess |
| 111 | + |
| 112 | + try: |
| 113 | + subprocess.run(["uv", "add", "oxylabs"], check=True) |
| 114 | + from oxylabs import RealtimeClient |
| 115 | + |
| 116 | + kwargs["oxylabs_api"] = RealtimeClient( |
| 117 | + username=username, |
| 118 | + password=password, |
| 119 | + sdk_type=sdk_type, |
| 120 | + ) |
| 121 | + except subprocess.CalledProcessError: |
| 122 | + raise ImportError("Failed to install oxylabs package") |
| 123 | + else: |
| 124 | + raise ImportError( |
| 125 | + "`oxylabs` package not found, please run `uv add oxylabs`" |
| 126 | + ) |
| 127 | + |
| 128 | + super().__init__(config=config, **kwargs) |
| 129 | + |
| 130 | + def _get_credentials_from_env(self) -> tuple[str, str]: |
| 131 | + username = os.environ.get("OXYLABS_USERNAME") |
| 132 | + password = os.environ.get("OXYLABS_PASSWORD") |
| 133 | + if not username or not password: |
| 134 | + raise ValueError( |
| 135 | + "You must pass oxylabs username and password when instantiating the tool " |
| 136 | + "or specify OXYLABS_USERNAME and OXYLABS_PASSWORD environment variables" |
| 137 | + ) |
| 138 | + return username, password |
| 139 | + |
| 140 | + def _run(self, query: str) -> str: |
| 141 | + response = self.oxylabs_api.amazon.scrape_product( |
| 142 | + query, |
| 143 | + **self.config.model_dump(exclude_none=True), |
| 144 | + ) |
| 145 | + |
| 146 | + content = response.results[0].content |
| 147 | + |
| 148 | + if isinstance(content, dict): |
| 149 | + return json.dumps(content) |
| 150 | + |
| 151 | + return content |
0 commit comments