Skip to content

Commit 9a21d05

Browse files
Add Oxylabs Web Scraping tools (crewAIInc#312)
* Add Oxylabs tools * Review updates * Add package_dependencies attribute
1 parent ccbb3f4 commit 9a21d05

File tree

13 files changed

+1041
-57
lines changed

13 files changed

+1041
-57
lines changed

crewai_tools/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@
3737
MultiOnTool,
3838
MySQLSearchTool,
3939
NL2SQLTool,
40+
OxylabsUniversalScraperTool,
41+
OxylabsGoogleSearchScraperTool,
42+
OxylabsAmazonProductScraperTool,
43+
OxylabsAmazonSearchScraperTool,
4044
PatronusEvalTool,
4145
PatronusLocalEvaluatorTool,
4246
PatronusPredefinedCriteriaEvalTool,

crewai_tools/tools/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,18 @@
3232
from .multion_tool.multion_tool import MultiOnTool
3333
from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
3434
from .nl2sql.nl2sql_tool import NL2SQLTool
35+
from .oxylabs_universal_scraper_tool.oxylabs_universal_scraper_tool import (
36+
OxylabsUniversalScraperTool,
37+
)
38+
from .oxylabs_google_search_scraper_tool.oxylabs_google_search_scraper_tool import (
39+
OxylabsGoogleSearchScraperTool,
40+
)
41+
from .oxylabs_amazon_product_scraper_tool.oxylabs_amazon_product_scraper_tool import (
42+
OxylabsAmazonProductScraperTool,
43+
)
44+
from .oxylabs_amazon_search_scraper_tool.oxylabs_amazon_search_scraper_tool import (
45+
OxylabsAmazonSearchScraperTool,
46+
)
3547
from .patronus_eval_tool import (
3648
PatronusEvalTool,
3749
PatronusLocalEvaluatorTool,
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# OxylabsAmazonProductScraperTool
2+
3+
Scrape any website with `OxylabsAmazonProductScraperTool`
4+
5+
## Installation
6+
7+
```
8+
pip install 'crewai[tools]' oxylabs
9+
```
10+
11+
## Example
12+
13+
```python
14+
from crewai_tools import OxylabsAmazonProductScraperTool
15+
16+
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
17+
tool = OxylabsAmazonProductScraperTool()
18+
19+
result = tool.run(query="AAAAABBBBCC")
20+
21+
print(result)
22+
```
23+
24+
## Arguments
25+
26+
- `username`: Oxylabs username.
27+
- `password`: Oxylabs password.
28+
29+
Get the credentials by creating an Oxylabs Account [here](https://oxylabs.io).
30+
31+
## Advanced example
32+
33+
Check out the Oxylabs [documentation](https://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/amazon/product) to get the full list of parameters.
34+
35+
```python
36+
from crewai_tools import OxylabsAmazonProductScraperTool
37+
38+
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
39+
tool = OxylabsAmazonProductScraperTool(
40+
config={
41+
"domain": "com",
42+
"parse": True,
43+
"context": [
44+
{
45+
"key": "autoselect_variant",
46+
"value": True
47+
}
48+
]
49+
}
50+
)
51+
52+
result = tool.run(query="AAAAABBBBCC")
53+
54+
print(result)
55+
```
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
import json
2+
import os
3+
from importlib.metadata import version
4+
from platform import architecture, python_version
5+
from typing import Any, List, Type
6+
7+
from crewai.tools import BaseTool
8+
from pydantic import BaseModel, ConfigDict, Field
9+
10+
try:
11+
from oxylabs import RealtimeClient
12+
from oxylabs.sources.response import Response as OxylabsResponse
13+
14+
OXYLABS_AVAILABLE = True
15+
except ImportError:
16+
RealtimeClient = Any
17+
OxylabsResponse = Any
18+
19+
OXYLABS_AVAILABLE = False
20+
21+
22+
__all__ = ["OxylabsAmazonProductScraperTool", "OxylabsAmazonProductScraperConfig"]
23+
24+
25+
class OxylabsAmazonProductScraperArgs(BaseModel):
26+
query: str = Field(description="Amazon product ASIN")
27+
28+
29+
class OxylabsAmazonProductScraperConfig(BaseModel):
30+
"""
31+
Amazon Product Scraper configuration options:
32+
https://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/amazon/product
33+
"""
34+
35+
domain: str | None = Field(
36+
None, description="The domain to limit the search results to."
37+
)
38+
geo_location: str | None = Field(None, description="The Deliver to location.")
39+
user_agent_type: str | None = Field(None, description="Device type and browser.")
40+
render: str | None = Field(None, description="Enables JavaScript rendering.")
41+
callback_url: str | None = Field(None, description="URL to your callback endpoint.")
42+
context: list | None = Field(
43+
None,
44+
description="Additional advanced settings and controls for specialized requirements.",
45+
)
46+
parse: bool | None = Field(None, description="True will return structured data.")
47+
parsing_instructions: dict | None = Field(
48+
None, description="Instructions for parsing the results."
49+
)
50+
51+
52+
class OxylabsAmazonProductScraperTool(BaseTool):
53+
"""
54+
Scrape Amazon product pages with OxylabsAmazonProductScraperTool.
55+
56+
Get Oxylabs account:
57+
https://dashboard.oxylabs.io/en
58+
59+
Args:
60+
username (str): Oxylabs username.
61+
password (str): Oxylabs password.
62+
config: Configuration options. See ``OxylabsAmazonProductScraperConfig``
63+
"""
64+
65+
model_config = ConfigDict(
66+
arbitrary_types_allowed=True,
67+
validate_assignment=True,
68+
)
69+
name: str = "Oxylabs Amazon Product Scraper tool"
70+
description: str = "Scrape Amazon product pages with Oxylabs Amazon Product Scraper"
71+
args_schema: Type[BaseModel] = OxylabsAmazonProductScraperArgs
72+
73+
oxylabs_api: RealtimeClient
74+
config: OxylabsAmazonProductScraperConfig
75+
package_dependencies: List[str] = ["oxylabs"]
76+
77+
def __init__(
78+
self,
79+
username: str | None = None,
80+
password: str | None = None,
81+
config: OxylabsAmazonProductScraperConfig
82+
| dict = OxylabsAmazonProductScraperConfig(),
83+
**kwargs,
84+
) -> None:
85+
bits, _ = architecture()
86+
sdk_type = (
87+
f"oxylabs-crewai-sdk-python/"
88+
f"{version('crewai')} "
89+
f"({python_version()}; {bits})"
90+
)
91+
92+
if username is None or password is None:
93+
username, password = self._get_credentials_from_env()
94+
95+
if OXYLABS_AVAILABLE:
96+
# import RealtimeClient to make it accessible for the current scope
97+
from oxylabs import RealtimeClient
98+
99+
kwargs["oxylabs_api"] = RealtimeClient(
100+
username=username,
101+
password=password,
102+
sdk_type=sdk_type,
103+
)
104+
else:
105+
import click
106+
107+
if click.confirm(
108+
"You are missing the 'oxylabs' package. Would you like to install it?"
109+
):
110+
import subprocess
111+
112+
try:
113+
subprocess.run(["uv", "add", "oxylabs"], check=True)
114+
from oxylabs import RealtimeClient
115+
116+
kwargs["oxylabs_api"] = RealtimeClient(
117+
username=username,
118+
password=password,
119+
sdk_type=sdk_type,
120+
)
121+
except subprocess.CalledProcessError:
122+
raise ImportError("Failed to install oxylabs package")
123+
else:
124+
raise ImportError(
125+
"`oxylabs` package not found, please run `uv add oxylabs`"
126+
)
127+
128+
super().__init__(config=config, **kwargs)
129+
130+
def _get_credentials_from_env(self) -> tuple[str, str]:
131+
username = os.environ.get("OXYLABS_USERNAME")
132+
password = os.environ.get("OXYLABS_PASSWORD")
133+
if not username or not password:
134+
raise ValueError(
135+
"You must pass oxylabs username and password when instantiating the tool "
136+
"or specify OXYLABS_USERNAME and OXYLABS_PASSWORD environment variables"
137+
)
138+
return username, password
139+
140+
def _run(self, query: str) -> str:
141+
response = self.oxylabs_api.amazon.scrape_product(
142+
query,
143+
**self.config.model_dump(exclude_none=True),
144+
)
145+
146+
content = response.results[0].content
147+
148+
if isinstance(content, dict):
149+
return json.dumps(content)
150+
151+
return content
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# OxylabsAmazonSearchScraperTool
2+
3+
Scrape any website with `OxylabsAmazonSearchScraperTool`
4+
5+
## Installation
6+
7+
```
8+
pip install 'crewai[tools]' oxylabs
9+
```
10+
11+
## Example
12+
13+
```python
14+
from crewai_tools import OxylabsAmazonSearchScraperTool
15+
16+
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
17+
tool = OxylabsAmazonSearchScraperTool()
18+
19+
result = tool.run(query="headsets")
20+
21+
print(result)
22+
```
23+
24+
## Arguments
25+
26+
- `username`: Oxylabs username.
27+
- `password`: Oxylabs password.
28+
29+
Get the credentials by creating an Oxylabs Account [here](https://oxylabs.io).
30+
31+
## Advanced example
32+
33+
Check out the Oxylabs [documentation](https://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/amazon/search) to get the full list of parameters.
34+
35+
```python
36+
from crewai_tools import OxylabsAmazonSearchScraperTool
37+
38+
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
39+
tool = OxylabsAmazonSearchScraperTool(
40+
config={
41+
"domain": 'nl',
42+
"start_page": 2,
43+
"pages": 2,
44+
"parse": True,
45+
"context": [
46+
{'key': 'category_id', 'value': 16391693031}
47+
],
48+
}
49+
)
50+
51+
result = tool.run(query='nirvana tshirt')
52+
53+
print(result)
54+
```

0 commit comments

Comments
 (0)