Supercharging Web Crawling with Crawl4AI
crawl4ai
integration in AG2, follow these
steps:
crawl4ai
extra:
pip install -U ag2[openai,crawl4ai]
Note: If you have been usingautogen
orag2
, all you need to do is upgrade it using:orCopypip install -U autogen[openai,crawl4ai]
asCopypip install -U ag2[openai,crawl4ai]
autogen
, andag2
are aliases for the same PyPI package.
# Installs Playwright and browsers for all OS
playwright install
# Additional command, mandatory for Linux only
playwright install-deps
nest_asyncio
to allow nested
event loops. bash pip install nest_asyncio
import os
import nest_asyncio
from pydantic import BaseModel
from autogen import AssistantAgent, UserProxyAgent
from autogen.tools.experimental import Crawl4AITool
nest_asyncio.apply()
config_list = [{"api_type": "openai", "model": "gpt-4o-mini", "api_key": os.environ["OPENAI_API_KEY"]}]
llm_config = {
"config_list": config_list,
}
user_proxy = UserProxyAgent(name="user_proxy", human_input_mode="NEVER")
assistant = AssistantAgent(name="assistant", llm_config=llm_config)
crawlai_tool = Crawl4AITool()
crawlai_tool.register_for_execution(user_proxy)
crawlai_tool.register_for_llm(assistant)
result = user_proxy.initiate_chat(
recipient=assistant,
message="Get info from https://docs.ag2.ai/docs/Home",
max_turns=2,
)
Note:Crawl4AI
is built on top of LiteLLM and supports the same models as LiteLLM. We had great experience withOpenAI
,Anthropic
,Gemini
andOllama
. However, as of this writing,DeepSeek
is encountering some issues.
config_list = [{"api_type": "openai", "model": "gpt-4o-mini", "api_key": os.environ["OPENAI_API_KEY"]}]
llm_config = {
"config_list": config_list,
}
user_proxy = UserProxyAgent(name="user_proxy", human_input_mode="NEVER")
assistant = AssistantAgent(name="assistant", llm_config=llm_config)
# Set llm_config to Crawl4AITool
crawlai_tool = Crawl4AITool(llm_config=llm_config)
crawlai_tool.register_for_execution(user_proxy)
crawlai_tool.register_for_llm(assistant)
result = user_proxy.initiate_chat(
recipient=assistant,
message="Get info from https://docs.ag2.ai/docs/Home",
max_turns=2,
)
config_list = [{"api_type": "openai", "model": "gpt-4o-mini", "api_key": os.environ["OPENAI_API_KEY"]}]
llm_config = {
"config_list": config_list,
}
user_proxy = UserProxyAgent(name="user_proxy", human_input_mode="NEVER")
assistant = AssistantAgent(name="assistant", llm_config=llm_config)
class Blog(BaseModel):
title: str
url: str
# Set llm_config and extraction_model to Crawl4AITool
crawlai_tool = Crawl4AITool(llm_config=llm_config, extraction_model=Blog)
crawlai_tool.register_for_execution(user_proxy)
crawlai_tool.register_for_llm(assistant)
message = "Extract all blog posts from https://docs.ag2.ai/blog"
result = user_proxy.initiate_chat(
recipient=assistant,
message=message,
max_turns=2,
)