firecrawl mcp server
Prompts
Reviews
Write Your Review
Detailed Ratings
Statistic
{
"tools": [
{
"name": "firecrawl_scrape",
"description": "\nScrape content from a single URL with advanced options.\n\n**Best for:** Single page content extraction, when you know exactly which page contains the information.\n**Not recommended for:** Multiple pages (use batch_scrape), unknown page (use search), structured data (use extract).\n**Common mistakes:** Using scrape for a list of URLs (use batch_scrape instead).\n**Prompt Example:** \"Get the content of the page at https://example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_scrape\",\n \"arguments\": {\n \"url\": \"https://example.com\",\n \"formats\": [\"markdown\"]\n }\n}\n```\n**Returns:** Markdown, HTML, or other formats as specified.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to scrape"
},
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml",
"screenshot",
"links",
"screenshot@fullPage",
"extract"
]
},
"default": [
"markdown"
],
"description": "Content formats to extract (default: ['markdown'])"
},
"onlyMainContent": {
"type": "boolean",
"description": "Extract only the main content, filtering out navigation, footers, etc."
},
"includeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "HTML tags to specifically include in extraction"
},
"excludeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "HTML tags to exclude from extraction"
},
"waitFor": {
"type": "number",
"description": "Time in milliseconds to wait for dynamic content to load"
},
"timeout": {
"type": "number",
"description": "Maximum time in milliseconds to wait for the page to load"
},
"actions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"wait",
"click",
"screenshot",
"write",
"press",
"scroll",
"scrape",
"executeJavascript"
],
"description": "Type of action to perform"
},
"selector": {
"type": "string",
"description": "CSS selector for the target element"
},
"milliseconds": {
"type": "number",
"description": "Time to wait in milliseconds (for wait action)"
},
"text": {
"type": "string",
"description": "Text to write (for write action)"
},
"key": {
"type": "string",
"description": "Key to press (for press action)"
},
"direction": {
"type": "string",
"enum": [
"up",
"down"
],
"description": "Scroll direction"
},
"script": {
"type": "string",
"description": "JavaScript code to execute"
},
"fullPage": {
"type": "boolean",
"description": "Take full page screenshot"
}
},
"required": [
"type"
]
},
"description": "List of actions to perform before scraping"
},
"extract": {
"type": "object",
"properties": {
"schema": {
"type": "object",
"description": "Schema for structured data extraction"
},
"systemPrompt": {
"type": "string",
"description": "System prompt for LLM extraction"
},
"prompt": {
"type": "string",
"description": "User prompt for LLM extraction"
}
},
"description": "Configuration for structured data extraction"
},
"mobile": {
"type": "boolean",
"description": "Use mobile viewport"
},
"skipTlsVerification": {
"type": "boolean",
"description": "Skip TLS certificate verification"
},
"removeBase64Images": {
"type": "boolean",
"description": "Remove base64 encoded images from output"
},
"location": {
"type": "object",
"properties": {
"country": {
"type": "string",
"description": "Country code for geolocation"
},
"languages": {
"type": "array",
"items": {
"type": "string"
},
"description": "Language codes for content"
}
},
"description": "Location settings for scraping"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_map",
"description": "\nMap a website to discover all indexed URLs on the site.\n\n**Best for:** Discovering URLs on a website before deciding what to scrape; finding specific sections of a website.\n**Not recommended for:** When you already know which specific URL you need (use scrape or batch_scrape); when you need the content of the pages (use scrape after mapping).\n**Common mistakes:** Using crawl to discover URLs instead of map.\n**Prompt Example:** \"List all URLs on example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_map\",\n \"arguments\": {\n \"url\": \"https://example.com\"\n }\n}\n```\n**Returns:** Array of URLs found on the site.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Starting URL for URL discovery"
},
"search": {
"type": "string",
"description": "Optional search term to filter URLs"
},
"ignoreSitemap": {
"type": "boolean",
"description": "Skip sitemap.xml discovery and only use HTML links"
},
"sitemapOnly": {
"type": "boolean",
"description": "Only use sitemap.xml for discovery, ignore HTML links"
},
"includeSubdomains": {
"type": "boolean",
"description": "Include URLs from subdomains in results"
},
"limit": {
"type": "number",
"description": "Maximum number of URLs to return"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_crawl",
"description": "\nStarts an asynchronous crawl job on a website and extracts content from all pages.\n\n**Best for:** Extracting content from multiple related pages, when you need comprehensive coverage.\n**Not recommended for:** Extracting content from a single page (use scrape); when token limits are a concern (use map + batch_scrape); when you need fast results (crawling can be slow).\n**Warning:** Crawl responses can be very large and may exceed token limits. Limit the crawl depth and number of pages, or use map + batch_scrape for better control.\n**Common mistakes:** Setting limit or maxDepth too high (causes token overflow); using crawl for a single page (use scrape instead).\n**Prompt Example:** \"Get all blog posts from the first two levels of example.com/blog.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_crawl\",\n \"arguments\": {\n \"url\": \"https://example.com/blog/*\",\n \"maxDepth\": 2,\n \"limit\": 100,\n \"allowExternalLinks\": false,\n \"deduplicateSimilarURLs\": true\n }\n}\n```\n**Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Starting URL for the crawl"
},
"excludePaths": {
"type": "array",
"items": {
"type": "string"
},
"description": "URL paths to exclude from crawling"
},
"includePaths": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only crawl these URL paths"
},
"maxDepth": {
"type": "number",
"description": "Maximum link depth to crawl"
},
"ignoreSitemap": {
"type": "boolean",
"description": "Skip sitemap.xml discovery"
},
"limit": {
"type": "number",
"description": "Maximum number of pages to crawl"
},
"allowBackwardLinks": {
"type": "boolean",
"description": "Allow crawling links that point to parent directories"
},
"allowExternalLinks": {
"type": "boolean",
"description": "Allow crawling links to external domains"
},
"webhook": {
"oneOf": [
{
"type": "string",
"description": "Webhook URL to notify when crawl is complete"
},
{
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Webhook URL"
},
"headers": {
"type": "object",
"description": "Custom headers for webhook requests"
}
},
"required": [
"url"
]
}
]
},
"deduplicateSimilarURLs": {
"type": "boolean",
"description": "Remove similar URLs during crawl"
},
"ignoreQueryParameters": {
"type": "boolean",
"description": "Ignore query parameters when comparing URLs"
},
"scrapeOptions": {
"type": "object",
"properties": {
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml",
"screenshot",
"links",
"screenshot@fullPage",
"extract"
]
}
},
"onlyMainContent": {
"type": "boolean"
},
"includeTags": {
"type": "array",
"items": {
"type": "string"
}
},
"excludeTags": {
"type": "array",
"items": {
"type": "string"
}
},
"waitFor": {
"type": "number"
}
},
"description": "Options for scraping each page"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_check_crawl_status",
"description": "\nCheck the status of a crawl job.\n\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_check_crawl_status\",\n \"arguments\": {\n \"id\": \"550e8400-e29b-41d4-a716-446655440000\"\n }\n}\n```\n**Returns:** Status and progress of the crawl job, including results if available.\n",
"inputSchema": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Crawl job ID to check"
}
},
"required": [
"id"
]
}
},
{
"name": "firecrawl_search",
"description": "\nSearch the web and optionally extract content from search results.\n\n**Best for:** Finding specific information across multiple websites, when you don't know which website has the information; when you need the most relevant content for a query.\n**Not recommended for:** When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl).\n**Common mistakes:** Using crawl or map for open-ended questions (use search instead).\n**Prompt Example:** \"Find the latest research papers on AI published in 2023.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_search\",\n \"arguments\": {\n \"query\": \"latest AI research papers 2023\",\n \"limit\": 5,\n \"lang\": \"en\",\n \"country\": \"us\",\n \"scrapeOptions\": {\n \"formats\": [\"markdown\"],\n \"onlyMainContent\": true\n }\n }\n}\n```\n**Returns:** Array of search results (with optional scraped content).\n",
"inputSchema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query string"
},
"limit": {
"type": "number",
"description": "Maximum number of results to return (default: 5)"
},
"lang": {
"type": "string",
"description": "Language code for search results (default: en)"
},
"country": {
"type": "string",
"description": "Country code for search results (default: us)"
},
"tbs": {
"type": "string",
"description": "Time-based search filter"
},
"filter": {
"type": "string",
"description": "Search filter"
},
"location": {
"type": "object",
"properties": {
"country": {
"type": "string",
"description": "Country code for geolocation"
},
"languages": {
"type": "array",
"items": {
"type": "string"
},
"description": "Language codes for content"
}
},
"description": "Location settings for search"
},
"scrapeOptions": {
"type": "object",
"properties": {
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml"
]
},
"description": "Content formats to extract from search results"
},
"onlyMainContent": {
"type": "boolean",
"description": "Extract only the main content from results"
},
"waitFor": {
"type": "number",
"description": "Time in milliseconds to wait for dynamic content"
}
},
"description": "Options for scraping search results"
}
},
"required": [
"query"
]
}
},
{
"name": "firecrawl_extract",
"description": "\nExtract structured information from web pages using LLM capabilities. Supports both cloud AI and self-hosted LLM extraction.\n\n**Best for:** Extracting specific structured data like prices, names, details.\n**Not recommended for:** When you need the full content of a page (use scrape); when you're not looking for specific structured data.\n**Arguments:**\n- urls: Array of URLs to extract information from\n- prompt: Custom prompt for the LLM extraction\n- systemPrompt: System prompt to guide the LLM\n- schema: JSON schema for structured data extraction\n- allowExternalLinks: Allow extraction from external links\n- enableWebSearch: Enable web search for additional context\n- includeSubdomains: Include subdomains in extraction\n**Prompt Example:** \"Extract the product name, price, and description from these product pages.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_extract\",\n \"arguments\": {\n \"urls\": [\"https://example.com/page1\", \"https://example.com/page2\"],\n \"prompt\": \"Extract product information including name, price, and description\",\n \"systemPrompt\": \"You are a helpful assistant that extracts product information\",\n \"schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": { \"type\": \"string\" },\n \"price\": { \"type\": \"number\" },\n \"description\": { \"type\": \"string\" }\n },\n \"required\": [\"name\", \"price\"]\n },\n \"allowExternalLinks\": false,\n \"enableWebSearch\": false,\n \"includeSubdomains\": false\n }\n}\n```\n**Returns:** Extracted structured data as defined by your schema.\n",
"inputSchema": {
"type": "object",
"properties": {
"urls": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of URLs to extract information from"
},
"prompt": {
"type": "string",
"description": "Prompt for the LLM extraction"
},
"systemPrompt": {
"type": "string",
"description": "System prompt for LLM extraction"
},
"schema": {
"type": "object",
"description": "JSON schema for structured data extraction"
},
"allowExternalLinks": {
"type": "boolean",
"description": "Allow extraction from external links"
},
"enableWebSearch": {
"type": "boolean",
"description": "Enable web search for additional context"
},
"includeSubdomains": {
"type": "boolean",
"description": "Include subdomains in extraction"
}
},
"required": [
"urls"
]
}
},
{
"name": "firecrawl_deep_research",
"description": "\nConduct deep web research on a query using intelligent crawling, search, and LLM analysis.\n\n**Best for:** Complex research questions requiring multiple sources, in-depth analysis.\n**Not recommended for:** Simple questions that can be answered with a single search; when you need very specific information from a known page (use scrape); when you need results quickly (deep research can take time).\n**Arguments:**\n- query (string, required): The research question or topic to explore.\n- maxDepth (number, optional): Maximum recursive depth for crawling/search (default: 3).\n- timeLimit (number, optional): Time limit in seconds for the research session (default: 120).\n- maxUrls (number, optional): Maximum number of URLs to analyze (default: 50).\n**Prompt Example:** \"Research the environmental impact of electric vehicles versus gasoline vehicles.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_deep_research\",\n \"arguments\": {\n \"query\": \"What are the environmental impacts of electric vehicles compared to gasoline vehicles?\",\n \"maxDepth\": 3,\n \"timeLimit\": 120,\n \"maxUrls\": 50\n }\n}\n```\n**Returns:** Final analysis generated by an LLM based on research. (data.finalAnalysis); may also include structured activities and sources used in the research process.\n",
"inputSchema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to research"
},
"maxDepth": {
"type": "number",
"description": "Maximum depth of research iterations (1-10)"
},
"timeLimit": {
"type": "number",
"description": "Time limit in seconds (30-300)"
},
"maxUrls": {
"type": "number",
"description": "Maximum number of URLs to analyze (1-1000)"
}
},
"required": [
"query"
]
}
},
{
"name": "firecrawl_generate_llmstxt",
"description": "\nGenerate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact with the site.\n\n**Best for:** Creating machine-readable permission guidelines for AI models.\n**Not recommended for:** General content extraction or research.\n**Arguments:**\n- url (string, required): The base URL of the website to analyze.\n- maxUrls (number, optional): Max number of URLs to include (default: 10).\n- showFullText (boolean, optional): Whether to include llms-full.txt contents in the response.\n**Prompt Example:** \"Generate an LLMs.txt file for example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_generate_llmstxt\",\n \"arguments\": {\n \"url\": \"https://example.com\",\n \"maxUrls\": 20,\n \"showFullText\": true\n }\n}\n```\n**Returns:** LLMs.txt file contents (and optionally llms-full.txt).\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to generate LLMs.txt from"
},
"maxUrls": {
"type": "number",
"description": "Maximum number of URLs to process (1-100, default: 10)"
},
"showFullText": {
"type": "boolean",
"description": "Whether to show the full LLMs-full.txt in the response"
}
},
"required": [
"url"
]
}
}
]
}
{ "mcpServers": { "firecrawl-mcp": { "command": "npx", "args": ["-y", "firecrawl-mcp"], "env": { "FIRECRAWL_API_KEY": "YOUR-API-KEY" } } }}
{"mcpServers": {"firecrawl-mcp": {"command": "npx", "args": ["-y", "firecrawl-mcp"], "env": {"FIRECRAWL_API_KEY": "YOUR-API-KEY"}}}}
{
"tools": [
{
"name": "firecrawl_scrape",
"description": "\nScrape content from a single URL with advanced options.\n\n**Best for:** Single page content extraction, when you know exactly which page contains the information.\n**Not recommended for:** Multiple pages (use batch_scrape), unknown page (use search), structured data (use extract).\n**Common mistakes:** Using scrape for a list of URLs (use batch_scrape instead).\n**Prompt Example:** \"Get the content of the page at https://example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_scrape\",\n \"arguments\": {\n \"url\": \"https://example.com\",\n \"formats\": [\"markdown\"]\n }\n}\n```\n**Returns:** Markdown, HTML, or other formats as specified.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to scrape"
},
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml",
"screenshot",
"links",
"screenshot@fullPage",
"extract"
]
},
"default": [
"markdown"
],
"description": "Content formats to extract (default: ['markdown'])"
},
"onlyMainContent": {
"type": "boolean",
"description": "Extract only the main content, filtering out navigation, footers, etc."
},
"includeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "HTML tags to specifically include in extraction"
},
"excludeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "HTML tags to exclude from extraction"
},
"waitFor": {
"type": "number",
"description": "Time in milliseconds to wait for dynamic content to load"
},
"timeout": {
"type": "number",
"description": "Maximum time in milliseconds to wait for the page to load"
},
"actions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"wait",
"click",
"screenshot",
"write",
"press",
"scroll",
"scrape",
"executeJavascript"
],
"description": "Type of action to perform"
},
"selector": {
"type": "string",
"description": "CSS selector for the target element"
},
"milliseconds": {
"type": "number",
"description": "Time to wait in milliseconds (for wait action)"
},
"text": {
"type": "string",
"description": "Text to write (for write action)"
},
"key": {
"type": "string",
"description": "Key to press (for press action)"
},
"direction": {
"type": "string",
"enum": [
"up",
"down"
],
"description": "Scroll direction"
},
"script": {
"type": "string",
"description": "JavaScript code to execute"
},
"fullPage": {
"type": "boolean",
"description": "Take full page screenshot"
}
},
"required": [
"type"
]
},
"description": "List of actions to perform before scraping"
},
"extract": {
"type": "object",
"properties": {
"schema": {
"type": "object",
"description": "Schema for structured data extraction"
},
"systemPrompt": {
"type": "string",
"description": "System prompt for LLM extraction"
},
"prompt": {
"type": "string",
"description": "User prompt for LLM extraction"
}
},
"description": "Configuration for structured data extraction"
},
"mobile": {
"type": "boolean",
"description": "Use mobile viewport"
},
"skipTlsVerification": {
"type": "boolean",
"description": "Skip TLS certificate verification"
},
"removeBase64Images": {
"type": "boolean",
"description": "Remove base64 encoded images from output"
},
"location": {
"type": "object",
"properties": {
"country": {
"type": "string",
"description": "Country code for geolocation"
},
"languages": {
"type": "array",
"items": {
"type": "string"
},
"description": "Language codes for content"
}
},
"description": "Location settings for scraping"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_map",
"description": "\nMap a website to discover all indexed URLs on the site.\n\n**Best for:** Discovering URLs on a website before deciding what to scrape; finding specific sections of a website.\n**Not recommended for:** When you already know which specific URL you need (use scrape or batch_scrape); when you need the content of the pages (use scrape after mapping).\n**Common mistakes:** Using crawl to discover URLs instead of map.\n**Prompt Example:** \"List all URLs on example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_map\",\n \"arguments\": {\n \"url\": \"https://example.com\"\n }\n}\n```\n**Returns:** Array of URLs found on the site.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Starting URL for URL discovery"
},
"search": {
"type": "string",
"description": "Optional search term to filter URLs"
},
"ignoreSitemap": {
"type": "boolean",
"description": "Skip sitemap.xml discovery and only use HTML links"
},
"sitemapOnly": {
"type": "boolean",
"description": "Only use sitemap.xml for discovery, ignore HTML links"
},
"includeSubdomains": {
"type": "boolean",
"description": "Include URLs from subdomains in results"
},
"limit": {
"type": "number",
"description": "Maximum number of URLs to return"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_crawl",
"description": "\nStarts an asynchronous crawl job on a website and extracts content from all pages.\n\n**Best for:** Extracting content from multiple related pages, when you need comprehensive coverage.\n**Not recommended for:** Extracting content from a single page (use scrape); when token limits are a concern (use map + batch_scrape); when you need fast results (crawling can be slow).\n**Warning:** Crawl responses can be very large and may exceed token limits. Limit the crawl depth and number of pages, or use map + batch_scrape for better control.\n**Common mistakes:** Setting limit or maxDepth too high (causes token overflow); using crawl for a single page (use scrape instead).\n**Prompt Example:** \"Get all blog posts from the first two levels of example.com/blog.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_crawl\",\n \"arguments\": {\n \"url\": \"https://example.com/blog/*\",\n \"maxDepth\": 2,\n \"limit\": 100,\n \"allowExternalLinks\": false,\n \"deduplicateSimilarURLs\": true\n }\n}\n```\n**Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Starting URL for the crawl"
},
"excludePaths": {
"type": "array",
"items": {
"type": "string"
},
"description": "URL paths to exclude from crawling"
},
"includePaths": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only crawl these URL paths"
},
"maxDepth": {
"type": "number",
"description": "Maximum link depth to crawl"
},
"ignoreSitemap": {
"type": "boolean",
"description": "Skip sitemap.xml discovery"
},
"limit": {
"type": "number",
"description": "Maximum number of pages to crawl"
},
"allowBackwardLinks": {
"type": "boolean",
"description": "Allow crawling links that point to parent directories"
},
"allowExternalLinks": {
"type": "boolean",
"description": "Allow crawling links to external domains"
},
"webhook": {
"oneOf": [
{
"type": "string",
"description": "Webhook URL to notify when crawl is complete"
},
{
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Webhook URL"
},
"headers": {
"type": "object",
"description": "Custom headers for webhook requests"
}
},
"required": [
"url"
]
}
]
},
"deduplicateSimilarURLs": {
"type": "boolean",
"description": "Remove similar URLs during crawl"
},
"ignoreQueryParameters": {
"type": "boolean",
"description": "Ignore query parameters when comparing URLs"
},
"scrapeOptions": {
"type": "object",
"properties": {
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml",
"screenshot",
"links",
"screenshot@fullPage",
"extract"
]
}
},
"onlyMainContent": {
"type": "boolean"
},
"includeTags": {
"type": "array",
"items": {
"type": "string"
}
},
"excludeTags": {
"type": "array",
"items": {
"type": "string"
}
},
"waitFor": {
"type": "number"
}
},
"description": "Options for scraping each page"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_check_crawl_status",
"description": "\nCheck the status of a crawl job.\n\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_check_crawl_status\",\n \"arguments\": {\n \"id\": \"550e8400-e29b-41d4-a716-446655440000\"\n }\n}\n```\n**Returns:** Status and progress of the crawl job, including results if available.\n",
"inputSchema": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Crawl job ID to check"
}
},
"required": [
"id"
]
}
},
{
"name": "firecrawl_search",
"description": "\nSearch the web and optionally extract content from search results.\n\n**Best for:** Finding specific information across multiple websites, when you don't know which website has the information; when you need the most relevant content for a query.\n**Not recommended for:** When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl).\n**Common mistakes:** Using crawl or map for open-ended questions (use search instead).\n**Prompt Example:** \"Find the latest research papers on AI published in 2023.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_search\",\n \"arguments\": {\n \"query\": \"latest AI research papers 2023\",\n \"limit\": 5,\n \"lang\": \"en\",\n \"country\": \"us\",\n \"scrapeOptions\": {\n \"formats\": [\"markdown\"],\n \"onlyMainContent\": true\n }\n }\n}\n```\n**Returns:** Array of search results (with optional scraped content).\n",
"inputSchema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query string"
},
"limit": {
"type": "number",
"description": "Maximum number of results to return (default: 5)"
},
"lang": {
"type": "string",
"description": "Language code for search results (default: en)"
},
"country": {
"type": "string",
"description": "Country code for search results (default: us)"
},
"tbs": {
"type": "string",
"description": "Time-based search filter"
},
"filter": {
"type": "string",
"description": "Search filter"
},
"location": {
"type": "object",
"properties": {
"country": {
"type": "string",
"description": "Country code for geolocation"
},
"languages": {
"type": "array",
"items": {
"type": "string"
},
"description": "Language codes for content"
}
},
"description": "Location settings for search"
},
"scrapeOptions": {
"type": "object",
"properties": {
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml"
]
},
"description": "Content formats to extract from search results"
},
"onlyMainContent": {
"type": "boolean",
"description": "Extract only the main content from results"
},
"waitFor": {
"type": "number",
"description": "Time in milliseconds to wait for dynamic content"
}
},
"description": "Options for scraping search results"
}
},
"required": [
"query"
]
}
},
{
"name": "firecrawl_extract",
"description": "\nExtract structured information from web pages using LLM capabilities. Supports both cloud AI and self-hosted LLM extraction.\n\n**Best for:** Extracting specific structured data like prices, names, details.\n**Not recommended for:** When you need the full content of a page (use scrape); when you're not looking for specific structured data.\n**Arguments:**\n- urls: Array of URLs to extract information from\n- prompt: Custom prompt for the LLM extraction\n- systemPrompt: System prompt to guide the LLM\n- schema: JSON schema for structured data extraction\n- allowExternalLinks: Allow extraction from external links\n- enableWebSearch: Enable web search for additional context\n- includeSubdomains: Include subdomains in extraction\n**Prompt Example:** \"Extract the product name, price, and description from these product pages.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_extract\",\n \"arguments\": {\n \"urls\": [\"https://example.com/page1\", \"https://example.com/page2\"],\n \"prompt\": \"Extract product information including name, price, and description\",\n \"systemPrompt\": \"You are a helpful assistant that extracts product information\",\n \"schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": { \"type\": \"string\" },\n \"price\": { \"type\": \"number\" },\n \"description\": { \"type\": \"string\" }\n },\n \"required\": [\"name\", \"price\"]\n },\n \"allowExternalLinks\": false,\n \"enableWebSearch\": false,\n \"includeSubdomains\": false\n }\n}\n```\n**Returns:** Extracted structured data as defined by your schema.\n",
"inputSchema": {
"type": "object",
"properties": {
"urls": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of URLs to extract information from"
},
"prompt": {
"type": "string",
"description": "Prompt for the LLM extraction"
},
"systemPrompt": {
"type": "string",
"description": "System prompt for LLM extraction"
},
"schema": {
"type": "object",
"description": "JSON schema for structured data extraction"
},
"allowExternalLinks": {
"type": "boolean",
"description": "Allow extraction from external links"
},
"enableWebSearch": {
"type": "boolean",
"description": "Enable web search for additional context"
},
"includeSubdomains": {
"type": "boolean",
"description": "Include subdomains in extraction"
}
},
"required": [
"urls"
]
}
},
{
"name": "firecrawl_deep_research",
"description": "\nConduct deep web research on a query using intelligent crawling, search, and LLM analysis.\n\n**Best for:** Complex research questions requiring multiple sources, in-depth analysis.\n**Not recommended for:** Simple questions that can be answered with a single search; when you need very specific information from a known page (use scrape); when you need results quickly (deep research can take time).\n**Arguments:**\n- query (string, required): The research question or topic to explore.\n- maxDepth (number, optional): Maximum recursive depth for crawling/search (default: 3).\n- timeLimit (number, optional): Time limit in seconds for the research session (default: 120).\n- maxUrls (number, optional): Maximum number of URLs to analyze (default: 50).\n**Prompt Example:** \"Research the environmental impact of electric vehicles versus gasoline vehicles.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_deep_research\",\n \"arguments\": {\n \"query\": \"What are the environmental impacts of electric vehicles compared to gasoline vehicles?\",\n \"maxDepth\": 3,\n \"timeLimit\": 120,\n \"maxUrls\": 50\n }\n}\n```\n**Returns:** Final analysis generated by an LLM based on research. (data.finalAnalysis); may also include structured activities and sources used in the research process.\n",
"inputSchema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to research"
},
"maxDepth": {
"type": "number",
"description": "Maximum depth of research iterations (1-10)"
},
"timeLimit": {
"type": "number",
"description": "Time limit in seconds (30-300)"
},
"maxUrls": {
"type": "number",
"description": "Maximum number of URLs to analyze (1-1000)"
}
},
"required": [
"query"
]
}
},
{
"name": "firecrawl_generate_llmstxt",
"description": "\nGenerate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact with the site.\n\n**Best for:** Creating machine-readable permission guidelines for AI models.\n**Not recommended for:** General content extraction or research.\n**Arguments:**\n- url (string, required): The base URL of the website to analyze.\n- maxUrls (number, optional): Max number of URLs to include (default: 10).\n- showFullText (boolean, optional): Whether to include llms-full.txt contents in the response.\n**Prompt Example:** \"Generate an LLMs.txt file for example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_generate_llmstxt\",\n \"arguments\": {\n \"url\": \"https://example.com\",\n \"maxUrls\": 20,\n \"showFullText\": true\n }\n}\n```\n**Returns:** LLMs.txt file contents (and optionally llms-full.txt).\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to generate LLMs.txt from"
},
"maxUrls": {
"type": "number",
"description": "Maximum number of URLs to process (1-100, default: 10)"
},
"showFullText": {
"type": "boolean",
"description": "Whether to show the full LLMs-full.txt in the response"
}
},
"required": [
"url"
]
}
}
]
}
{
"tools": [
{
"name": "firecrawl_scrape",
"description": "\nScrape content from a single URL with advanced options.\n\n**Best for:** Single page content extraction, when you know exactly which page contains the information.\n**Not recommended for:** Multiple pages (use batch_scrape), unknown page (use search), structured data (use extract).\n**Common mistakes:** Using scrape for a list of URLs (use batch_scrape instead).\n**Prompt Example:** \"Get the content of the page at https://example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_scrape\",\n \"arguments\": {\n \"url\": \"https://example.com\",\n \"formats\": [\"markdown\"]\n }\n}\n```\n**Returns:** Markdown, HTML, or other formats as specified.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to scrape"
},
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml",
"screenshot",
"links",
"screenshot@fullPage",
"extract"
]
},
"default": [
"markdown"
],
"description": "Content formats to extract (default: ['markdown'])"
},
"onlyMainContent": {
"type": "boolean",
"description": "Extract only the main content, filtering out navigation, footers, etc."
},
"includeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "HTML tags to specifically include in extraction"
},
"excludeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "HTML tags to exclude from extraction"
},
"waitFor": {
"type": "number",
"description": "Time in milliseconds to wait for dynamic content to load"
},
"timeout": {
"type": "number",
"description": "Maximum time in milliseconds to wait for the page to load"
},
"actions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"wait",
"click",
"screenshot",
"write",
"press",
"scroll",
"scrape",
"executeJavascript"
],
"description": "Type of action to perform"
},
"selector": {
"type": "string",
"description": "CSS selector for the target element"
},
"milliseconds": {
"type": "number",
"description": "Time to wait in milliseconds (for wait action)"
},
"text": {
"type": "string",
"description": "Text to write (for write action)"
},
"key": {
"type": "string",
"description": "Key to press (for press action)"
},
"direction": {
"type": "string",
"enum": [
"up",
"down"
],
"description": "Scroll direction"
},
"script": {
"type": "string",
"description": "JavaScript code to execute"
},
"fullPage": {
"type": "boolean",
"description": "Take full page screenshot"
}
},
"required": [
"type"
]
},
"description": "List of actions to perform before scraping"
},
"extract": {
"type": "object",
"properties": {
"schema": {
"type": "object",
"description": "Schema for structured data extraction"
},
"systemPrompt": {
"type": "string",
"description": "System prompt for LLM extraction"
},
"prompt": {
"type": "string",
"description": "User prompt for LLM extraction"
}
},
"description": "Configuration for structured data extraction"
},
"mobile": {
"type": "boolean",
"description": "Use mobile viewport"
},
"skipTlsVerification": {
"type": "boolean",
"description": "Skip TLS certificate verification"
},
"removeBase64Images": {
"type": "boolean",
"description": "Remove base64 encoded images from output"
},
"location": {
"type": "object",
"properties": {
"country": {
"type": "string",
"description": "Country code for geolocation"
},
"languages": {
"type": "array",
"items": {
"type": "string"
},
"description": "Language codes for content"
}
},
"description": "Location settings for scraping"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_map",
"description": "\nMap a website to discover all indexed URLs on the site.\n\n**Best for:** Discovering URLs on a website before deciding what to scrape; finding specific sections of a website.\n**Not recommended for:** When you already know which specific URL you need (use scrape or batch_scrape); when you need the content of the pages (use scrape after mapping).\n**Common mistakes:** Using crawl to discover URLs instead of map.\n**Prompt Example:** \"List all URLs on example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_map\",\n \"arguments\": {\n \"url\": \"https://example.com\"\n }\n}\n```\n**Returns:** Array of URLs found on the site.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Starting URL for URL discovery"
},
"search": {
"type": "string",
"description": "Optional search term to filter URLs"
},
"ignoreSitemap": {
"type": "boolean",
"description": "Skip sitemap.xml discovery and only use HTML links"
},
"sitemapOnly": {
"type": "boolean",
"description": "Only use sitemap.xml for discovery, ignore HTML links"
},
"includeSubdomains": {
"type": "boolean",
"description": "Include URLs from subdomains in results"
},
"limit": {
"type": "number",
"description": "Maximum number of URLs to return"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_crawl",
"description": "\nStarts an asynchronous crawl job on a website and extracts content from all pages.\n\n**Best for:** Extracting content from multiple related pages, when you need comprehensive coverage.\n**Not recommended for:** Extracting content from a single page (use scrape); when token limits are a concern (use map + batch_scrape); when you need fast results (crawling can be slow).\n**Warning:** Crawl responses can be very large and may exceed token limits. Limit the crawl depth and number of pages, or use map + batch_scrape for better control.\n**Common mistakes:** Setting limit or maxDepth too high (causes token overflow); using crawl for a single page (use scrape instead).\n**Prompt Example:** \"Get all blog posts from the first two levels of example.com/blog.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_crawl\",\n \"arguments\": {\n \"url\": \"https://example.com/blog/*\",\n \"maxDepth\": 2,\n \"limit\": 100,\n \"allowExternalLinks\": false,\n \"deduplicateSimilarURLs\": true\n }\n}\n```\n**Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Starting URL for the crawl"
},
"excludePaths": {
"type": "array",
"items": {
"type": "string"
},
"description": "URL paths to exclude from crawling"
},
"includePaths": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only crawl these URL paths"
},
"maxDepth": {
"type": "number",
"description": "Maximum link depth to crawl"
},
"ignoreSitemap": {
"type": "boolean",
"description": "Skip sitemap.xml discovery"
},
"limit": {
"type": "number",
"description": "Maximum number of pages to crawl"
},
"allowBackwardLinks": {
"type": "boolean",
"description": "Allow crawling links that point to parent directories"
},
"allowExternalLinks": {
"type": "boolean",
"description": "Allow crawling links to external domains"
},
"webhook": {
"oneOf": [
{
"type": "string",
"description": "Webhook URL to notify when crawl is complete"
},
{
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Webhook URL"
},
"headers": {
"type": "object",
"description": "Custom headers for webhook requests"
}
},
"required": [
"url"
]
}
]
},
"deduplicateSimilarURLs": {
"type": "boolean",
"description": "Remove similar URLs during crawl"
},
"ignoreQueryParameters": {
"type": "boolean",
"description": "Ignore query parameters when comparing URLs"
},
"scrapeOptions": {
"type": "object",
"properties": {
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml",
"screenshot",
"links",
"screenshot@fullPage",
"extract"
]
}
},
"onlyMainContent": {
"type": "boolean"
},
"includeTags": {
"type": "array",
"items": {
"type": "string"
}
},
"excludeTags": {
"type": "array",
"items": {
"type": "string"
}
},
"waitFor": {
"type": "number"
}
},
"description": "Options for scraping each page"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_check_crawl_status",
"description": "\nCheck the status of a crawl job.\n\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_check_crawl_status\",\n \"arguments\": {\n \"id\": \"550e8400-e29b-41d4-a716-446655440000\"\n }\n}\n```\n**Returns:** Status and progress of the crawl job, including results if available.\n",
"inputSchema": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Crawl job ID to check"
}
},
"required": [
"id"
]
}
},
{
"name": "firecrawl_search",
"description": "\nSearch the web and optionally extract content from search results.\n\n**Best for:** Finding specific information across multiple websites, when you don't know which website has the information; when you need the most relevant content for a query.\n**Not recommended for:** When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl).\n**Common mistakes:** Using crawl or map for open-ended questions (use search instead).\n**Prompt Example:** \"Find the latest research papers on AI published in 2023.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_search\",\n \"arguments\": {\n \"query\": \"latest AI research papers 2023\",\n \"limit\": 5,\n \"lang\": \"en\",\n \"country\": \"us\",\n \"scrapeOptions\": {\n \"formats\": [\"markdown\"],\n \"onlyMainContent\": true\n }\n }\n}\n```\n**Returns:** Array of search results (with optional scraped content).\n",
"inputSchema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query string"
},
"limit": {
"type": "number",
"description": "Maximum number of results to return (default: 5)"
},
"lang": {
"type": "string",
"description": "Language code for search results (default: en)"
},
"country": {
"type": "string",
"description": "Country code for search results (default: us)"
},
"tbs": {
"type": "string",
"description": "Time-based search filter"
},
"filter": {
"type": "string",
"description": "Search filter"
},
"location": {
"type": "object",
"properties": {
"country": {
"type": "string",
"description": "Country code for geolocation"
},
"languages": {
"type": "array",
"items": {
"type": "string"
},
"description": "Language codes for content"
}
},
"description": "Location settings for search"
},
"scrapeOptions": {
"type": "object",
"properties": {
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml"
]
},
"description": "Content formats to extract from search results"
},
"onlyMainContent": {
"type": "boolean",
"description": "Extract only the main content from results"
},
"waitFor": {
"type": "number",
"description": "Time in milliseconds to wait for dynamic content"
}
},
"description": "Options for scraping search results"
}
},
"required": [
"query"
]
}
},
{
"name": "firecrawl_extract",
"description": "\nExtract structured information from web pages using LLM capabilities. Supports both cloud AI and self-hosted LLM extraction.\n\n**Best for:** Extracting specific structured data like prices, names, details.\n**Not recommended for:** When you need the full content of a page (use scrape); when you're not looking for specific structured data.\n**Arguments:**\n- urls: Array of URLs to extract information from\n- prompt: Custom prompt for the LLM extraction\n- systemPrompt: System prompt to guide the LLM\n- schema: JSON schema for structured data extraction\n- allowExternalLinks: Allow extraction from external links\n- enableWebSearch: Enable web search for additional context\n- includeSubdomains: Include subdomains in extraction\n**Prompt Example:** \"Extract the product name, price, and description from these product pages.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_extract\",\n \"arguments\": {\n \"urls\": [\"https://example.com/page1\", \"https://example.com/page2\"],\n \"prompt\": \"Extract product information including name, price, and description\",\n \"systemPrompt\": \"You are a helpful assistant that extracts product information\",\n \"schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": { \"type\": \"string\" },\n \"price\": { \"type\": \"number\" },\n \"description\": { \"type\": \"string\" }\n },\n \"required\": [\"name\", \"price\"]\n },\n \"allowExternalLinks\": false,\n \"enableWebSearch\": false,\n \"includeSubdomains\": false\n }\n}\n```\n**Returns:** Extracted structured data as defined by your schema.\n",
"inputSchema": {
"type": "object",
"properties": {
"urls": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of URLs to extract information from"
},
"prompt": {
"type": "string",
"description": "Prompt for the LLM extraction"
},
"systemPrompt": {
"type": "string",
"description": "System prompt for LLM extraction"
},
"schema": {
"type": "object",
"description": "JSON schema for structured data extraction"
},
"allowExternalLinks": {
"type": "boolean",
"description": "Allow extraction from external links"
},
"enableWebSearch": {
"type": "boolean",
"description": "Enable web search for additional context"
},
"includeSubdomains": {
"type": "boolean",
"description": "Include subdomains in extraction"
}
},
"required": [
"urls"
]
}
},
{
"name": "firecrawl_deep_research",
"description": "\nConduct deep web research on a query using intelligent crawling, search, and LLM analysis.\n\n**Best for:** Complex research questions requiring multiple sources, in-depth analysis.\n**Not recommended for:** Simple questions that can be answered with a single search; when you need very specific information from a known page (use scrape); when you need results quickly (deep research can take time).\n**Arguments:**\n- query (string, required): The research question or topic to explore.\n- maxDepth (number, optional): Maximum recursive depth for crawling/search (default: 3).\n- timeLimit (number, optional): Time limit in seconds for the research session (default: 120).\n- maxUrls (number, optional): Maximum number of URLs to analyze (default: 50).\n**Prompt Example:** \"Research the environmental impact of electric vehicles versus gasoline vehicles.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_deep_research\",\n \"arguments\": {\n \"query\": \"What are the environmental impacts of electric vehicles compared to gasoline vehicles?\",\n \"maxDepth\": 3,\n \"timeLimit\": 120,\n \"maxUrls\": 50\n }\n}\n```\n**Returns:** Final analysis generated by an LLM based on research. (data.finalAnalysis); may also include structured activities and sources used in the research process.\n",
"inputSchema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to research"
},
"maxDepth": {
"type": "number",
"description": "Maximum depth of research iterations (1-10)"
},
"timeLimit": {
"type": "number",
"description": "Time limit in seconds (30-300)"
},
"maxUrls": {
"type": "number",
"description": "Maximum number of URLs to analyze (1-1000)"
}
},
"required": [
"query"
]
}
},
{
"name": "firecrawl_generate_llmstxt",
"description": "\nGenerate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact with the site.\n\n**Best for:** Creating machine-readable permission guidelines for AI models.\n**Not recommended for:** General content extraction or research.\n**Arguments:**\n- url (string, required): The base URL of the website to analyze.\n- maxUrls (number, optional): Max number of URLs to include (default: 10).\n- showFullText (boolean, optional): Whether to include llms-full.txt contents in the response.\n**Prompt Example:** \"Generate an LLMs.txt file for example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_generate_llmstxt\",\n \"arguments\": {\n \"url\": \"https://example.com\",\n \"maxUrls\": 20,\n \"showFullText\": true\n }\n}\n```\n**Returns:** LLMs.txt file contents (and optionally llms-full.txt).\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to generate LLMs.txt from"
},
"maxUrls": {
"type": "number",
"description": "Maximum number of URLs to process (1-100, default: 10)"
},
"showFullText": {
"type": "boolean",
"description": "Whether to show the full LLMs-full.txt in the response"
}
},
"required": [
"url"
]
}
}
]
}
{
"mcpServers": {
"firecrawl-mcp": {
"command": "npx",
"args": ["-y", "firecrawl-mcp"],
"env": {
"FIRECRAWL_API_KEY": "YOUR-API-KEY"
}
}
}
}
{
"tools": [
{
"name": "firecrawl_scrape",
"description": "\nScrape content from a single URL with advanced options.\n\n**Best for:** Single page content extraction, when you know exactly which page contains the information.\n**Not recommended for:** Multiple pages (use batch_scrape), unknown page (use search), structured data (use extract).\n**Common mistakes:** Using scrape for a list of URLs (use batch_scrape instead).\n**Prompt Example:** \"Get the content of the page at https://example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_scrape\",\n \"arguments\": {\n \"url\": \"https://example.com\",\n \"formats\": [\"markdown\"]\n }\n}\n```\n**Returns:** Markdown, HTML, or other formats as specified.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to scrape"
},
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml",
"screenshot",
"links",
"screenshot@fullPage",
"extract"
]
},
"default": [
"markdown"
],
"description": "Content formats to extract (default: ['markdown'])"
},
"onlyMainContent": {
"type": "boolean",
"description": "Extract only the main content, filtering out navigation, footers, etc."
},
"includeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "HTML tags to specifically include in extraction"
},
"excludeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "HTML tags to exclude from extraction"
},
"waitFor": {
"type": "number",
"description": "Time in milliseconds to wait for dynamic content to load"
},
"timeout": {
"type": "number",
"description": "Maximum time in milliseconds to wait for the page to load"
},
"actions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"wait",
"click",
"screenshot",
"write",
"press",
"scroll",
"scrape",
"executeJavascript"
],
"description": "Type of action to perform"
},
"selector": {
"type": "string",
"description": "CSS selector for the target element"
},
"milliseconds": {
"type": "number",
"description": "Time to wait in milliseconds (for wait action)"
},
"text": {
"type": "string",
"description": "Text to write (for write action)"
},
"key": {
"type": "string",
"description": "Key to press (for press action)"
},
"direction": {
"type": "string",
"enum": [
"up",
"down"
],
"description": "Scroll direction"
},
"script": {
"type": "string",
"description": "JavaScript code to execute"
},
"fullPage": {
"type": "boolean",
"description": "Take full page screenshot"
}
},
"required": [
"type"
]
},
"description": "List of actions to perform before scraping"
},
"extract": {
"type": "object",
"properties": {
"schema": {
"type": "object",
"description": "Schema for structured data extraction"
},
"systemPrompt": {
"type": "string",
"description": "System prompt for LLM extraction"
},
"prompt": {
"type": "string",
"description": "User prompt for LLM extraction"
}
},
"description": "Configuration for structured data extraction"
},
"mobile": {
"type": "boolean",
"description": "Use mobile viewport"
},
"skipTlsVerification": {
"type": "boolean",
"description": "Skip TLS certificate verification"
},
"removeBase64Images": {
"type": "boolean",
"description": "Remove base64 encoded images from output"
},
"location": {
"type": "object",
"properties": {
"country": {
"type": "string",
"description": "Country code for geolocation"
},
"languages": {
"type": "array",
"items": {
"type": "string"
},
"description": "Language codes for content"
}
},
"description": "Location settings for scraping"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_map",
"description": "\nMap a website to discover all indexed URLs on the site.\n\n**Best for:** Discovering URLs on a website before deciding what to scrape; finding specific sections of a website.\n**Not recommended for:** When you already know which specific URL you need (use scrape or batch_scrape); when you need the content of the pages (use scrape after mapping).\n**Common mistakes:** Using crawl to discover URLs instead of map.\n**Prompt Example:** \"List all URLs on example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_map\",\n \"arguments\": {\n \"url\": \"https://example.com\"\n }\n}\n```\n**Returns:** Array of URLs found on the site.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Starting URL for URL discovery"
},
"search": {
"type": "string",
"description": "Optional search term to filter URLs"
},
"ignoreSitemap": {
"type": "boolean",
"description": "Skip sitemap.xml discovery and only use HTML links"
},
"sitemapOnly": {
"type": "boolean",
"description": "Only use sitemap.xml for discovery, ignore HTML links"
},
"includeSubdomains": {
"type": "boolean",
"description": "Include URLs from subdomains in results"
},
"limit": {
"type": "number",
"description": "Maximum number of URLs to return"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_crawl",
"description": "\nStarts an asynchronous crawl job on a website and extracts content from all pages.\n\n**Best for:** Extracting content from multiple related pages, when you need comprehensive coverage.\n**Not recommended for:** Extracting content from a single page (use scrape); when token limits are a concern (use map + batch_scrape); when you need fast results (crawling can be slow).\n**Warning:** Crawl responses can be very large and may exceed token limits. Limit the crawl depth and number of pages, or use map + batch_scrape for better control.\n**Common mistakes:** Setting limit or maxDepth too high (causes token overflow); using crawl for a single page (use scrape instead).\n**Prompt Example:** \"Get all blog posts from the first two levels of example.com/blog.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_crawl\",\n \"arguments\": {\n \"url\": \"https://example.com/blog/*\",\n \"maxDepth\": 2,\n \"limit\": 100,\n \"allowExternalLinks\": false,\n \"deduplicateSimilarURLs\": true\n }\n}\n```\n**Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Starting URL for the crawl"
},
"excludePaths": {
"type": "array",
"items": {
"type": "string"
},
"description": "URL paths to exclude from crawling"
},
"includePaths": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only crawl these URL paths"
},
"maxDepth": {
"type": "number",
"description": "Maximum link depth to crawl"
},
"ignoreSitemap": {
"type": "boolean",
"description": "Skip sitemap.xml discovery"
},
"limit": {
"type": "number",
"description": "Maximum number of pages to crawl"
},
"allowBackwardLinks": {
"type": "boolean",
"description": "Allow crawling links that point to parent directories"
},
"allowExternalLinks": {
"type": "boolean",
"description": "Allow crawling links to external domains"
},
"webhook": {
"oneOf": [
{
"type": "string",
"description": "Webhook URL to notify when crawl is complete"
},
{
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Webhook URL"
},
"headers": {
"type": "object",
"description": "Custom headers for webhook requests"
}
},
"required": [
"url"
]
}
]
},
"deduplicateSimilarURLs": {
"type": "boolean",
"description": "Remove similar URLs during crawl"
},
"ignoreQueryParameters": {
"type": "boolean",
"description": "Ignore query parameters when comparing URLs"
},
"scrapeOptions": {
"type": "object",
"properties": {
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml",
"screenshot",
"links",
"screenshot@fullPage",
"extract"
]
}
},
"onlyMainContent": {
"type": "boolean"
},
"includeTags": {
"type": "array",
"items": {
"type": "string"
}
},
"excludeTags": {
"type": "array",
"items": {
"type": "string"
}
},
"waitFor": {
"type": "number"
}
},
"description": "Options for scraping each page"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_check_crawl_status",
"description": "\nCheck the status of a crawl job.\n\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_check_crawl_status\",\n \"arguments\": {\n \"id\": \"550e8400-e29b-41d4-a716-446655440000\"\n }\n}\n```\n**Returns:** Status and progress of the crawl job, including results if available.\n",
"inputSchema": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Crawl job ID to check"
}
},
"required": [
"id"
]
}
},
{
"name": "firecrawl_search",
"description": "\nSearch the web and optionally extract content from search results.\n\n**Best for:** Finding specific information across multiple websites, when you don't know which website has the information; when you need the most relevant content for a query.\n**Not recommended for:** When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl).\n**Common mistakes:** Using crawl or map for open-ended questions (use search instead).\n**Prompt Example:** \"Find the latest research papers on AI published in 2023.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_search\",\n \"arguments\": {\n \"query\": \"latest AI research papers 2023\",\n \"limit\": 5,\n \"lang\": \"en\",\n \"country\": \"us\",\n \"scrapeOptions\": {\n \"formats\": [\"markdown\"],\n \"onlyMainContent\": true\n }\n }\n}\n```\n**Returns:** Array of search results (with optional scraped content).\n",
"inputSchema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query string"
},
"limit": {
"type": "number",
"description": "Maximum number of results to return (default: 5)"
},
"lang": {
"type": "string",
"description": "Language code for search results (default: en)"
},
"country": {
"type": "string",
"description": "Country code for search results (default: us)"
},
"tbs": {
"type": "string",
"description": "Time-based search filter"
},
"filter": {
"type": "string",
"description": "Search filter"
},
"location": {
"type": "object",
"properties": {
"country": {
"type": "string",
"description": "Country code for geolocation"
},
"languages": {
"type": "array",
"items": {
"type": "string"
},
"description": "Language codes for content"
}
},
"description": "Location settings for search"
},
"scrapeOptions": {
"type": "object",
"properties": {
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml"
]
},
"description": "Content formats to extract from search results"
},
"onlyMainContent": {
"type": "boolean",
"description": "Extract only the main content from results"
},
"waitFor": {
"type": "number",
"description": "Time in milliseconds to wait for dynamic content"
}
},
"description": "Options for scraping search results"
}
},
"required": [
"query"
]
}
},
{
"name": "firecrawl_extract",
"description": "\nExtract structured information from web pages using LLM capabilities. Supports both cloud AI and self-hosted LLM extraction.\n\n**Best for:** Extracting specific structured data like prices, names, details.\n**Not recommended for:** When you need the full content of a page (use scrape); when you're not looking for specific structured data.\n**Arguments:**\n- urls: Array of URLs to extract information from\n- prompt: Custom prompt for the LLM extraction\n- systemPrompt: System prompt to guide the LLM\n- schema: JSON schema for structured data extraction\n- allowExternalLinks: Allow extraction from external links\n- enableWebSearch: Enable web search for additional context\n- includeSubdomains: Include subdomains in extraction\n**Prompt Example:** \"Extract the product name, price, and description from these product pages.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_extract\",\n \"arguments\": {\n \"urls\": [\"https://example.com/page1\", \"https://example.com/page2\"],\n \"prompt\": \"Extract product information including name, price, and description\",\n \"systemPrompt\": \"You are a helpful assistant that extracts product information\",\n \"schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": { \"type\": \"string\" },\n \"price\": { \"type\": \"number\" },\n \"description\": { \"type\": \"string\" }\n },\n \"required\": [\"name\", \"price\"]\n },\n \"allowExternalLinks\": false,\n \"enableWebSearch\": false,\n \"includeSubdomains\": false\n }\n}\n```\n**Returns:** Extracted structured data as defined by your schema.\n",
"inputSchema": {
"type": "object",
"properties": {
"urls": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of URLs to extract information from"
},
"prompt": {
"type": "string",
"description": "Prompt for the LLM extraction"
},
"systemPrompt": {
"type": "string",
"description": "System prompt for LLM extraction"
},
"schema": {
"type": "object",
"description": "JSON schema for structured data extraction"
},
"allowExternalLinks": {
"type": "boolean",
"description": "Allow extraction from external links"
},
"enableWebSearch": {
"type": "boolean",
"description": "Enable web search for additional context"
},
"includeSubdomains": {
"type": "boolean",
"description": "Include subdomains in extraction"
}
},
"required": [
"urls"
]
}
},
{
"name": "firecrawl_deep_research",
"description": "\nConduct deep web research on a query using intelligent crawling, search, and LLM analysis.\n\n**Best for:** Complex research questions requiring multiple sources, in-depth analysis.\n**Not recommended for:** Simple questions that can be answered with a single search; when you need very specific information from a known page (use scrape); when you need results quickly (deep research can take time).\n**Arguments:**\n- query (string, required): The research question or topic to explore.\n- maxDepth (number, optional): Maximum recursive depth for crawling/search (default: 3).\n- timeLimit (number, optional): Time limit in seconds for the research session (default: 120).\n- maxUrls (number, optional): Maximum number of URLs to analyze (default: 50).\n**Prompt Example:** \"Research the environmental impact of electric vehicles versus gasoline vehicles.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_deep_research\",\n \"arguments\": {\n \"query\": \"What are the environmental impacts of electric vehicles compared to gasoline vehicles?\",\n \"maxDepth\": 3,\n \"timeLimit\": 120,\n \"maxUrls\": 50\n }\n}\n```\n**Returns:** Final analysis generated by an LLM based on research. (data.finalAnalysis); may also include structured activities and sources used in the research process.\n",
"inputSchema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to research"
},
"maxDepth": {
"type": "number",
"description": "Maximum depth of research iterations (1-10)"
},
"timeLimit": {
"type": "number",
"description": "Time limit in seconds (30-300)"
},
"maxUrls": {
"type": "number",
"description": "Maximum number of URLs to analyze (1-1000)"
}
},
"required": [
"query"
]
}
},
{
"name": "firecrawl_generate_llmstxt",
"description": "\nGenerate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact with the site.\n\n**Best for:** Creating machine-readable permission guidelines for AI models.\n**Not recommended for:** General content extraction or research.\n**Arguments:**\n- url (string, required): The base URL of the website to analyze.\n- maxUrls (number, optional): Max number of URLs to include (default: 10).\n- showFullText (boolean, optional): Whether to include llms-full.txt contents in the response.\n**Prompt Example:** \"Generate an LLMs.txt file for example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_generate_llmstxt\",\n \"arguments\": {\n \"url\": \"https://example.com\",\n \"maxUrls\": 20,\n \"showFullText\": true\n }\n}\n```\n**Returns:** LLMs.txt file contents (and optionally llms-full.txt).\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to generate LLMs.txt from"
},
"maxUrls": {
"type": "number",
"description": "Maximum number of URLs to process (1-100, default: 10)"
},
"showFullText": {
"type": "boolean",
"description": "Whether to show the full LLMs-full.txt in the response"
}
},
"required": [
"url"
]
}
}
]
}
{"mcpServers": {"mcp-server-firecrawl": {"command": "npx", "args": ["-y", "firecrawl-mcp"], "env": {"FIRECRAWL_API_KEY": "YOUR_API_KEY_HERE", "FIRECRAWL_RETRY_MAX_ATTEMPTS": "5", "FIRECRAWL_RETRY_INITIAL_DELAY": "2000", "FIRECRAWL_RETRY_MAX_DELAY": "30000", "FIRECRAWL_RETRY_BACKOFF_FACTOR": "3", "FIRECRAWL_CREDIT_WARNING_THRESHOLD": "2000", "FIRECRAWL_CREDIT_CRITICAL_THRESHOLD": "500"}}}}
{"mcp": {"inputs": [{"type": "promptString", "id": "apiKey", "description": "Firecrawl API Key", "password": true}], "servers": {"firecrawl": {"command": "npx", "args": ["-y", "firecrawl-mcp"], "env": {"FIRECRAWL_API_KEY": "${input:apiKey}"}}}}}
{
"tools": [
{
"name": "firecrawl_scrape",
"description": "\nScrape content from a single URL with advanced options.\n\n**Best for:** Single page content extraction, when you know exactly which page contains the information.\n**Not recommended for:** Multiple pages (use batch_scrape), unknown page (use search), structured data (use extract).\n**Common mistakes:** Using scrape for a list of URLs (use batch_scrape instead).\n**Prompt Example:** \"Get the content of the page at https://example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_scrape\",\n \"arguments\": {\n \"url\": \"https://example.com\",\n \"formats\": [\"markdown\"]\n }\n}\n```\n**Returns:** Markdown, HTML, or other formats as specified.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to scrape"
},
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml",
"screenshot",
"links",
"screenshot@fullPage",
"extract"
]
},
"default": [
"markdown"
],
"description": "Content formats to extract (default: ['markdown'])"
},
"onlyMainContent": {
"type": "boolean",
"description": "Extract only the main content, filtering out navigation, footers, etc."
},
"includeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "HTML tags to specifically include in extraction"
},
"excludeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "HTML tags to exclude from extraction"
},
"waitFor": {
"type": "number",
"description": "Time in milliseconds to wait for dynamic content to load"
},
"timeout": {
"type": "number",
"description": "Maximum time in milliseconds to wait for the page to load"
},
"actions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"wait",
"click",
"screenshot",
"write",
"press",
"scroll",
"scrape",
"executeJavascript"
],
"description": "Type of action to perform"
},
"selector": {
"type": "string",
"description": "CSS selector for the target element"
},
"milliseconds": {
"type": "number",
"description": "Time to wait in milliseconds (for wait action)"
},
"text": {
"type": "string",
"description": "Text to write (for write action)"
},
"key": {
"type": "string",
"description": "Key to press (for press action)"
},
"direction": {
"type": "string",
"enum": [
"up",
"down"
],
"description": "Scroll direction"
},
"script": {
"type": "string",
"description": "JavaScript code to execute"
},
"fullPage": {
"type": "boolean",
"description": "Take full page screenshot"
}
},
"required": [
"type"
]
},
"description": "List of actions to perform before scraping"
},
"extract": {
"type": "object",
"properties": {
"schema": {
"type": "object",
"description": "Schema for structured data extraction"
},
"systemPrompt": {
"type": "string",
"description": "System prompt for LLM extraction"
},
"prompt": {
"type": "string",
"description": "User prompt for LLM extraction"
}
},
"description": "Configuration for structured data extraction"
},
"mobile": {
"type": "boolean",
"description": "Use mobile viewport"
},
"skipTlsVerification": {
"type": "boolean",
"description": "Skip TLS certificate verification"
},
"removeBase64Images": {
"type": "boolean",
"description": "Remove base64 encoded images from output"
},
"location": {
"type": "object",
"properties": {
"country": {
"type": "string",
"description": "Country code for geolocation"
},
"languages": {
"type": "array",
"items": {
"type": "string"
},
"description": "Language codes for content"
}
},
"description": "Location settings for scraping"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_map",
"description": "\nMap a website to discover all indexed URLs on the site.\n\n**Best for:** Discovering URLs on a website before deciding what to scrape; finding specific sections of a website.\n**Not recommended for:** When you already know which specific URL you need (use scrape or batch_scrape); when you need the content of the pages (use scrape after mapping).\n**Common mistakes:** Using crawl to discover URLs instead of map.\n**Prompt Example:** \"List all URLs on example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_map\",\n \"arguments\": {\n \"url\": \"https://example.com\"\n }\n}\n```\n**Returns:** Array of URLs found on the site.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Starting URL for URL discovery"
},
"search": {
"type": "string",
"description": "Optional search term to filter URLs"
},
"ignoreSitemap": {
"type": "boolean",
"description": "Skip sitemap.xml discovery and only use HTML links"
},
"sitemapOnly": {
"type": "boolean",
"description": "Only use sitemap.xml for discovery, ignore HTML links"
},
"includeSubdomains": {
"type": "boolean",
"description": "Include URLs from subdomains in results"
},
"limit": {
"type": "number",
"description": "Maximum number of URLs to return"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_crawl",
"description": "\nStarts an asynchronous crawl job on a website and extracts content from all pages.\n\n**Best for:** Extracting content from multiple related pages, when you need comprehensive coverage.\n**Not recommended for:** Extracting content from a single page (use scrape); when token limits are a concern (use map + batch_scrape); when you need fast results (crawling can be slow).\n**Warning:** Crawl responses can be very large and may exceed token limits. Limit the crawl depth and number of pages, or use map + batch_scrape for better control.\n**Common mistakes:** Setting limit or maxDepth too high (causes token overflow); using crawl for a single page (use scrape instead).\n**Prompt Example:** \"Get all blog posts from the first two levels of example.com/blog.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_crawl\",\n \"arguments\": {\n \"url\": \"https://example.com/blog/*\",\n \"maxDepth\": 2,\n \"limit\": 100,\n \"allowExternalLinks\": false,\n \"deduplicateSimilarURLs\": true\n }\n}\n```\n**Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Starting URL for the crawl"
},
"excludePaths": {
"type": "array",
"items": {
"type": "string"
},
"description": "URL paths to exclude from crawling"
},
"includePaths": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only crawl these URL paths"
},
"maxDepth": {
"type": "number",
"description": "Maximum link depth to crawl"
},
"ignoreSitemap": {
"type": "boolean",
"description": "Skip sitemap.xml discovery"
},
"limit": {
"type": "number",
"description": "Maximum number of pages to crawl"
},
"allowBackwardLinks": {
"type": "boolean",
"description": "Allow crawling links that point to parent directories"
},
"allowExternalLinks": {
"type": "boolean",
"description": "Allow crawling links to external domains"
},
"webhook": {
"oneOf": [
{
"type": "string",
"description": "Webhook URL to notify when crawl is complete"
},
{
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "Webhook URL"
},
"headers": {
"type": "object",
"description": "Custom headers for webhook requests"
}
},
"required": [
"url"
]
}
]
},
"deduplicateSimilarURLs": {
"type": "boolean",
"description": "Remove similar URLs during crawl"
},
"ignoreQueryParameters": {
"type": "boolean",
"description": "Ignore query parameters when comparing URLs"
},
"scrapeOptions": {
"type": "object",
"properties": {
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml",
"screenshot",
"links",
"screenshot@fullPage",
"extract"
]
}
},
"onlyMainContent": {
"type": "boolean"
},
"includeTags": {
"type": "array",
"items": {
"type": "string"
}
},
"excludeTags": {
"type": "array",
"items": {
"type": "string"
}
},
"waitFor": {
"type": "number"
}
},
"description": "Options for scraping each page"
}
},
"required": [
"url"
]
}
},
{
"name": "firecrawl_check_crawl_status",
"description": "\nCheck the status of a crawl job.\n\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_check_crawl_status\",\n \"arguments\": {\n \"id\": \"550e8400-e29b-41d4-a716-446655440000\"\n }\n}\n```\n**Returns:** Status and progress of the crawl job, including results if available.\n",
"inputSchema": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Crawl job ID to check"
}
},
"required": [
"id"
]
}
},
{
"name": "firecrawl_search",
"description": "\nSearch the web and optionally extract content from search results.\n\n**Best for:** Finding specific information across multiple websites, when you don't know which website has the information; when you need the most relevant content for a query.\n**Not recommended for:** When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl).\n**Common mistakes:** Using crawl or map for open-ended questions (use search instead).\n**Prompt Example:** \"Find the latest research papers on AI published in 2023.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_search\",\n \"arguments\": {\n \"query\": \"latest AI research papers 2023\",\n \"limit\": 5,\n \"lang\": \"en\",\n \"country\": \"us\",\n \"scrapeOptions\": {\n \"formats\": [\"markdown\"],\n \"onlyMainContent\": true\n }\n }\n}\n```\n**Returns:** Array of search results (with optional scraped content).\n",
"inputSchema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query string"
},
"limit": {
"type": "number",
"description": "Maximum number of results to return (default: 5)"
},
"lang": {
"type": "string",
"description": "Language code for search results (default: en)"
},
"country": {
"type": "string",
"description": "Country code for search results (default: us)"
},
"tbs": {
"type": "string",
"description": "Time-based search filter"
},
"filter": {
"type": "string",
"description": "Search filter"
},
"location": {
"type": "object",
"properties": {
"country": {
"type": "string",
"description": "Country code for geolocation"
},
"languages": {
"type": "array",
"items": {
"type": "string"
},
"description": "Language codes for content"
}
},
"description": "Location settings for search"
},
"scrapeOptions": {
"type": "object",
"properties": {
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": [
"markdown",
"html",
"rawHtml"
]
},
"description": "Content formats to extract from search results"
},
"onlyMainContent": {
"type": "boolean",
"description": "Extract only the main content from results"
},
"waitFor": {
"type": "number",
"description": "Time in milliseconds to wait for dynamic content"
}
},
"description": "Options for scraping search results"
}
},
"required": [
"query"
]
}
},
{
"name": "firecrawl_extract",
"description": "\nExtract structured information from web pages using LLM capabilities. Supports both cloud AI and self-hosted LLM extraction.\n\n**Best for:** Extracting specific structured data like prices, names, details.\n**Not recommended for:** When you need the full content of a page (use scrape); when you're not looking for specific structured data.\n**Arguments:**\n- urls: Array of URLs to extract information from\n- prompt: Custom prompt for the LLM extraction\n- systemPrompt: System prompt to guide the LLM\n- schema: JSON schema for structured data extraction\n- allowExternalLinks: Allow extraction from external links\n- enableWebSearch: Enable web search for additional context\n- includeSubdomains: Include subdomains in extraction\n**Prompt Example:** \"Extract the product name, price, and description from these product pages.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_extract\",\n \"arguments\": {\n \"urls\": [\"https://example.com/page1\", \"https://example.com/page2\"],\n \"prompt\": \"Extract product information including name, price, and description\",\n \"systemPrompt\": \"You are a helpful assistant that extracts product information\",\n \"schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": { \"type\": \"string\" },\n \"price\": { \"type\": \"number\" },\n \"description\": { \"type\": \"string\" }\n },\n \"required\": [\"name\", \"price\"]\n },\n \"allowExternalLinks\": false,\n \"enableWebSearch\": false,\n \"includeSubdomains\": false\n }\n}\n```\n**Returns:** Extracted structured data as defined by your schema.\n",
"inputSchema": {
"type": "object",
"properties": {
"urls": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of URLs to extract information from"
},
"prompt": {
"type": "string",
"description": "Prompt for the LLM extraction"
},
"systemPrompt": {
"type": "string",
"description": "System prompt for LLM extraction"
},
"schema": {
"type": "object",
"description": "JSON schema for structured data extraction"
},
"allowExternalLinks": {
"type": "boolean",
"description": "Allow extraction from external links"
},
"enableWebSearch": {
"type": "boolean",
"description": "Enable web search for additional context"
},
"includeSubdomains": {
"type": "boolean",
"description": "Include subdomains in extraction"
}
},
"required": [
"urls"
]
}
},
{
"name": "firecrawl_deep_research",
"description": "\nConduct deep web research on a query using intelligent crawling, search, and LLM analysis.\n\n**Best for:** Complex research questions requiring multiple sources, in-depth analysis.\n**Not recommended for:** Simple questions that can be answered with a single search; when you need very specific information from a known page (use scrape); when you need results quickly (deep research can take time).\n**Arguments:**\n- query (string, required): The research question or topic to explore.\n- maxDepth (number, optional): Maximum recursive depth for crawling/search (default: 3).\n- timeLimit (number, optional): Time limit in seconds for the research session (default: 120).\n- maxUrls (number, optional): Maximum number of URLs to analyze (default: 50).\n**Prompt Example:** \"Research the environmental impact of electric vehicles versus gasoline vehicles.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_deep_research\",\n \"arguments\": {\n \"query\": \"What are the environmental impacts of electric vehicles compared to gasoline vehicles?\",\n \"maxDepth\": 3,\n \"timeLimit\": 120,\n \"maxUrls\": 50\n }\n}\n```\n**Returns:** Final analysis generated by an LLM based on research. (data.finalAnalysis); may also include structured activities and sources used in the research process.\n",
"inputSchema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to research"
},
"maxDepth": {
"type": "number",
"description": "Maximum depth of research iterations (1-10)"
},
"timeLimit": {
"type": "number",
"description": "Time limit in seconds (30-300)"
},
"maxUrls": {
"type": "number",
"description": "Maximum number of URLs to analyze (1-1000)"
}
},
"required": [
"query"
]
}
},
{
"name": "firecrawl_generate_llmstxt",
"description": "\nGenerate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact with the site.\n\n**Best for:** Creating machine-readable permission guidelines for AI models.\n**Not recommended for:** General content extraction or research.\n**Arguments:**\n- url (string, required): The base URL of the website to analyze.\n- maxUrls (number, optional): Max number of URLs to include (default: 10).\n- showFullText (boolean, optional): Whether to include llms-full.txt contents in the response.\n**Prompt Example:** \"Generate an LLMs.txt file for example.com.\"\n**Usage Example:**\n```json\n{\n \"name\": \"firecrawl_generate_llmstxt\",\n \"arguments\": {\n \"url\": \"https://example.com\",\n \"maxUrls\": 20,\n \"showFullText\": true\n }\n}\n```\n**Returns:** LLMs.txt file contents (and optionally llms-full.txt).\n",
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to generate LLMs.txt from"
},
"maxUrls": {
"type": "number",
"description": "Maximum number of URLs to process (1-100, default: 10)"
},
"showFullText": {
"type": "boolean",
"description": "Whether to show the full LLMs-full.txt in the response"
}
},
"required": [
"url"
]
}
}
]
}
{ "mcpServers": { "firecrawl-mcp": { "command": "npx", "args": ["-y", "firecrawl-mcp"], "env": { "FIRECRAWL_API_KEY": "YOUR-API-KEY" } } }}
{"mcpServers": {"mcp-server-firecrawl": {"command": "npx", "args": ["-y", "firecrawl-mcp"], "env": {"FIRECRAWL_API_KEY": "YOUR_API_KEY"}}}}
Upload Files
-
Community
Reply