diff --git a/config.py b/config.py index ace406fb..14643216 100644 --- a/config.py +++ b/config.py @@ -58,4 +58,5 @@ class Config: AI_MIDDLEWARE_PAUTH_KEY = os.getenv('AI_MIDDLEWARE_PAUTH_KEY') OPENAI_API_KEY_GPT_5_NANO = os.getenv('OPENAI_API_KEY_GPT_5_NANO') HIPPOCAMPUS_API_KEY = os.getenv('HIPPOCAMPUS_API_KEY') - HIPPOCAMPUS_COLLECTION_ID = os.getenv('HIPPOCAMPUS_COLLECTION_ID') \ No newline at end of file + HIPPOCAMPUS_COLLECTION_ID = os.getenv('HIPPOCAMPUS_COLLECTION_ID') + FIRECRAWL_API_KEY = os.getenv('FIRECRAWL_API_KEY') \ No newline at end of file diff --git a/src/configs/constant.py b/src/configs/constant.py index 74d7b528..d4c392cb 100644 --- a/src/configs/constant.py +++ b/src/configs/constant.py @@ -71,4 +71,8 @@ "gemini":"gemini-2.5-flash", "ai_ml": "gpt-oss-20b", "grok": "grok-4-fast" +} + +inbuild_tools = { + "Gtwy_Web_Search":"Gtwy_Web_Search" } \ No newline at end of file diff --git a/src/services/commonServices/baseService/utils.py b/src/services/commonServices/baseService/utils.py index 199815d0..bee3ed4b 100644 --- a/src/services/commonServices/baseService/utils.py +++ b/src/services/commonServices/baseService/utils.py @@ -12,9 +12,10 @@ from globals import * from src.db_services.ConfigurationServices import get_bridges_without_tools, update_bridge from src.services.utils.ai_call_util import call_gtwy_agent +from src.services.utils.built_in_tools.firecrawl import call_firecrawl_scrape from globals import * from src.services.cache_service import store_in_cache, find_in_cache, client, REDIS_PREFIX -from src.configs.constant import redis_keys +from src.configs.constant import redis_keys,inbuild_tools def clean_json(data): """Recursively remove keys with empty string, empty list, or empty dictionary.""" @@ -252,6 +253,8 @@ async def process_data_and_run_tools(codes_mapping, self): agent_args["bridge_configurations"] = self.bridge_configurations task = call_gtwy_agent(agent_args) + elif self.tool_id_and_name_mapping[name].get('type') == inbuild_tools["Gtwy_Web_Search"]: + task = call_firecrawl_scrape(tool_data.get("args")) else: task = axios_work(tool_data.get("args"), self.tool_id_and_name_mapping[name]) tasks.append((tool_call_key, tool_data, task)) @@ -509,5 +512,3 @@ async def save_files_to_redis(thread_id, sub_thread_id, bridge_id, files): await store_in_cache(cache_key, files, 604800) else: await store_in_cache(cache_key, files, 604800) - - diff --git a/src/services/utils/built_in_tools/firecrawl.py b/src/services/utils/built_in_tools/firecrawl.py new file mode 100644 index 00000000..311b782f --- /dev/null +++ b/src/services/utils/built_in_tools/firecrawl.py @@ -0,0 +1,61 @@ +from config import Config +from src.services.utils.apiservice import fetch +from src.services.utils.logger import logger + + +async def call_firecrawl_scrape(args): + url = (args or {}).get('url') if isinstance(args, dict) else None + if not url: + return { + 'response': {'error': 'url is required for web_crawling tool'}, + 'metadata': {'type': 'function'}, + 'status': 0 + } + + api_key = Config.FIRECRAWL_API_KEY + if not api_key: + return { + 'response': {'error': 'web_crawling tool is not configured'}, + 'metadata': {'type': 'function'}, + 'status': 0 + } + + payload = {'url': url} + formats = args.get('formats') if isinstance(args, dict) else None + if formats: + if isinstance(formats, list): + payload['formats'] = formats + elif isinstance(formats, str): + payload['formats'] = [formats] + else: + payload['formats'] = [str(formats)] + + request_headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {api_key}' + } + + try: + response, headers = await fetch( + "https://api.firecrawl.dev/v2/scrape", + 'POST', + request_headers, + None, + payload + ) + data = response.get('data') if isinstance(response, dict) and 'data' in response else response + return { + 'response': data, + 'metadata': { + 'type': 'function', + 'flowHitId': headers.get('flowHitId') if isinstance(headers, dict) else None + }, + 'status': 1 + } + except Exception as exc: + logger.error(f"Firecrawl scrape failed: {exc}") + return { + 'response': {'error': str(exc)}, + 'metadata': {'type': 'function'}, + 'status': 0 + } diff --git a/src/services/utils/getConfiguration.py b/src/services/utils/getConfiguration.py index 40f638e2..57ada568 100644 --- a/src/services/utils/getConfiguration.py +++ b/src/services/utils/getConfiguration.py @@ -6,7 +6,7 @@ from .getConfiguration_utils import ( validate_bridge, get_bridge_data, setup_configuration, setup_tool_choice, setup_tools, setup_api_key, setup_pre_tools, add_rag_tool, - add_anthropic_json_schema, add_connected_agents + add_anthropic_json_schema, add_connected_agents, add_web_crawling_tool ) from .update_and_check_cost import check_bridge_api_folder_limits @@ -132,6 +132,9 @@ async def _prepare_configuration_response(configuration, service, bridge_id, api ) add_rag_tool(tools, tool_id_and_name_mapping, rag_data) + + gtwy_web_search_filters = web_search_filters or result.get('bridges', {}).get('gtwy_web_search_filters') or {} + add_web_crawling_tool(tools, tool_id_and_name_mapping, built_in_tools or result.get('bridges', {}).get('built_in_tools'), gtwy_web_search_filters) add_anthropic_json_schema(service, configuration, tools) if rag_data: diff --git a/src/services/utils/getConfiguration_utils.py b/src/services/utils/getConfiguration_utils.py index 8e4a8868..97d2559c 100644 --- a/src/services/utils/getConfiguration_utils.py +++ b/src/services/utils/getConfiguration_utils.py @@ -4,6 +4,7 @@ from src.services.commonServices.baseService.utils import makeFunctionName from src.services.utils.service_config_utils import tool_choice_function_name_formatter from config import Config +from src.configs.constant import inbuild_tools apiCallModel = db['apicalls'] from globals import * @@ -275,6 +276,47 @@ def add_rag_tool(tools, tool_id_and_name_mapping, rag_data): "type": "RAG" } +def _should_enable_web_crawling_tool(built_in_tools): + if not built_in_tools: + return False + return inbuild_tools["Gtwy_Web_Search"] in built_in_tools + +def add_web_crawling_tool(tools, tool_id_and_name_mapping, built_in_tools, gtwy_web_search_filters=None): + """Add Firecrawl-based web crawling tool when requested via built-in tools.""" + if not _should_enable_web_crawling_tool(built_in_tools): + return + + tools.append({ + 'type': 'function', + 'name': inbuild_tools["Gtwy_Web_Search"], + 'description': 'Search and extract content from any website URL. This tool scrapes web pages and returns their content in various formats. Use this when you need to: fetch real-time information from websites, extract article content, retrieve documentation, access public web data, or get current information not in your training data. If enum is provided for URL, only use URLs from those allowed domains.', + 'properties': { + 'url': { + 'description': 'The complete URL of the website to scrape (must start with http:// or https://). Example: https://example.com/page', + 'type': 'string', + 'enum': gtwy_web_search_filters if (gtwy_web_search_filters and len(gtwy_web_search_filters) > 0) else [], + 'required_params': [], + 'parameter': {} + }, + 'formats': { + 'description': 'Optional list of output formats. Available formats include: "markdown" (default, clean text), "html" (raw HTML), "screenshot" (visual capture), "links" (extracted URLs). If not specified, returns markdown format.', + 'type': 'array', + 'items': { + 'type': 'string' + }, + 'enum': [], + 'required_params': [], + 'parameter': {} + } + }, + 'required': ['url'] + }) + + tool_id_and_name_mapping[inbuild_tools["Gtwy_Web_Search"]] = { + 'type': inbuild_tools["Gtwy_Web_Search"], + 'name': inbuild_tools["Gtwy_Web_Search"] + } + def add_anthropic_json_schema(service, configuration, tools): """Add JSON schema response format for Anthropic service""" if (service != 'anthropic' or