Skip to content

Commit dfd0215

Browse files
authored
Merge pull request #57 from lthomas122/main
Add SearXNG Support
2 parents d49c58a + 8bb105d commit dfd0215

File tree

5 files changed

+73
-17
lines changed

5 files changed

+73
-17
lines changed

.env.example

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1-
OLLAMA_BASE_URL=http://localhost:11434 # the endpoint of the Ollama service, defaults to http://localhost:11434 if not set
2-
OLLAMA_MODEL=deepseek-r1:1.5b # the name of the model to use, defaults to 'llama3.2' if not set
1+
2+
OLLAMA_BASE_URL=http://localhost:11434 # the endpoint of the Ollama service, defaults to http://localhost:11434 if not set
3+
OLLAMA_MODEL=llama3.2 # the name of the model to use, defaults to 'llama3.2' if not set
34

4-
# Which search service to use, either 'duckduckgo' or 'tavily' or 'perplexity'
5+
# Which search service to use, either 'duckduckgo', 'tavily', 'perplexity', Searxng
56
SEARCH_API='duckduckgo'
7+
# For Searxng search, defaults to http://localhost:8888
8+
SEARXNG_URL=
9+
610
# Web Search API Keys (choose one or both)
711
TAVILY_API_KEY=tvly-xxxxx # Get your key at https://tavily.com
812
PERPLEXITY_API_KEY=pplx-xxxxx # Get your key at https://www.perplexity.ai

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ cd ollama-deep-researcher
3232

3333
4. Select a web search tool:
3434

35-
By default, it will use [DuckDuckGo](https://duckduckgo.com/) for web search, which does not require an API key. But you can also use [Tavily](https://tavily.com/) or [Perplexity](https://www.perplexity.ai/hub/blog/introducing-the-sonar-pro-api) by adding their API keys to the environment file:
35+
By default, it will use [DuckDuckGo](https://duckduckgo.com/) for web search, which does not require an API key. But you can also use [SearXNG](https://docs.searxng.org/), [Tavily](https://tavily.com/) or [Perplexity](https://www.perplexity.ai/hub/blog/introducing-the-sonar-pro-api) by adding their API keys to the environment file:
3636
```bash
3737
cp .env.example .env
3838
```

src/assistant/configuration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ class SearchAPI(Enum):
1111
PERPLEXITY = "perplexity"
1212
TAVILY = "tavily"
1313
DUCKDUCKGO = "duckduckgo"
14+
SEARXNG = "searxng"
1415

1516
@dataclass(kw_only=True)
1617
class Configuration:

src/assistant/graph.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from langgraph.graph import START, END, StateGraph
99

1010
from assistant.configuration import Configuration, SearchAPI
11-
from assistant.utils import deduplicate_and_format_sources, tavily_search, format_sources, perplexity_search, duckduckgo_search
11+
from assistant.utils import deduplicate_and_format_sources, tavily_search, format_sources, perplexity_search, duckduckgo_search, searxng_search
1212
from assistant.state import SummaryState, SummaryStateInput, SummaryStateOutput
1313
from assistant.prompts import query_writer_instructions, summarizer_instructions, reflection_instructions
1414

@@ -54,6 +54,9 @@ def web_research(state: SummaryState, config: RunnableConfig):
5454
elif search_api == "duckduckgo":
5555
search_results = duckduckgo_search(state.search_query, max_results=3, fetch_full_page=configurable.fetch_full_page)
5656
search_str = deduplicate_and_format_sources(search_results, max_tokens_per_source=1000, include_raw_content=True)
57+
elif search_api == "searxng":
58+
search_results = searxng_search(state.search_query, max_results=3, fetch_full_page=configurable.fetch_full_page)
59+
search_str = deduplicate_and_format_sources(search_results, max_tokens_per_source=1000, include_raw_content=False)
5760
else:
5861
raise ValueError(f"Unsupported search API: {configurable.search_api}")
5962

src/assistant/utils.py

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from langsmith import traceable
55
from tavily import TavilyClient
66
from duckduckgo_search import DDGS
7+
from langchain_community.utilities import SearxSearchWrapper
78

89
def deduplicate_and_format_sources(search_response, max_tokens_per_source, include_raw_content=False):
910
"""
@@ -71,6 +72,18 @@ def format_sources(search_results):
7172
f"* {source['title']} : {source['url']}"
7273
for source in search_results['results']
7374
)
75+
def fetch_raw_content(url):
76+
try:
77+
# Try to fetch the full page content using curl
78+
import urllib.request
79+
from bs4 import BeautifulSoup
80+
response = urllib.request.urlopen(url)
81+
html = response.read()
82+
soup = BeautifulSoup(html, 'html.parser')
83+
return soup.get_text()
84+
85+
except Exception as e:
86+
print(f"Warning: Failed to fetch full page content for {url}: {str(e)}")
7487

7588
@traceable
7689
def duckduckgo_search(query: str, max_results: int = 3, fetch_full_page: bool = False) -> Dict[str, List[Dict[str, str]]]:
@@ -104,18 +117,7 @@ def duckduckgo_search(query: str, max_results: int = 3, fetch_full_page: bool =
104117

105118
raw_content = content
106119
if fetch_full_page:
107-
try:
108-
# Try to fetch the full page content using curl
109-
import urllib.request
110-
from bs4 import BeautifulSoup
111-
112-
response = urllib.request.urlopen(url)
113-
html = response.read()
114-
soup = BeautifulSoup(html, 'html.parser')
115-
raw_content = soup.get_text()
116-
117-
except Exception as e:
118-
print(f"Warning: Failed to fetch full page content for {url}: {str(e)}")
120+
raw_content = fetch_raw_content(url)
119121

120122
# Add result to list
121123
result = {
@@ -132,6 +134,52 @@ def duckduckgo_search(query: str, max_results: int = 3, fetch_full_page: bool =
132134
print(f"Full error details: {type(e).__name__}")
133135
return {"results": []}
134136

137+
@traceable
138+
def searxng_search(query: str, max_results: int = 3, fetch_full_page = False):
139+
"""Search the web using SearXNG.
140+
141+
Args:
142+
query (str): The search query to execute
143+
max_results (int): Maximum number of results to return
144+
fetch_full_page: Fetch page content from results urls
145+
146+
Returns:
147+
dict: Search response containing:
148+
- results (list): List of search result dictionaries, each containing:
149+
- title (str): Title of the search result
150+
- url (str): URL of the search result
151+
- content (str): Snippet/summary of the content
152+
- raw_content (str): Same as content since SearXNG doesn't provide full page content, unless fetch_full_page is true
153+
"""
154+
host=os.environ.get("SEARXNG_URL", "http://localhost:8888")
155+
s = SearxSearchWrapper(searx_host=host)
156+
157+
results = []
158+
search_results = s.results(query, num_results=max_results)
159+
for r in search_results:
160+
url = r.get('link')
161+
title = r.get('title')
162+
content = r.get('snippet')
163+
164+
if not all([url, title, content]):
165+
print(f"Warning: Incomplete result from SearXNG: {r}")
166+
continue
167+
168+
raw_content = content
169+
if fetch_full_page:
170+
raw_content = fetch_raw_content(url)
171+
172+
# Add result to list
173+
result = {
174+
"title": title,
175+
"url": url,
176+
"content": content,
177+
"raw_content": raw_content
178+
}
179+
results.append(result)
180+
return {"results": results}
181+
182+
135183
@traceable
136184
def tavily_search(query, include_raw_content=True, max_results=3):
137185
""" Search the web using the Tavily API.

0 commit comments

Comments
 (0)