Skip to content

Commit 40f4eeb

Browse files
authored
Merge pull request #45 from mpnikhil/main
Add lm studio as a LLM provider
2 parents dfd0215 + 1b8ce54 commit 40f4eeb

File tree

8 files changed

+281
-28
lines changed

8 files changed

+281
-28
lines changed

.env.example

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,10 @@ SEARXNG_URL=
1111
TAVILY_API_KEY=tvly-xxxxx # Get your key at https://tavily.com
1212
PERPLEXITY_API_KEY=pplx-xxxxx # Get your key at https://www.perplexity.ai
1313

14+
# LLM Configuration
15+
LLM_PROVIDER=lmstudio # Options: ollama, lmstudio
16+
LOCAL_LLM=qwen_qwq-32b # Model name in LMStudio
17+
LMSTUDIO_BASE_URL=http://localhost:1234/v1 # LMStudio OpenAI-compatible API URL
18+
1419
MAX_WEB_RESEARCH_LOOPS=3
15-
FETCH_FULL_PAGE=True
20+
FETCH_FULL_PAGE=True

README.md

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Ollama Deep Researcher
22

3-
Ollama Deep Researcher is a fully local web research assistant that uses any LLM hosted by [Ollama](https://ollama.com/search). Give it a topic and it will generate a web search query, gather web search results (via [Tavily](https://www.tavily.com/) by default), summarize the results of web search, reflect on the summary to examine knowledge gaps, generate a new search query to address the gaps, search, and improve the summary for a user-defined number of cycles. It will provide the user a final markdown summary with all sources used.
3+
Ollama Deep Researcher is a fully local web research assistant that uses any LLM hosted by [Ollama](https://ollama.com/search) or [LMStudio](https://lmstudio.ai/). Give it a topic and it will generate a web search query, gather web search results (via [Tavily](https://www.tavily.com/) by default), summarize the results of web search, reflect on the summary to examine knowledge gaps, generate a new search query to address the gaps, search, and improve the summary for a user-defined number of cycles. It will provide the user a final markdown summary with all sources used.
44

55
![research-rabbit](https://github.com/user-attachments/assets/4308ee9c-abf3-4abb-9d1e-83e7c2c3f187)
66

@@ -15,7 +15,7 @@ See it in action or build it yourself? Check out these helpful video tutorials:
1515

1616
## 🚀 Quickstart
1717

18-
### Mac
18+
### Using Ollama
1919

2020
1. Download the Ollama app for Mac [here](https://ollama.com/download).
2121

@@ -24,7 +24,26 @@ See it in action or build it yourself? Check out these helpful video tutorials:
2424
ollama pull deepseek-r1:8b
2525
```
2626

27-
3. Clone the repository:
27+
### Using LMStudio
28+
29+
1. Download and install LMStudio from [here](https://lmstudio.ai/).
30+
31+
2. In LMStudio:
32+
- Download and load your preferred model (e.g., qwen_qwq-32b)
33+
- Go to the "Local Server" tab
34+
- Start the server with the OpenAI-compatible API
35+
- Note the server URL (default: http://localhost:1234/v1)
36+
37+
3. Update your `.env` file with the following settings:
38+
```
39+
LLM_PROVIDER=lmstudio
40+
LOCAL_LLM=qwen_qwq-32b # Use the exact model name as shown in LMStudio
41+
LMSTUDIO_BASE_URL=http://localhost:1234/v1
42+
```
43+
44+
### Installation
45+
46+
1. Clone the repository:
2847
```bash
2948
git clone https://github.com/langchain-ai/ollama-deep-researcher.git
3049
cd ollama-deep-researcher

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ dependencies = [
1515
"langchain-ollama>=0.2.1",
1616
"duckduckgo-search>=7.3.0",
1717
"beautifulsoup4>=4.13.3",
18+
"langchain-openai>=0.1.1",
19+
"openai>=1.12.0",
1820
]
1921

2022
[project.optional-dependencies]

src/assistant/configuration.py

Lines changed: 52 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
import os
2-
from dataclasses import dataclass, fields
3-
from typing import Any, Optional
2+
from typing import Any, Optional, Dict, List, Literal
3+
from pydantic import BaseModel, Field
44

55
from langchain_core.runnables import RunnableConfig
6-
from dataclasses import dataclass
76

87
from enum import Enum
98

@@ -16,11 +15,46 @@ class SearchAPI(Enum):
1615
@dataclass(kw_only=True)
1716
class Configuration:
1817
"""The configurable fields for the research assistant."""
19-
max_web_research_loops: int = int(os.environ.get("MAX_WEB_RESEARCH_LOOPS", "3"))
20-
local_llm: str = os.environ.get("OLLAMA_MODEL", "llama3.2")
21-
search_api: SearchAPI = SearchAPI(os.environ.get("SEARCH_API", SearchAPI.DUCKDUCKGO.value)) # Default to DUCKDUCKGO
22-
fetch_full_page: bool = os.environ.get("FETCH_FULL_PAGE", "False").lower() in ("true", "1", "t")
23-
ollama_base_url: str = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434/")
18+
max_web_research_loops: int = Field(
19+
default=3,
20+
title="Research Depth",
21+
description="Number of research iterations to perform"
22+
)
23+
local_llm: str = Field(
24+
default="llama3.2",
25+
title="LLM Model Name",
26+
description="Name of the LLM model to use"
27+
)
28+
llm_provider: Literal["ollama", "lmstudio"] = Field(
29+
default="ollama",
30+
title="LLM Provider",
31+
description="Provider for the LLM (Ollama or LMStudio)"
32+
)
33+
search_api: Literal["perplexity", "tavily", "duckduckgo"] = Field(
34+
default="duckduckgo",
35+
title="Search API",
36+
description="Web search API to use"
37+
)
38+
fetch_full_page: bool = Field(
39+
default=False,
40+
title="Fetch Full Page",
41+
description="Whether to fetch the full page content (DuckDuckGo only)"
42+
)
43+
ollama_base_url: str = Field(
44+
default="http://localhost:11434/",
45+
title="Ollama Base URL",
46+
description="Base URL for Ollama API"
47+
)
48+
lmstudio_base_url: str = Field(
49+
default="http://localhost:1234/v1",
50+
title="LMStudio Base URL",
51+
description="Base URL for LMStudio OpenAI-compatible API"
52+
)
53+
strip_thinking_tokens: bool = Field(
54+
default=False,
55+
title="Strip Thinking Tokens",
56+
description="Whether to strip <think> tokens from model responses"
57+
)
2458

2559
@classmethod
2660
def from_runnable_config(
@@ -30,9 +64,14 @@ def from_runnable_config(
3064
configurable = (
3165
config["configurable"] if config and "configurable" in config else {}
3266
)
33-
values: dict[str, Any] = {
34-
f.name: os.environ.get(f.name.upper(), configurable.get(f.name))
35-
for f in fields(cls)
36-
if f.init
67+
68+
# Get raw values from environment or config
69+
raw_values: dict[str, Any] = {
70+
name: os.environ.get(name.upper(), configurable.get(name))
71+
for name in cls.model_fields.keys()
3772
}
38-
return cls(**{k: v for k, v in values.items() if v})
73+
74+
# Filter out None values
75+
values = {k: v for k, v in raw_values.items() if v is not None}
76+
77+
return cls(**values)

src/assistant/graph.py

Lines changed: 74 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,54 @@
88
from langgraph.graph import START, END, StateGraph
99

1010
from assistant.configuration import Configuration, SearchAPI
11-
from assistant.utils import deduplicate_and_format_sources, tavily_search, format_sources, perplexity_search, duckduckgo_search, searxng_search
11+
from assistant.utils import deduplicate_and_format_sources, tavily_search, format_sources, perplexity_search, duckduckgo_search, searxng_search, strip_thinking_tokens
12+
1213
from assistant.state import SummaryState, SummaryStateInput, SummaryStateOutput
13-
from assistant.prompts import query_writer_instructions, summarizer_instructions, reflection_instructions
14+
from assistant.prompts import query_writer_instructions, summarizer_instructions, reflection_instructions, get_current_date
15+
from assistant.lmstudio import ChatLMStudio
1416

1517
# Nodes
1618
def generate_query(state: SummaryState, config: RunnableConfig):
1719
""" Generate a query for web search """
1820

1921
# Format the prompt
20-
query_writer_instructions_formatted = query_writer_instructions.format(research_topic=state.research_topic)
22+
current_date = get_current_date()
23+
formatted_prompt = query_writer_instructions.format(
24+
current_date=current_date,
25+
research_topic=state.research_topic
26+
)
2127

2228
# Generate a query
2329
configurable = Configuration.from_runnable_config(config)
24-
llm_json_mode = ChatOllama(base_url=configurable.ollama_base_url, model=configurable.local_llm, temperature=0, format="json")
30+
31+
# Choose the appropriate LLM based on the provider
32+
if configurable.llm_provider == "lmstudio":
33+
llm_json_mode = ChatLMStudio(
34+
base_url=configurable.lmstudio_base_url,
35+
model=configurable.local_llm,
36+
temperature=0,
37+
format="json"
38+
)
39+
else: # Default to Ollama
40+
llm_json_mode = ChatOllama(
41+
base_url=configurable.ollama_base_url,
42+
model=configurable.local_llm,
43+
temperature=0,
44+
format="json"
45+
)
46+
2547
result = llm_json_mode.invoke(
26-
[SystemMessage(content=query_writer_instructions_formatted),
48+
[SystemMessage(content=formatted_prompt),
2749
HumanMessage(content=f"Generate a query for web search:")]
2850
)
29-
query = json.loads(result.content)
51+
print(result.content)
52+
53+
# Strip thinking tokens if configured
54+
content = result.content
55+
if configurable.strip_thinking_tokens:
56+
content = strip_thinking_tokens(content)
57+
58+
query = json.loads(content)
3059

3160
return {"search_query": query['query']}
3261

@@ -86,7 +115,21 @@ def summarize_sources(state: SummaryState, config: RunnableConfig):
86115

87116
# Run the LLM
88117
configurable = Configuration.from_runnable_config(config)
89-
llm = ChatOllama(base_url=configurable.ollama_base_url, model=configurable.local_llm, temperature=0)
118+
119+
# Choose the appropriate LLM based on the provider
120+
if configurable.llm_provider == "lmstudio":
121+
llm = ChatLMStudio(
122+
base_url=configurable.lmstudio_base_url,
123+
model=configurable.local_llm,
124+
temperature=0
125+
)
126+
else: # Default to Ollama
127+
llm = ChatOllama(
128+
base_url=configurable.ollama_base_url,
129+
model=configurable.local_llm,
130+
temperature=0
131+
)
132+
90133
result = llm.invoke(
91134
[SystemMessage(content=summarizer_instructions),
92135
HumanMessage(content=human_message_content)]
@@ -108,19 +151,40 @@ def reflect_on_summary(state: SummaryState, config: RunnableConfig):
108151

109152
# Generate a query
110153
configurable = Configuration.from_runnable_config(config)
111-
llm_json_mode = ChatOllama(base_url=configurable.ollama_base_url, model=configurable.local_llm, temperature=0, format="json")
154+
155+
# Choose the appropriate LLM based on the provider
156+
if configurable.llm_provider == "lmstudio":
157+
llm_json_mode = ChatLMStudio(
158+
base_url=configurable.lmstudio_base_url,
159+
model=configurable.local_llm,
160+
temperature=0,
161+
format="json"
162+
)
163+
else: # Default to Ollama
164+
llm_json_mode = ChatOllama(
165+
base_url=configurable.ollama_base_url,
166+
model=configurable.local_llm,
167+
temperature=0,
168+
format="json"
169+
)
170+
112171
result = llm_json_mode.invoke(
113172
[SystemMessage(content=reflection_instructions.format(research_topic=state.research_topic)),
114173
HumanMessage(content=f"Identify a knowledge gap and generate a follow-up web search query based on our existing knowledge: {state.running_summary}")]
115174
)
116-
follow_up_query = json.loads(result.content)
175+
176+
# Strip thinking tokens if configured
177+
content = result.content
178+
if configurable.strip_thinking_tokens:
179+
content = strip_thinking_tokens(content)
180+
181+
follow_up_query = json.loads(content)
117182

118183
# Get the follow-up query
119184
query = follow_up_query.get('follow_up_query')
120185

121186
# JSON mode can fail in some cases
122187
if not query:
123-
124188
# Fallback to a placeholder query
125189
return {"search_query": f"Tell me more about {state.research_topic}"}
126190

src/assistant/lmstudio.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
"""LMStudio integration for the research assistant."""
2+
3+
import json
4+
import logging
5+
from typing import Any, Dict, List, Optional, Union
6+
7+
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
8+
from langchain_core.language_models.chat_models import BaseChatModel
9+
from langchain_core.messages import (
10+
AIMessage,
11+
BaseMessage,
12+
ChatMessage,
13+
HumanMessage,
14+
SystemMessage,
15+
)
16+
from langchain_core.outputs import ChatGeneration, ChatResult
17+
from langchain_openai import ChatOpenAI
18+
from pydantic import Field
19+
20+
# Set up logging
21+
logger = logging.getLogger(__name__)
22+
23+
class ChatLMStudio(ChatOpenAI):
24+
"""Chat model that uses LMStudio's OpenAI-compatible API."""
25+
26+
format: Optional[str] = Field(default=None, description="Format for the response (e.g., 'json')")
27+
28+
def __init__(
29+
self,
30+
base_url: str = "http://localhost:1234/v1",
31+
model: str = "qwen_qwq-32b",
32+
temperature: float = 0.7,
33+
format: Optional[str] = None,
34+
api_key: str = "not-needed-for-local-models",
35+
**kwargs: Any,
36+
):
37+
"""Initialize the ChatLMStudio.
38+
39+
Args:
40+
base_url: Base URL for LMStudio's OpenAI-compatible API
41+
model: Model name to use
42+
temperature: Temperature for sampling
43+
format: Format for the response (e.g., "json")
44+
api_key: API key (not actually used, but required by OpenAI client)
45+
**kwargs: Additional arguments to pass to the OpenAI client
46+
"""
47+
# Initialize the base class
48+
super().__init__(
49+
base_url=base_url,
50+
model=model,
51+
temperature=temperature,
52+
api_key=api_key,
53+
**kwargs,
54+
)
55+
self.format = format
56+
57+
def _generate(
58+
self,
59+
messages: List[BaseMessage],
60+
stop: Optional[List[str]] = None,
61+
run_manager: Optional[CallbackManagerForLLMRun] = None,
62+
**kwargs: Any,
63+
) -> ChatResult:
64+
"""Generate a chat response using LMStudio's OpenAI-compatible API."""
65+
if self.format == "json":
66+
# Set response_format for JSON mode
67+
kwargs["response_format"] = {"type": "json_object"}
68+
logger.info(f"Using response_format={kwargs['response_format']}")
69+
70+
# Call the parent class's _generate method
71+
result = super()._generate(messages, stop, run_manager, **kwargs)
72+
73+
# If JSON format is requested, try to clean up the response
74+
if self.format == "json" and result.generations:
75+
try:
76+
# Get the raw text
77+
raw_text = result.generations[0][0].text
78+
logger.info(f"Raw model response: {raw_text}")
79+
80+
# Try to find JSON in the response
81+
json_start = raw_text.find('{')
82+
json_end = raw_text.rfind('}') + 1
83+
84+
if json_start >= 0 and json_end > json_start:
85+
# Extract just the JSON part
86+
json_text = raw_text[json_start:json_end]
87+
# Validate it's proper JSON
88+
json.loads(json_text)
89+
logger.info(f"Cleaned JSON: {json_text}")
90+
# Update the generation with the cleaned JSON
91+
result.generations[0][0].text = json_text
92+
else:
93+
logger.warning("Could not find JSON in response")
94+
except Exception as e:
95+
logger.error(f"Error processing JSON response: {str(e)}")
96+
# If any error occurs during cleanup, just use the original response
97+
pass
98+
99+
return result

src/assistant/prompts.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
1+
from datetime import datetime
2+
3+
# Get current date in a readable format
4+
def get_current_date():
5+
return datetime.now().strftime("%B %d, %Y")
6+
17
query_writer_instructions="""Your goal is to generate a targeted web search query.
2-
The query will gather information related to a specific topic.
8+
9+
<CONTEXT>
10+
Current date: {current_date}
11+
Please ensure your queries account for the most current information available as of this date.
12+
</CONTEXT>
313
414
<TOPIC>
515
{research_topic}

0 commit comments

Comments
 (0)