Merge pull request #45 from mpnikhil/main

rlancemartin · web-flow · commit 40f4eeb8a942 · 2025-03-20T13:48:24.000-07:00
Add lm studio as a LLM provider
diff --git a/.env.example b/.env.example
@@ -11,5 +11,10 @@ SEARXNG_URL=
 TAVILY_API_KEY=tvly-xxxxx      # Get your key at https://tavily.com
 PERPLEXITY_API_KEY=pplx-xxxxx  # Get your key at https://www.perplexity.ai
 
+# LLM Configuration
+LLM_PROVIDER=lmstudio          # Options: ollama, lmstudio
+LOCAL_LLM=qwen_qwq-32b         # Model name in LMStudio
+LMSTUDIO_BASE_URL=http://localhost:1234/v1  # LMStudio OpenAI-compatible API URL
+
 MAX_WEB_RESEARCH_LOOPS=3
-FETCH_FULL_PAGE=True
+FETCH_FULL_PAGE=True
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Ollama Deep Researcher
 
-Ollama Deep Researcher is a fully local web research assistant that uses any LLM hosted by [Ollama](https://ollama.com/search). Give it a topic and it will generate a web search query, gather web search results (via [Tavily](https://www.tavily.com/) by default), summarize the results of web search, reflect on the summary to examine knowledge gaps, generate a new search query to address the gaps, search, and improve the summary for a user-defined number of cycles. It will provide the user a final markdown summary with all sources used.
+Ollama Deep Researcher is a fully local web research assistant that uses any LLM hosted by [Ollama](https://ollama.com/search) or [LMStudio](https://lmstudio.ai/). Give it a topic and it will generate a web search query, gather web search results (via [Tavily](https://www.tavily.com/) by default), summarize the results of web search, reflect on the summary to examine knowledge gaps, generate a new search query to address the gaps, search, and improve the summary for a user-defined number of cycles. It will provide the user a final markdown summary with all sources used.
 
 ![research-rabbit](https://github.com/user-attachments/assets/4308ee9c-abf3-4abb-9d1e-83e7c2c3f187)
 
@@ -15,7 +15,7 @@ See it in action or build it yourself? Check out these helpful video tutorials:
 
 ## 🚀 Quickstart
 
-### Mac
+### Using Ollama
 
 1. Download the Ollama app for Mac [here](https://ollama.com/download).
 
@@ -24,7 +24,26 @@ See it in action or build it yourself? Check out these helpful video tutorials:
 ollama pull deepseek-r1:8b
 ```
 
-3. Clone the repository:
+### Using LMStudio
+
+1. Download and install LMStudio from [here](https://lmstudio.ai/).
+
+2. In LMStudio:
+   - Download and load your preferred model (e.g., qwen_qwq-32b)
+   - Go to the "Local Server" tab
+   - Start the server with the OpenAI-compatible API
+   - Note the server URL (default: http://localhost:1234/v1)
+
+3. Update your `.env` file with the following settings:
+```
+LLM_PROVIDER=lmstudio
+LOCAL_LLM=qwen_qwq-32b  # Use the exact model name as shown in LMStudio
+LMSTUDIO_BASE_URL=http://localhost:1234/v1
+```
+
+### Installation
+
+1. Clone the repository:
 ```bash
 git clone https://github.com/langchain-ai/ollama-deep-researcher.git
 cd ollama-deep-researcher
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,6 +15,8 @@ dependencies = [
     "langchain-ollama>=0.2.1",
     "duckduckgo-search>=7.3.0",
     "beautifulsoup4>=4.13.3",
+    "langchain-openai>=0.1.1",
+    "openai>=1.12.0",
 ]
 
 [project.optional-dependencies]
diff --git a/src/assistant/configuration.py b/src/assistant/configuration.py
@@ -1,9 +1,8 @@
 import os
-from dataclasses import dataclass, fields
-from typing import Any, Optional
+from typing import Any, Optional, Dict, List, Literal
+from pydantic import BaseModel, Field
 
 from langchain_core.runnables import RunnableConfig
-from dataclasses import dataclass
 
 from enum import Enum
 
@@ -16,11 +15,46 @@ class SearchAPI(Enum):
 @dataclass(kw_only=True)
 class Configuration:
     """The configurable fields for the research assistant."""
-    max_web_research_loops: int = int(os.environ.get("MAX_WEB_RESEARCH_LOOPS", "3"))
-    local_llm: str = os.environ.get("OLLAMA_MODEL", "llama3.2")
-    search_api: SearchAPI = SearchAPI(os.environ.get("SEARCH_API", SearchAPI.DUCKDUCKGO.value))  # Default to DUCKDUCKGO
-    fetch_full_page: bool = os.environ.get("FETCH_FULL_PAGE", "False").lower() in ("true", "1", "t")
-    ollama_base_url: str = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434/")
+    max_web_research_loops: int = Field(
+        default=3,
+        title="Research Depth",
+        description="Number of research iterations to perform"
+    )
+    local_llm: str = Field(
+        default="llama3.2",
+        title="LLM Model Name",
+        description="Name of the LLM model to use"
+    )
+    llm_provider: Literal["ollama", "lmstudio"] = Field(
+        default="ollama",
+        title="LLM Provider",
+        description="Provider for the LLM (Ollama or LMStudio)"
+    )
+    search_api: Literal["perplexity", "tavily", "duckduckgo"] = Field(
+        default="duckduckgo",
+        title="Search API",
+        description="Web search API to use"
+    )
+    fetch_full_page: bool = Field(
+        default=False,
+        title="Fetch Full Page",
+        description="Whether to fetch the full page content (DuckDuckGo only)"
+    )
+    ollama_base_url: str = Field(
+        default="http://localhost:11434/",
+        title="Ollama Base URL",
+        description="Base URL for Ollama API"
+    )
+    lmstudio_base_url: str = Field(
+        default="http://localhost:1234/v1",
+        title="LMStudio Base URL",
+        description="Base URL for LMStudio OpenAI-compatible API"
+    )
+    strip_thinking_tokens: bool = Field(
+        default=False,
+        title="Strip Thinking Tokens",
+        description="Whether to strip <think> tokens from model responses"
+    )
 
     @classmethod
     def from_runnable_config(
@@ -30,9 +64,14 @@ def from_runnable_config(
         configurable = (
             config["configurable"] if config and "configurable" in config else {}
         )
-        values: dict[str, Any] = {
-            f.name: os.environ.get(f.name.upper(), configurable.get(f.name))
-            for f in fields(cls)
-            if f.init
+        
+        # Get raw values from environment or config
+        raw_values: dict[str, Any] = {
+            name: os.environ.get(name.upper(), configurable.get(name))
+            for name in cls.model_fields.keys()
         }
-        return cls(**{k: v for k, v in values.items() if v})
+        
+        # Filter out None values
+        values = {k: v for k, v in raw_values.items() if v is not None}
+        
+        return cls(**values)
diff --git a/src/assistant/graph.py b/src/assistant/graph.py
@@ -8,25 +8,54 @@
 from langgraph.graph import START, END, StateGraph
 
 from assistant.configuration import Configuration, SearchAPI
-from assistant.utils import deduplicate_and_format_sources, tavily_search, format_sources, perplexity_search, duckduckgo_search, searxng_search
+from assistant.utils import deduplicate_and_format_sources, tavily_search, format_sources, perplexity_search, duckduckgo_search, searxng_search, strip_thinking_tokens
+
 from assistant.state import SummaryState, SummaryStateInput, SummaryStateOutput
-from assistant.prompts import query_writer_instructions, summarizer_instructions, reflection_instructions
+from assistant.prompts import query_writer_instructions, summarizer_instructions, reflection_instructions, get_current_date
+from assistant.lmstudio import ChatLMStudio
 
 # Nodes
 def generate_query(state: SummaryState, config: RunnableConfig):
     """ Generate a query for web search """
 
     # Format the prompt
-    query_writer_instructions_formatted = query_writer_instructions.format(research_topic=state.research_topic)
+    current_date = get_current_date()
+    formatted_prompt = query_writer_instructions.format(
+        current_date=current_date,
+        research_topic=state.research_topic
+    )
 
     # Generate a query
     configurable = Configuration.from_runnable_config(config)
-    llm_json_mode = ChatOllama(base_url=configurable.ollama_base_url, model=configurable.local_llm, temperature=0, format="json")
+    
+    # Choose the appropriate LLM based on the provider
+    if configurable.llm_provider == "lmstudio":
+        llm_json_mode = ChatLMStudio(
+            base_url=configurable.lmstudio_base_url, 
+            model=configurable.local_llm, 
+            temperature=0, 
+            format="json"
+        )
+    else:  # Default to Ollama
+        llm_json_mode = ChatOllama(
+            base_url=configurable.ollama_base_url, 
+            model=configurable.local_llm, 
+            temperature=0, 
+            format="json"
+        )
+    
     result = llm_json_mode.invoke(
-        [SystemMessage(content=query_writer_instructions_formatted),
+        [SystemMessage(content=formatted_prompt),
         HumanMessage(content=f"Generate a query for web search:")]
     )
-    query = json.loads(result.content)
+    print(result.content)
+    
+    # Strip thinking tokens if configured
+    content = result.content
+    if configurable.strip_thinking_tokens:
+        content = strip_thinking_tokens(content)
+    
+    query = json.loads(content)
 
     return {"search_query": query['query']}
 
@@ -86,7 +115,21 @@ def summarize_sources(state: SummaryState, config: RunnableConfig):
 
     # Run the LLM
     configurable = Configuration.from_runnable_config(config)
-    llm = ChatOllama(base_url=configurable.ollama_base_url, model=configurable.local_llm, temperature=0)
+    
+    # Choose the appropriate LLM based on the provider
+    if configurable.llm_provider == "lmstudio":
+        llm = ChatLMStudio(
+            base_url=configurable.lmstudio_base_url, 
+            model=configurable.local_llm, 
+            temperature=0
+        )
+    else:  # Default to Ollama
+        llm = ChatOllama(
+            base_url=configurable.ollama_base_url, 
+            model=configurable.local_llm, 
+            temperature=0
+        )
+    
     result = llm.invoke(
         [SystemMessage(content=summarizer_instructions),
         HumanMessage(content=human_message_content)]
@@ -108,19 +151,40 @@ def reflect_on_summary(state: SummaryState, config: RunnableConfig):
 
     # Generate a query
     configurable = Configuration.from_runnable_config(config)
-    llm_json_mode = ChatOllama(base_url=configurable.ollama_base_url, model=configurable.local_llm, temperature=0, format="json")
+    
+    # Choose the appropriate LLM based on the provider
+    if configurable.llm_provider == "lmstudio":
+        llm_json_mode = ChatLMStudio(
+            base_url=configurable.lmstudio_base_url, 
+            model=configurable.local_llm, 
+            temperature=0, 
+            format="json"
+        )
+    else:  # Default to Ollama
+        llm_json_mode = ChatOllama(
+            base_url=configurable.ollama_base_url, 
+            model=configurable.local_llm, 
+            temperature=0, 
+            format="json"
+        )
+    
     result = llm_json_mode.invoke(
         [SystemMessage(content=reflection_instructions.format(research_topic=state.research_topic)),
         HumanMessage(content=f"Identify a knowledge gap and generate a follow-up web search query based on our existing knowledge: {state.running_summary}")]
     )
-    follow_up_query = json.loads(result.content)
+    
+    # Strip thinking tokens if configured
+    content = result.content
+    if configurable.strip_thinking_tokens:
+        content = strip_thinking_tokens(content)
+    
+    follow_up_query = json.loads(content)
 
     # Get the follow-up query
     query = follow_up_query.get('follow_up_query')
 
     # JSON mode can fail in some cases
     if not query:
-
         # Fallback to a placeholder query
         return {"search_query": f"Tell me more about {state.research_topic}"}
 
diff --git a/src/assistant/lmstudio.py b/src/assistant/lmstudio.py
@@ -0,0 +1,99 @@
+"""LMStudio integration for the research assistant."""
+
+import json
+import logging
+from typing import Any, Dict, List, Optional, Union
+
+from langchain_core.callbacks.manager import CallbackManagerForLLMRun
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.messages import (
+    AIMessage,
+    BaseMessage,
+    ChatMessage,
+    HumanMessage,
+    SystemMessage,
+)
+from langchain_core.outputs import ChatGeneration, ChatResult
+from langchain_openai import ChatOpenAI
+from pydantic import Field
+
+# Set up logging
+logger = logging.getLogger(__name__)
+
+class ChatLMStudio(ChatOpenAI):
+    """Chat model that uses LMStudio's OpenAI-compatible API."""
+    
+    format: Optional[str] = Field(default=None, description="Format for the response (e.g., 'json')")
+    
+    def __init__(
+        self,
+        base_url: str = "http://localhost:1234/v1",
+        model: str = "qwen_qwq-32b",
+        temperature: float = 0.7,
+        format: Optional[str] = None,
+        api_key: str = "not-needed-for-local-models",
+        **kwargs: Any,
+    ):
+        """Initialize the ChatLMStudio.
+        
+        Args:
+            base_url: Base URL for LMStudio's OpenAI-compatible API
+            model: Model name to use
+            temperature: Temperature for sampling
+            format: Format for the response (e.g., "json")
+            api_key: API key (not actually used, but required by OpenAI client)
+            **kwargs: Additional arguments to pass to the OpenAI client
+        """
+        # Initialize the base class
+        super().__init__(
+            base_url=base_url,
+            model=model,
+            temperature=temperature,
+            api_key=api_key,
+            **kwargs,
+        )
+        self.format = format
+        
+    def _generate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        """Generate a chat response using LMStudio's OpenAI-compatible API."""
+        if self.format == "json":
+            # Set response_format for JSON mode
+            kwargs["response_format"] = {"type": "json_object"}
+            logger.info(f"Using response_format={kwargs['response_format']}")
+        
+        # Call the parent class's _generate method
+        result = super()._generate(messages, stop, run_manager, **kwargs)
+        
+        # If JSON format is requested, try to clean up the response
+        if self.format == "json" and result.generations:
+            try:
+                # Get the raw text
+                raw_text = result.generations[0][0].text
+                logger.info(f"Raw model response: {raw_text}")
+                
+                # Try to find JSON in the response
+                json_start = raw_text.find('{')
+                json_end = raw_text.rfind('}') + 1
+                
+                if json_start >= 0 and json_end > json_start:
+                    # Extract just the JSON part
+                    json_text = raw_text[json_start:json_end]
+                    # Validate it's proper JSON
+                    json.loads(json_text)
+                    logger.info(f"Cleaned JSON: {json_text}")
+                    # Update the generation with the cleaned JSON
+                    result.generations[0][0].text = json_text
+                else:
+                    logger.warning("Could not find JSON in response")
+            except Exception as e:
+                logger.error(f"Error processing JSON response: {str(e)}")
+                # If any error occurs during cleanup, just use the original response
+                pass
+                
+        return result 
diff --git a/src/assistant/prompts.py b/src/assistant/prompts.py
@@ -1,5 +1,15 @@
+from datetime import datetime
+
+# Get current date in a readable format
+def get_current_date():
+    return datetime.now().strftime("%B %d, %Y")
+
 query_writer_instructions="""Your goal is to generate a targeted web search query.
-The query will gather information related to a specific topic.
+
+<CONTEXT>
+Current date: {current_date}
+Please ensure your queries account for the most current information available as of this date.
+</CONTEXT>
 
 <TOPIC>
 {research_topic}
diff --git a/src/assistant/utils.py b/src/assistant/utils.py

Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,8 @@ dependencies = [`
`15`	`15`	`"langchain-ollama>=0.2.1",`
`16`	`16`	`"duckduckgo-search>=7.3.0",`
`17`	`17`	`"beautifulsoup4>=4.13.3",`
	`18`	`+ "langchain-openai>=0.1.1",`
	`19`	`+ "openai>=1.12.0",`
`18`	`20`	`]`
`19`	`21`
`20`	`22`	`[project.optional-dependencies]`