docs-agent fixes (#612)

Rahul-Lashkari · web-flow · commit b7d8a07b31d0 · 2025-11-25T09:30:45.000-08:00
diff --git a/examples/gemini/python/docs-agent/docs/concepts.md b/examples/gemini/python/docs-agent/docs/concepts.md
@@ -24,7 +24,7 @@ in the information that you provide and control.
 The key features of the Docs Agent chat app are:
 
 - Add contextual information to user questions to augment prompts for AI language models.
-- Process documents into embeddings and store them in a vector database for semnatic retrieval.
+- Process documents into embeddings and store them in a vector database for semantic retrieval.
 
 ![Docs Agent flow](./images/docs-agent-architecture-02.png)
 
@@ -50,7 +50,7 @@ shown in Figure 3 to augment the user question with a preset **condition** and a
 [`config.yaml`][config-yaml] file.) Then the Docs Agent server sends this prompt to a
 language model using the Gemini API and receives a response generated by the model.
 
-![Docs Agent prompt strcture](./images/docs-agent-prompt-structure-01.png)
+![Docs Agent prompt structure](./images/docs-agent-prompt-structure-01.png)
 
 **Figure 3**. Prompt structure for augmenting a user question with related context
 (Context source: [eventhorizontelescope.org][context-source-01])
@@ -244,7 +244,7 @@ db_type: "google_semantic_retriever"
 The setup above uses both the Semantic Retrieval API and the AQA model.
 
 **Note**: At the moment, when `db_type` is set to `google_semantic_retriever`, running the
-`populate_vector_database.py` script will also create and popluate a local vector database using
+`populate_vector_database.py` script will also create and populate a local vector database using
 Chroma as well as creating and populating an online corpus using the Semantic Retrieval API.
 
 However, if you want to use only the AQA model without using an online corpus, update the
diff --git a/examples/gemini/python/docs-agent/docs/create-a-new-task.md b/examples/gemini/python/docs-agent/docs/create-a-new-task.md
@@ -308,7 +308,7 @@ A step that runs the `tellme` command:
 
 Using the `tellme` command requires **a vector database setup**.
 
-<!-- Referene links -->
+<!-- Reference links -->
 
 [model-code]: https://ai.google.dev/gemini-api/docs/models/gemini
 [tasks-dir]: ../tasks
diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/chatui.py b/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/chatui.py
@@ -55,7 +55,7 @@ def construct_blueprint(
         if product_config.secondary_db_type == "chroma":
             docs_agent = DocsAgent(config=product_config, init_chroma=True)
         else:
-            # A local Chroma DB is not needed for the Semantic Retreiver only mode.
+            # A local Chroma DB is not needed for the Semantic Retriever only mode.
             docs_agent = DocsAgent(config=product_config, init_chroma=False)
     elif product_config.db_type == "none":
         docs_agent = DocsAgent(
diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/cli/cli_admin.py b/examples/gemini/python/docs-agent/docs_agent/interfaces/cli/cli_admin.py
@@ -208,7 +208,7 @@ def delete_corpus(
         click.echo("Deleting " + name)
         if click.confirm("Do you want to continue?", abort=True):
             semantic.delete_a_corpus(corpus_name=name, force=True)
-            click.echo("Successfuly deleted " + name)
+            click.echo("Successfully deleted " + name)
             corpora_response = semantic.list_existing_corpora()
             click.echo(f"Corpora list:\n{corpora_response}")
 
diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/cli/cli_helpme.py b/examples/gemini/python/docs-agent/docs_agent/interfaces/cli/cli_helpme.py
@@ -284,7 +284,7 @@ def helpme(
 
     # Select the mode.
     if helpme_mode == "PREVIOUS_EXCHANGES":
-        # Continue mode, which uses the previous exchangs as the main context.
+        # Continue mode, which uses the previous exchanges as the main context.
         this_output = console.ask_model_with_file(
             question.strip(),
             product_config,
diff --git a/examples/gemini/python/docs-agent/docs_agent/preprocess/splitters/fidl_splitter.py b/examples/gemini/python/docs-agent/docs_agent/preprocess/splitters/fidl_splitter.py
@@ -59,11 +59,11 @@ def construct_chunks(library_name: str, protocol_name: str, lines):
     contents = []
     buffer_size = get_byte_size(lines)
     if int(buffer_size) > 5000:
-        # If the protocol is larget than 5KB, divide it into two.
+        # If the protocol is larger than 5KB, divide it into two.
         logging.info(
             "Found a text chunk ("
             + str(protocol_name)
-            + ") is greater than 6KB (size: "
+            + ") is greater than 5KB (size: "
             + str(buffer_size)
             + ")."
         )
@@ -116,11 +116,11 @@ def split_file_to_protocols(this_file):
             # print("MATCHED [End bracket]")
             line_buffer.append(line)
             if library_name != "" and protocol_name != "":
-                # Prepre a captured FIDL protocl into small text chunks.
+                # Prepare a captured FIDL protocol into small text chunks.
                 contents = construct_chunks(library_name, protocol_name, line_buffer)
                 for content in contents:
                     protocols.append(content)
-            # Clear the line butter and protocol name when an end bracket is found.
+            # Clear the line buffer and protocol name when an end bracket is found.
             line_buffer.clear()
             protocol_name = ""
         else:
diff --git a/examples/gemini/python/docs-agent/docs_agent/preprocess/splitters/markdown_splitter.py b/examples/gemini/python/docs-agent/docs_agent/preprocess/splitters/markdown_splitter.py
@@ -629,7 +629,7 @@ def process_document_into_sections(markdown_text):
     return sections
 
 
-# Process an array of Markdwon text into an array of string buffers
+# Process an array of Markdown text into an array of string buffers
 # whose size is smaller than 5KB.
 def construct_chunks(lines):
     contents = []
diff --git a/examples/gemini/python/docs-agent/docs_agent/storage/google_semantic_retriever.py b/examples/gemini/python/docs-agent/docs_agent/storage/google_semantic_retriever.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 #
 
-"""Semantic Retrievel module for using the Semantic Retrieval API with AQA"""
+"""Semantic Retrieval module for using the Semantic Retrieval API with AQA"""
 
 import google.ai.generativelanguage as glm
 from absl import logging
@@ -133,7 +133,7 @@ def create_a_doc(
             # Set the `document_resource_name` for subsequent sections.
             document_resource_name = create_document_response.name
         except:
-            logging.error(f"Cannot create a new doucment: {page_title}")
+            logging.error(f"Cannot create a new document: {page_title}")
             exit(1)
         return document_resource_name
 
@@ -144,7 +144,7 @@ def retrieve_a_doc(self, document_resource_name: str):
             # Make the request
             response = self.retriever_service_client.get_document(get_document_request)
         except:
-            logging.error(f"Cannot retrieve a doucment: {document_resource_name}")
+            logging.error(f"Cannot retrieve a document: {document_resource_name}")
         return response
 
     def create_a_chunk(
@@ -241,10 +241,10 @@ def create_a_doc_chunk(
                 )
                 return chunk
             except:
-                logging.error("Error in creaing a doc chunk: " + page_title)
+                logging.error("Error in creating a doc chunk: " + page_title)
                 return None
         except:
-            logging.error("Error in creaing a doc chunk: " + page_title)
+            logging.error("Error in creating a doc chunk: " + page_title)
             return None
 
     def get_all_docs(self, corpus_name: str, print_output: bool = False):
diff --git a/examples/gemini/python/docs-agent/docs_agent/utilities/config.py b/examples/gemini/python/docs-agent/docs_agent/utilities/config.py
@@ -581,7 +581,7 @@ def __init__(self, yaml_path_input: str | None = None):
             sys.exit(1)
 
     def __str__(self):
-        # Returns the absoulte path to the config file or provides an error message
+        # Returns the absolute path to the config file or provides an error message
         return getattr(self, "yaml_path", "Config path not determined")
 
     def returnProducts(self, product: typing.Optional[str] = None) -> ConfigFile:
diff --git a/examples/gemini/python/docs-agent/scripts/extract_replace_image_alt_text.py b/examples/gemini/python/docs-agent/scripts/extract_replace_image_alt_text.py
@@ -17,12 +17,12 @@
 This script extracts image paths from markdown, html, or directory of files.
 
 Usage:
-  python extract_image_files.py <input_file>
+  python extract_replace_image_alt_text.py <input_file>
 
 Example:
-  python extract_image_files.py my_document.md
-  python extract_image_files.py my_document.html
-  python extract_image_files.py my_documents_folder
+  python extract_replace_image_alt_text.py my_document.md
+  python extract_replace_image_alt_text.py my_document.html
+  python extract_replace_image_alt_text.py my_documents_folder
 """
 
 import os
diff --git a/examples/gemini/python/docs-agent/scripts/update_files_from_yaml.py b/examples/gemini/python/docs-agent/scripts/update_files_from_yaml.py
@@ -17,10 +17,10 @@
 This script updates markdown files with image paths and alt text from a YAML file.
 
 Usage:
-  python update_files_from_yaml.py
+  python update_files_from_yaml.py <yaml_file>
 
 Example:
-  python update_files_from_yaml.py
+  python update_files_from_yaml.py agent_out/file_alt_text.yaml
 """
 
 import re
diff --git a/examples/gemini/python/docs-agent/tasks/describe-images-and-replace.yaml b/examples/gemini/python/docs-agent/tasks/describe-images-and-replace.yaml
@@ -15,8 +15,8 @@ tasks:
       - prompt: "create_file_dictionary.py"
         function: "script"
         description: >
-          This script extract all image files found in the input file and
-          store the list of image file names in the
+          This script extracts all image files found in the input file and
+          stores the list of image file names in the
           agent_out/files_alt_text.yaml file.
         flags:
           script_input: "<INPUT>"
diff --git a/examples/gemini/python/docs-agent/tasks/describe-images-from-doc-replace.yaml b/examples/gemini/python/docs-agent/tasks/describe-images-from-doc-replace.yaml
@@ -12,7 +12,7 @@ tasks:
       - prompt: "extract_replace_image_alt_text.py"
         function: "script"
         description: >
-          This script extract all image files found in the input file and store the list of image file
+          This script extracts all image files found in the input file and stores the list of image file
           names in the agent_out/image_paths.txt file.
         flags:
           script_input: "<INPUT>"
diff --git a/examples/gemini/python/docs-agent/tasks/describe-images-from-doc.yaml b/examples/gemini/python/docs-agent/tasks/describe-images-from-doc.yaml
@@ -18,7 +18,7 @@ tasks:
       - prompt: "extract_image_files.py"
         function: "script"
         description: >
-          This script extract all image files found in the input file and store the list of image
+          This script extracts all image files found in the input file and stores the list of image
           file names in the agent_out/image_paths.txt file.
         flags:
           script_input: "<INPUT>"