Skip to content

Commit 541e311

Browse files
committed
pushing everything together
1 parent 364a2b5 commit 541e311

29 files changed

+788
-269
lines changed
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
"""extend collection table for provider agnostic support
2+
3+
Revision ID: 041
4+
Revises: 040
5+
Create Date: 2026-01-15 16:53:19.495583
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
import sqlmodel.sql.sqltypes
11+
from sqlalchemy.dialects import postgresql
12+
13+
14+
# revision identifiers, used by Alembic.
15+
revision = "041"
16+
down_revision = "040"
17+
branch_labels = None
18+
depends_on = None
19+
20+
provider_type = postgresql.ENUM(
21+
"OPENAI",
22+
# aws
23+
# gemini
24+
name="providertype",
25+
create_type=False,
26+
)
27+
28+
29+
def upgrade():
30+
provider_type.create(op.get_bind(), checkfirst=True)
31+
op.add_column(
32+
"collection",
33+
sa.Column(
34+
"provider",
35+
provider_type,
36+
nullable=False,
37+
comment="LLM provider used for this collection",
38+
),
39+
)
40+
op.execute("UPDATE collection SET provider = 'OPENAI' WHERE provider IS NULL")
41+
op.add_column(
42+
"collection",
43+
sa.Column(
44+
"name",
45+
sqlmodel.sql.sqltypes.AutoString(),
46+
nullable=True,
47+
comment="Name of the collection",
48+
),
49+
)
50+
op.add_column(
51+
"collection",
52+
sa.Column(
53+
"description",
54+
sqlmodel.sql.sqltypes.AutoString(),
55+
nullable=True,
56+
comment="Description of the collection",
57+
),
58+
)
59+
op.alter_column(
60+
"collection",
61+
"llm_service_name",
62+
existing_type=sa.VARCHAR(),
63+
comment="Name of the LLM service",
64+
existing_comment="Name of the LLM service provider",
65+
existing_nullable=False,
66+
)
67+
op.create_unique_constraint(None, "collection", ["name"])
68+
op.drop_constraint(
69+
op.f("collection_organization_id_fkey"), "collection", type_="foreignkey"
70+
)
71+
op.drop_column("collection", "organization_id")
72+
73+
74+
def downgrade():
75+
op.add_column(
76+
"collection",
77+
sa.Column(
78+
"organization_id",
79+
sa.INTEGER(),
80+
autoincrement=False,
81+
nullable=True,
82+
comment="Reference to the organization",
83+
),
84+
)
85+
op.execute(
86+
"""UPDATE collection SET organization_id = (SELECT organization_id FROM project
87+
WHERE project.id = collection.project_id)"""
88+
)
89+
op.alter_column("collection", "organization_id", nullable=False)
90+
op.create_foreign_key(
91+
op.f("collection_organization_id_fkey"),
92+
"collection",
93+
"organization",
94+
["organization_id"],
95+
["id"],
96+
ondelete="CASCADE",
97+
)
98+
op.drop_constraint("collection_name_key", "collection", type_="unique")
99+
op.alter_column(
100+
"collection",
101+
"llm_service_name",
102+
existing_type=sa.VARCHAR(),
103+
comment="Name of the LLM service provider",
104+
existing_comment="Name of the LLM service",
105+
existing_nullable=False,
106+
)
107+
op.drop_column("collection", "description")
108+
op.drop_column("collection", "name")
109+
op.drop_column("collection", "provider")

backend/app/api/routes/collections.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
CollectionPublic,
2929
)
3030
from app.utils import APIResponse, load_description, validate_callback_url
31+
from app.services.collections.helpers import ensure_unique_name
3132
from app.services.collections import (
3233
create_collection as create_service,
3334
delete_collection as delete_service,
@@ -87,6 +88,9 @@ def create_collection(
8788
if request.callback_url:
8889
validate_callback_url(str(request.callback_url))
8990

91+
if request.name:
92+
ensure_unique_name(session, current_user.project_.id, request.name)
93+
9094
collection_job_crud = CollectionJobCrud(session, current_user.project_.id)
9195
collection_job = collection_job_crud.create(
9296
CollectionJobCreate(

backend/app/crud/collection/collection.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,16 @@ def read_all(self):
9393
collections = self.session.exec(statement).all()
9494
return collections
9595

96+
def exists_by_name(self, collection_name: str) -> bool:
97+
statement = (
98+
select(Collection.id)
99+
.where(Collection.project_id == self.project_id)
100+
.where(Collection.name == collection_name)
101+
.where(Collection.deleted_at.is_(None))
102+
)
103+
result = self.session.exec(statement).first()
104+
return result is not None
105+
96106
def delete_by_id(self, collection_id: UUID) -> Collection:
97107
coll = self.read_one(collection_id)
98108
coll.deleted_at = now()

backend/app/models/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@
88

99
from .collection import (
1010
Collection,
11-
CreateCollectionParams,
12-
CreateCollectionResult,
1311
CreationRequest,
1412
CollectionPublic,
1513
CollectionIDPublic,
1614
CollectionWithDocsPublic,
1715
DeletionRequest,
16+
ProviderType,
1817
)
1918
from .collection_job import (
2019
CollectionActionType,

backend/app/models/collection.py

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
from datetime import datetime
2+
from enum import Enum
3+
from typing import Any, Literal
4+
from uuid import UUID, uuid4
5+
6+
from pydantic import HttpUrl, model_validator
7+
from sqlmodel import Field, Relationship, SQLModel
8+
9+
from app.core.util import now
10+
from app.models.document import DocumentPublic
11+
from .project import Project
12+
13+
14+
class ProviderType(str, Enum):
15+
"""Supported LLM providers for collections."""
16+
17+
OPENAI = "OPENAI"
18+
# BEDROCK = "bedrock"
19+
# GEMINI = "gemini"
20+
21+
22+
class Collection(SQLModel, table=True):
23+
"""Database model for Collection operations."""
24+
25+
id: UUID = Field(
26+
default_factory=uuid4,
27+
primary_key=True,
28+
description="Unique identifier for the collection",
29+
sa_column_kwargs={"comment": "Unique identifier for the collection"},
30+
)
31+
provider: ProviderType = (
32+
Field(
33+
nullable=False,
34+
description="LLM provider used for this collection (e.g., 'openai', 'bedrock', 'gemini', etc)",
35+
sa_column_kwargs={"LLM provider used for this collection"},
36+
),
37+
)
38+
llm_service_id: str = Field(
39+
nullable=False,
40+
description="External LLM service identifier (e.g., OpenAI vector store ID)",
41+
sa_column_kwargs={
42+
"comment": "External LLM service identifier (e.g., OpenAI vector store ID)"
43+
},
44+
)
45+
llm_service_name: str = Field(
46+
nullable=False,
47+
description="Name of the LLM service",
48+
sa_column_kwargs={"comment": "Name of the LLM service"},
49+
)
50+
name: str = Field(
51+
nullable=True,
52+
unique=True,
53+
description="Name of the collection",
54+
sa_column_kwargs={"comment": "Name of the collection"},
55+
)
56+
description: str = Field(
57+
nullable=True,
58+
description="Description of the collection",
59+
sa_column_kwargs={"comment": "Description of the collection"},
60+
)
61+
project_id: int = Field(
62+
foreign_key="project.id",
63+
nullable=False,
64+
ondelete="CASCADE",
65+
description="Project the collection belongs to",
66+
sa_column_kwargs={"comment": "Reference to the project"},
67+
)
68+
inserted_at: datetime = Field(
69+
default_factory=now,
70+
description="Timestamp when the collection was created",
71+
sa_column_kwargs={"comment": "Timestamp when the collection was created"},
72+
)
73+
updated_at: datetime = Field(
74+
default_factory=now,
75+
description="Timestamp when the collection was updated",
76+
sa_column_kwargs={"comment": "Timestamp when the collection was last updated"},
77+
)
78+
deleted_at: datetime | None = Field(
79+
default=None,
80+
description="Timestamp when the collection was deleted",
81+
sa_column_kwargs={"comment": "Timestamp when the collection was deleted"},
82+
)
83+
project: Project = Relationship(back_populates="collections")
84+
85+
86+
# Request models
87+
class CollectionOptions(SQLModel):
88+
name: str | None = Field(default=None, description="Name of the collection")
89+
description: str | None = Field(
90+
default=None, description="Description of the collection"
91+
)
92+
documents: list[UUID] = Field(
93+
description="List of document IDs",
94+
)
95+
batch_size: int = Field(
96+
default=1,
97+
description=(
98+
"Number of documents to send to OpenAI in a single "
99+
"transaction. See the `file_ids` parameter in the "
100+
"vector store [create batch](https://platform.openai.com/docs/api-reference/vector-stores-file-batches/createBatch)."
101+
),
102+
)
103+
104+
def model_post_init(self, __context: Any):
105+
self.documents = list(set(self.documents))
106+
107+
108+
class AssistantOptions(SQLModel):
109+
# Fields to be passed along to OpenAI. They must be a subset of
110+
# parameters accepted by the OpenAI.clien.beta.assistants.create
111+
# API.
112+
model: str | None = Field(
113+
default=None,
114+
description=(
115+
"**[Deprecated]** "
116+
"OpenAI model to attach to this assistant. The model "
117+
"must be compatable with the assistants API; see the "
118+
"OpenAI [model documentation](https://platform.openai.com/docs/models/compare) for more."
119+
),
120+
)
121+
122+
instructions: str | None = Field(
123+
default=None,
124+
description=(
125+
"**[Deprecated]** "
126+
"Assistant instruction. Sometimes referred to as the "
127+
'"system" prompt.'
128+
),
129+
)
130+
temperature: float = Field(
131+
default=1e-6,
132+
description=(
133+
"**[Deprecated]** "
134+
"Model temperature. The default is slightly "
135+
"greater-than zero because it is [unknown how OpenAI "
136+
"handles zero](https://community.openai.com/t/clarifications-on-setting-temperature-0/886447/5)."
137+
),
138+
)
139+
140+
@model_validator(mode="before")
141+
def _assistant_fields_all_or_none(cls, values: dict[str, Any]) -> dict[str, Any]:
142+
def norm(x: Any) -> Any:
143+
if x is None:
144+
return None
145+
if isinstance(x, str):
146+
s = x.strip()
147+
return s if s else None
148+
return x # let Pydantic handle non-strings
149+
150+
model = norm(values.get("model"))
151+
instructions = norm(values.get("instructions"))
152+
153+
if (model is None) ^ (instructions is None):
154+
raise ValueError(
155+
"To create an Assistant, provide BOTH 'model' and 'instructions'. "
156+
"If you only want a vector store, remove both fields."
157+
)
158+
159+
values["model"] = model
160+
values["instructions"] = instructions
161+
return values
162+
163+
164+
class CallbackRequest(SQLModel):
165+
callback_url: HttpUrl | None = Field(
166+
default=None,
167+
description="URL to call to report endpoint status",
168+
)
169+
170+
171+
class ProviderOptions(SQLModel):
172+
"""LLM provider configuration."""
173+
174+
provider: Literal["openai"] = Field(
175+
default="openai", description="LLM provider to use for this collection"
176+
)
177+
178+
179+
class CreationRequest(
180+
AssistantOptions,
181+
CollectionOptions,
182+
ProviderOptions,
183+
CallbackRequest,
184+
):
185+
def extract_super_type(self, cls: "CreationRequest"):
186+
for field_name in cls.model_fields.keys():
187+
field_value = getattr(self, field_name)
188+
yield (field_name, field_value)
189+
190+
191+
class DeletionRequest(CallbackRequest):
192+
collection_id: UUID = Field(description="Collection to delete")
193+
194+
195+
# Response models
196+
197+
198+
class CollectionIDPublic(SQLModel):
199+
id: UUID
200+
201+
202+
class CollectionPublic(SQLModel):
203+
id: UUID
204+
llm_service_id: str
205+
llm_service_name: str
206+
project_id: int
207+
208+
inserted_at: datetime
209+
updated_at: datetime
210+
deleted_at: datetime | None = None
211+
212+
213+
class CollectionWithDocsPublic(CollectionPublic):
214+
documents: list[DocumentPublic] | None = None

backend/app/models/organization.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,6 @@ class Organization(OrganizationBase, table=True):
7474
assistants: list["Assistant"] = Relationship(
7575
back_populates="organization", cascade_delete=True
7676
)
77-
collections: list["Collection"] = Relationship(
78-
back_populates="organization", cascade_delete=True
79-
)
8077
openai_conversations: list["OpenAIConversation"] = Relationship(
8178
back_populates="organization", cascade_delete=True
8279
)

0 commit comments

Comments
 (0)