Coverage for src / idx_api / embeddings / brokerages.py: 23%
47 statements
« prev ^ index » next coverage.py v7.13.1, created at 2025-12-28 11:12 -0700
« prev ^ index » next coverage.py v7.13.1, created at 2025-12-28 11:12 -0700
1"""Brokerage embedding functions for semantic firm search with PostgreSQL/pgvector."""
3from sqlalchemy import text
4from sqlalchemy.orm import Session
6from idx_api.embeddings.base import embedding_to_pgvector, generate_embedding_sync
9def build_brokerage_text(brokerage) -> str:
10 """Build rich text representation of a brokerage for embedding.
12 Args:
13 brokerage: Brokerage ORM model or dict
15 Returns:
16 Text representation for semantic search
17 """
18 parts = []
20 # Always include name
21 name = brokerage.name if hasattr(brokerage, "name") else brokerage.get("name")
22 if name:
23 parts.append(name)
25 # Add tagline if present
26 tagline = brokerage.tagline if hasattr(brokerage, "tagline") else brokerage.get("tagline")
27 if tagline:
28 parts.append(tagline)
30 # Add franchise affiliation
31 franchise = (
32 brokerage.franchise_affiliation
33 if hasattr(brokerage, "franchise_affiliation")
34 else brokerage.get("franchise_affiliation")
35 )
36 if franchise:
37 parts.append(f"affiliated with {franchise}")
39 # Add specializations
40 military = (
41 brokerage.military_specialist
42 if hasattr(brokerage, "military_specialist")
43 else brokerage.get("military_specialist")
44 )
45 if military:
46 parts.append("military relocation specialist")
48 va_loans = brokerage.va_loans if hasattr(brokerage, "va_loans") else brokerage.get("va_loans")
49 if va_loans:
50 parts.append("VA loan expert")
52 return " - ".join(parts)
55def index_brokerage(session: Session, brokerage_id: int, text_content: str) -> None:
56 """Index a brokerage for vector search using pgvector.
58 Args:
59 session: Database session
60 brokerage_id: ID of the brokerage to index
61 text_content: Text representation of the brokerage
62 """
63 # Generate embedding via Ollama API
64 embedding = generate_embedding_sync(text_content)
66 if embedding is None:
67 print(f"Failed to generate embedding for brokerage {brokerage_id}")
68 return
70 # Store embedding directly in brokerages table
71 session.execute(
72 text("""
73 UPDATE brokerages
74 SET embedding = :embedding::vector
75 WHERE id = :brokerage_id
76 """),
77 {
78 "brokerage_id": brokerage_id,
79 "embedding": embedding_to_pgvector(embedding),
80 },
81 )
84def search_brokerages(
85 session: Session,
86 query: str,
87 limit: int = 10,
88) -> list[dict]:
89 """Search brokerages by semantic similarity using pgvector.
91 Args:
92 session: Database session
93 query: Search query (e.g., "military relocation specialist")
94 limit: Maximum results to return
96 Returns:
97 List of matching brokerages with similarity scores
98 """
99 # Generate query embedding
100 query_embedding = generate_embedding_sync(query)
102 if query_embedding is None:
103 return []
105 result = session.execute(
106 text("""
107 SELECT
108 b.id,
109 b.slug,
110 b.name,
111 b.tagline,
112 b.military_specialist,
113 b.va_loans,
114 b.franchise_affiliation,
115 b.logo_url,
116 b.embedding <=> :query_embedding::vector AS similarity_distance
117 FROM brokerages b
118 WHERE b.embedding IS NOT NULL
119 ORDER BY b.embedding <=> :query_embedding::vector
120 LIMIT :limit
121 """),
122 {
123 "query_embedding": embedding_to_pgvector(query_embedding),
124 "limit": limit,
125 },
126 )
128 return [
129 {
130 "id": row.id,
131 "slug": row.slug,
132 "name": row.name,
133 "tagline": row.tagline,
134 "military_specialist": row.military_specialist,
135 "va_loans": row.va_loans,
136 "franchise_affiliation": row.franchise_affiliation,
137 "logo_url": row.logo_url,
138 "similarity": 1.0 - row.similarity_distance,
139 }
140 for row in result.fetchall()
141 ]
144def reindex_all_brokerages(session: Session) -> int:
145 """Reindex all brokerages for vector search.
147 Returns:
148 Number of brokerages indexed
149 """
150 result = session.execute(
151 text("""
152 SELECT *
153 FROM brokerages
154 WHERE disabled_at IS NULL
155 """)
156 )
158 count = 0
159 for row in result.fetchall():
160 row_dict = row._asdict()
161 text_content = build_brokerage_text(row_dict)
162 index_brokerage(session, row_dict['id'], text_content)
163 count += 1
165 session.commit()
166 return count
169# Legacy aliases for backward compatibility
170build_brokerage_text_legacy = build_brokerage_text
171index_brokerage_legacy = index_brokerage
172search_brokerages_legacy = search_brokerages
173reindex_all_brokerages_legacy = reindex_all_brokerages