Coverage for src / idx_api / embeddings / broker_contacts.py: 15%
48 statements
« prev ^ index » next coverage.py v7.13.1, created at 2025-12-28 11:12 -0700
« prev ^ index » next coverage.py v7.13.1, created at 2025-12-28 11:12 -0700
1"""Broker contact embedding functions for semantic person search with PostgreSQL/pgvector."""
3from sqlalchemy import text
4from sqlalchemy.orm import Session
6from idx_api.embeddings.base import embedding_to_pgvector, generate_embedding_sync
9def build_broker_contact_text(contact) -> str:
10 """Build rich text representation of a broker contact for embedding.
12 Args:
13 contact: Broker (contact) ORM model or dict
15 Returns:
16 Text representation for semantic search
17 """
18 parts = []
20 # Always include name
21 name = contact.name if hasattr(contact, "name") else contact.get("name")
22 if name:
23 parts.append(name)
25 # Add email
26 email = contact.email if hasattr(contact, "email") else contact.get("email")
27 if email:
28 parts.append(email)
30 # Add license info
31 license_type = (
32 contact.license_type
33 if hasattr(contact, "license_type")
34 else contact.get("license_type")
35 )
36 if license_type:
37 parts.append(f"{license_type} license")
39 # Add primary contact indicator
40 is_primary = (
41 contact.is_primary
42 if hasattr(contact, "is_primary")
43 else contact.get("is_primary")
44 )
45 if is_primary:
46 parts.append("primary contact")
48 # Add brokerage context if available
49 brokerage_name = (
50 contact.brokerage.name
51 if hasattr(contact, "brokerage") and contact.brokerage
52 else contact.get("brokerage_name")
53 )
54 if brokerage_name:
55 parts.append(f"at {brokerage_name}")
57 return " - ".join(parts)
60def index_broker_contact(session: Session, contact_id: int, text_content: str) -> None:
61 """Index a broker contact for vector search using pgvector.
63 Args:
64 session: Database session
65 contact_id: ID of the broker contact to index
66 text_content: Text representation of the contact
67 """
68 # Generate embedding via Ollama API
69 embedding = generate_embedding_sync(text_content)
71 if embedding is None:
72 print(f"Failed to generate embedding for broker contact {contact_id}")
73 return
75 # Store embedding directly in brokers table
76 session.execute(
77 text("""
78 UPDATE brokers
79 SET embedding = :embedding::vector
80 WHERE id = :contact_id
81 """),
82 {
83 "contact_id": contact_id,
84 "embedding": embedding_to_pgvector(embedding),
85 },
86 )
89def search_broker_contacts(
90 session: Session,
91 query: str,
92 brokerage_id: int | None = None,
93 limit: int = 10,
94) -> list[dict]:
95 """Search broker contacts by semantic similarity using pgvector.
97 Args:
98 session: Database session
99 query: Search query (e.g., "primary contact" or "licensed broker")
100 brokerage_id: Optional filter by brokerage ID
101 limit: Maximum results to return
103 Returns:
104 List of matching broker contacts with similarity scores
105 """
106 # Generate query embedding
107 query_embedding = generate_embedding_sync(query)
109 if query_embedding is None:
110 return []
112 # Build SQL based on whether we're filtering by brokerage
113 if brokerage_id:
114 sql = text("""
115 SELECT
116 bc.id,
117 bc.brokerage_id,
118 bc.name,
119 bc.email,
120 bc.phone,
121 bc.license_type,
122 bc.is_primary,
123 bc.embedding <=> :query_embedding::vector AS similarity_distance
124 FROM brokers bc
125 WHERE bc.embedding IS NOT NULL
126 AND bc.brokerage_id = :brokerage_id
127 ORDER BY bc.embedding <=> :query_embedding::vector
128 LIMIT :limit
129 """)
130 params = {
131 "query_embedding": embedding_to_pgvector(query_embedding),
132 "limit": limit,
133 "brokerage_id": brokerage_id,
134 }
135 else:
136 sql = text("""
137 SELECT
138 bc.id,
139 bc.brokerage_id,
140 bc.name,
141 bc.email,
142 bc.phone,
143 bc.license_type,
144 bc.is_primary,
145 bc.embedding <=> :query_embedding::vector AS similarity_distance
146 FROM brokers bc
147 WHERE bc.embedding IS NOT NULL
148 ORDER BY bc.embedding <=> :query_embedding::vector
149 LIMIT :limit
150 """)
151 params = {
152 "query_embedding": embedding_to_pgvector(query_embedding),
153 "limit": limit,
154 }
156 result = session.execute(sql, params)
158 return [
159 {
160 "id": row.id,
161 "brokerage_id": row.brokerage_id,
162 "name": row.name,
163 "email": row.email,
164 "phone": row.phone,
165 "license_type": row.license_type,
166 "is_primary": row.is_primary,
167 "similarity": 1.0 - row.similarity_distance,
168 }
169 for row in result.fetchall()
170 ]
173def reindex_all_broker_contacts(session: Session) -> int:
174 """Reindex all broker contacts for vector search.
176 Returns:
177 Number of broker contacts indexed
178 """
179 result = session.execute(
180 text("""
181 SELECT bc.*, b.name as brokerage_name
182 FROM brokers bc
183 LEFT JOIN brokerages b ON b.id = bc.brokerage_id
184 WHERE bc.disabled_at IS NULL
185 """)
186 )
188 count = 0
189 for row in result.fetchall():
190 row_dict = row._asdict()
191 text_content = build_broker_contact_text(row_dict)
192 index_broker_contact(session, row_dict['id'], text_content)
193 count += 1
195 session.commit()
196 return count