Coverage for src / idx_api / embeddings / agents.py: 57%
37 statements
« prev ^ index » next coverage.py v7.13.1, created at 2025-12-28 11:12 -0700
« prev ^ index » next coverage.py v7.13.1, created at 2025-12-28 11:12 -0700
1"""Agent embedding functions for semantic agent search with PostgreSQL/pgvector."""
3from sqlalchemy import text
4from sqlalchemy.orm import Session
6from idx_api.embeddings.base import embedding_to_pgvector, generate_embedding_sync
9def build_agent_text(agent) -> str:
10 """Build rich text representation of an agent for embedding.
12 Args:
13 agent: Agent ORM model or dict (with brokerage_name if joining)
15 Returns:
16 Text representation for semantic search
17 """
18 parts = []
20 # Always include name
21 name = agent.name if hasattr(agent, "name") else agent.get("name")
22 if name:
23 parts.append(name)
25 # Add brokerage context
26 brokerage_name = (
27 agent.brokerage.name if hasattr(agent, "brokerage") and agent.brokerage else agent.get("brokerage_name")
28 )
29 if brokerage_name:
30 parts.append(f"works at {brokerage_name}")
32 # Add bio for rich context
33 bio = agent.bio if hasattr(agent, "bio") else agent.get("bio")
34 if bio:
35 parts.append(bio)
37 return " - ".join(parts)
40def index_agent(session: Session, agent_id: int, text_content: str) -> None:
41 """Index an agent for vector search using pgvector.
43 Args:
44 session: Database session
45 agent_id: ID of the agent to index
46 text_content: Text representation of the agent
47 """
48 # Generate embedding via Ollama API
49 embedding = generate_embedding_sync(text_content)
51 if embedding is None:
52 print(f"Failed to generate embedding for agent {agent_id}")
53 return
55 # Store embedding directly in agents table
56 session.execute(
57 text("""
58 UPDATE agents
59 SET embedding = :embedding::vector
60 WHERE id = :agent_id
61 """),
62 {
63 "agent_id": agent_id,
64 "embedding": embedding_to_pgvector(embedding),
65 },
66 )
69def search_agents(
70 session: Session,
71 query: str,
72 limit: int = 10,
73) -> list[dict]:
74 """Search agents by semantic similarity using pgvector.
76 Args:
77 session: Database session
78 query: Search query (e.g., "native Idaho real estate")
79 limit: Maximum results to return
81 Returns:
82 List of matching agents with similarity scores
83 """
84 # Generate query embedding
85 query_embedding = generate_embedding_sync(query)
87 if query_embedding is None:
88 return []
90 result = session.execute(
91 text("""
92 SELECT
93 a.id,
94 a.name,
95 a.email,
96 a.phone,
97 a.bio,
98 b.name as brokerage_name,
99 a.embedding <=> :query_embedding::vector AS similarity_distance
100 FROM agents a
101 LEFT JOIN brokerages b ON b.id = a.brokerage_id
102 WHERE a.embedding IS NOT NULL
103 ORDER BY a.embedding <=> :query_embedding::vector
104 LIMIT :limit
105 """),
106 {
107 "query_embedding": embedding_to_pgvector(query_embedding),
108 "limit": limit,
109 },
110 )
112 return [
113 {
114 "id": row.id,
115 "name": row.name,
116 "email": row.email,
117 "phone": row.phone,
118 "bio": row.bio[:200] + "..." if row.bio and len(row.bio) > 200 else row.bio,
119 "brokerage_name": row.brokerage_name,
120 "distance": row.similarity_distance,
121 }
122 for row in result.fetchall()
123 ]
126def reindex_all_agents(session: Session) -> int:
127 """Reindex all agents for vector search.
129 Returns:
130 Number of agents indexed
131 """
132 result = session.execute(
133 text("""
134 SELECT a.id, a.name, a.bio, b.name as brokerage_name
135 FROM agents a
136 LEFT JOIN brokerages b ON b.id = a.brokerage_id
137 WHERE a.status = 'active'
138 """)
139 )
141 count = 0
142 for row in result.fetchall():
143 row_dict = row._asdict()
144 text_content = build_agent_text(row_dict)
145 index_agent(session, row_dict['id'], text_content)
146 count += 1
148 session.commit()
149 return count