Coverage for src / idx_api / embeddings / agents.py: 57%

37 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2025-12-28 11:12 -0700

1"""Agent embedding functions for semantic agent search with PostgreSQL/pgvector.""" 

2 

3from sqlalchemy import text 

4from sqlalchemy.orm import Session 

5 

6from idx_api.embeddings.base import embedding_to_pgvector, generate_embedding_sync 

7 

8 

9def build_agent_text(agent) -> str: 

10 """Build rich text representation of an agent for embedding. 

11 

12 Args: 

13 agent: Agent ORM model or dict (with brokerage_name if joining) 

14 

15 Returns: 

16 Text representation for semantic search 

17 """ 

18 parts = [] 

19 

20 # Always include name 

21 name = agent.name if hasattr(agent, "name") else agent.get("name") 

22 if name: 

23 parts.append(name) 

24 

25 # Add brokerage context 

26 brokerage_name = ( 

27 agent.brokerage.name if hasattr(agent, "brokerage") and agent.brokerage else agent.get("brokerage_name") 

28 ) 

29 if brokerage_name: 

30 parts.append(f"works at {brokerage_name}") 

31 

32 # Add bio for rich context 

33 bio = agent.bio if hasattr(agent, "bio") else agent.get("bio") 

34 if bio: 

35 parts.append(bio) 

36 

37 return " - ".join(parts) 

38 

39 

40def index_agent(session: Session, agent_id: int, text_content: str) -> None: 

41 """Index an agent for vector search using pgvector. 

42 

43 Args: 

44 session: Database session 

45 agent_id: ID of the agent to index 

46 text_content: Text representation of the agent 

47 """ 

48 # Generate embedding via Ollama API 

49 embedding = generate_embedding_sync(text_content) 

50 

51 if embedding is None: 

52 print(f"Failed to generate embedding for agent {agent_id}") 

53 return 

54 

55 # Store embedding directly in agents table 

56 session.execute( 

57 text(""" 

58 UPDATE agents 

59 SET embedding = :embedding::vector 

60 WHERE id = :agent_id 

61 """), 

62 { 

63 "agent_id": agent_id, 

64 "embedding": embedding_to_pgvector(embedding), 

65 }, 

66 ) 

67 

68 

69def search_agents( 

70 session: Session, 

71 query: str, 

72 limit: int = 10, 

73) -> list[dict]: 

74 """Search agents by semantic similarity using pgvector. 

75 

76 Args: 

77 session: Database session 

78 query: Search query (e.g., "native Idaho real estate") 

79 limit: Maximum results to return 

80 

81 Returns: 

82 List of matching agents with similarity scores 

83 """ 

84 # Generate query embedding 

85 query_embedding = generate_embedding_sync(query) 

86 

87 if query_embedding is None: 

88 return [] 

89 

90 result = session.execute( 

91 text(""" 

92 SELECT 

93 a.id, 

94 a.name, 

95 a.email, 

96 a.phone, 

97 a.bio, 

98 b.name as brokerage_name, 

99 a.embedding <=> :query_embedding::vector AS similarity_distance 

100 FROM agents a 

101 LEFT JOIN brokerages b ON b.id = a.brokerage_id 

102 WHERE a.embedding IS NOT NULL 

103 ORDER BY a.embedding <=> :query_embedding::vector 

104 LIMIT :limit 

105 """), 

106 { 

107 "query_embedding": embedding_to_pgvector(query_embedding), 

108 "limit": limit, 

109 }, 

110 ) 

111 

112 return [ 

113 { 

114 "id": row.id, 

115 "name": row.name, 

116 "email": row.email, 

117 "phone": row.phone, 

118 "bio": row.bio[:200] + "..." if row.bio and len(row.bio) > 200 else row.bio, 

119 "brokerage_name": row.brokerage_name, 

120 "distance": row.similarity_distance, 

121 } 

122 for row in result.fetchall() 

123 ] 

124 

125 

126def reindex_all_agents(session: Session) -> int: 

127 """Reindex all agents for vector search. 

128 

129 Returns: 

130 Number of agents indexed 

131 """ 

132 result = session.execute( 

133 text(""" 

134 SELECT a.id, a.name, a.bio, b.name as brokerage_name 

135 FROM agents a 

136 LEFT JOIN brokerages b ON b.id = a.brokerage_id 

137 WHERE a.status = 'active' 

138 """) 

139 ) 

140 

141 count = 0 

142 for row in result.fetchall(): 

143 row_dict = row._asdict() 

144 text_content = build_agent_text(row_dict) 

145 index_agent(session, row_dict['id'], text_content) 

146 count += 1 

147 

148 session.commit() 

149 return count