Coverage for src / idx_api / embeddings / brokerages.py: 23%

47 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2025-12-28 11:12 -0700

1"""Brokerage embedding functions for semantic firm search with PostgreSQL/pgvector.""" 

2 

3from sqlalchemy import text 

4from sqlalchemy.orm import Session 

5 

6from idx_api.embeddings.base import embedding_to_pgvector, generate_embedding_sync 

7 

8 

9def build_brokerage_text(brokerage) -> str: 

10 """Build rich text representation of a brokerage for embedding. 

11 

12 Args: 

13 brokerage: Brokerage ORM model or dict 

14 

15 Returns: 

16 Text representation for semantic search 

17 """ 

18 parts = [] 

19 

20 # Always include name 

21 name = brokerage.name if hasattr(brokerage, "name") else brokerage.get("name") 

22 if name: 

23 parts.append(name) 

24 

25 # Add tagline if present 

26 tagline = brokerage.tagline if hasattr(brokerage, "tagline") else brokerage.get("tagline") 

27 if tagline: 

28 parts.append(tagline) 

29 

30 # Add franchise affiliation 

31 franchise = ( 

32 brokerage.franchise_affiliation 

33 if hasattr(brokerage, "franchise_affiliation") 

34 else brokerage.get("franchise_affiliation") 

35 ) 

36 if franchise: 

37 parts.append(f"affiliated with {franchise}") 

38 

39 # Add specializations 

40 military = ( 

41 brokerage.military_specialist 

42 if hasattr(brokerage, "military_specialist") 

43 else brokerage.get("military_specialist") 

44 ) 

45 if military: 

46 parts.append("military relocation specialist") 

47 

48 va_loans = brokerage.va_loans if hasattr(brokerage, "va_loans") else brokerage.get("va_loans") 

49 if va_loans: 

50 parts.append("VA loan expert") 

51 

52 return " - ".join(parts) 

53 

54 

55def index_brokerage(session: Session, brokerage_id: int, text_content: str) -> None: 

56 """Index a brokerage for vector search using pgvector. 

57 

58 Args: 

59 session: Database session 

60 brokerage_id: ID of the brokerage to index 

61 text_content: Text representation of the brokerage 

62 """ 

63 # Generate embedding via Ollama API 

64 embedding = generate_embedding_sync(text_content) 

65 

66 if embedding is None: 

67 print(f"Failed to generate embedding for brokerage {brokerage_id}") 

68 return 

69 

70 # Store embedding directly in brokerages table 

71 session.execute( 

72 text(""" 

73 UPDATE brokerages 

74 SET embedding = :embedding::vector 

75 WHERE id = :brokerage_id 

76 """), 

77 { 

78 "brokerage_id": brokerage_id, 

79 "embedding": embedding_to_pgvector(embedding), 

80 }, 

81 ) 

82 

83 

84def search_brokerages( 

85 session: Session, 

86 query: str, 

87 limit: int = 10, 

88) -> list[dict]: 

89 """Search brokerages by semantic similarity using pgvector. 

90 

91 Args: 

92 session: Database session 

93 query: Search query (e.g., "military relocation specialist") 

94 limit: Maximum results to return 

95 

96 Returns: 

97 List of matching brokerages with similarity scores 

98 """ 

99 # Generate query embedding 

100 query_embedding = generate_embedding_sync(query) 

101 

102 if query_embedding is None: 

103 return [] 

104 

105 result = session.execute( 

106 text(""" 

107 SELECT 

108 b.id, 

109 b.slug, 

110 b.name, 

111 b.tagline, 

112 b.military_specialist, 

113 b.va_loans, 

114 b.franchise_affiliation, 

115 b.logo_url, 

116 b.embedding <=> :query_embedding::vector AS similarity_distance 

117 FROM brokerages b 

118 WHERE b.embedding IS NOT NULL 

119 ORDER BY b.embedding <=> :query_embedding::vector 

120 LIMIT :limit 

121 """), 

122 { 

123 "query_embedding": embedding_to_pgvector(query_embedding), 

124 "limit": limit, 

125 }, 

126 ) 

127 

128 return [ 

129 { 

130 "id": row.id, 

131 "slug": row.slug, 

132 "name": row.name, 

133 "tagline": row.tagline, 

134 "military_specialist": row.military_specialist, 

135 "va_loans": row.va_loans, 

136 "franchise_affiliation": row.franchise_affiliation, 

137 "logo_url": row.logo_url, 

138 "similarity": 1.0 - row.similarity_distance, 

139 } 

140 for row in result.fetchall() 

141 ] 

142 

143 

144def reindex_all_brokerages(session: Session) -> int: 

145 """Reindex all brokerages for vector search. 

146 

147 Returns: 

148 Number of brokerages indexed 

149 """ 

150 result = session.execute( 

151 text(""" 

152 SELECT * 

153 FROM brokerages 

154 WHERE disabled_at IS NULL 

155 """) 

156 ) 

157 

158 count = 0 

159 for row in result.fetchall(): 

160 row_dict = row._asdict() 

161 text_content = build_brokerage_text(row_dict) 

162 index_brokerage(session, row_dict['id'], text_content) 

163 count += 1 

164 

165 session.commit() 

166 return count 

167 

168 

169# Legacy aliases for backward compatibility 

170build_brokerage_text_legacy = build_brokerage_text 

171index_brokerage_legacy = index_brokerage 

172search_brokerages_legacy = search_brokerages 

173reindex_all_brokerages_legacy = reindex_all_brokerages