Coverage for src / idx_api / embeddings / images.py: 14%

104 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2025-12-28 11:16 -0700

1"""Property image embedding functions for multi-modal search with PostgreSQL/pgvector. 

2 

3Handles image indexing, description embeddings, visual embeddings, 

4and inverse footnote linking. 

5""" 

6 

7import json 

8import re 

9 

10from sqlalchemy import text 

11from sqlalchemy.orm import Session 

12 

13from idx_api.config import settings 

14from idx_api.embeddings.base import embedding_to_pgvector, generate_embedding_sync 

15 

16 

17def index_property_image( 

18 session: Session, 

19 listing_id: str, 

20 image_url: str, 

21 image_index: int, 

22 description: str, 

23 room_type: str | None = None, 

24) -> int: 

25 """Index a single property image with its description. 

26 

27 Args: 

28 session: Database session 

29 listing_id: MLS listing ID 

30 image_url: URL of the image 

31 image_index: Position in the photo array (0 = primary) 

32 description: AI-generated description of the image 

33 room_type: Detected room type (kitchen, bathroom, etc.) 

34 

35 Returns: 

36 ID of the inserted/updated property_images row 

37 """ 

38 # Generate description embedding via Ollama API 

39 description_embedding = generate_embedding_sync(description) if description else None 

40 

41 # Insert or update the image metadata with embedding 

42 session.execute( 

43 text(""" 

44 INSERT INTO property_images (listing_id, image_url, image_index, description, room_type, description_embedding) 

45 VALUES (:listing_id, :image_url, :image_index, :description, :room_type, :embedding::vector) 

46 ON CONFLICT(listing_id, image_url) DO UPDATE SET 

47 description = EXCLUDED.description, 

48 room_type = EXCLUDED.room_type, 

49 description_embedding = EXCLUDED.description_embedding, 

50 created_at = CURRENT_TIMESTAMP 

51 """), 

52 { 

53 "listing_id": listing_id, 

54 "image_url": image_url, 

55 "image_index": image_index, 

56 "description": description, 

57 "room_type": room_type, 

58 "embedding": embedding_to_pgvector(description_embedding) if description_embedding else None, 

59 }, 

60 ) 

61 

62 # Get the image ID 

63 result = session.execute( 

64 text("SELECT id FROM property_images WHERE listing_id = :listing_id AND image_url = :image_url"), 

65 {"listing_id": listing_id, "image_url": image_url}, 

66 ) 

67 row = result.fetchone() 

68 return row[0] 

69 

70 

71def index_property_image_visual( 

72 session: Session, 

73 image_id: int, 

74 visual_embedding: list[float], 

75) -> None: 

76 """Store a SigLIP visual embedding for a property image (Approach B). 

77 

78 Args: 

79 session: Database session 

80 image_id: ID from property_images table 

81 visual_embedding: 1024-dimensional SigLIP embedding 

82 """ 

83 session.execute( 

84 text(""" 

85 UPDATE property_images 

86 SET visual_embedding = :embedding::vector 

87 WHERE id = :image_id 

88 """), 

89 { 

90 "image_id": image_id, 

91 "embedding": embedding_to_pgvector(visual_embedding), 

92 }, 

93 ) 

94 

95 

96def search_image_descriptions( 

97 session: Session, 

98 query: str, 

99 limit: int = 20, 

100) -> list[dict]: 

101 """Search property images by description similarity (Approach A). 

102 

103 Args: 

104 session: Database session 

105 query: Search query (e.g., "granite countertops") 

106 limit: Maximum results to return 

107 

108 Returns: 

109 List of matching images with their properties 

110 """ 

111 # Generate query embedding 

112 query_embedding = generate_embedding_sync(query) 

113 

114 if query_embedding is None: 

115 return [] 

116 

117 result = session.execute( 

118 text(""" 

119 SELECT 

120 i.id as image_id, 

121 i.listing_id, 

122 i.image_url, 

123 i.image_index, 

124 i.description, 

125 i.room_type, 

126 p.city, 

127 p.state_or_province, 

128 p.list_price, 

129 p.bedrooms_total, 

130 p.bathrooms_total_integer, 

131 i.description_embedding <=> :query_embedding::vector AS similarity_distance 

132 FROM property_images i 

133 JOIN properties p ON p.listing_id = i.listing_id 

134 WHERE i.description_embedding IS NOT NULL 

135 AND p.mlg_can_view = true 

136 ORDER BY i.description_embedding <=> :query_embedding::vector 

137 LIMIT :limit 

138 """), 

139 { 

140 "query_embedding": embedding_to_pgvector(query_embedding), 

141 "limit": limit, 

142 }, 

143 ) 

144 

145 return [ 

146 { 

147 "image_id": row.image_id, 

148 "listing_id": row.listing_id, 

149 "image_url": row.image_url, 

150 "image_index": row.image_index, 

151 "description": row.description, 

152 "room_type": row.room_type, 

153 "city": row.city, 

154 "state": row.state_or_province, 

155 "price": row.list_price, 

156 "beds": row.bedrooms_total, 

157 "baths": row.bathrooms_total_integer, 

158 "similarity": 1.0 - row.similarity_distance, 

159 } 

160 for row in result.fetchall() 

161 ] 

162 

163 

164def search_image_visuals( 

165 session: Session, 

166 query_embedding: list[float], 

167 limit: int = 20, 

168) -> list[dict]: 

169 """Search property images by visual similarity (Approach B). 

170 

171 Args: 

172 session: Database session 

173 query_embedding: SigLIP text embedding for the query 

174 limit: Maximum results to return 

175 

176 Returns: 

177 List of matching images with their properties 

178 """ 

179 result = session.execute( 

180 text(""" 

181 SELECT 

182 i.id as image_id, 

183 i.listing_id, 

184 i.image_url, 

185 i.image_index, 

186 i.description, 

187 i.room_type, 

188 p.city, 

189 p.state_or_province, 

190 p.list_price, 

191 p.bedrooms_total, 

192 p.bathrooms_total_integer, 

193 i.visual_embedding <=> :query_embedding::vector AS similarity_distance 

194 FROM property_images i 

195 JOIN properties p ON p.listing_id = i.listing_id 

196 WHERE i.visual_embedding IS NOT NULL 

197 AND p.mlg_can_view = true 

198 ORDER BY i.visual_embedding <=> :query_embedding::vector 

199 LIMIT :limit 

200 """), 

201 { 

202 "query_embedding": embedding_to_pgvector(query_embedding), 

203 "limit": limit, 

204 }, 

205 ) 

206 

207 return [ 

208 { 

209 "image_id": row.image_id, 

210 "listing_id": row.listing_id, 

211 "image_url": row.image_url, 

212 "image_index": row.image_index, 

213 "description": row.description, 

214 "room_type": row.room_type, 

215 "city": row.city, 

216 "state": row.state_or_province, 

217 "price": row.list_price, 

218 "beds": row.bedrooms_total, 

219 "baths": row.bathrooms_total_integer, 

220 "similarity": 1.0 - row.similarity_distance, 

221 } 

222 for row in result.fetchall() 

223 ] 

224 

225 

226def get_unindexed_property_images( 

227 session: Session, 

228 limit: int = 100, 

229) -> list[dict]: 

230 """Get property images that haven't been indexed yet. 

231 

232 Returns properties with photos but no entries in property_images table. 

233 

234 Args: 

235 session: Database session 

236 limit: Maximum properties to return 

237 

238 Returns: 

239 List of dicts with listing_id and photos array 

240 """ 

241 result = session.execute( 

242 text(""" 

243 SELECT p.listing_id, p.photos, p.primary_photo_url 

244 FROM properties p 

245 LEFT JOIN property_images pi ON pi.listing_id = p.listing_id 

246 WHERE p.mlg_can_view = true 

247 AND p.standard_status = 'Active' 

248 AND p.photos IS NOT NULL 

249 AND pi.id IS NULL 

250 LIMIT :limit 

251 """), 

252 {"limit": limit}, 

253 ) 

254 

255 properties = [] 

256 for row in result.fetchall(): 

257 photos = [] 

258 if row.photos: 

259 try: 

260 photos = json.loads(row.photos) 

261 except json.JSONDecodeError: 

262 pass 

263 elif row.primary_photo_url: 

264 photos = [row.primary_photo_url] 

265 

266 if photos: 

267 properties.append({ 

268 "listing_id": row.listing_id, 

269 "photos": photos[:settings.vision_max_photos_per_property], 

270 }) 

271 

272 return properties 

273 

274 

275def get_image_index_stats(session: Session) -> dict: 

276 """Get statistics about image indexing progress. 

277 

278 Returns: 

279 Dict with counts for total properties, indexed images, etc. 

280 """ 

281 result = session.execute( 

282 text(""" 

283 SELECT 

284 (SELECT COUNT(*) FROM properties WHERE mlg_can_view = true AND standard_status = 'Active') as total_properties, 

285 (SELECT COUNT(DISTINCT listing_id) FROM property_images) as properties_with_images, 

286 (SELECT COUNT(*) FROM property_images) as total_images, 

287 (SELECT COUNT(*) FROM property_images WHERE description_embedding IS NOT NULL) as description_embeddings, 

288 (SELECT COUNT(*) FROM property_images WHERE visual_embedding IS NOT NULL) as visual_embeddings 

289 """) 

290 ) 

291 row = result.fetchone() 

292 return { 

293 "total_properties": row[0], 

294 "properties_with_images": row[1], 

295 "total_images": row[2], 

296 "description_embeddings": row[3], 

297 "visual_embeddings": row[4], 

298 } 

299 

300 

301# ======================================== 

302# Feature Tags (for inverse footnotes) 

303# ======================================== 

304 

305 

306def index_image_feature_tags( 

307 session: Session, 

308 image_id: int, 

309 tags: dict, 

310) -> int: 

311 """Index structured feature tags for an image. 

312 

313 These tags enable "inverse footnote" linking from listing text to photos. 

314 

315 Args: 

316 session: Database session 

317 image_id: ID from property_images table 

318 tags: Dict with feature_type → list of values, e.g.: 

319 {"room_type": ["kitchen"], "features": ["island", "granite countertops"], 

320 "materials": ["granite", "stainless steel"], "style": ["modern"]} 

321 

322 Returns: 

323 Number of tags indexed 

324 """ 

325 count = 0 

326 for feature_type, values in tags.items(): 

327 if not values: 

328 continue 

329 # Handle both single values and lists 

330 if isinstance(values, str): 

331 values = [values] 

332 

333 for value in values: 

334 if not value: 

335 continue 

336 # Normalize the value (lowercase, strip whitespace) 

337 normalized_value = value.lower().strip() 

338 try: 

339 session.execute( 

340 text(""" 

341 INSERT INTO image_feature_tags (image_id, feature_type, feature_value) 

342 VALUES (:image_id, :feature_type, :feature_value) 

343 ON CONFLICT (image_id, feature_type, feature_value) DO NOTHING 

344 """), 

345 { 

346 "image_id": image_id, 

347 "feature_type": feature_type, 

348 "feature_value": normalized_value, 

349 }, 

350 ) 

351 count += 1 

352 except Exception: 

353 pass # Ignore duplicate constraint violations 

354 

355 return count 

356 

357 

358def find_images_by_feature( 

359 session: Session, 

360 listing_id: str, 

361 feature_terms: list[str], 

362) -> list[dict]: 

363 """Find images for a listing that match given feature terms. 

364 

365 This is the core of "inverse footnotes" - given terms from listing text, 

366 find which photos show those features. 

367 

368 Args: 

369 session: Database session 

370 listing_id: MLS listing ID to search within 

371 feature_terms: List of terms to search for (e.g., ["shop", "tub", "pool"]) 

372 

373 Returns: 

374 List of matching images with their matching terms 

375 """ 

376 if not feature_terms: 

377 return [] 

378 

379 # Build a query that finds images matching any of the terms 

380 # Uses LIKE for partial matching (e.g., "shop" matches "workshop") 

381 conditions = [] 

382 params = {"listing_id": listing_id} 

383 

384 for i, term in enumerate(feature_terms): 

385 normalized = term.lower().strip() 

386 param_name = f"term_{i}" 

387 params[param_name] = f"%{normalized}%" 

388 conditions.append(f"ft.feature_value LIKE :{param_name}") 

389 

390 if not conditions: 

391 return [] 

392 

393 where_clause = " OR ".join(conditions) 

394 

395 result = session.execute( 

396 text(f""" 

397 SELECT DISTINCT 

398 i.id as image_id, 

399 i.image_url, 

400 i.image_index, 

401 i.description, 

402 i.room_type, 

403 STRING_AGG(DISTINCT ft.feature_value, ',') as matched_features 

404 FROM property_images i 

405 JOIN image_feature_tags ft ON ft.image_id = i.id 

406 WHERE i.listing_id = :listing_id 

407 AND ({where_clause}) 

408 GROUP BY i.id, i.image_url, i.image_index, i.description, i.room_type 

409 ORDER BY i.image_index 

410 """), 

411 params, 

412 ) 

413 

414 return [ 

415 { 

416 "image_id": row.image_id, 

417 "image_url": row.image_url, 

418 "image_index": row.image_index, 

419 "description": row.description, 

420 "room_type": row.room_type, 

421 "matched_features": row.matched_features.split(",") if row.matched_features else [], 

422 } 

423 for row in result.fetchall() 

424 ] 

425 

426 

427def get_inverse_footnotes( 

428 session: Session, 

429 listing_id: str, 

430 listing_text: str, 

431 min_term_length: int = 3, 

432) -> dict: 

433 """Generate inverse footnotes linking terms in listing text to photos. 

434 

435 Scans the listing text for known feature terms and returns a mapping 

436 of terms to their corresponding image evidence. 

437 

438 Args: 

439 session: Database session 

440 listing_id: MLS listing ID 

441 listing_text: The public remarks or description text 

442 min_term_length: Minimum term length to consider (filters noise) 

443 

444 Returns: 

445 Dict with: 

446 - "terms": Dict mapping term → list of image_ids 

447 - "images": Dict mapping image_id → image metadata 

448 - "annotated_text": Original text with [[term:image_ids]] markers 

449 """ 

450 # Get all feature tags for this listing's images 

451 result = session.execute( 

452 text(""" 

453 SELECT 

454 i.id as image_id, 

455 i.image_url, 

456 i.image_index, 

457 i.room_type, 

458 ft.feature_type, 

459 ft.feature_value 

460 FROM property_images i 

461 JOIN image_feature_tags ft ON ft.image_id = i.id 

462 WHERE i.listing_id = :listing_id 

463 """), 

464 {"listing_id": listing_id}, 

465 ) 

466 

467 # Build a mapping of feature terms to images 

468 term_to_images = {} # term → set of image_ids 

469 image_metadata = {} # image_id → image info 

470 

471 for row in result.fetchall(): 

472 term = row.feature_value 

473 image_id = row.image_id 

474 

475 if term not in term_to_images: 

476 term_to_images[term] = set() 

477 term_to_images[term].add(image_id) 

478 

479 if image_id not in image_metadata: 

480 image_metadata[image_id] = { 

481 "image_id": image_id, 

482 "image_url": row.image_url, 

483 "image_index": row.image_index, 

484 "room_type": row.room_type, 

485 } 

486 

487 # Find terms that appear in the listing text 

488 listing_lower = listing_text.lower() 

489 found_terms = {} # term → list of image_ids (sorted) 

490 

491 for term, image_ids in term_to_images.items(): 

492 if len(term) < min_term_length: 

493 continue 

494 

495 # Check if term appears in text (word boundary aware for longer terms) 

496 if len(term) >= 4: 

497 # Use word boundary for longer terms 

498 pattern = r'\b' + re.escape(term) + r'\b' 

499 if re.search(pattern, listing_lower): 

500 found_terms[term] = sorted(image_ids) 

501 elif term in listing_lower: 

502 # For short terms, require exact substring 

503 found_terms[term] = sorted(image_ids) 

504 

505 # Generate annotated text with inverse footnote markers 

506 annotated = listing_text 

507 for term in sorted(found_terms.keys(), key=len, reverse=True): 

508 # Find all occurrences (case-insensitive) 

509 image_ids = found_terms[term] 

510 marker = f"[[{term}:{','.join(map(str, image_ids))}]]" 

511 

512 # Replace first occurrence only to avoid over-annotation 

513 pattern = re.compile(re.escape(term), re.IGNORECASE) 

514 annotated = pattern.sub(f"{term}{marker}", annotated, count=1) 

515 

516 return { 

517 "terms": found_terms, 

518 "images": image_metadata, 

519 "annotated_text": annotated, 

520 "total_terms_found": len(found_terms), 

521 "total_images_linked": len(set().union(*found_terms.values())) if found_terms else 0, 

522 }