Coverage for src/idx_api/embeddings/images.py: 14%

1"""Property image embedding functions for multi-modal search with PostgreSQL/pgvector.

3Handles image indexing, description embeddings, visual embeddings,

4and inverse footnote linking.

5"""

7import json

8import re

10from sqlalchemy import text

11from sqlalchemy.orm import Session

13from idx_api.config import settings

14from idx_api.embeddings.base import embedding_to_pgvector, generate_embedding_sync

17def index_property_image(

18 session: Session,

19 listing_id: str,

20 image_url: str,

21 image_index: int,

22 description: str,

23 room_type: str | None = None,

24) -> int:

25 """Index a single property image with its description.

27 Args:

28 session: Database session

29 listing_id: MLS listing ID

30 image_url: URL of the image

31 image_index: Position in the photo array (0 = primary)

32 description: AI-generated description of the image

33 room_type: Detected room type (kitchen, bathroom, etc.)

35 Returns:

36 ID of the inserted/updated property_images row

37 """

38 # Generate description embedding via Ollama API

39 description_embedding = generate_embedding_sync(description) if description else None

41 # Insert or update the image metadata with embedding

42 session.execute(

43 text("""

44 INSERT INTO property_images (listing_id, image_url, image_index, description, room_type, description_embedding)

45 VALUES (:listing_id, :image_url, :image_index, :description, :room_type, :embedding::vector)

46 ON CONFLICT(listing_id, image_url) DO UPDATE SET

47 description = EXCLUDED.description,

48 room_type = EXCLUDED.room_type,

49 description_embedding = EXCLUDED.description_embedding,

50 created_at = CURRENT_TIMESTAMP

51 """),

52 {

53 "listing_id": listing_id,

54 "image_url": image_url,

55 "image_index": image_index,

56 "description": description,

57 "room_type": room_type,

58 "embedding": embedding_to_pgvector(description_embedding) if description_embedding else None,

59 },

60 )

62 # Get the image ID

63 result = session.execute(

64 text("SELECT id FROM property_images WHERE listing_id = :listing_id AND image_url = :image_url"),

65 {"listing_id": listing_id, "image_url": image_url},

66 )

67 row = result.fetchone()

68 return row[0]

71def index_property_image_visual(

72 session: Session,

73 image_id: int,

74 visual_embedding: list[float],

75) -> None:

76 """Store a SigLIP visual embedding for a property image (Approach B).

78 Args:

79 session: Database session

80 image_id: ID from property_images table

81 visual_embedding: 1024-dimensional SigLIP embedding

82 """

83 session.execute(

84 text("""

85 UPDATE property_images

86 SET visual_embedding = :embedding::vector

87 WHERE id = :image_id

88 """),

89 {

90 "image_id": image_id,

91 "embedding": embedding_to_pgvector(visual_embedding),

92 },

93 )

96def search_image_descriptions(

97 session: Session,

98 query: str,

99 limit: int = 20,

100) -> list[dict]:

101 """Search property images by description similarity (Approach A).

102

103 Args:

104 session: Database session

105 query: Search query (e.g., "granite countertops")

106 limit: Maximum results to return

107

108 Returns:

109 List of matching images with their properties

110 """

111 # Generate query embedding

112 query_embedding = generate_embedding_sync(query)

113

114 if query_embedding is None:

115 return []

116

117 result = session.execute(

118 text("""

119 SELECT

120 i.id as image_id,

121 i.listing_id,

122 i.image_url,

123 i.image_index,

124 i.description,

125 i.room_type,

126 p.city,

127 p.state_or_province,

128 p.list_price,

129 p.bedrooms_total,

130 p.bathrooms_total_integer,

131 i.description_embedding <=> :query_embedding::vector AS similarity_distance

132 FROM property_images i

133 JOIN properties p ON p.listing_id = i.listing_id

134 WHERE i.description_embedding IS NOT NULL

135 AND p.mlg_can_view = true

136 ORDER BY i.description_embedding <=> :query_embedding::vector

137 LIMIT :limit

138 """),

139 {

140 "query_embedding": embedding_to_pgvector(query_embedding),

141 "limit": limit,

142 },

143 )

144

145 return [

146 {

147 "image_id": row.image_id,

148 "listing_id": row.listing_id,

149 "image_url": row.image_url,

150 "image_index": row.image_index,

151 "description": row.description,

152 "room_type": row.room_type,

153 "city": row.city,

154 "state": row.state_or_province,

155 "price": row.list_price,

156 "beds": row.bedrooms_total,

157 "baths": row.bathrooms_total_integer,

158 "similarity": 1.0 - row.similarity_distance,

159 }

160 for row in result.fetchall()

161 ]

162

163

164def search_image_visuals(

165 session: Session,

166 query_embedding: list[float],

167 limit: int = 20,

168) -> list[dict]:

169 """Search property images by visual similarity (Approach B).

170

171 Args:

172 session: Database session

173 query_embedding: SigLIP text embedding for the query

174 limit: Maximum results to return

175

176 Returns:

177 List of matching images with their properties

178 """

179 result = session.execute(

180 text("""

181 SELECT

182 i.id as image_id,

183 i.listing_id,

184 i.image_url,

185 i.image_index,

186 i.description,

187 i.room_type,

188 p.city,

189 p.state_or_province,

190 p.list_price,

191 p.bedrooms_total,

192 p.bathrooms_total_integer,

193 i.visual_embedding <=> :query_embedding::vector AS similarity_distance

194 FROM property_images i

195 JOIN properties p ON p.listing_id = i.listing_id

196 WHERE i.visual_embedding IS NOT NULL

197 AND p.mlg_can_view = true

198 ORDER BY i.visual_embedding <=> :query_embedding::vector

199 LIMIT :limit

200 """),

201 {

202 "query_embedding": embedding_to_pgvector(query_embedding),

203 "limit": limit,

204 },

205 )

206

207 return [

208 {

209 "image_id": row.image_id,

210 "listing_id": row.listing_id,

211 "image_url": row.image_url,

212 "image_index": row.image_index,

213 "description": row.description,

214 "room_type": row.room_type,

215 "city": row.city,

216 "state": row.state_or_province,

217 "price": row.list_price,

218 "beds": row.bedrooms_total,

219 "baths": row.bathrooms_total_integer,

220 "similarity": 1.0 - row.similarity_distance,

221 }

222 for row in result.fetchall()

223 ]

224

225

226def get_unindexed_property_images(

227 session: Session,

228 limit: int = 100,

229) -> list[dict]:

230 """Get property images that haven't been indexed yet.

231

232 Returns properties with photos but no entries in property_images table.

233

234 Args:

235 session: Database session

236 limit: Maximum properties to return

237

238 Returns:

239 List of dicts with listing_id and photos array

240 """

241 result = session.execute(

242 text("""

243 SELECT p.listing_id, p.photos, p.primary_photo_url

244 FROM properties p

245 LEFT JOIN property_images pi ON pi.listing_id = p.listing_id

246 WHERE p.mlg_can_view = true

247 AND p.standard_status = 'Active'

248 AND p.photos IS NOT NULL

249 AND pi.id IS NULL

250 LIMIT :limit

251 """),

252 {"limit": limit},

253 )

254

255 properties = []

256 for row in result.fetchall():

257 photos = []

258 if row.photos:

259 try:

260 photos = json.loads(row.photos)

261 except json.JSONDecodeError:

262 pass

263 elif row.primary_photo_url:

264 photos = [row.primary_photo_url]

265

266 if photos:

267 properties.append({

268 "listing_id": row.listing_id,

269 "photos": photos[:settings.vision_max_photos_per_property],

270 })

271

272 return properties

273

274

275def get_image_index_stats(session: Session) -> dict:

276 """Get statistics about image indexing progress.

277

278 Returns:

279 Dict with counts for total properties, indexed images, etc.

280 """

281 result = session.execute(

282 text("""

283 SELECT

284 (SELECT COUNT(*) FROM properties WHERE mlg_can_view = true AND standard_status = 'Active') as total_properties,

285 (SELECT COUNT(DISTINCT listing_id) FROM property_images) as properties_with_images,

286 (SELECT COUNT(*) FROM property_images) as total_images,

287 (SELECT COUNT(*) FROM property_images WHERE description_embedding IS NOT NULL) as description_embeddings,

288 (SELECT COUNT(*) FROM property_images WHERE visual_embedding IS NOT NULL) as visual_embeddings

289 """)

290 )

291 row = result.fetchone()

292 return {

293 "total_properties": row[0],

294 "properties_with_images": row[1],

295 "total_images": row[2],

296 "description_embeddings": row[3],

297 "visual_embeddings": row[4],

298 }

299

300

301# ========================================

302# Feature Tags (for inverse footnotes)

303# ========================================

304

305

306def index_image_feature_tags(

307 session: Session,

308 image_id: int,

309 tags: dict,

310) -> int:

311 """Index structured feature tags for an image.

312

313 These tags enable "inverse footnote" linking from listing text to photos.

314

315 Args:

316 session: Database session

317 image_id: ID from property_images table

318 tags: Dict with feature_type → list of values, e.g.:

319 {"room_type": ["kitchen"], "features": ["island", "granite countertops"],

320 "materials": ["granite", "stainless steel"], "style": ["modern"]}

321

322 Returns:

323 Number of tags indexed

324 """

325 count = 0

326 for feature_type, values in tags.items():

327 if not values:

328 continue

329 # Handle both single values and lists

330 if isinstance(values, str):

331 values = [values]

332

333 for value in values:

334 if not value:

335 continue

336 # Normalize the value (lowercase, strip whitespace)

337 normalized_value = value.lower().strip()

338 try:

339 session.execute(

340 text("""

341 INSERT INTO image_feature_tags (image_id, feature_type, feature_value)

342 VALUES (:image_id, :feature_type, :feature_value)

343 ON CONFLICT (image_id, feature_type, feature_value) DO NOTHING

344 """),

345 {

346 "image_id": image_id,

347 "feature_type": feature_type,

348 "feature_value": normalized_value,

349 },

350 )

351 count += 1

352 except Exception:

353 pass # Ignore duplicate constraint violations

354

355 return count

356

357

358def find_images_by_feature(

359 session: Session,

360 listing_id: str,

361 feature_terms: list[str],

362) -> list[dict]:

363 """Find images for a listing that match given feature terms.

364

365 This is the core of "inverse footnotes" - given terms from listing text,

366 find which photos show those features.

367

368 Args:

369 session: Database session

370 listing_id: MLS listing ID to search within

371 feature_terms: List of terms to search for (e.g., ["shop", "tub", "pool"])

372

373 Returns:

374 List of matching images with their matching terms

375 """

376 if not feature_terms:

377 return []

378

379 # Build a query that finds images matching any of the terms

380 # Uses LIKE for partial matching (e.g., "shop" matches "workshop")

381 conditions = []

382 params = {"listing_id": listing_id}

383

384 for i, term in enumerate(feature_terms):

385 normalized = term.lower().strip()

386 param_name = f"term_{i}"

387 params[param_name] = f"%{normalized}%"

388 conditions.append(f"ft.feature_value LIKE :{param_name}")

389

390 if not conditions:

391 return []

392

393 where_clause = " OR ".join(conditions)

394

395 result = session.execute(

396 text(f"""

397 SELECT DISTINCT

398 i.id as image_id,

399 i.image_url,

400 i.image_index,

401 i.description,

402 i.room_type,

403 STRING_AGG(DISTINCT ft.feature_value, ',') as matched_features

404 FROM property_images i

405 JOIN image_feature_tags ft ON ft.image_id = i.id

406 WHERE i.listing_id = :listing_id

407 AND ({where_clause})

408 GROUP BY i.id, i.image_url, i.image_index, i.description, i.room_type

409 ORDER BY i.image_index

410 """),

411 params,

412 )

413

414 return [

415 {

416 "image_id": row.image_id,

417 "image_url": row.image_url,

418 "image_index": row.image_index,

419 "description": row.description,

420 "room_type": row.room_type,

421 "matched_features": row.matched_features.split(",") if row.matched_features else [],

422 }

423 for row in result.fetchall()

424 ]

425

426

427def get_inverse_footnotes(

428 session: Session,

429 listing_id: str,

430 listing_text: str,

431 min_term_length: int = 3,

432) -> dict:

433 """Generate inverse footnotes linking terms in listing text to photos.

434

435 Scans the listing text for known feature terms and returns a mapping

436 of terms to their corresponding image evidence.

437

438 Args:

439 session: Database session

440 listing_id: MLS listing ID

441 listing_text: The public remarks or description text

442 min_term_length: Minimum term length to consider (filters noise)

443

444 Returns:

445 Dict with:

446 - "terms": Dict mapping term → list of image_ids

447 - "images": Dict mapping image_id → image metadata

448 - "annotated_text": Original text with [[term:image_ids]] markers

449 """

450 # Get all feature tags for this listing's images

451 result = session.execute(

452 text("""

453 SELECT

454 i.id as image_id,

455 i.image_url,

456 i.image_index,

457 i.room_type,

458 ft.feature_type,

459 ft.feature_value

460 FROM property_images i

461 JOIN image_feature_tags ft ON ft.image_id = i.id

462 WHERE i.listing_id = :listing_id

463 """),

464 {"listing_id": listing_id},

465 )

466

467 # Build a mapping of feature terms to images

468 term_to_images = {} # term → set of image_ids

469 image_metadata = {} # image_id → image info

470

471 for row in result.fetchall():

472 term = row.feature_value

473 image_id = row.image_id

474

475 if term not in term_to_images:

476 term_to_images[term] = set()

477 term_to_images[term].add(image_id)

478

479 if image_id not in image_metadata:

480 image_metadata[image_id] = {

481 "image_id": image_id,

482 "image_url": row.image_url,

483 "image_index": row.image_index,

484 "room_type": row.room_type,

485 }

486

487 # Find terms that appear in the listing text

488 listing_lower = listing_text.lower()

489 found_terms = {} # term → list of image_ids (sorted)

490

491 for term, image_ids in term_to_images.items():

492 if len(term) < min_term_length:

493 continue

494

495 # Check if term appears in text (word boundary aware for longer terms)

496 if len(term) >= 4:

497 # Use word boundary for longer terms

498 pattern = r'\b' + re.escape(term) + r'\b'

499 if re.search(pattern, listing_lower):

500 found_terms[term] = sorted(image_ids)

501 elif term in listing_lower:

502 # For short terms, require exact substring

503 found_terms[term] = sorted(image_ids)

504

505 # Generate annotated text with inverse footnote markers

506 annotated = listing_text

507 for term in sorted(found_terms.keys(), key=len, reverse=True):

508 # Find all occurrences (case-insensitive)

509 image_ids = found_terms[term]

510 marker = f"[[{term}:{','.join(map(str, image_ids))}]]"

511

512 # Replace first occurrence only to avoid over-annotation

513 pattern = re.compile(re.escape(term), re.IGNORECASE)

514 annotated = pattern.sub(f"{term}{marker}", annotated, count=1)

515

516 return {

517 "terms": found_terms,

518 "images": image_metadata,

519 "annotated_text": annotated,

520 "total_terms_found": len(found_terms),

521 "total_images_linked": len(set().union(*found_terms.values())) if found_terms else 0,

522 }

Coverage for src / idx_api / embeddings / images.py: 14%

104 statements