#!/usr/bin/env python """ A simple script that removes big JPEG blobs from the contact cards on your Nextcloud db. :author: Fabio Manganiello """ import re import sqlalchemy as sa from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker Base = declarative_base() db_url = "mysql+pymysql://user:pwd@dbhost/nextclouddb" class Card(Base): __tablename__ = "oc_cards" id = sa.Column(sa.BigInteger, primary_key=True) addressbookid = sa.Column(sa.BigInteger) carddata = sa.Column(sa.LargeBinary) uri = sa.Column(sa.String) lastmodified = sa.Column(sa.BigInteger) etag = sa.Column(sa.String) size = sa.Column(sa.BigInteger) uid = sa.Column(sa.String) def remove_photo_attributes(vcard_data): # Convert binary to string if isinstance(vcard_data, bytes): vcard_data = vcard_data.decode("utf-8", errors="replace") # Split into lines lines = vcard_data.split("\n") result_lines = [] i = 0 while i < len(lines): line = lines[i] # Check if this is a PHOTO attribute if line.startswith("PHOTO;"): # Skip this line and any continuation lines (starting with space) i += 1 while i < len(lines) and ( lines[i].startswith(" ") or lines[i].startswith("\t") ): i += 1 else: # Keep this line result_lines.append(line) i += 1 # Join back into a string return "\n".join(result_lines).encode("utf-8") def update_cards(): engine = sa.create_engine(db_url) Session = sessionmaker(bind=engine, autocommit=False) session = Session() try: # Process in batches to avoid memory issues batch_size = 100 offset = 0 while True: cards = ( session.query(Card) .order_by(Card.id) .offset(offset) .limit(batch_size) .all() ) if not cards: break for card in cards: new_carddata = remove_photo_attributes(card.carddata) if new_carddata != card.carddata: card.carddata = new_carddata # Update size field to reflect new size print( f"Card: id={card.id} old_size={card.size / 1000}KB new_size={len(new_carddata) / 1000}KB" ) card.size = len(new_carddata) # Update etag (optional, depends on how Nextcloud generates etags) # card.etag = generate_new_etag() session.commit() offset += batch_size print(f"Processed {offset} records") except Exception as e: session.rollback() print(f"Error: {e}") finally: session.close() if __name__ == "__main__": update_cards()