fabio revised this gist . Go to revision
No changes
fabio revised this gist . Go to revision
1 file changed, 0 insertions, 0 deletions
cleanup_jpeg_from_contacts.py renamed to cleanup_jpeg_from_nc_contacts.py
File renamed without changes
Fabio Manganiello revised this gist . Go to revision
1 file changed, 108 insertions
cleanup_jpeg_from_contacts.py(file created)
| @@ -0,0 +1,108 @@ | |||
| 1 | + | #!/usr/bin/env python | |
| 2 | + | ||
| 3 | + | """ | |
| 4 | + | A simple script that removes big JPEG blobs from the contact cards on your | |
| 5 | + | Nextcloud db. | |
| 6 | + | ||
| 7 | + | :author: Fabio Manganiello <info@fabiomanganiello.com> | |
| 8 | + | """ | |
| 9 | + | ||
| 10 | + | import re | |
| 11 | + | import sqlalchemy as sa | |
| 12 | + | from sqlalchemy.ext.declarative import declarative_base | |
| 13 | + | from sqlalchemy.orm import sessionmaker | |
| 14 | + | ||
| 15 | + | Base = declarative_base() | |
| 16 | + | ||
| 17 | + | db_url = "mysql+pymysql://user:pwd@dbhost/nextclouddb" | |
| 18 | + | ||
| 19 | + | ||
| 20 | + | class Card(Base): | |
| 21 | + | __tablename__ = "oc_cards" | |
| 22 | + | ||
| 23 | + | id = sa.Column(sa.BigInteger, primary_key=True) | |
| 24 | + | addressbookid = sa.Column(sa.BigInteger) | |
| 25 | + | carddata = sa.Column(sa.LargeBinary) | |
| 26 | + | uri = sa.Column(sa.String) | |
| 27 | + | lastmodified = sa.Column(sa.BigInteger) | |
| 28 | + | etag = sa.Column(sa.String) | |
| 29 | + | size = sa.Column(sa.BigInteger) | |
| 30 | + | uid = sa.Column(sa.String) | |
| 31 | + | ||
| 32 | + | ||
| 33 | + | def remove_photo_attributes(vcard_data): | |
| 34 | + | # Convert binary to string | |
| 35 | + | if isinstance(vcard_data, bytes): | |
| 36 | + | vcard_data = vcard_data.decode("utf-8", errors="replace") | |
| 37 | + | ||
| 38 | + | # Split into lines | |
| 39 | + | lines = vcard_data.split("\n") | |
| 40 | + | result_lines = [] | |
| 41 | + | ||
| 42 | + | i = 0 | |
| 43 | + | while i < len(lines): | |
| 44 | + | line = lines[i] | |
| 45 | + | ||
| 46 | + | # Check if this is a PHOTO attribute | |
| 47 | + | if line.startswith("PHOTO;"): | |
| 48 | + | # Skip this line and any continuation lines (starting with space) | |
| 49 | + | i += 1 | |
| 50 | + | while i < len(lines) and ( | |
| 51 | + | lines[i].startswith(" ") or lines[i].startswith("\t") | |
| 52 | + | ): | |
| 53 | + | i += 1 | |
| 54 | + | else: | |
| 55 | + | # Keep this line | |
| 56 | + | result_lines.append(line) | |
| 57 | + | i += 1 | |
| 58 | + | ||
| 59 | + | # Join back into a string | |
| 60 | + | return "\n".join(result_lines).encode("utf-8") | |
| 61 | + | ||
| 62 | + | ||
| 63 | + | def update_cards(): | |
| 64 | + | engine = sa.create_engine(db_url) | |
| 65 | + | Session = sessionmaker(bind=engine, autocommit=False) | |
| 66 | + | session = Session() | |
| 67 | + | ||
| 68 | + | try: | |
| 69 | + | # Process in batches to avoid memory issues | |
| 70 | + | batch_size = 100 | |
| 71 | + | offset = 0 | |
| 72 | + | ||
| 73 | + | while True: | |
| 74 | + | cards = ( | |
| 75 | + | session.query(Card) | |
| 76 | + | .order_by(Card.id) | |
| 77 | + | .offset(offset) | |
| 78 | + | .limit(batch_size) | |
| 79 | + | .all() | |
| 80 | + | ) | |
| 81 | + | if not cards: | |
| 82 | + | break | |
| 83 | + | ||
| 84 | + | for card in cards: | |
| 85 | + | new_carddata = remove_photo_attributes(card.carddata) | |
| 86 | + | if new_carddata != card.carddata: | |
| 87 | + | card.carddata = new_carddata | |
| 88 | + | # Update size field to reflect new size | |
| 89 | + | print( | |
| 90 | + | f"Card: id={card.id} old_size={card.size / 1000}KB new_size={len(new_carddata) / 1000}KB" | |
| 91 | + | ) | |
| 92 | + | card.size = len(new_carddata) | |
| 93 | + | # Update etag (optional, depends on how Nextcloud generates etags) | |
| 94 | + | # card.etag = generate_new_etag() | |
| 95 | + | ||
| 96 | + | session.commit() | |
| 97 | + | offset += batch_size | |
| 98 | + | print(f"Processed {offset} records") | |
| 99 | + | ||
| 100 | + | except Exception as e: | |
| 101 | + | session.rollback() | |
| 102 | + | print(f"Error: {e}") | |
| 103 | + | finally: | |
| 104 | + | session.close() | |
| 105 | + | ||
| 106 | + | ||
| 107 | + | if __name__ == "__main__": | |
| 108 | + | update_cards() | |