cleanup_jpeg_from_nc_contacts.py
· 2.9 KiB · Python
Raw
#!/usr/bin/env python
"""
A simple script that removes big JPEG blobs from the contact cards on your
Nextcloud db.
:author: Fabio Manganiello <info@fabiomanganiello.com>
"""
import re
import sqlalchemy as sa
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
db_url = "mysql+pymysql://user:pwd@dbhost/nextclouddb"
class Card(Base):
__tablename__ = "oc_cards"
id = sa.Column(sa.BigInteger, primary_key=True)
addressbookid = sa.Column(sa.BigInteger)
carddata = sa.Column(sa.LargeBinary)
uri = sa.Column(sa.String)
lastmodified = sa.Column(sa.BigInteger)
etag = sa.Column(sa.String)
size = sa.Column(sa.BigInteger)
uid = sa.Column(sa.String)
def remove_photo_attributes(vcard_data):
# Convert binary to string
if isinstance(vcard_data, bytes):
vcard_data = vcard_data.decode("utf-8", errors="replace")
# Split into lines
lines = vcard_data.split("\n")
result_lines = []
i = 0
while i < len(lines):
line = lines[i]
# Check if this is a PHOTO attribute
if line.startswith("PHOTO;"):
# Skip this line and any continuation lines (starting with space)
i += 1
while i < len(lines) and (
lines[i].startswith(" ") or lines[i].startswith("\t")
):
i += 1
else:
# Keep this line
result_lines.append(line)
i += 1
# Join back into a string
return "\n".join(result_lines).encode("utf-8")
def update_cards():
engine = sa.create_engine(db_url)
Session = sessionmaker(bind=engine, autocommit=False)
session = Session()
try:
# Process in batches to avoid memory issues
batch_size = 100
offset = 0
while True:
cards = (
session.query(Card)
.order_by(Card.id)
.offset(offset)
.limit(batch_size)
.all()
)
if not cards:
break
for card in cards:
new_carddata = remove_photo_attributes(card.carddata)
if new_carddata != card.carddata:
card.carddata = new_carddata
# Update size field to reflect new size
print(
f"Card: id={card.id} old_size={card.size / 1000}KB new_size={len(new_carddata) / 1000}KB"
)
card.size = len(new_carddata)
# Update etag (optional, depends on how Nextcloud generates etags)
# card.etag = generate_new_etag()
session.commit()
offset += batch_size
print(f"Processed {offset} records")
except Exception as e:
session.rollback()
print(f"Error: {e}")
finally:
session.close()
if __name__ == "__main__":
update_cards()
| 1 | #!/usr/bin/env python |
| 2 | |
| 3 | """ |
| 4 | A simple script that removes big JPEG blobs from the contact cards on your |
| 5 | Nextcloud db. |
| 6 | |
| 7 | :author: Fabio Manganiello <info@fabiomanganiello.com> |
| 8 | """ |
| 9 | |
| 10 | import re |
| 11 | import sqlalchemy as sa |
| 12 | from sqlalchemy.ext.declarative import declarative_base |
| 13 | from sqlalchemy.orm import sessionmaker |
| 14 | |
| 15 | Base = declarative_base() |
| 16 | |
| 17 | db_url = "mysql+pymysql://user:pwd@dbhost/nextclouddb" |
| 18 | |
| 19 | |
| 20 | class Card(Base): |
| 21 | __tablename__ = "oc_cards" |
| 22 | |
| 23 | id = sa.Column(sa.BigInteger, primary_key=True) |
| 24 | addressbookid = sa.Column(sa.BigInteger) |
| 25 | carddata = sa.Column(sa.LargeBinary) |
| 26 | uri = sa.Column(sa.String) |
| 27 | lastmodified = sa.Column(sa.BigInteger) |
| 28 | etag = sa.Column(sa.String) |
| 29 | size = sa.Column(sa.BigInteger) |
| 30 | uid = sa.Column(sa.String) |
| 31 | |
| 32 | |
| 33 | def remove_photo_attributes(vcard_data): |
| 34 | # Convert binary to string |
| 35 | if isinstance(vcard_data, bytes): |
| 36 | vcard_data = vcard_data.decode("utf-8", errors="replace") |
| 37 | |
| 38 | # Split into lines |
| 39 | lines = vcard_data.split("\n") |
| 40 | result_lines = [] |
| 41 | |
| 42 | i = 0 |
| 43 | while i < len(lines): |
| 44 | line = lines[i] |
| 45 | |
| 46 | # Check if this is a PHOTO attribute |
| 47 | if line.startswith("PHOTO;"): |
| 48 | # Skip this line and any continuation lines (starting with space) |
| 49 | i += 1 |
| 50 | while i < len(lines) and ( |
| 51 | lines[i].startswith(" ") or lines[i].startswith("\t") |
| 52 | ): |
| 53 | i += 1 |
| 54 | else: |
| 55 | # Keep this line |
| 56 | result_lines.append(line) |
| 57 | i += 1 |
| 58 | |
| 59 | # Join back into a string |
| 60 | return "\n".join(result_lines).encode("utf-8") |
| 61 | |
| 62 | |
| 63 | def update_cards(): |
| 64 | engine = sa.create_engine(db_url) |
| 65 | Session = sessionmaker(bind=engine, autocommit=False) |
| 66 | session = Session() |
| 67 | |
| 68 | try: |
| 69 | # Process in batches to avoid memory issues |
| 70 | batch_size = 100 |
| 71 | offset = 0 |
| 72 | |
| 73 | while True: |
| 74 | cards = ( |
| 75 | session.query(Card) |
| 76 | .order_by(Card.id) |
| 77 | .offset(offset) |
| 78 | .limit(batch_size) |
| 79 | .all() |
| 80 | ) |
| 81 | if not cards: |
| 82 | break |
| 83 | |
| 84 | for card in cards: |
| 85 | new_carddata = remove_photo_attributes(card.carddata) |
| 86 | if new_carddata != card.carddata: |
| 87 | card.carddata = new_carddata |
| 88 | # Update size field to reflect new size |
| 89 | print( |
| 90 | f"Card: id={card.id} old_size={card.size / 1000}KB new_size={len(new_carddata) / 1000}KB" |
| 91 | ) |
| 92 | card.size = len(new_carddata) |
| 93 | # Update etag (optional, depends on how Nextcloud generates etags) |
| 94 | # card.etag = generate_new_etag() |
| 95 | |
| 96 | session.commit() |
| 97 | offset += batch_size |
| 98 | print(f"Processed {offset} records") |
| 99 | |
| 100 | except Exception as e: |
| 101 | session.rollback() |
| 102 | print(f"Error: {e}") |
| 103 | finally: |
| 104 | session.close() |
| 105 | |
| 106 | |
| 107 | if __name__ == "__main__": |
| 108 | update_cards() |
| 109 |