cleanup_jpeg_from_contacts.py
· 2.9 KiB · Python
Raw
#!/usr/bin/env python
"""
A simple script that removes big JPEG blobs from the contact cards on your
Nextcloud db.
:author: Fabio Manganiello <info@fabiomanganiello.com>
"""
import re
import sqlalchemy as sa
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
db_url = "mysql+pymysql://user:pwd@dbhost/nextclouddb"
class Card(Base):
__tablename__ = "oc_cards"
id = sa.Column(sa.BigInteger, primary_key=True)
addressbookid = sa.Column(sa.BigInteger)
carddata = sa.Column(sa.LargeBinary)
uri = sa.Column(sa.String)
lastmodified = sa.Column(sa.BigInteger)
etag = sa.Column(sa.String)
size = sa.Column(sa.BigInteger)
uid = sa.Column(sa.String)
def remove_photo_attributes(vcard_data):
# Convert binary to string
if isinstance(vcard_data, bytes):
vcard_data = vcard_data.decode("utf-8", errors="replace")
# Split into lines
lines = vcard_data.split("\n")
result_lines = []
i = 0
while i < len(lines):
line = lines[i]
# Check if this is a PHOTO attribute
if line.startswith("PHOTO;"):
# Skip this line and any continuation lines (starting with space)
i += 1
while i < len(lines) and (
lines[i].startswith(" ") or lines[i].startswith("\t")
):
i += 1
else:
# Keep this line
result_lines.append(line)
i += 1
# Join back into a string
return "\n".join(result_lines).encode("utf-8")
def update_cards():
engine = sa.create_engine(db_url)
Session = sessionmaker(bind=engine, autocommit=False)
session = Session()
try:
# Process in batches to avoid memory issues
batch_size = 100
offset = 0
while True:
cards = (
session.query(Card)
.order_by(Card.id)
.offset(offset)
.limit(batch_size)
.all()
)
if not cards:
break
for card in cards:
new_carddata = remove_photo_attributes(card.carddata)
if new_carddata != card.carddata:
card.carddata = new_carddata
# Update size field to reflect new size
print(
f"Card: id={card.id} old_size={card.size / 1000}KB new_size={len(new_carddata) / 1000}KB"
)
card.size = len(new_carddata)
# Update etag (optional, depends on how Nextcloud generates etags)
# card.etag = generate_new_etag()
session.commit()
offset += batch_size
print(f"Processed {offset} records")
except Exception as e:
session.rollback()
print(f"Error: {e}")
finally:
session.close()
if __name__ == "__main__":
update_cards()
1 | #!/usr/bin/env python |
2 | |
3 | """ |
4 | A simple script that removes big JPEG blobs from the contact cards on your |
5 | Nextcloud db. |
6 | |
7 | :author: Fabio Manganiello <info@fabiomanganiello.com> |
8 | """ |
9 | |
10 | import re |
11 | import sqlalchemy as sa |
12 | from sqlalchemy.ext.declarative import declarative_base |
13 | from sqlalchemy.orm import sessionmaker |
14 | |
15 | Base = declarative_base() |
16 | |
17 | db_url = "mysql+pymysql://user:pwd@dbhost/nextclouddb" |
18 | |
19 | |
20 | class Card(Base): |
21 | __tablename__ = "oc_cards" |
22 | |
23 | id = sa.Column(sa.BigInteger, primary_key=True) |
24 | addressbookid = sa.Column(sa.BigInteger) |
25 | carddata = sa.Column(sa.LargeBinary) |
26 | uri = sa.Column(sa.String) |
27 | lastmodified = sa.Column(sa.BigInteger) |
28 | etag = sa.Column(sa.String) |
29 | size = sa.Column(sa.BigInteger) |
30 | uid = sa.Column(sa.String) |
31 | |
32 | |
33 | def remove_photo_attributes(vcard_data): |
34 | # Convert binary to string |
35 | if isinstance(vcard_data, bytes): |
36 | vcard_data = vcard_data.decode("utf-8", errors="replace") |
37 | |
38 | # Split into lines |
39 | lines = vcard_data.split("\n") |
40 | result_lines = [] |
41 | |
42 | i = 0 |
43 | while i < len(lines): |
44 | line = lines[i] |
45 | |
46 | # Check if this is a PHOTO attribute |
47 | if line.startswith("PHOTO;"): |
48 | # Skip this line and any continuation lines (starting with space) |
49 | i += 1 |
50 | while i < len(lines) and ( |
51 | lines[i].startswith(" ") or lines[i].startswith("\t") |
52 | ): |
53 | i += 1 |
54 | else: |
55 | # Keep this line |
56 | result_lines.append(line) |
57 | i += 1 |
58 | |
59 | # Join back into a string |
60 | return "\n".join(result_lines).encode("utf-8") |
61 | |
62 | |
63 | def update_cards(): |
64 | engine = sa.create_engine(db_url) |
65 | Session = sessionmaker(bind=engine, autocommit=False) |
66 | session = Session() |
67 | |
68 | try: |
69 | # Process in batches to avoid memory issues |
70 | batch_size = 100 |
71 | offset = 0 |
72 | |
73 | while True: |
74 | cards = ( |
75 | session.query(Card) |
76 | .order_by(Card.id) |
77 | .offset(offset) |
78 | .limit(batch_size) |
79 | .all() |
80 | ) |
81 | if not cards: |
82 | break |
83 | |
84 | for card in cards: |
85 | new_carddata = remove_photo_attributes(card.carddata) |
86 | if new_carddata != card.carddata: |
87 | card.carddata = new_carddata |
88 | # Update size field to reflect new size |
89 | print( |
90 | f"Card: id={card.id} old_size={card.size / 1000}KB new_size={len(new_carddata) / 1000}KB" |
91 | ) |
92 | card.size = len(new_carddata) |
93 | # Update etag (optional, depends on how Nextcloud generates etags) |
94 | # card.etag = generate_new_etag() |
95 | |
96 | session.commit() |
97 | offset += batch_size |
98 | print(f"Processed {offset} records") |
99 | |
100 | except Exception as e: |
101 | session.rollback() |
102 | print(f"Error: {e}") |
103 | finally: |
104 | session.close() |
105 | |
106 | |
107 | if __name__ == "__main__": |
108 | update_cards() |
109 |