fabio revised this gist . Go to revision
No changes
fabio revised this gist . Go to revision
1 file changed, 0 insertions, 0 deletions
cleanup_jpeg_from_contacts.py renamed to cleanup_jpeg_from_nc_contacts.py
File renamed without changes
Fabio Manganiello revised this gist . Go to revision
1 file changed, 108 insertions
cleanup_jpeg_from_contacts.py(file created)
@@ -0,0 +1,108 @@ | |||
1 | + | #!/usr/bin/env python | |
2 | + | ||
3 | + | """ | |
4 | + | A simple script that removes big JPEG blobs from the contact cards on your | |
5 | + | Nextcloud db. | |
6 | + | ||
7 | + | :author: Fabio Manganiello <info@fabiomanganiello.com> | |
8 | + | """ | |
9 | + | ||
10 | + | import re | |
11 | + | import sqlalchemy as sa | |
12 | + | from sqlalchemy.ext.declarative import declarative_base | |
13 | + | from sqlalchemy.orm import sessionmaker | |
14 | + | ||
15 | + | Base = declarative_base() | |
16 | + | ||
17 | + | db_url = "mysql+pymysql://user:pwd@dbhost/nextclouddb" | |
18 | + | ||
19 | + | ||
20 | + | class Card(Base): | |
21 | + | __tablename__ = "oc_cards" | |
22 | + | ||
23 | + | id = sa.Column(sa.BigInteger, primary_key=True) | |
24 | + | addressbookid = sa.Column(sa.BigInteger) | |
25 | + | carddata = sa.Column(sa.LargeBinary) | |
26 | + | uri = sa.Column(sa.String) | |
27 | + | lastmodified = sa.Column(sa.BigInteger) | |
28 | + | etag = sa.Column(sa.String) | |
29 | + | size = sa.Column(sa.BigInteger) | |
30 | + | uid = sa.Column(sa.String) | |
31 | + | ||
32 | + | ||
33 | + | def remove_photo_attributes(vcard_data): | |
34 | + | # Convert binary to string | |
35 | + | if isinstance(vcard_data, bytes): | |
36 | + | vcard_data = vcard_data.decode("utf-8", errors="replace") | |
37 | + | ||
38 | + | # Split into lines | |
39 | + | lines = vcard_data.split("\n") | |
40 | + | result_lines = [] | |
41 | + | ||
42 | + | i = 0 | |
43 | + | while i < len(lines): | |
44 | + | line = lines[i] | |
45 | + | ||
46 | + | # Check if this is a PHOTO attribute | |
47 | + | if line.startswith("PHOTO;"): | |
48 | + | # Skip this line and any continuation lines (starting with space) | |
49 | + | i += 1 | |
50 | + | while i < len(lines) and ( | |
51 | + | lines[i].startswith(" ") or lines[i].startswith("\t") | |
52 | + | ): | |
53 | + | i += 1 | |
54 | + | else: | |
55 | + | # Keep this line | |
56 | + | result_lines.append(line) | |
57 | + | i += 1 | |
58 | + | ||
59 | + | # Join back into a string | |
60 | + | return "\n".join(result_lines).encode("utf-8") | |
61 | + | ||
62 | + | ||
63 | + | def update_cards(): | |
64 | + | engine = sa.create_engine(db_url) | |
65 | + | Session = sessionmaker(bind=engine, autocommit=False) | |
66 | + | session = Session() | |
67 | + | ||
68 | + | try: | |
69 | + | # Process in batches to avoid memory issues | |
70 | + | batch_size = 100 | |
71 | + | offset = 0 | |
72 | + | ||
73 | + | while True: | |
74 | + | cards = ( | |
75 | + | session.query(Card) | |
76 | + | .order_by(Card.id) | |
77 | + | .offset(offset) | |
78 | + | .limit(batch_size) | |
79 | + | .all() | |
80 | + | ) | |
81 | + | if not cards: | |
82 | + | break | |
83 | + | ||
84 | + | for card in cards: | |
85 | + | new_carddata = remove_photo_attributes(card.carddata) | |
86 | + | if new_carddata != card.carddata: | |
87 | + | card.carddata = new_carddata | |
88 | + | # Update size field to reflect new size | |
89 | + | print( | |
90 | + | f"Card: id={card.id} old_size={card.size / 1000}KB new_size={len(new_carddata) / 1000}KB" | |
91 | + | ) | |
92 | + | card.size = len(new_carddata) | |
93 | + | # Update etag (optional, depends on how Nextcloud generates etags) | |
94 | + | # card.etag = generate_new_etag() | |
95 | + | ||
96 | + | session.commit() | |
97 | + | offset += batch_size | |
98 | + | print(f"Processed {offset} records") | |
99 | + | ||
100 | + | except Exception as e: | |
101 | + | session.rollback() | |
102 | + | print(f"Error: {e}") | |
103 | + | finally: | |
104 | + | session.close() | |
105 | + | ||
106 | + | ||
107 | + | if __name__ == "__main__": | |
108 | + | update_cards() |