1# -*- coding: utf-8 -*-
2#
3# gPodder - A media aggregator and podcast client
4# Copyright (c) 2005-2018 The gPodder Team
5#
6# gPodder is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 3 of the License, or
9# (at your option) any later version.
10#
11# gPodder is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program.  If not, see <http://www.gnu.org/licenses/>.
18#
19
20# gpodder.schema - Database schema update and migration facility
21# Thomas Perl <thp@gpodder.org>; 2011-02-01
22
23import logging
24import shutil
25import time
26from sqlite3 import dbapi2 as sqlite
27
28from gpodder import util
29
30logger = logging.getLogger(__name__)
31
32
33EpisodeColumns = (
34    'podcast_id',
35    'title',
36    'description',
37    'url',
38    'published',
39    'guid',
40    'link',
41    'file_size',
42    'mime_type',
43    'state',
44    'is_new',
45    'archive',
46    'download_filename',
47    'total_time',
48    'current_position',
49    'current_position_updated',
50    'last_playback',
51    'payment_url',
52    'description_html',
53)
54
55PodcastColumns = (
56    'title',
57    'url',
58    'link',
59    'description',
60    'cover_url',
61    'auth_username',
62    'auth_password',
63    'http_last_modified',
64    'http_etag',
65    'auto_archive_episodes',
66    'download_folder',
67    'pause_subscription',
68    'section',
69    'payment_url',
70    'download_strategy',
71    'sync_to_mp3_player',
72    'cover_thumb',
73)
74
75CURRENT_VERSION = 7
76
77
78# SQL commands to upgrade old database versions to new ones
79# Each item is a tuple (old_version, new_version, sql_commands) that should be
80# applied to the database to migrate from old_version to new_version.
81UPGRADE_SQL = [
82        # Version 2: Section labels for the podcast list
83        (1, 2, """
84        ALTER TABLE podcast ADD COLUMN section TEXT NOT NULL DEFAULT ''
85        """),
86
87        # Version 3: Flattr integration (+ invalidate http_* fields to force
88        # a feed update, so that payment URLs are parsed during the next check)
89        (2, 3, """
90        ALTER TABLE podcast ADD COLUMN payment_url TEXT NULL DEFAULT NULL
91        ALTER TABLE episode ADD COLUMN payment_url TEXT NULL DEFAULT NULL
92        UPDATE podcast SET http_last_modified=NULL, http_etag=NULL
93        """),
94
95        # Version 4: Per-podcast download strategy management
96        (3, 4, """
97        ALTER TABLE podcast ADD COLUMN download_strategy INTEGER NOT NULL DEFAULT 0
98        """),
99
100        # Version 5: Per-podcast MP3 player device synchronization option
101        (4, 5, """
102        ALTER TABLE podcast ADD COLUMN sync_to_mp3_player INTEGER NOT NULL DEFAULT 1
103        """),
104
105        # Version 6: Add thumbnail for cover art
106        (5, 6, """
107        ALTER TABLE podcast ADD COLUMN cover_thumb BLOB NULL DEFAULT NULL
108        """),
109
110        # Version 7: Add HTML description
111        (6, 7, """
112        ALTER TABLE episode ADD COLUMN description_html TEXT NOT NULL DEFAULT ''
113        UPDATE episode SET description_html=description WHERE is_html(description)
114        UPDATE episode SET description=remove_html_tags(description_html) WHERE is_html(description)
115        UPDATE podcast SET http_last_modified=NULL, http_etag=NULL
116        """),
117]
118
119
120def initialize_database(db):
121    # Create table for podcasts
122    db.execute("""
123    CREATE TABLE podcast (
124        id INTEGER PRIMARY KEY NOT NULL,
125        title TEXT NOT NULL DEFAULT '',
126        url TEXT NOT NULL DEFAULT '',
127        link TEXT NOT NULL DEFAULT '',
128        description TEXT NOT NULL DEFAULT '',
129        cover_url TEXT NULL DEFAULT NULL,
130        auth_username TEXT NULL DEFAULT NULL,
131        auth_password TEXT NULL DEFAULT NULL,
132        http_last_modified TEXT NULL DEFAULT NULL,
133        http_etag TEXT NULL DEFAULT NULL,
134        auto_archive_episodes INTEGER NOT NULL DEFAULT 0,
135        download_folder TEXT NOT NULL DEFAULT '',
136        pause_subscription INTEGER NOT NULL DEFAULT 0,
137        section TEXT NOT NULL DEFAULT '',
138        payment_url TEXT NULL DEFAULT NULL,
139        download_strategy INTEGER NOT NULL DEFAULT 0,
140        sync_to_mp3_player INTEGER NOT NULL DEFAULT 1,
141        cover_thumb BLOB NULL DEFAULT NULL
142    )
143    """)
144
145    INDEX_SQL = """
146    CREATE UNIQUE INDEX idx_podcast_url ON podcast (url)
147    CREATE UNIQUE INDEX idx_podcast_download_folder ON podcast (download_folder)
148    """
149
150    for sql in INDEX_SQL.strip().split('\n'):
151        db.execute(sql)
152
153    # Create table for episodes
154    db.execute("""
155    CREATE TABLE episode (
156        id INTEGER PRIMARY KEY NOT NULL,
157        podcast_id INTEGER NOT NULL,
158        title TEXT NOT NULL DEFAULT '',
159        description TEXT NOT NULL DEFAULT '',
160        url TEXT NOT NULL,
161        published INTEGER NOT NULL DEFAULT 0,
162        guid TEXT NOT NULL,
163        link TEXT NOT NULL DEFAULT '',
164        file_size INTEGER NOT NULL DEFAULT 0,
165        mime_type TEXT NOT NULL DEFAULT 'application/octet-stream',
166        state INTEGER NOT NULL DEFAULT 0,
167        is_new INTEGER NOT NULL DEFAULT 0,
168        archive INTEGER NOT NULL DEFAULT 0,
169        download_filename TEXT NULL DEFAULT NULL,
170        total_time INTEGER NOT NULL DEFAULT 0,
171        current_position INTEGER NOT NULL DEFAULT 0,
172        current_position_updated INTEGER NOT NULL DEFAULT 0,
173        last_playback INTEGER NOT NULL DEFAULT 0,
174        payment_url TEXT NULL DEFAULT NULL,
175        description_html TEXT NOT NULL DEFAULT ''
176    )
177    """)
178
179    INDEX_SQL = """
180    CREATE INDEX idx_episode_podcast_id ON episode (podcast_id)
181    CREATE UNIQUE INDEX idx_episode_download_filename ON episode (podcast_id, download_filename)
182    CREATE UNIQUE INDEX idx_episode_guid ON episode (podcast_id, guid)
183    CREATE INDEX idx_episode_state ON episode (state)
184    CREATE INDEX idx_episode_is_new ON episode (is_new)
185    CREATE INDEX idx_episode_archive ON episode (archive)
186    CREATE INDEX idx_episode_published ON episode (published)
187    """
188
189    for sql in INDEX_SQL.strip().split('\n'):
190        db.execute(sql)
191
192    # Create table for version info / metadata + insert initial data
193    db.execute("""CREATE TABLE version (version integer)""")
194    db.execute("INSERT INTO version (version) VALUES (%d)" % CURRENT_VERSION)
195    db.commit()
196
197
198def upgrade(db, filename):
199    if not list(db.execute('PRAGMA table_info(version)')):
200        initialize_database(db)
201        return
202
203    db.create_function('is_html', 1, util.is_html)
204    db.create_function('remove_html_tags', 1, util.remove_html_tags)
205
206    version = db.execute('SELECT version FROM version').fetchone()[0]
207    if version == CURRENT_VERSION:
208        return
209
210    # We are trying an upgrade - save the current version of the DB
211    backup = '%s_upgraded-v%d_%d' % (filename, int(version), int(time.time()))
212    try:
213        shutil.copy(filename, backup)
214    except Exception as e:
215        raise Exception('Cannot create DB backup before upgrade: ' + e)
216
217    db.execute("DELETE FROM version")
218
219    for old_version, new_version, upgrade in UPGRADE_SQL:
220        if version == old_version:
221            for sql in upgrade.strip().split('\n'):
222                db.execute(sql)
223            version = new_version
224
225    assert version == CURRENT_VERSION
226
227    db.execute("INSERT INTO version (version) VALUES (%d)" % version)
228    db.commit()
229
230    if version != CURRENT_VERSION:
231        raise Exception('Database schema version unknown')
232
233
234def convert_gpodder2_db(old_db, new_db):
235    """Convert gPodder 2.x databases to the new format
236
237    Both arguments should be SQLite3 connections to the
238    corresponding databases.
239    """
240
241    old_db = sqlite.connect(old_db)
242    new_db_filename = new_db
243    new_db = sqlite.connect(new_db)
244    upgrade(new_db, new_db_filename)
245
246    # Copy data for podcasts
247    old_cur = old_db.cursor()
248    columns = [x[1] for x in old_cur.execute('PRAGMA table_info(channels)')]
249    for row in old_cur.execute('SELECT * FROM channels'):
250        row = dict(list(zip(columns, row)))
251        values = (
252                row['id'],
253                row['override_title'] or row['title'],
254                row['url'],
255                row['link'],
256                row['description'],
257                row['image'],
258                row['username'] or None,
259                row['password'] or None,
260                row['last_modified'] or None,
261                row['etag'] or None,
262                row['channel_is_locked'],
263                row['foldername'],
264                not row['feed_update_enabled'],
265                '',
266                None,
267                0,
268                row['sync_to_devices'],
269                None,
270        )
271        new_db.execute("""
272        INSERT INTO podcast VALUES (%s)
273        """ % ', '.join('?' * len(values)), values)
274    old_cur.close()
275
276    # Copy data for episodes
277    old_cur = old_db.cursor()
278    columns = [x[1] for x in old_cur.execute('PRAGMA table_info(episodes)')]
279    for row in old_cur.execute('SELECT * FROM episodes'):
280        row = dict(list(zip(columns, row)))
281        values = (
282                row['id'],
283                row['channel_id'],
284                row['title'],
285                row['description'],
286                row['url'],
287                row['pubDate'],
288                row['guid'],
289                row['link'],
290                row['length'],
291                row['mimetype'],
292                row['state'],
293                not row['played'],
294                row['locked'],
295                row['filename'],
296                row['total_time'],
297                row['current_position'],
298                row['current_position_updated'],
299                0,
300                None,
301                '',
302        )
303        new_db.execute("""
304        INSERT INTO episode VALUES (%s)
305        """ % ', '.join('?' * len(values)), values)
306        # do 6 -> 7 upgrade (description_html)
307        new_db.create_function('is_html', 1, util.is_html)
308        new_db.create_function('remove_html_tags', 1, util.remove_html_tags)
309        new_db.execute("UPDATE episode SET description_html=description WHERE is_html(description)")
310        new_db.execute("UPDATE episode SET description=remove_html_tags(description_html) WHERE is_html(description)")
311        new_db.execute("UPDATE podcast SET http_last_modified=NULL, http_etag=NULL")
312
313    old_cur.close()
314
315    old_db.close()
316    new_db.commit()
317    new_db.close()
318
319
320def check_data(db):
321    # All episodes must be assigned to a podcast
322    orphan_episodes = db.get('SELECT COUNT(id) FROM episode '
323            'WHERE podcast_id NOT IN (SELECT id FROM podcast)')
324    if orphan_episodes > 0:
325        logger.error('Orphaned episodes found in database')
326