1#!/usr/local/bin/python3.8
2# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
3
4
5__license__   = 'GPL v3'
6__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
7__docformat__ = 'restructuredtext en'
8
9import os, calendar, zipfile
10from threading import RLock
11from datetime import timedelta
12
13from lxml import etree
14from lxml.builder import ElementMaker
15
16from calibre import force_unicode
17from calibre.utils.xml_parse import safe_xml_fromstring
18from calibre.constants import numeric_version
19from calibre.utils.iso8601 import parse_iso8601
20from calibre.utils.date import now as nowf, utcnow, local_tz, isoformat, EPOCH, UNDEFINED_DATE
21from calibre.utils.recycle_bin import delete_file
22from polyglot.builtins import iteritems
23
24NS = 'http://calibre-ebook.com/recipe_collection'
25E = ElementMaker(namespace=NS, nsmap={None:NS})
26
27
28def iterate_over_builtin_recipe_files():
29    exclude = ['craigslist', 'toronto_sun']
30    d = os.path.dirname
31    base = os.path.join(d(d(d(d(d(d(os.path.abspath(__file__))))))), 'recipes')
32    for f in os.listdir(base):
33        fbase, ext = os.path.splitext(f)
34        if ext != '.recipe' or fbase in exclude:
35            continue
36        f = os.path.join(base, f)
37        rid = os.path.splitext(os.path.relpath(f, base).replace(os.sep,
38            '/'))[0]
39        yield rid, f
40
41
42def serialize_recipe(urn, recipe_class):
43
44    def attr(n, d):
45        ans = getattr(recipe_class, n, d)
46        if isinstance(ans, bytes):
47            ans = ans.decode('utf-8', 'replace')
48        return ans
49
50    default_author = _('You') if urn.startswith('custom:') else _('Unknown')
51    ns = attr('needs_subscription', False)
52    if not ns:
53        ns = 'no'
54    if ns is True:
55        ns = 'yes'
56    return E.recipe({
57        'id'                 : str(urn),
58        'title'              : attr('title', _('Unknown')),
59        'author'             : attr('__author__', default_author),
60        'language'           : attr('language', 'und'),
61        'needs_subscription' : ns,
62        'description'        : attr('description', '')
63        })
64
65
66def serialize_collection(mapping_of_recipe_classes):
67    collection = E.recipe_collection()
68    '''for u, x in mapping_of_recipe_classes.items():
69        print 11111, u, repr(x.title)
70        if isinstance(x.title, bytes):
71            x.title.decode('ascii')
72    '''
73    for urn in sorted(mapping_of_recipe_classes.keys(),
74            key=lambda key: force_unicode(
75                getattr(mapping_of_recipe_classes[key], 'title', 'zzz'),
76                'utf-8')):
77        try:
78            recipe = serialize_recipe(urn, mapping_of_recipe_classes[urn])
79        except:
80            import traceback
81            traceback.print_exc()
82            continue
83        collection.append(recipe)
84    collection.set('count', str(len(collection)))
85    return etree.tostring(collection, encoding='utf-8', xml_declaration=True,
86            pretty_print=True)
87
88
89def serialize_builtin_recipes():
90    from calibre.web.feeds.recipes import compile_recipe
91    recipe_mapping = {}
92    for rid, f in iterate_over_builtin_recipe_files():
93        with open(f, 'rb') as stream:
94            try:
95                recipe_class = compile_recipe(stream.read())
96            except:
97                print('Failed to compile: %s'%f)
98                raise
99        if recipe_class is not None:
100            recipe_mapping['builtin:'+rid] = recipe_class
101
102    return serialize_collection(recipe_mapping)
103
104
105def get_builtin_recipe_collection():
106    return etree.parse(P('builtin_recipes.xml', allow_user_override=False)).getroot()
107
108
109def get_custom_recipe_collection(*args):
110    from calibre.web.feeds.recipes import compile_recipe, \
111            custom_recipes
112    bdir = os.path.dirname(custom_recipes.file_path)
113    rmap = {}
114    for id_, x in iteritems(custom_recipes):
115        title, fname = x
116        recipe = os.path.join(bdir, fname)
117        try:
118            with open(recipe, 'rb') as f:
119                recipe = f.read().decode('utf-8')
120            recipe_class = compile_recipe(recipe)
121            if recipe_class is not None:
122                rmap['custom:%s'%id_] = recipe_class
123        except:
124            print('Failed to load recipe from: %r'%fname)
125            import traceback
126            traceback.print_exc()
127            continue
128    return safe_xml_fromstring(serialize_collection(rmap), recover=False)
129
130
131def update_custom_recipe(id_, title, script):
132    update_custom_recipes([(id_, title, script)])
133
134
135def update_custom_recipes(script_ids):
136    from calibre.web.feeds.recipes import custom_recipes, \
137            custom_recipe_filename
138
139    bdir = os.path.dirname(custom_recipes.file_path)
140    for id_, title, script in script_ids:
141
142        id_ = str(int(id_))
143        existing = custom_recipes.get(id_, None)
144
145        if existing is None:
146            fname = custom_recipe_filename(id_, title)
147        else:
148            fname = existing[1]
149        if isinstance(script, str):
150            script = script.encode('utf-8')
151
152        custom_recipes[id_] = (title, fname)
153
154        if not os.path.exists(bdir):
155            os.makedirs(bdir)
156
157        with open(os.path.join(bdir, fname), 'wb') as f:
158            f.write(script)
159
160
161def add_custom_recipe(title, script):
162    add_custom_recipes({title:script})
163
164
165def add_custom_recipes(script_map):
166    from calibre.web.feeds.recipes import custom_recipes, \
167            custom_recipe_filename
168    id_ = 1000
169    keys = tuple(map(int, custom_recipes))
170    if keys:
171        id_ = max(keys)+1
172    bdir = os.path.dirname(custom_recipes.file_path)
173    with custom_recipes:
174        for title, script in iteritems(script_map):
175            fid = str(id_)
176
177            fname = custom_recipe_filename(fid, title)
178            if isinstance(script, str):
179                script = script.encode('utf-8')
180
181            custom_recipes[fid] = (title, fname)
182
183            if not os.path.exists(bdir):
184                os.makedirs(bdir)
185
186            with open(os.path.join(bdir, fname), 'wb') as f:
187                f.write(script)
188            id_ += 1
189
190
191def remove_custom_recipe(id_):
192    from calibre.web.feeds.recipes import custom_recipes
193    id_ = str(int(id_))
194    existing = custom_recipes.get(id_, None)
195    if existing is not None:
196        bdir = os.path.dirname(custom_recipes.file_path)
197        fname = existing[1]
198        del custom_recipes[id_]
199        try:
200            delete_file(os.path.join(bdir, fname))
201        except:
202            pass
203
204
205def get_custom_recipe(id_):
206    from calibre.web.feeds.recipes import custom_recipes
207    id_ = str(int(id_))
208    existing = custom_recipes.get(id_, None)
209    if existing is not None:
210        bdir = os.path.dirname(custom_recipes.file_path)
211        fname = existing[1]
212        with open(os.path.join(bdir, fname), 'rb') as f:
213            return f.read().decode('utf-8')
214
215
216def get_builtin_recipe_titles():
217    return [r.get('title') for r in get_builtin_recipe_collection()]
218
219
220def download_builtin_recipe(urn):
221    from calibre.utils.config_base import prefs
222    from calibre.utils.https import get_https_resource_securely
223    import bz2
224    recipe_source = bz2.decompress(get_https_resource_securely(
225        'https://code.calibre-ebook.com/recipe-compressed/'+urn, headers={'CALIBRE-INSTALL-UUID':prefs['installation_uuid']}))
226    recipe_source = recipe_source.decode('utf-8')
227    from calibre.web.feeds.recipes import compile_recipe
228    recipe = compile_recipe(recipe_source)  # ensure the downloaded recipe is at least compile-able
229    if recipe is None:
230        raise ValueError('Failed to find recipe object in downloaded recipe: ' + urn)
231    if recipe.requires_version > numeric_version:
232        raise ValueError('Downloaded recipe for {} requires calibre >= {}'.format(urn, recipe.requires_version))
233    return recipe_source
234
235
236def get_builtin_recipe(urn):
237    with zipfile.ZipFile(P('builtin_recipes.zip', allow_user_override=False), 'r') as zf:
238        return zf.read(urn+'.recipe').decode('utf-8')
239
240
241def get_builtin_recipe_by_title(title, log=None, download_recipe=False):
242    for x in get_builtin_recipe_collection():
243        if x.get('title') == title:
244            urn = x.get('id')[8:]
245            if download_recipe:
246                try:
247                    if log is not None:
248                        log('Trying to get latest version of recipe:', urn)
249                    return download_builtin_recipe(urn)
250                except:
251                    if log is None:
252                        import traceback
253                        traceback.print_exc()
254                    else:
255                        log.exception(
256                        'Failed to download recipe, using builtin version')
257            return get_builtin_recipe(urn)
258
259
260def get_builtin_recipe_by_id(id_, log=None, download_recipe=False):
261    for x in get_builtin_recipe_collection():
262        if x.get('id') == id_:
263            urn = x.get('id')[8:]
264            if download_recipe:
265                try:
266                    if log is not None:
267                        log('Trying to get latest version of recipe:', urn)
268                    return download_builtin_recipe(urn)
269                except:
270                    if log is None:
271                        import traceback
272                        traceback.print_exc()
273                    else:
274                        log.exception(
275                        'Failed to download recipe, using builtin version')
276            return get_builtin_recipe(urn)
277
278
279class SchedulerConfig:
280
281    def __init__(self):
282        from calibre.utils.config import config_dir
283        from calibre.utils.lock import ExclusiveFile
284        self.conf_path = os.path.join(config_dir, 'scheduler.xml')
285        old_conf_path  = os.path.join(config_dir, 'scheduler.pickle')
286        self.root = E.recipe_collection()
287        self.lock = RLock()
288        if os.access(self.conf_path, os.R_OK):
289            with ExclusiveFile(self.conf_path) as f:
290                try:
291                    self.root = safe_xml_fromstring(f.read(), recover=False)
292                except:
293                    print('Failed to read recipe scheduler config')
294                    import traceback
295                    traceback.print_exc()
296        elif os.path.exists(old_conf_path):
297            self.migrate_old_conf(old_conf_path)
298
299    def iter_recipes(self):
300        for x in self.root:
301            if x.tag == '{%s}scheduled_recipe'%NS:
302                yield x
303
304    def iter_accounts(self):
305        for x in self.root:
306            if x.tag == '{%s}account_info'%NS:
307                yield x
308
309    def iter_customization(self):
310        for x in self.root:
311            if x.tag == '{%s}recipe_customization'%NS:
312                yield x
313
314    def schedule_recipe(self, recipe, schedule_type, schedule, last_downloaded=None):
315        with self.lock:
316            for x in list(self.iter_recipes()):
317                if x.get('id', False) == recipe.get('id'):
318                    ld = x.get('last_downloaded', None)
319                    if ld and last_downloaded is None:
320                        try:
321                            last_downloaded = parse_iso8601(ld)
322                        except Exception:
323                            pass
324                    self.root.remove(x)
325                    break
326            if last_downloaded is None:
327                last_downloaded = EPOCH
328            sr = E.scheduled_recipe({
329                'id' : recipe.get('id'),
330                'title': recipe.get('title'),
331                'last_downloaded':isoformat(last_downloaded),
332                }, self.serialize_schedule(schedule_type, schedule))
333            self.root.append(sr)
334            self.write_scheduler_file()
335
336    # 'keep_issues' argument for recipe-specific number of copies to keep
337    def customize_recipe(self, urn, add_title_tag, custom_tags, keep_issues):
338        with self.lock:
339            for x in list(self.iter_customization()):
340                if x.get('id') == urn:
341                    self.root.remove(x)
342            cs = E.recipe_customization({
343                'keep_issues' : keep_issues,
344                'id' : urn,
345                'add_title_tag' : 'yes' if add_title_tag else 'no',
346                'custom_tags' : ','.join(custom_tags),
347                })
348            self.root.append(cs)
349            self.write_scheduler_file()
350
351    def un_schedule_recipe(self, recipe_id):
352        with self.lock:
353            for x in list(self.iter_recipes()):
354                if x.get('id', False) == recipe_id:
355                    self.root.remove(x)
356                    break
357            self.write_scheduler_file()
358
359    def update_last_downloaded(self, recipe_id):
360        with self.lock:
361            now = utcnow()
362            for x in self.iter_recipes():
363                if x.get('id', False) == recipe_id:
364                    typ, sch, last_downloaded = self.un_serialize_schedule(x)
365                    if typ == 'interval':
366                        # Prevent downloads more frequent than once an hour
367                        actual_interval = now - last_downloaded
368                        nominal_interval = timedelta(days=sch)
369                        if abs(actual_interval - nominal_interval) < \
370                                timedelta(hours=1):
371                            now = last_downloaded + nominal_interval
372                    x.set('last_downloaded', isoformat(now))
373                    break
374            self.write_scheduler_file()
375
376    def get_to_be_downloaded_recipes(self):
377        ans = []
378        with self.lock:
379            for recipe in self.iter_recipes():
380                if self.recipe_needs_to_be_downloaded(recipe):
381                    ans.append(recipe.get('id'))
382        return ans
383
384    def write_scheduler_file(self):
385        from calibre.utils.lock import ExclusiveFile
386        self.root.text = '\n\n\t'
387        for x in self.root:
388            x.tail = '\n\n\t'
389        if len(self.root) > 0:
390            self.root[-1].tail = '\n\n'
391        with ExclusiveFile(self.conf_path) as f:
392            f.seek(0)
393            f.truncate()
394            f.write(etree.tostring(self.root, encoding='utf-8',
395                xml_declaration=True, pretty_print=False))
396
397    def serialize_schedule(self, typ, schedule):
398        s = E.schedule({'type':typ})
399        if typ == 'interval':
400            if schedule < 0.04:
401                schedule = 0.04
402            text = '%f'%schedule
403        elif typ == 'day/time':
404            text = '%d:%d:%d'%schedule
405        elif typ in ('days_of_week', 'days_of_month'):
406            dw = ','.join(map(str, map(int, schedule[0])))
407            text = '%s:%d:%d'%(dw, schedule[1], schedule[2])
408        else:
409            raise ValueError('Unknown schedule type: %r'%typ)
410        s.text = text
411        return s
412
413    def un_serialize_schedule(self, recipe):
414        for x in recipe.iterdescendants():
415            if 'schedule' in x.tag:
416                sch, typ = x.text, x.get('type')
417                if typ == 'interval':
418                    sch = float(sch)
419                elif typ == 'day/time':
420                    sch = list(map(int, sch.split(':')))
421                elif typ in ('days_of_week', 'days_of_month'):
422                    parts = sch.split(':')
423                    days = list(map(int, [x.strip() for x in
424                        parts[0].split(',')]))
425                    sch = [days, int(parts[1]), int(parts[2])]
426                try:
427                    ld = parse_iso8601(recipe.get('last_downloaded'))
428                except Exception:
429                    ld = UNDEFINED_DATE
430                return typ, sch, ld
431
432    def recipe_needs_to_be_downloaded(self, recipe):
433        try:
434            typ, sch, ld = self.un_serialize_schedule(recipe)
435        except:
436            return False
437
438        def is_time(now, hour, minute):
439            return now.hour > hour or \
440                    (now.hour == hour and now.minute >= minute)
441
442        def is_weekday(day, now):
443            return day < 0 or day > 6 or \
444                    day == calendar.weekday(now.year, now.month, now.day)
445
446        def was_downloaded_already_today(ld_local, now):
447            return ld_local.date() == now.date()
448
449        if typ == 'interval':
450            return utcnow() - ld > timedelta(sch)
451        elif typ == 'day/time':
452            now = nowf()
453            try:
454                ld_local = ld.astimezone(local_tz)
455            except Exception:
456                return False
457            day, hour, minute = sch
458            return is_weekday(day, now) and \
459                    not was_downloaded_already_today(ld_local, now) and \
460                    is_time(now, hour, minute)
461        elif typ == 'days_of_week':
462            now = nowf()
463            try:
464                ld_local = ld.astimezone(local_tz)
465            except Exception:
466                return False
467            days, hour, minute = sch
468            have_day = False
469            for day in days:
470                if is_weekday(day, now):
471                    have_day = True
472                    break
473            return have_day and \
474                    not was_downloaded_already_today(ld_local, now) and \
475                    is_time(now, hour, minute)
476        elif typ == 'days_of_month':
477            now = nowf()
478            try:
479                ld_local = ld.astimezone(local_tz)
480            except Exception:
481                return False
482            days, hour, minute = sch
483            have_day = now.day in days
484            return have_day and \
485                    not was_downloaded_already_today(ld_local, now) and \
486                    is_time(now, hour, minute)
487
488        return False
489
490    def set_account_info(self, urn, un, pw):
491        with self.lock:
492            for x in list(self.iter_accounts()):
493                if x.get('id', False) == urn:
494                    self.root.remove(x)
495                    break
496            ac = E.account_info({'id':urn, 'username':un, 'password':pw})
497            self.root.append(ac)
498            self.write_scheduler_file()
499
500    def get_account_info(self, urn):
501        with self.lock:
502            for x in self.iter_accounts():
503                if x.get('id', False) == urn:
504                    return x.get('username', ''), x.get('password', '')
505
506    def clear_account_info(self, urn):
507        with self.lock:
508            for x in self.iter_accounts():
509                if x.get('id', False) == urn:
510                    x.getparent().remove(x)
511                    self.write_scheduler_file()
512                    break
513
514    def get_customize_info(self, urn):
515        keep_issues = 0
516        add_title_tag = True
517        custom_tags = []
518        with self.lock:
519            for x in self.iter_customization():
520                if x.get('id', False) == urn:
521                    keep_issues = x.get('keep_issues', '0')
522                    add_title_tag = x.get('add_title_tag', 'yes') == 'yes'
523                    custom_tags = [i.strip() for i in x.get('custom_tags',
524                        '').split(',')]
525                    break
526        return add_title_tag, custom_tags, keep_issues
527
528    def get_schedule_info(self, urn):
529        with self.lock:
530            for x in self.iter_recipes():
531                if x.get('id', False) == urn:
532                    ans = list(self.un_serialize_schedule(x))
533                    return ans
534
535    def migrate_old_conf(self, old_conf_path):
536        from calibre.utils.config import DynamicConfig
537        c = DynamicConfig('scheduler')
538        for r in c.get('scheduled_recipes', []):
539            try:
540                self.add_old_recipe(r)
541            except:
542                continue
543        for k in c.keys():
544            if k.startswith('recipe_account_info'):
545                try:
546                    urn = k.replace('recipe_account_info_', '')
547                    if urn.startswith('recipe_'):
548                        urn = 'builtin:'+urn[7:]
549                    else:
550                        urn = 'custom:%d'%int(urn)
551                    try:
552                        username, password = c[k]
553                    except:
554                        username = password = ''
555                    self.set_account_info(urn, str(username),
556                            str(password))
557                except:
558                    continue
559        del c
560        self.write_scheduler_file()
561        try:
562            os.remove(old_conf_path)
563        except:
564            pass
565
566    def add_old_recipe(self, r):
567        urn = None
568        if r['builtin'] and r['id'].startswith('recipe_'):
569            urn = 'builtin:'+r['id'][7:]
570        elif not r['builtin']:
571            try:
572                urn = 'custom:%d'%int(r['id'])
573            except:
574                return
575        schedule = r['schedule']
576        typ = 'interval'
577        if schedule > 1e5:
578            typ = 'day/time'
579            raw = '%d'%int(schedule)
580            day = int(raw[0]) - 1
581            hour = int(raw[2:4]) - 1
582            minute = int(raw[-2:]) - 1
583            if day >= 7:
584                day = -1
585            schedule = [day, hour, minute]
586        recipe = {'id':urn, 'title':r['title']}
587        self.schedule_recipe(recipe, typ, schedule,
588        last_downloaded=r['last_downloaded'])
589