1
"""Does the synchronization. Called by "manage-plnt.py sync"."""
2
from datetime import datetime
5
from markupsafe import escape
7
from .database import Blog
8
from .database import Entry
9
from .database import session
10
from .utils import nl2p
11
from .utils import strip_tags
14
HTML_MIMETYPES = {"text/html", "application/xhtml+xml"}
19
Performs a synchronization. Articles that are already synchronized aren't
22
for blog in Blog.query.all():
23
# parse the feed. feedparser.parse will never given an exception
24
# but the bozo bit might be defined.
25
feed = feedparser.parse(blog.feed_url)
27
for entry in feed.entries:
28
# get the guid. either the id if specified, otherwise the link.
29
# if none is available we skip the entry.
30
guid = entry.get("id") or entry.get("link")
34
# get an old entry for the guid to check if we need to update
35
# or recreate the item
36
old_entry = Entry.query.filter_by(guid=guid).first()
38
# get title, url and text. skip if no title or no text is
39
# given. if the link is missing we use the blog link.
40
if "title_detail" in entry:
41
title = entry.title_detail.get("value") or ""
42
if entry.title_detail.get("type") in HTML_MIMETYPES:
43
title = strip_tags(title)
47
title = entry.get("title")
48
url = entry.get("link") or blog.blog_url
50
entry.content[0] if "content" in entry else entry.get("summary_detail")
53
if not title or not text:
56
# if we have an html text we use that, otherwise we HTML
57
# escape the text and use that one. We also handle XHTML
58
# with our tag soup parser for the moment.
59
if text.get("type") not in HTML_MIMETYPES:
60
text = escape(nl2p(text.get("value") or ""))
62
text = text.get("value") or ""
68
# get the pub date and updated date. This is rather complex
69
# because different feeds do different stuff
71
entry.get("published_parsed")
72
or entry.get("created_parsed")
73
or entry.get("date_parsed")
75
updated = entry.get("updated_parsed") or pub_date
76
pub_date = pub_date or updated
78
# if we don't have a pub_date we skip.
82
# convert the time tuples to datetime objects.
83
pub_date = datetime(*pub_date[:6])
84
updated = datetime(*updated[:6])
85
if old_entry and updated <= old_entry.last_update:
88
# create a new entry object based on the data collected or
90
entry = old_entry or Entry()
96
entry.pub_date = pub_date
97
entry.last_update = updated