7 gadi atpakaļ · dc7ea6a635
--- a/content/extra/robots.txt
+++ b/content/extra/robots.txt
@@ -0,0 +1 @@
 
				+User-agent: *
			
--- a/pelican-plugins/sitemap/Readme.rst
+++ b/pelican-plugins/sitemap/Readme.rst
@@ -0,0 +1,78 @@
 
				+Sitemap
			
 
				+-------
			
 
				+
			
 
				+This plugin generates plain-text or XML sitemaps. You can use the ``SITEMAP``
			
 
				+variable in your settings file to configure the behavior of the plugin.
			
 
				+
			
 
				+The ``SITEMAP`` variable must be a Python dictionary and can contain these keys:
			
 
				+
			
 
				+- ``format``, which sets the output format of the plugin (``xml`` or ``txt``)
			
 
				+
			
 
				+- ``priorities``, which is a dictionary with three keys:
			
 
				+
			
 
				+  - ``articles``, the priority for the URLs of the articles and their
			
 
				+    translations
			
 
				+
			
 
				+  - ``pages``, the priority for the URLs of the static pages
			
 
				+
			
 
				+  - ``indexes``, the priority for the URLs of the index pages, such as tags,
			
 
				+     author pages, categories indexes, archives, etc...
			
 
				+
			
 
				+  All the values of this dictionary must be decimal numbers between ``0`` and ``1``.
			
 
				+
			
 
				+- ``changefreqs``, which is a dictionary with three items:
			
 
				+
			
 
				+  - ``articles``, the update frequency of the articles
			
 
				+
			
 
				+  - ``pages``, the update frequency of the pages
			
 
				+
			
 
				+  - ``indexes``, the update frequency of the index pages
			
 
				+
			
 
				+  Valid frequency values are ``always``, ``hourly``, ``daily``, ``weekly``, ``monthly``,
			
 
				+  ``yearly`` and ``never``.
			
 
				+
			
 
				+You can exclude URLs from being included in the sitemap via regular expressions.
			
 
				+For example, to exclude all URLs containing ``tag/`` or ``category/`` you can
			
 
				+use the following ``SITEMAP`` setting.
			
 
				+
			
 
				+.. code-block:: python
			
 
				+
			
 
				+    SITEMAP = {
			
 
				+        'exclude': ['tag/', 'category/']
			
 
				+    }
			
 
				+
			
 
				+If a key is missing or a value is incorrect, it will be replaced with the
			
 
				+default value.
			
 
				+
			
 
				+You can also exclude an individual URL by adding metadata to it setting ``private``
			
 
				+to ``True``.
			
 
				+
			
 
				+The sitemap is saved in ``<output_path>/sitemap.<format>``.
			
 
				+
			
 
				+.. note::
			
 
				+   ``priorities`` and ``changefreqs`` are information for search engines.
			
 
				+   They are only used in the XML sitemaps.
			
 
				+   For more information: <http://www.sitemaps.org/protocol.html#xmlTagDefinitions>
			
 
				+
			
 
				+**Example**
			
 
				+
			
 
				+Here is an example configuration (it's also the default settings):
			
 
				+
			
 
				+.. code-block:: python
			
 
				+    # Where your plug-ins reside
			
 
				+    PLUGIN_PATHS = ['/where/you/cloned/it/pelican-plugins/', ]
			
 
				+    PLUGINS=['sitemap',]
			
 
				+
			
 
				+    SITEMAP = {
			
 
				+        'format': 'xml',
			
 
				+        'priorities': {
			
 
				+            'articles': 0.5,
			
 
				+            'indexes': 0.5,
			
 
				+            'pages': 0.5
			
 
				+        },
			
 
				+        'changefreqs': {
			
 
				+            'articles': 'monthly',
			
 
				+            'indexes': 'daily',
			
 
				+            'pages': 'monthly'
			
 
				+        }
			
 
				+    }
			
--- a/pelican-plugins/sitemap/__init__.py
+++ b/pelican-plugins/sitemap/__init__.py
@@ -0,0 +1 @@
 
				+from .sitemap import *
			
--- a/pelican-plugins/sitemap/sitemap.py
+++ b/pelican-plugins/sitemap/sitemap.py
@@ -0,0 +1,271 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+'''
			
 
				+Sitemap
			
 
				+-------
			
 
				+
			
 
				+The sitemap plugin generates plain-text or XML sitemaps.
			
 
				+'''
			
 
				+
			
 
				+from __future__ import unicode_literals
			
 
				+
			
 
				+import re
			
 
				+import collections
			
 
				+import os.path
			
 
				+
			
 
				+from datetime import datetime
			
 
				+from logging import warning, info
			
 
				+from codecs import open
			
 
				+from pytz import timezone
			
 
				+
			
 
				+from pelican import signals, contents
			
 
				+from pelican.utils import get_date
			
 
				+
			
 
				+TXT_HEADER = """{0}/index.html
			
 
				+{0}/archives.html
			
 
				+{0}/tags.html
			
 
				+{0}/categories.html
			
 
				+"""
			
 
				+
			
 
				+XML_HEADER = """<?xml version="1.0" encoding="utf-8"?>
			
 
				+<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
			
 
				+xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
			
 
				+"""
			
 
				+
			
 
				+XML_URL = """
			
 
				+<url>
			
 
				+<loc>{0}/{1}</loc>
			
 
				+<lastmod>{2}</lastmod>
			
 
				+<changefreq>{3}</changefreq>
			
 
				+<priority>{4}</priority>
			
 
				+</url>
			
 
				+"""
			
 
				+
			
 
				+XML_FOOTER = """
			
 
				+</urlset>
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+def format_date(date):
			
 
				+    if date.tzinfo:
			
 
				+        tz = date.strftime('%z')
			
 
				+        tz = tz[:-2] + ':' + tz[-2:]
			
 
				+    else:
			
 
				+        tz = "-00:00"
			
 
				+    return date.strftime("%Y-%m-%dT%H:%M:%S") + tz
			
 
				+
			
 
				+class SitemapGenerator(object):
			
 
				+
			
 
				+    def __init__(self, context, settings, path, theme, output_path, *null):
			
 
				+
			
 
				+        self.output_path = output_path
			
 
				+        self.context = context
			
 
				+        self.now = datetime.now()
			
 
				+        self.siteurl = settings.get('SITEURL')
			
 
				+
			
 
				+
			
 
				+        self.default_timezone = settings.get('TIMEZONE', 'UTC')
			
 
				+        self.timezone = getattr(self, 'timezone', self.default_timezone)
			
 
				+        self.timezone = timezone(self.timezone)
			
 
				+
			
 
				+        self.format = 'xml'
			
 
				+
			
 
				+        self.changefreqs = {
			
 
				+            'articles': 'monthly',
			
 
				+            'indexes': 'daily',
			
 
				+            'pages': 'monthly'
			
 
				+        }
			
 
				+
			
 
				+        self.priorities = {
			
 
				+            'articles': 0.5,
			
 
				+            'indexes': 0.5,
			
 
				+            'pages': 0.5
			
 
				+        }
			
 
				+
			
 
				+        self.sitemapExclude = []
			
 
				+
			
 
				+        config = settings.get('SITEMAP', {})
			
 
				+
			
 
				+        if not isinstance(config, dict):
			
 
				+            warning("sitemap plugin: the SITEMAP setting must be a dict")
			
 
				+        else:
			
 
				+            fmt = config.get('format')
			
 
				+            pris = config.get('priorities')
			
 
				+            chfreqs = config.get('changefreqs')
			
 
				+            self.sitemapExclude = config.get('exclude', [])
			
 
				+
			
 
				+            if fmt not in ('xml', 'txt'):
			
 
				+                warning("sitemap plugin: SITEMAP['format'] must be `txt' or `xml'")
			
 
				+                warning("sitemap plugin: Setting SITEMAP['format'] on `xml'")
			
 
				+            elif fmt == 'txt':
			
 
				+                self.format = fmt
			
 
				+                return
			
 
				+
			
 
				+            valid_keys = ('articles', 'indexes', 'pages')
			
 
				+            valid_chfreqs = ('always', 'hourly', 'daily', 'weekly', 'monthly',
			
 
				+                    'yearly', 'never')
			
 
				+
			
 
				+            if isinstance(pris, dict):
			
 
				+                # We use items for Py3k compat. .iteritems() otherwise
			
 
				+                for k, v in pris.items():
			
 
				+                    if k in valid_keys and not isinstance(v, (int, float)):
			
 
				+                        default = self.priorities[k]
			
 
				+                        warning("sitemap plugin: priorities must be numbers")
			
 
				+                        warning("sitemap plugin: setting SITEMAP['priorities']"
			
 
				+                                "['{0}'] on {1}".format(k, default))
			
 
				+                        pris[k] = default
			
 
				+                self.priorities.update(pris)
			
 
				+            elif pris is not None:
			
 
				+                warning("sitemap plugin: SITEMAP['priorities'] must be a dict")
			
 
				+                warning("sitemap plugin: using the default values")
			
 
				+
			
 
				+            if isinstance(chfreqs, dict):
			
 
				+                # .items() for py3k compat.
			
 
				+                for k, v in chfreqs.items():
			
 
				+                    if k in valid_keys and v not in valid_chfreqs:
			
 
				+                        default = self.changefreqs[k]
			
 
				+                        warning("sitemap plugin: invalid changefreq `{0}'".format(v))
			
 
				+                        warning("sitemap plugin: setting SITEMAP['changefreqs']"
			
 
				+                                "['{0}'] on '{1}'".format(k, default))
			
 
				+                        chfreqs[k] = default
			
 
				+                self.changefreqs.update(chfreqs)
			
 
				+            elif chfreqs is not None:
			
 
				+                warning("sitemap plugin: SITEMAP['changefreqs'] must be a dict")
			
 
				+                warning("sitemap plugin: using the default values")
			
 
				+
			
 
				+    def write_url(self, page, fd):
			
 
				+
			
 
				+        if getattr(page, 'status', 'published') != 'published':
			
 
				+            return
			
 
				+           
			
 
				+        if getattr(page, 'private', 'False') == 'True':
			
 
				+            return
			
 
				+
			
 
				+        # We can disable categories/authors/etc by using False instead of ''
			
 
				+        if not page.save_as:
			
 
				+            return
			
 
				+
			
 
				+        page_path = os.path.join(self.output_path, page.save_as)
			
 
				+        if not os.path.exists(page_path):
			
 
				+            return
			
 
				+
			
 
				+        lastdate = getattr(page, 'date', self.now)
			
 
				+        try:
			
 
				+            lastdate = self.get_date_modified(page, lastdate)
			
 
				+        except ValueError:
			
 
				+            warning("sitemap plugin: " + page.save_as + " has invalid modification date,")
			
 
				+            warning("sitemap plugin: using date value as lastmod.")
			
 
				+        lastmod = format_date(lastdate)
			
 
				+
			
 
				+        if isinstance(page, contents.Article):
			
 
				+            pri = self.priorities['articles']
			
 
				+            chfreq = self.changefreqs['articles']
			
 
				+        elif isinstance(page, contents.Page):
			
 
				+            pri = self.priorities['pages']
			
 
				+            chfreq = self.changefreqs['pages']
			
 
				+        else:
			
 
				+            pri = self.priorities['indexes']
			
 
				+            chfreq = self.changefreqs['indexes']
			
 
				+
			
 
				+        pageurl = '' if page.url == 'index.html' else page.url
			
 
				+
			
 
				+        #Exclude URLs from the sitemap:
			
 
				+        if self.format == 'xml':
			
 
				+            flag = False
			
 
				+            for regstr in self.sitemapExclude:
			
 
				+                if re.match(regstr, pageurl):
			
 
				+                    flag = True
			
 
				+                    break
			
 
				+            if not flag:
			
 
				+                fd.write(XML_URL.format(self.siteurl, pageurl, lastmod, chfreq, pri))
			
 
				+        else:
			
 
				+            fd.write(self.siteurl + '/' + pageurl + '\n')
			
 
				+
			
 
				+    def get_date_modified(self, page, default):
			
 
				+        if hasattr(page, 'modified'):
			
 
				+            if isinstance(page.modified, datetime):
			
 
				+                return page.modified
			
 
				+            return get_date(page.modified)
			
 
				+        else:
			
 
				+            return default
			
 
				+
			
 
				+    def set_url_wrappers_modification_date(self, wrappers):
			
 
				+        for (wrapper, articles) in wrappers:
			
 
				+            lastmod = datetime.min.replace(tzinfo=self.timezone)
			
 
				+            for article in articles:
			
 
				+                lastmod = max(lastmod, article.date.replace(tzinfo=self.timezone))
			
 
				+                try:
			
 
				+                    modified = self.get_date_modified(article, datetime.min).replace(tzinfo=self.timezone)
			
 
				+                    lastmod = max(lastmod, modified)
			
 
				+                except ValueError:
			
 
				+                    # Supressed: user will be notified.
			
 
				+                    pass
			
 
				+            setattr(wrapper, 'modified', str(lastmod))
			
 
				+
			
 
				+    def generate_output(self, writer):
			
 
				+        path = os.path.join(self.output_path, 'sitemap.{0}'.format(self.format))
			
 
				+
			
 
				+        pages = self.context['pages'] + self.context['articles'] \
			
 
				+                + [ c for (c, a) in self.context['categories']] \
			
 
				+                + [ t for (t, a) in self.context['tags']] \
			
 
				+                + [ a for (a, b) in self.context['authors']]
			
 
				+
			
 
				+        self.set_url_wrappers_modification_date(self.context['categories'])
			
 
				+        self.set_url_wrappers_modification_date(self.context['tags'])
			
 
				+        self.set_url_wrappers_modification_date(self.context['authors'])
			
 
				+
			
 
				+        for article in self.context['articles']:
			
 
				+            pages += article.translations
			
 
				+
			
 
				+        info('writing {0}'.format(path))
			
 
				+
			
 
				+        with open(path, 'w', encoding='utf-8') as fd:
			
 
				+
			
 
				+            if self.format == 'xml':
			
 
				+                fd.write(XML_HEADER)
			
 
				+            else:
			
 
				+                fd.write(TXT_HEADER.format(self.siteurl))
			
 
				+
			
 
				+            FakePage = collections.namedtuple('FakePage',
			
 
				+                                              ['status',
			
 
				+                                               'date',
			
 
				+                                               'url',
			
 
				+                                               'save_as'])
			
 
				+
			
 
				+            for standard_page_url in ['index.html',
			
 
				+                                      'archives.html',
			
 
				+                                      'tags.html',
			
 
				+                                      'categories.html']:
			
 
				+                fake = FakePage(status='published',
			
 
				+                                date=self.now,
			
 
				+                                url=standard_page_url,
			
 
				+                                save_as=standard_page_url)
			
 
				+                self.write_url(fake, fd)
			
 
				+
			
 
				+            # add template pages
			
 
				+            # We use items for Py3k compat. .iteritems() otherwise
			
 
				+            for path, template_page_url in self.context['TEMPLATE_PAGES'].items():
			
 
				+
			
 
				+                # don't add duplicate entry for index page
			
 
				+                if template_page_url == 'index.html':
			
 
				+                    continue
			
 
				+
			
 
				+                fake = FakePage(status='published',
			
 
				+                                date=self.now,
			
 
				+                                url=template_page_url,
			
 
				+                                save_as=template_page_url)
			
 
				+                self.write_url(fake, fd)
			
 
				+
			
 
				+            for page in pages:
			
 
				+                self.write_url(page, fd)
			
 
				+
			
 
				+            if self.format == 'xml':
			
 
				+                fd.write(XML_FOOTER)
			
 
				+
			
 
				+
			
 
				+def get_generators(generators):
			
 
				+    return SitemapGenerator
			
 
				+
			
 
				+
			
 
				+def register():
			
 
				+    signals.get_generators.connect(get_generators)
			
--- a/pelicanconf.py
+++ b/pelicanconf.py
@@ -13,17 +13,21 @@ FAVICON = '/images/favicon.ico'
 
				 
			
 
				 BROWSER_COLOR = '#333'
			
 
				 
			
 
				-ROBOTS = 'index, follow'
			
 
				-
			
 
				 PATH = 'content'
			
 
				 
			
 
				 TIMEZONE = 'Europe/Paris'
			
 
				 
			
 
				 PLUGIN_PATHS = [u'pelican-plugins']
			
 
				-PLUGINS = ['i18n_subsites', 'representative_image']
			
 
				+PLUGINS = ['i18n_subsites', 'representative_image', 'sitemap']
			
 
				 
			
 
				 JINJA_ENVIRONMENT = {'extensions': ['jinja2.ext.i18n']}
			
 
				 
			
 
				+ROBOTS = 'index, follow'
			
 
				+SITEMAP = {
			
 
				+    'format': 'xml',
			
 
				+    'exclude': ['tag/', 'category/']
			
 
				+}
			
 
				+
			
 
				 I18N_TEMPLATES_LANG = 'en'
			
 
				 DEFAULT_LANG = 'fr'
			
 
				 OG_LOCALE = 'fr_FR'
			
@@ -74,7 +78,10 @@ CC_LICENSE = {
 
				 DEFAULT_PAGINATION = False
			
 
				 THEME = 'themes/Flex'
			
 
				 DEFAULT_DATE = 'fs'
			
 
				-STATIC_PATHS = ['images']
			
 
				+STATIC_PATHS = ['images', 'extra/robots.txt']
			
 
				+EXTRA_PATH_METADATA = {
			
 
				+    'extra/robots.txt': {'path': 'robots.txt'},
			
 
				+}
			
 
				 THEME_STATIC_DIR = 'themes'
			
 
				 
			
 
				 # Uncomment following line if you want document-relative URLs when developing