Browse Source

FIX tipue search

boyska 6 years ago
parent
commit
4bd0e12be2

+ 67 - 0
plugins/tipue_search/README.md

@@ -0,0 +1,67 @@
+Tipue Search
+============
+
+A Pelican plugin to serialize generated HTML to JSON that can be used by jQuery plugin - Tipue Search.
+
+Copyright (c) Talha Mansoor
+
+Author          | Talha Mansoor
+----------------|-----
+Author Email    | talha131@gmail.com 
+Author Homepage | http://onCrashReboot.com 
+Github Account  | https://github.com/talha131 
+
+Why do you need it?
+===================
+
+Static sites do not offer search feature out of the box. [Tipue Search](http://www.tipue.com/search/)
+is a jQuery plugin that search the static site without using any third party service, like DuckDuckGo or Google.
+
+Tipue Search offers 4 search modes. Its [JSON search mode](http://www.tipue.com/search/docs/json/) is the best search mode
+especially for large sites.
+
+Tipue's JSON search mode requires the textual content of site in JSON format.
+
+Requirements
+============
+
+Tipue Search requires BeautifulSoup.
+
+```bash
+pip install beautifulsoup4
+```
+
+How Tipue Search works
+=========================
+
+Tipue Search serializes the generated HTML into JSON. Format of JSON is as follows
+
+```python
+{
+    "pages": [
+        { 
+            "text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer nec odio. Praesent libero. Sed cursus ante dapibus diam. Sed nisi. Nulla quis sem at nibh elementum imperdiet. Duis sagittis ipsum. Praesent mauris. Fusce nec tellus sed augue semper porta. Mauris massa. Vestibulum lacinia arcu eget nulla. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Curabitur sodales ligula in libero.",
+            "tags": "Example Category",
+            "url" : "http://oncrashreboot.com/plugin-example.html",
+            "title": "Everything you want to know about Lorem Ipsum"
+        },
+        { 
+            "text": "Sed dignissim lacinia nunc. Curabitur tortor. Pellentesque nibh. Aenean quam. In scelerisque sem at dolor. Maecenas mattis. Sed convallis tristique sem. Proin ut ligula vel nunc egestas porttitor. Morbi lectus risus, iaculis vel, suscipit quis, luctus non, massa. Fusce ac turpis quis ligula lacinia aliquet. Mauris ipsum. Nulla metus metus, ullamcorper vel, tincidunt sed, euismod in, nibh.",
+            "tags": "Example Category",
+            "url" : "http://oncrashreboot.com/plugin-example-2.html",
+            "title": "Review of the book Lorem Ipsum"
+        }
+    ]
+}
+```
+
+JSON is written to file `tipuesearch_content.json` which is created in the root of `output` directory.
+
+How to use
+==========
+
+To utilize JSON Search mode, your theme needs to have Tipue Search properly configured in it. [Official documentation](http://www.tipue.com/search/docs/#json) has the required details.
+
+Pelican [Elegant Theme](https://github.com/talha131/pelican-elegant) and [Plumage
+theme](https://github.com/kdeldycke/plumage) have Tipue Search configured. You can view their
+code to understand the configuration.

+ 1 - 0
plugins/tipue_search/__init__.py

@@ -0,0 +1 @@
+from .tipue_search import *

+ 107 - 0
plugins/tipue_search/tipue_search.py

@@ -0,0 +1,107 @@
+# -*- coding: utf-8 -*-
+"""
+Tipue Search
+============
+
+A Pelican plugin to serialize generated HTML to JSON
+that can be used by jQuery plugin - Tipue Search.
+
+Copyright (c) Talha Mansoor
+"""
+
+from __future__ import unicode_literals
+
+import os.path
+import json
+from bs4 import BeautifulSoup
+from codecs import open
+try:
+    from urlparse import urljoin
+except ImportError:
+    from urllib.parse import urljoin
+
+from pelican import signals
+
+
+class Tipue_Search_JSON_Generator(object):
+
+    def __init__(self, context, settings, path, theme, output_path, *null):
+
+        self.output_path = output_path
+        self.context = context
+        self.siteurl = settings.get('SITEURL')
+        self.relative_urls = settings.get('RELATIVE_URLS')
+        self.tpages = settings.get('TEMPLATE_PAGES')
+        self.output_path = output_path
+        self.json_nodes = []
+
+    def create_json_node(self, page):
+        if getattr(page, 'status', 'published') != 'published':
+            return
+
+        soup_title = BeautifulSoup(page.title.replace(' ', ' '), 'html.parser')
+        page_title = soup_title.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('^', '^')
+
+        soup_text = BeautifulSoup(page.content, 'html.parser')
+        page_text = soup_text.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('¶', ' ').replace('^', '^')
+        page_text = ' '.join(page_text.split())
+
+        page_category = page.category.name if getattr(page, 'category', 'None') != 'None' else ''
+
+        page_url = '.'
+        if page.url:
+            page_url = page.url if self.relative_urls else (self.siteurl + '/' + page.url)
+
+        node = {'title': page_title,
+                'text': page_text,
+                'tags': page_category,
+                'loc': page_url}
+
+        self.json_nodes.append(node)
+
+    def create_tpage_node(self, srclink):
+        with open(os.path.join(self.output_path, self.tpages[srclink]),
+                  encoding='utf-8') as srcfile:
+            soup = BeautifulSoup(srcfile, 'html.parser')
+            page_title = soup.title.string if soup.title is not None else ''
+            page_text = soup.get_text()
+
+        # Should set default category?
+        page_category = ''
+        page_url = urljoin(self.siteurl, self.tpages[srclink])
+
+        node = {'title': page_title,
+                'text': page_text,
+                'tags': page_category,
+                'url': page_url}
+
+        self.json_nodes.append(node)
+
+    def generate_output(self, writer):
+        # bisognerebbe cambiare usando questo coso
+        # for p in self.context['PAGES']:
+        #    print 'U', p.url
+        path = os.path.join(self.output_path, 'tipuesearch_content.json')
+
+        pages = self.context['pages'] + self.context['articles']
+
+        for article in self.context['articles']:
+            pages += article.translations
+
+        for srclink in self.tpages:
+            self.create_tpage_node(srclink)
+
+        for page in pages:
+            self.create_json_node(page)
+        root_node = {'pages': self.json_nodes}
+
+        with open(path, 'w', encoding='utf-8') as fd:
+            json.dump(root_node, fd, separators=(',', ':'), ensure_ascii=False)
+
+
+def get_generators(generators):
+    return Tipue_Search_JSON_Generator
+
+
+def register():
+    signals.get_generators.connect(get_generators)