dotfiles/.config/sublime-text-3/Packages/mdpopups/tests/spellcheck.py

"""Spell check with aspell."""
from __future__ import unicode_literals
import subprocess
import os
import sys
import codecs
import bs4
import yaml
import re
from collections import namedtuple

PY3 = sys.version_info >= (3, 0)


def yaml_load(source, loader=yaml.Loader):
    """
    Wrap PyYaml's loader so we can extend it to suit our needs.

    Load all strings as unicode: http://stackoverflow.com/a/2967461/3609487.
    """

    def construct_yaml_str(self, node):
        """Override the default string handling function to always return Unicode objects."""
        return self.construct_scalar(node)

    class Loader(loader):
        """Define a custom loader to leave the global loader unaltered."""

    # Attach our unicode constructor to our custom loader ensuring all strings
    # will be unicode on translation.
    Loader.add_constructor('tag:yaml.org,2002:str', construct_yaml_str)

    return yaml.load(source, Loader)


def read_config(file_name):
    """Read configuration."""

    config = {}
    with codecs.open(file_name, 'r', encoding='utf-8') as f:
        config = yaml_load(f.read())
    return config


def console(cmd, input_file=None, input_text=None):
    """Call with arguments."""

    returncode = None
    output = None

    if sys.platform.startswith('win'):
        startupinfo = subprocess.STARTUPINFO()
        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
        process = subprocess.Popen(
            cmd,
            startupinfo=startupinfo,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            stdin=subprocess.PIPE,
            shell=False
        )
    else:
        process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            stdin=subprocess.PIPE,
            shell=False
        )

    if input_file is not None:
        with open(input_file, 'rb') as f:
            process.stdin.write(f.read())
    if input_text is not None:
        process.stdin.write(input_text)
    output = process.communicate()
    returncode = process.returncode

    assert returncode == 0, "Runtime Error: %s" % (
        output[0].rstrip().decode('utf-8') if PY3 else output[0]
    )

    return output[0].decode('utf-8') if PY3 else output[0]


class IgnoreRule (namedtuple('IgnoreRule', ['tag', 'id', 'classes'])):
    """Ignore rule."""


class Spelling(object):
    """Spell check object."""

    DICTIONARY = 'dictionary.bin'
    RE_SELECTOR = re.compile(r'(\#|\.)?[-\w]+')

    def __init__(self, config_file):
        """Initialize."""

        config = read_config(config_file)
        self.docs = config.get('docs', [])
        self.dictionary = ('\n'.join(config.get('dictionary', []))).encode('utf-8')
        self.attributes = set(config.get('attributes', []))
        self.ignores = self.ignore_rules(*config.get('ignores', []))
        self.dict_bin = os.path.abspath(self.DICTIONARY)

    def ignore_rules(self, *args):
        """
        Process ignore rules.

        Split ignore selector string into tag, id, and classes.
        """

        ignores = []

        for arg in args:
            selector = arg.lower()
            tag = None
            tag_id = None
            classes = set()

            for m in self.RE_SELECTOR.finditer(selector):
                selector = m.group(0)
                if selector.startswith('.'):
                    classes.add(selector[1:])
                elif selector.startswith('#') and tag_id is None:
                    tag_id = selector[1:]
                elif tag is None:
                    tag = selector
                else:
                    raise ValueError('Bad selector!')

            if tag or tag_id or classes:
                ignores.append(IgnoreRule(tag, tag_id, tuple(classes)))

        return ignores

    def compile_dictionaries(self):
        """Compile user dictionary."""

        if os.path.exists(self.dict_bin):
            os.remove(self.dict_bin)
        print("Compiling Dictionary...")
        print(
            console(
                [
                    'aspell',
                    '--lang=en',
                    '--encoding=utf-8',
                    'create',
                    'master',
                    os.path.abspath(self.dict_bin)
                ],
                input_text=self.dictionary
            )
        )

    def skip_tag(self, el):
        """Determine if tag should be skipped."""

        skip = False
        for rule in self.ignores:
            if rule.tag and el.name.lower() != rule.tag:
                continue
            if rule.id and rule.id != el.attrs.get('id', '').lower():
                continue
            if rule.classes:
                current_classes = [c.lower() for c in el.attrs.get('class', [])]
                found = True
                for c in rule.classes:
                    if c not in current_classes:
                        found = False
                        break
                if not found:
                    continue
            skip = True
            break
        return skip

    def html_to_text(self, tree, root=True):
        """
        Parse the HTML creating a buffer with each tags content.

        Skip any selectors specified and include attributes if specified.
        Ignored tags will not have their attributes scanned either.
        """

        text = []

        if not self.skip_tag(tree):
            for attr in self.attributes:
                value = tree.attrs.get(attr)
                if value:
                    text.append(value)

            for child in tree:
                if isinstance(child, bs4.element.Tag):
                    if child.contents:
                        text.extend(self.html_to_text(child, False))
                else:
                    text.append(str(child))

        return ' '.join(text) if root else text

    def check_spelling(self, html_file):
        """Check spelling."""

        fail = False
        with codecs.open(html_file, 'r', encoding='utf-8') as file_obj:
            html = bs4.BeautifulSoup(file_obj.read(), "html5lib")
            text = self.html_to_text(html.html)

        wordlist = console(
            [
                'aspell',
                'list',
                '--lang=en',
                '--mode=url',
                '--encoding=utf-8',
                '--extra-dicts',
                self.dict_bin
            ],
            input_text=text.encode('utf-8')
        )
        words = [w for w in sorted(set(wordlist.split('\n'))) if w]

        if words:
            fail = True
            print('Misspelled words in %s' % html_file)
            print('-' * 80)
            for word in words:
                print(word)
            print('-' * 80)
            print('\n')
        return fail

    def check(self):
        """Walk documents and initiate spell check."""

        self.compile_dictionaries()

        print('Spell Checking...')
        fail = False
        for doc in self.docs:
            if os.path.isdir(doc):
                for base, dirs, files in os.walk(doc):
                    # Remove child folders based on exclude rules
                    for f in files:
                        if f.lower().endswith('.html'):
                            file_name = os.path.join(base, f)
                            if self.check_spelling(file_name):
                                fail = True
            elif doc.lower().endswith('.html'):
                if self.check_spelling(doc):
                    fail = True
        return fail


def main():
    """Main."""

    spelling = Spelling('.spelling.yml')
    return spelling.check()


if __name__ == "__main__":
    sys.exit(main())
Sublime Text configuration 2018-05-16 00:30:13 +02:00			`"""Spell check with aspell."""`
			`from __future__ import unicode_literals`
			`import subprocess`
			`import os`
			`import sys`
			`import codecs`
			`import bs4`
			`import yaml`
			`import re`
			`from collections import namedtuple`

			`PY3 = sys.version_info >= (3, 0)`


			`def yaml_load(source, loader=yaml.Loader):`
			`"""`
			`Wrap PyYaml's loader so we can extend it to suit our needs.`

			`Load all strings as unicode: http://stackoverflow.com/a/2967461/3609487.`
			`"""`

			`def construct_yaml_str(self, node):`
			`"""Override the default string handling function to always return Unicode objects."""`
			`return self.construct_scalar(node)`

			`class Loader(loader):`
			`"""Define a custom loader to leave the global loader unaltered."""`

			`# Attach our unicode constructor to our custom loader ensuring all strings`
			`# will be unicode on translation.`
			`Loader.add_constructor('tag:yaml.org,2002:str', construct_yaml_str)`

			`return yaml.load(source, Loader)`


			`def read_config(file_name):`
			`"""Read configuration."""`

			`config = {}`
			`with codecs.open(file_name, 'r', encoding='utf-8') as f:`
			`config = yaml_load(f.read())`
			`return config`


			`def console(cmd, input_file=None, input_text=None):`
			`"""Call with arguments."""`

			`returncode = None`
			`output = None`

			`if sys.platform.startswith('win'):`
			`startupinfo = subprocess.STARTUPINFO()`
			`startupinfo.dwFlags \|= subprocess.STARTF_USESHOWWINDOW`
			`process = subprocess.Popen(`
			`cmd,`
			`startupinfo=startupinfo,`
			`stdout=subprocess.PIPE,`
			`stderr=subprocess.STDOUT,`
			`stdin=subprocess.PIPE,`
			`shell=False`
			`)`
			`else:`
			`process = subprocess.Popen(`
			`cmd,`
			`stdout=subprocess.PIPE,`
			`stderr=subprocess.STDOUT,`
			`stdin=subprocess.PIPE,`
			`shell=False`
			`)`

			`if input_file is not None:`
			`with open(input_file, 'rb') as f:`
			`process.stdin.write(f.read())`
			`if input_text is not None:`
			`process.stdin.write(input_text)`
			`output = process.communicate()`
			`returncode = process.returncode`

			`assert returncode == 0, "Runtime Error: %s" % (`
			`output[0].rstrip().decode('utf-8') if PY3 else output[0]`
			`)`

			`return output[0].decode('utf-8') if PY3 else output[0]`


			`class IgnoreRule (namedtuple('IgnoreRule', ['tag', 'id', 'classes'])):`
			`"""Ignore rule."""`


			`class Spelling(object):`
			`"""Spell check object."""`

			`DICTIONARY = 'dictionary.bin'`
			`RE_SELECTOR = re.compile(r'(\#\|\.)?[-\w]+')`

			`def __init__(self, config_file):`
			`"""Initialize."""`

			`config = read_config(config_file)`
			`self.docs = config.get('docs', [])`
			`self.dictionary = ('\n'.join(config.get('dictionary', []))).encode('utf-8')`
			`self.attributes = set(config.get('attributes', []))`
			`self.ignores = self.ignore_rules(*config.get('ignores', []))`
			`self.dict_bin = os.path.abspath(self.DICTIONARY)`

			`def ignore_rules(self, *args):`
			`"""`
			`Process ignore rules.`

			`Split ignore selector string into tag, id, and classes.`
			`"""`

			`ignores = []`

			`for arg in args:`
			`selector = arg.lower()`
			`tag = None`
			`tag_id = None`
			`classes = set()`

			`for m in self.RE_SELECTOR.finditer(selector):`
			`selector = m.group(0)`
			`if selector.startswith('.'):`
			`classes.add(selector[1:])`
			`elif selector.startswith('#') and tag_id is None:`
			`tag_id = selector[1:]`
			`elif tag is None:`
			`tag = selector`
			`else:`
			`raise ValueError('Bad selector!')`

			`if tag or tag_id or classes:`
			`ignores.append(IgnoreRule(tag, tag_id, tuple(classes)))`

			`return ignores`

			`def compile_dictionaries(self):`
			`"""Compile user dictionary."""`

			`if os.path.exists(self.dict_bin):`
			`os.remove(self.dict_bin)`
			`print("Compiling Dictionary...")`
			`print(`
			`console(`
			`[`
			`'aspell',`
			`'--lang=en',`
			`'--encoding=utf-8',`
			`'create',`
			`'master',`
			`os.path.abspath(self.dict_bin)`
			`],`
			`input_text=self.dictionary`
			`)`
			`)`

			`def skip_tag(self, el):`
			`"""Determine if tag should be skipped."""`

			`skip = False`
			`for rule in self.ignores:`
			`if rule.tag and el.name.lower() != rule.tag:`
			`continue`
			`if rule.id and rule.id != el.attrs.get('id', '').lower():`
			`continue`
			`if rule.classes:`
			`current_classes = [c.lower() for c in el.attrs.get('class', [])]`
			`found = True`
			`for c in rule.classes:`
			`if c not in current_classes:`
			`found = False`
			`break`
			`if not found:`
			`continue`
			`skip = True`
			`break`
			`return skip`

			`def html_to_text(self, tree, root=True):`
			`"""`
			`Parse the HTML creating a buffer with each tags content.`

			`Skip any selectors specified and include attributes if specified.`
			`Ignored tags will not have their attributes scanned either.`
			`"""`

			`text = []`

			`if not self.skip_tag(tree):`
			`for attr in self.attributes:`
			`value = tree.attrs.get(attr)`
			`if value:`
			`text.append(value)`

			`for child in tree:`
			`if isinstance(child, bs4.element.Tag):`
			`if child.contents:`
			`text.extend(self.html_to_text(child, False))`
			`else:`
			`text.append(str(child))`

			`return ' '.join(text) if root else text`

			`def check_spelling(self, html_file):`
			`"""Check spelling."""`

			`fail = False`
			`with codecs.open(html_file, 'r', encoding='utf-8') as file_obj:`
			`html = bs4.BeautifulSoup(file_obj.read(), "html5lib")`
			`text = self.html_to_text(html.html)`

			`wordlist = console(`
			`[`
			`'aspell',`
			`'list',`
			`'--lang=en',`
			`'--mode=url',`
			`'--encoding=utf-8',`
			`'--extra-dicts',`
			`self.dict_bin`
			`],`
			`input_text=text.encode('utf-8')`
			`)`
			`words = [w for w in sorted(set(wordlist.split('\n'))) if w]`

			`if words:`
			`fail = True`
			`print('Misspelled words in %s' % html_file)`
			`print('-' * 80)`
			`for word in words:`
			`print(word)`
			`print('-' * 80)`
			`print('\n')`
			`return fail`

			`def check(self):`
			`"""Walk documents and initiate spell check."""`

			`self.compile_dictionaries()`

			`print('Spell Checking...')`
			`fail = False`
			`for doc in self.docs:`
			`if os.path.isdir(doc):`
			`for base, dirs, files in os.walk(doc):`
			`# Remove child folders based on exclude rules`
			`for f in files:`
			`if f.lower().endswith('.html'):`
			`file_name = os.path.join(base, f)`
			`if self.check_spelling(file_name):`
			`fail = True`
			`elif doc.lower().endswith('.html'):`
			`if self.check_spelling(doc):`
			`fail = True`
			`return fail`


			`def main():`
			`"""Main."""`

			`spelling = Spelling('.spelling.yml')`
			`return spelling.check()`


			`if __name__ == "__main__":`
			`sys.exit(main())`