summaryrefslogtreecommitdiff
path: root/yarnlib/mdparser.py
blob: 4bd59d38d434029749788734e5353098986a03ee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# Copyright 2013  Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# =*= License: GPL-3+ =*=


import logging
import markdown
import StringIO
from markdown.treeprocessors import Treeprocessor


#
# Classes for Markdown parsing. See python-markdown documentation
# for details. We want to find all top level code blocks (indented
# four spaces in the Markdown), which we'll parse for scenario test
# stuff later on. We create a Python markdown extension and use
# "tree processor" to analyse the parsed ElementTree at the right
# moment for top level <pre> blocks.
#

# This is a Treeprocessor that iterates over the parsed Markdown,
# as an ElementTree, and finds all top level code blocks.

class GatherCodeBlocks(Treeprocessor):

    def __init__(self, blocks):
        self.blocks = blocks

    def run(self, root):
        for child in root.getchildren():
            if child.tag == 'pre':
                code = child.find('code')
                self.blocks.append(code.text)
        return root

# This is the Python Markdown extension to call the code block
# gatherer at the right time. It stores the list of top level
# code blocks as the blocks attribute.

class ParseScenarioTestBlocks(markdown.extensions.Extension):

    def extendMarkdown(self, md, md_globals):
        self.blocks = []
        self.gatherer = GatherCodeBlocks(self.blocks)
        md.treeprocessors.add('gathercode', self.gatherer, '_end')


class MarkdownParser(object):

    def __init__(self):
        self.blocks = []

    def parse_string(self, text):
        ext = ParseScenarioTestBlocks()
        f = StringIO.StringIO()
        markdown.markdown(text, output=f, extensions=[ext])
        self.blocks.extend(ext.blocks)
        return ext.blocks

    def parse_file(self, filename): # pragma: no cover
        with open(filename) as f:
            binary = f.read()
            text = binary.decode('utf-8')
            return self.parse_string(text)