A Simple Markdown Parser

Subscribe Send me a message home page tags


#java  #markdown  #parser 

It turns out it's not super hard to write a simple markdown parser. The following code converts a markdown file to a HTML file. It's easy to extend the implementation and add more customization.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212

package lab;

import lombok.Builder;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.StringJoiner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class MarkdownParser {

    private static String createIndentation(int level) {
        return " ".repeat(level * 4);
    }

    private static String decorate(final String line) {
        // First test image
        final Pattern imagePattern = Pattern.compile("!\\[(?<alt>.+?)\\]\\((?<src>.+?)\\)");
        final Matcher m1 = imagePattern.matcher(line);
        String value = m1.replaceAll("<img src=\"${src}\" alt=\"${alt}\">");

        // Second, replace links
        final Pattern urlPattern = Pattern.compile("\\[(?<text>.+?)\\]\\((?<href>.+?)\\)");
        final Matcher m2 = urlPattern.matcher(value);
        value = m2.replaceAll("<a href=\"${href}\">${text}</a>");

        // Finally, replace bold
        final Pattern boldPattern = Pattern.compile("\\*\\*(.+?)\\*\\*");
        final Matcher m3 = boldPattern.matcher(value);
        value = m3.replaceAll("<b>$1</b>");
        return value;
    }

    private interface HtmlGenerator {
        String generate();
    }

    private static class RawLines implements HtmlGenerator {
        final List<String> lines;

        RawLines() {
            this.lines = new ArrayList<>();
        }
        public void add(final String line) {
            this.lines.add(line);
        }

        @Override
        public String generate() {
            final StringJoiner sj = new StringJoiner("\n");
            for (var line : this.lines) {
                sj.add(line);
            }
            return sj.toString();
        }
    }

    @Builder
    private static class Paragraph implements HtmlGenerator {
        private final String content;

        @Override
        public String generate() {
            final StringJoiner sj = new StringJoiner("\n");
            final String startTag = "<div class=\"paragraph\">";
            final String endTag = "</div>";
            sj.add(startTag);
            sj.add(decorate(this.content));
            sj.add(endTag);
            return sj.toString();
        }
    }

    private static class ListItems implements HtmlGenerator {

        private final List<Pair<Integer, String>> contents;

        public ListItems() {
            this.contents = new ArrayList<>();
        }

        private int count(String line) {
            int k = 0;
            while (k < line.length() && line.charAt(k) == ' ') {
                ++k;
            }
            return k;
        }

        public void add(final String line) {
            int c = this.count(line);
            this.contents.add(Pair.of(c, line.substring(c + 1)));
        }

        @Override
        public String generate() {
            final String ul_start = "<ul>";
            final String ul_end = "</ul>";
            final String li_start = "<li>";
            final String li_end = "</li>";
            final StringJoiner sj = new StringJoiner("\n");

            sj.add("<div class=\"listing\">");

            int prevLevel = -1;
            for (var p : contents) {
                final int level = p.getLeft();
                final String content = p.getRight();

                if (level > prevLevel) {
                    sj.add(ul_start);
                    prevLevel = level;
                    sj.add(li_start + decorate(content) + li_end);
                } else if (level == prevLevel) {
                    sj.add(li_start + decorate(content) + li_end);
                } else {
                    sj.add(ul_end);
                    sj.add(li_start + decorate(content) + li_end);
                    prevLevel = level;
                }
            }
            sj.add(ul_end);
            sj.add("</div>");
            return sj.toString();
        }
    }

    @Builder
    private static class Header implements HtmlGenerator {
        private final int level;
        private final String content;

        @Override
        public String generate() {
            final StringJoiner sj = new StringJoiner("\n");
            final String startTag = "<h" + this.level + ">";
            final String endTag = "</h" + this.level + ">";
            sj.add(startTag);
            sj.add(this.content);
            sj.add(endTag);
            return sj.toString();
        }
    }


    public static String toHtml(final String content) {
            final List<String> lines = Arrays.asList(content.split("\n"));
            return toHtml(lines);
        }

    private static String toHtml(final List<String> lines) {
        int k = 0;

        final List<HtmlGenerator> results = new ArrayList<>();

        while (k < lines.size()) {
            String line = lines.get(k);
            String trimmedLine = line.trim();

            if (trimmedLine.startsWith("#")) {
                int j = 0;
                while (j < trimmedLine.length() && trimmedLine.charAt(j) == '#') {
                    j++;
                }
                results.add(Header.builder()
                                .level(j)
                                .content(trimmedLine.substring(j + 1))
                                .build());
            } else if (trimmedLine.startsWith("-")) {
                final ListItems listItems = new ListItems();
                listItems.add(line);
                ++k;
                // Go through the list
                while (k < lines.size() && lines.get(k).trim().startsWith("-")) {
                    listItems.add(lines.get(k));
                    ++k;
                }
                results.add(listItems);
                --k;

            } else if (trimmedLine.startsWith("\\begin{html}")) {
                final RawLines rl = new RawLines();
                ++k;
                while (k < lines.size() && !lines.get(k).trim().startsWith("\\end{html}")) {
                    rl.add(lines.get(k));
                    ++k;
                }
                results.add(rl);
            } else if (StringUtils.isNotEmpty(trimmedLine)) {
                results.add(Paragraph.builder()
                        .content(trimmedLine)
                        .build());
            }
            ++k;
        }

        final StringJoiner sj = new StringJoiner("\n\n");

        for (var gen : results) {
            sj.add(gen.generate());
        }

        return sj.toString();
    }
}


----- END -----

If you have questions about this post, you could find me on Discord.
Send me a message Subscribe to blog updates

Want some fun stuff?

/static/shopping_demo.png