It turns out it's not super hard to write a simple markdown parser. The following code converts a markdown file to a HTML file. It's easy to extend the implementation and add more customization.
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
package lab;
import lombok.Builder;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.StringJoiner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MarkdownParser {
private static String createIndentation(int level) {
return " ".repeat(level * 4);
}
private static String decorate(final String line) {
// First test image
final Pattern imagePattern = Pattern.compile("!\\[(?<alt>.+?)\\]\\((?<src>.+?)\\)");
final Matcher m1 = imagePattern.matcher(line);
String value = m1.replaceAll("<img src=\"${src}\" alt=\"${alt}\">");
// Second, replace links
final Pattern urlPattern = Pattern.compile("\\[(?<text>.+?)\\]\\((?<href>.+?)\\)");
final Matcher m2 = urlPattern.matcher(value);
value = m2.replaceAll("<a href=\"${href}\">${text}</a>");
// Finally, replace bold
final Pattern boldPattern = Pattern.compile("\\*\\*(.+?)\\*\\*");
final Matcher m3 = boldPattern.matcher(value);
value = m3.replaceAll("<b>$1</b>");
return value;
}
private interface HtmlGenerator {
String generate();
}
private static class RawLines implements HtmlGenerator {
final List<String> lines;
RawLines() {
this.lines = new ArrayList<>();
}
public void add(final String line) {
this.lines.add(line);
}
@Override
public String generate() {
final StringJoiner sj = new StringJoiner("\n");
for (var line : this.lines) {
sj.add(line);
}
return sj.toString();
}
}
@Builder
private static class Paragraph implements HtmlGenerator {
private final String content;
@Override
public String generate() {
final StringJoiner sj = new StringJoiner("\n");
final String startTag = "<div class=\"paragraph\">";
final String endTag = "</div>";
sj.add(startTag);
sj.add(decorate(this.content));
sj.add(endTag);
return sj.toString();
}
}
private static class ListItems implements HtmlGenerator {
private final List<Pair<Integer, String>> contents;
public ListItems() {
this.contents = new ArrayList<>();
}
private int count(String line) {
int k = 0;
while (k < line.length() && line.charAt(k) == ' ') {
++k;
}
return k;
}
public void add(final String line) {
int c = this.count(line);
this.contents.add(Pair.of(c, line.substring(c + 1)));
}
@Override
public String generate() {
final String ul_start = "<ul>";
final String ul_end = "</ul>";
final String li_start = "<li>";
final String li_end = "</li>";
final StringJoiner sj = new StringJoiner("\n");
sj.add("<div class=\"listing\">");
int prevLevel = -1;
for (var p : contents) {
final int level = p.getLeft();
final String content = p.getRight();
if (level > prevLevel) {
sj.add(ul_start);
prevLevel = level;
sj.add(li_start + decorate(content) + li_end);
} else if (level == prevLevel) {
sj.add(li_start + decorate(content) + li_end);
} else {
sj.add(ul_end);
sj.add(li_start + decorate(content) + li_end);
prevLevel = level;
}
}
sj.add(ul_end);
sj.add("</div>");
return sj.toString();
}
}
@Builder
private static class Header implements HtmlGenerator {
private final int level;
private final String content;
@Override
public String generate() {
final StringJoiner sj = new StringJoiner("\n");
final String startTag = "<h" + this.level + ">";
final String endTag = "</h" + this.level + ">";
sj.add(startTag);
sj.add(this.content);
sj.add(endTag);
return sj.toString();
}
}
public static String toHtml(final String content) {
final List<String> lines = Arrays.asList(content.split("\n"));
return toHtml(lines);
}
private static String toHtml(final List<String> lines) {
int k = 0;
final List<HtmlGenerator> results = new ArrayList<>();
while (k < lines.size()) {
String line = lines.get(k);
String trimmedLine = line.trim();
if (trimmedLine.startsWith("#")) {
int j = 0;
while (j < trimmedLine.length() && trimmedLine.charAt(j) == '#') {
j++;
}
results.add(Header.builder()
.level(j)
.content(trimmedLine.substring(j + 1))
.build());
} else if (trimmedLine.startsWith("-")) {
final ListItems listItems = new ListItems();
listItems.add(line);
++k;
// Go through the list
while (k < lines.size() && lines.get(k).trim().startsWith("-")) {
listItems.add(lines.get(k));
++k;
}
results.add(listItems);
--k;
} else if (trimmedLine.startsWith("\\begin{html}")) {
final RawLines rl = new RawLines();
++k;
while (k < lines.size() && !lines.get(k).trim().startsWith("\\end{html}")) {
rl.add(lines.get(k));
++k;
}
results.add(rl);
} else if (StringUtils.isNotEmpty(trimmedLine)) {
results.add(Paragraph.builder()
.content(trimmedLine)
.build());
}
++k;
}
final StringJoiner sj = new StringJoiner("\n\n");
for (var gen : results) {
sj.add(gen.generate());
}
return sj.toString();
}
}
----- END -----
©2019 - 2023 all rights reserved