将Markdown转换为HTML算法面试问题-最佳方法



我最近在一次技术面试中遇到了这个问题,时间不够了。

任务是编写一个Markdown到HTML的转换器。给定以下输入:

This is a paragraph with a soft
line break.
This is another paragraph that has
> Some text that
> is in a
> block quote.
This is another paragraph with a ~~strikethrough~~ word.

产生以下输出:

<p>This is a paragraph with a soft<br />line break.</p>
<p>This is another paragraph that has <br />
<blockquote>Some text that<br />is in a<br />block quote.</blockquote>
</p>
<p>This is another paragraph with a <del>strikethrough</del> word.</p>

输出的格式并不重要,只需要是有效的HTML即可。

我想不出一个好的方法来做这件事。我唯一能想到的就是用双换行符分割得到段落,然后在每个段落上迭代,检查每个特殊符号的存在,并用它的HTML等价物替换is。不过,这感觉很粗糙,我相信有一种更结构化的方法来处理这类问题,记住这是一个时间限制约为30分钟的面试环境。

我欢迎任何建议。

这是我在一小时内的尝试。

如果可能的话,代码会反复努力合并相邻的行。为了提高可读性和性能,调整它需要额外的时间。一些额外的错误检查也可以。

using System;
using System.Collections.Generic;
namespace akMdConverter
{
class Program
{
const string Break = "<br />";
static void Main(string[] args)
{
var md = new string[]
{
"This is a paragraph with a soft",
"line break.",
"",
"This is another paragraph that has",
"> Some text that",
"> is in a",
"> block quote.",
"",
"This is another paragraph with a ~~strikethrough~~ word."
};
md = MergeBlockQuotes(md);
md = MergeSoftLineBreaks(md);
md = MergeParagraphs(md);
ReplaceStrikeThroughs(md);
foreach (var m in md)
{
Console.WriteLine(m);
}
}
static string[] MergeBlockQuotes(string[] md)
{
var lines = new List<string>();
string previousLine = "";
foreach(string line in md)
{
if (line.StartsWith(">"))
{
if (previousLine != "")
{
previousLine = previousLine + Break + 
line.Substring(1).Trim();
}
else
{
previousLine = line.Trim();
}
}
else
{
if (previousLine != "")
{
lines.Add(previousLine);
previousLine = "";
}
lines.Add(line);
}
}
return lines.ToArray();
}
static string[] MergeSoftLineBreaks(string[] md)
{
var lines = new List<string>();
string previousLine = "";
foreach (string line in md)
{
if ((line == "") || line.StartsWith(">"))
{
if (previousLine != "")
{
lines.Add(previousLine);
lines.Add(line);
previousLine = "";
}
else
{
lines.Add(line);
}
}
else
{
if (previousLine != "")
{
previousLine += Break;
}
previousLine += line;
}
}
if (previousLine != "")
{
lines.Add(previousLine);
}
return lines.ToArray();
}
static string[] MergeParagraphs(string[] md)
{
var lines = new List<string>();
string previousLine = "";
foreach (string line in md)
{
if (line == "")
{
if (previousLine != "")
{
lines.Add(Bracket(previousLine, "p"));
previousLine = "";
}
lines.Add("");
}
else if (line.StartsWith(">"))
{
if (previousLine != "")
{
previousLine += Break + "n";
}
previousLine += Bracket(line.Substring(1), "blockquote");
}
else
{
if (previousLine != "")
{
previousLine += Break;
}
previousLine += line;
}
}
if (previousLine != "")
{
lines.Add(Bracket(previousLine, "p"));
}
return lines.ToArray();
}
static string Bracket(string s, string bracket)
{
return "<" + bracket + ">" + s.Trim() + "<" + bracket + "/>";
}
static void ReplaceStrikeThroughs(string[] md)
{
for(int i = 0; i < md.Length; i++)
{
string s = md[i];
int tilde1 = s.IndexOf("~~");
while (tilde1 >= 0)
{
int tilde2 = s.IndexOf("~~", tilde1 + 2);
if (tilde2 > tilde1)
{
s = s.Substring(0, tilde1) + "<del>" + 
s.Substring(tilde1+2, tilde2 - tilde1 - 2) + "</del>" + 
s.Substring(tilde2 + 2);
tilde1 = s.IndexOf("~~");
}
else
{
tilde1 = -1;
}
}
md[i] = s;
}
}
}
}