这个
manyCharsTill
documentation
说(强调我的):
manyCharsTill cp endp
使用字符分析器分析字符
cp
直到解析器
endp
成功。它停止了
之后
尾端
并将解析的字符作为字符串返回。
所以你不想用
between
结合
许多人
;你想做一些
pstring "\"\"\"" >>. manyCharsTill (pstring "\"\"\"")
.
但碰巧,我可以帮你省下很多工作。业余时间我一直在用FParsec开发一个TOML解析器。它还远未完成,但字符串部分可以正常工作并正确处理反斜杠转义(据我所知:我已经进行了彻底的测试,但还没有完全测试)。我唯一缺少的是“strip first newline if it appear right after The opening delimiter”规则,您已经用它处理过了
optional newline
. 所以只要在下面的代码中添加这个位,就应该有一个工作的TOML字符串解析器。
顺便说一句,我计划在麻省理工学院的许可下授权我的代码(如果我完成了它)。所以我在麻省理工学院的许可下发布了下面的代码块。如果对你有用的话,可以在你的项目中使用它。
let pShortCodepointInHex = // Anything from 0000 to FFFF, *except* the range D800-DFFF
(anyOf "dD" >>. (anyOf "01234567" <?> "a Unicode scalar value (range D800-DFFF not allowed)") .>>. exactly 2 isHex |>> fun (c,s) -> sprintf "d%c%s" c s)
<|> (exactly 4 isHex <?> "a Unicode scalar value")
let pLongCodepointInHex = // Anything from 00000000 to 0010FFFF, *except* the range D800-DFFF
(pstring "0000" >>. pShortCodepointInHex)
<|> (pstring "000" >>. exactly 5 isHex)
<|> (pstring "0010" >>. exactly 4 isHex |>> fun s -> "0010" + s)
<?> "a Unicode scalar value (i.e., in range 00000000 to 0010FFFF)"
let toCharOrSurrogatePair p =
p |> withSkippedString (fun codePoint _ -> System.Int32.Parse(codePoint, System.Globalization.NumberStyles.HexNumber) |> System.Char.ConvertFromUtf32)
let pStandardBackslashEscape =
anyOf "\\\"bfnrt"
|>> function
| 'b' -> "\b" // U+0008 BACKSPACE
| 'f' -> "\u000c" // U+000C FORM FEED
| 'n' -> "\n" // U+000A LINE FEED
| 'r' -> "\r" // U+000D CARRIAGE RETURN
| 't' -> "\t" // U+0009 CHARACTER TABULATION a.k.a. Tab or Horizonal Tab
| c -> string c
let pUnicodeEscape = (pchar 'u' >>. (pShortCodepointInHex |> toCharOrSurrogatePair))
<|> (pchar 'U' >>. ( pLongCodepointInHex |> toCharOrSurrogatePair))
let pEscapedChar = pstring "\\" >>. (pStandardBackslashEscape <|> pUnicodeEscape)
let quote = pchar '"'
let isBasicStrChar c = c <> '\\' && c <> '"' && c > '\u001f' && c <> '\u007f'
let pBasicStrChars = manySatisfy isBasicStrChar
let pBasicStr = stringsSepBy pBasicStrChars pEscapedChar |> between quote quote
let pEscapedNewline = skipChar '\\' .>> skipNewline .>> spaces
let isMultilineStrChar c = c = '\n' || isBasicStrChar c
let pMultilineStrChars = manySatisfy isMultilineStrChar
let pTripleQuote = pstring "\"\"\""
let pMultilineStr = stringsSepBy pMultilineStrChars (pEscapedChar <|> (notFollowedByString "\"\"\"" >>. pstring "\"")) |> between pTripleQuote pTripleQuote