/*---------------------------------------------------------------------------
  Copyright 2012-2021, Microsoft Research, Daan Leijen.

  This is free software; you can redistribute it and/or modify it under the
  terms of the Apache License, Version 2.0. A copy of the License can be
  found in the LICENSE file at the root of this distribution.
---------------------------------------------------------------------------*/

/* Regular expressions.

   The regular expressions conform to the regular expressions of JavaScript
   as described at <https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions>
*/
module std/text/regexstd/text/regex

import std/num/int32std/num/int32

extern import
  c { conan="pcre2/[>=10.37]"; vcpkg="pcre2"; library="pcre2-8" }

extern import
  cs file "regex-inline.cs"
  js file "regex-inline.js"
  c  file "regex-inline.c"


// Abstract type of a regular expression object
abstract value struct regexstd/text/regex/regex: V( objstd/text/regex/regex/obj: (regex : regex) -> any: anystd/core/types/any: V, srcstd/text/regex/regex/src: (regex : regex) -> string : stringstd/core/types/string: V )

// Return the pattern as a string
pub fun sourcestd/text/regex/source: (r : regex) -> string( rr: regex : regexstd/text/regex/regex: V )result: -> total string : stringstd/core/types/string: V
  rr: regex.srcstd/text/regex/regex/src: (regex : regex) -> string

extern regex-createstd/text/regex/regex-create: (source : string, ignore-case : bool, multi-line : bool) -> any(sourcesource: string : stringstd/core/types/string: V, ignore-caseignore-case: bool : boolstd/core/types/bool: V, multi-linemulti-line: bool : boolstd/core/types/bool: V) : anystd/core/types/any: V
  c "kk_regex_create"
  js "$regexCreate"
  cs "RegEx.Create"

extern regex-execstd/text/regex/regex-exec: (regex : any, str : string, start : ssize_t) -> list<sslice>( regexregex: any : anystd/core/types/any: V, strstr: string : stringstd/core/types/string: V, startstart: ssize_t : ssize_tstd/core/types/ssize_t: V) : liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V>
  c  "kk_regex_exec"
  js "$regexExec"
  cs "RegEx.Exec"

extern regex-exec-allstd/text/regex/regex-exec-all: (regex : any, str : string, start : ssize_t, atmost : ssize_t) -> list<list<sslice>>( regexregex: any : anystd/core/types/any: V, strstr: string : stringstd/core/types/string: V, startstart: ssize_t : ssize_tstd/core/types/ssize_t: V, atmostatmost: ssize_t : ssize_tstd/core/types/ssize_t: V) : liststd/core/types/list: V -> V<liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V>>
  c  "kk_regex_exec_all"
  js "$regexExecAll"
  cs "RegEx.ExecAll"


// How many groups are captured by this regex?
pub fun groups-countstd/text/regex/groups-count: (r : regex) -> int( rr: regex : regexstd/text/regex/regex: V )result: -> total int : intstd/core/types/int: V
  rr: regex.sourcestd/text/regex/source: (r : regex) -> string.replace-allstd/text/regex/string/replace-all: (s : string, regex : regex, repl : string, atmost : ? int) -> string(rx-nongroupstd/text/regex/rx-nongroup: regex,""literal: string
count= 0
).countstd/core/string/count: (s : string) -> int
; val rx-nongroupstd/text/regex/rx-nongroup: regex = regexstd/text/regex/regex: (regex : string, ignorecase : ? bool, multiline : ? bool) -> regex("[^\\\\\\[(]+|\\\\[\\s\\S]?|\\(\\?|\\[(?:[^\\\\\\]]|\\\\.)*\\]"literal: string
count= 45
) // Create a new regular expression. Takes two optional parameters. Set `ignoreCase` to `True` // to ignore uppercase/lowercase distinction. If `multiline` is set to `True`, then `^` and `$` // match also the beginning and end of every line (instead of the entire input). pub fun regexstd/text/regex/regex: (regex : string, ignorecase : ? bool, multiline : ? bool) -> regex( regexregex: string :stringstd/core/types/string: V, ignorecaseignorecase: ? bool :boolstd/core/types/bool: V = Falsestd/core/types/False: bool, multilinemultiline: ? bool : boolstd/core/types/bool: V = Falsestd/core/types/False: bool )result: -> total regex : regexstd/text/regex/regex: V Regexstd/text/regex/Regex: (obj : any, src : string) -> regex(regex-createstd/text/regex/regex-create: (source : string, ignore-case : bool, multi-line : bool) -> any(regexregex: string,ignorecaseignorecase: bool,multilinemultiline: bool), regexregex: string); // Find a match for a regular expression. // See also `find` and `contains` pub fun execstd/text/regex/exec: (regex : regex, s : string) -> list<sslice>( regexregex: regex :regexstd/text/regex/regex: V, ss: string : stringstd/core/types/string: V )result: -> total list<sslice> : liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V> regex-execstd/text/regex/regex-exec: (regex : any, str : string, start : ssize_t) -> list<sslice>(regexregex: regex.objstd/text/regex/regex/obj: (regex : regex) -> any,ss: string,0literal: int
dec = 0
hex8 = 0x00
bit8 = 0b00000000
.ssize_tstd/core/int/ssize_t: (i : int) -> ssize_t
) // Match a regular expression `regex` over a string `s`. // Matches at most `atmost` times (and matches all by default). // Returns always an odd number of elements where every even // element is a match and the odd ones the string parts between the // matches. // See also `find-all` and `strings`. pub fun exec-allstd/text/regex/exec-all: (regex : regex, s : string, atmost : ? int) -> list<list<sslice>>( regexregex: regex : regexstd/text/regex/regex: V, ss: string : stringstd/core/types/string: V, atmostatmost: ? int : intstd/core/types/int: V = -1literal: int
dec = -1
hex8 = 0xFF
bit8 = 0b11111111
)result: -> total list<list<sslice>> : liststd/core/types/list: V -> V<liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V>> regex-exec-allstd/text/regex/regex-exec-all: (regex : any, str : string, start : ssize_t, atmost : ssize_t) -> list<list<sslice>>(regexregex: regex.objstd/text/regex/regex/obj: (regex : regex) -> any,ss: string,0literal: int
dec = 0
hex8 = 0x00
bit8 = 0b00000000
.ssize_tstd/core/int/ssize_t: (i : int) -> ssize_t,atmostatmost: int.ssize_tstd/core/int/ssize_t: (i : int) -> ssize_t
) // Return the full matched string of a capture group pub fun capturedstd/text/regex/captured: (matched : list<sslice>) -> string( matchedmatched: list<sslice> : liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V> )result: -> total string : stringstd/core/types/string: V match matchedmatched: list<sslice> Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>(ss: sslice) -> ss: sslice.stringstd/core/sslice/string: (slice : sslice) -> string _ -> ""literal: string
count= 0
// Return the full matched string part for a list of matched capture groups. pub fun capturesstd/text/regex/captures: (xs : list<list<sslice>>) -> list<string>( xsxs: list<list<sslice>> : liststd/core/types/list: V -> V<liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V>> )result: -> total list<string> : liststd/core/types/list: V -> V<stringstd/core/types/string: V> xsxs: list<list<sslice>>.mapstd/core/list/map: (xs : list<list<sslice>>, f : (list<sslice>) -> string) -> list<string>(capturedstd/text/regex/captured: (matched : list<sslice>) -> string) // Find a match for a regular expression. // See also `exec` pub fun findstd/text/regex/find: (s : string, r : regex) -> maybe<string>( ss: string : stringstd/core/types/string: V, rr: regex : regexstd/text/regex/regex: V )result: -> total maybe<string> : maybestd/core/types/maybe: V -> V<stringstd/core/types/string: V> match rr: regex.execstd/text/regex/exec: (regex : regex, s : string) -> list<sslice>(ss: string) Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>(mm: sslice) -> Juststd/core/types/Just: forall<a> (value : a) -> maybe<a>(mm: sslice.stringstd/core/sslice/string: (slice : sslice) -> string) _ -> Nothingstd/core/types/Nothing: forall<a> maybe<a> // Does a regular expression pattern occur in a string `s`? // (note: called `test` in javascript) pub fun containsstd/text/regex/contains: (s : string, r : regex) -> bool( ss: string : stringstd/core/types/string: V, rr: regex : regexstd/text/regex/regex: V )result: -> total bool : boolstd/core/types/bool: V rr: regex.execstd/text/regex/exec: (regex : regex, s : string) -> list<sslice>(ss: string).is-consstd/core/types/is-cons: (list : list<sslice>) -> bool // Filter only for the matched parts. fun filter-matchesstd/text/regex/filter-matches: (xs : list<list<sslice>>) -> list<list<sslice>>( xsxs: list<list<sslice>> : liststd/core/types/list: V -> V<liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V>> )result: -> total list<list<sslice>> : liststd/core/types/list: V -> V<liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V>> match xsxs: list<list<sslice>> Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>(_,Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>(mm: list<sslice>,mmmm: list<list<sslice>>)) -> Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>(mm: list<sslice>,filter-matchesstd/text/regex/filter-matches: (xs : list<list<sslice>>) -> list<list<sslice>>(mmmm: list<list<sslice>>)) _ -> Nilstd/core/types/Nil: forall<a> list<a> // Find all matches for a regular expression in a string. pub fun find-allstd/text/regex/find-all: (s : string, r : regex, atmost : ? int) -> list<string>( ss: string : stringstd/core/types/string: V, rr: regex : regexstd/text/regex/regex: V, atmostatmost: ? int : intstd/core/types/int: V = -1literal: int
dec = -1
hex8 = 0xFF
bit8 = 0b11111111
)result: -> total list<string> : liststd/core/types/list: V -> V<stringstd/core/types/string: V> rr: regex.exec-allstd/text/regex/exec-all: (regex : regex, s : string, atmost : ? int) -> list<list<sslice>>(ss: string,atmostatmost: int).filter-matchesstd/text/regex/filter-matches: (xs : list<list<sslice>>) -> list<list<sslice>>.capturesstd/text/regex/captures: (xs : list<list<sslice>>) -> list<string>
fun concat-replacestd/text/regex/concat-replace: forall<e> (matches : list<list<sslice>>, repl : (list<sslice>) -> e string, acc : list<string>) -> e string( matchesmatches: list<list<sslice>> : liststd/core/types/list: V -> V<liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V>>, replrepl: (list<sslice>) -> $217 string : liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V> -> ee: E stringstd/core/types/string: V, accacc: list<string> : liststd/core/types/list: V -> V<stringstd/core/types/string: V> )result: -> 277 string : ee: E stringstd/core/types/string: V match matchesmatches: list<list<sslice>> Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>(prepre: list<sslice>,Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>(mm: list<sslice>,mmmm: list<list<sslice>>)) -> concat-replacestd/text/regex/concat-replace: (matches : list<list<sslice>>, repl : (list<sslice>) -> $217 string, acc : list<string>) -> $217 string( mmmm: list<list<sslice>>, replrepl: (list<sslice>) -> $217 string, Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>(replrepl: (list<sslice>) -> $217 string(mm: list<sslice>), Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>( prepre: list<sslice>.capturedstd/text/regex/captured: (matched : list<sslice>) -> $217 string,accacc: list<string>))) Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>(postpost: list<sslice>,Nilstd/core/types/Nil: forall<a> list<a>) -> Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>(postpost: list<sslice>.capturedstd/text/regex/captured: (matched : list<sslice>) -> $217 string,accacc: list<string>).reverse-joinstd/core/list/reverse-join: (xs : list<string>) -> $217 string Nilstd/core/types/Nil: forall<a> list<a> -> accacc: list<string>.reverse-joinstd/core/list/reverse-join: (xs : list<string>) -> $217 string // Replace the all occurrences of `regex` by the result of the replacement fun `repl` in a string `s`. pub fun replace-allstd/text/regex/replace-all: forall<e> (s : string, r : regex, repl : (list<sslice>) -> e string, atmost : ? int) -> e string( ss: string : stringstd/core/types/string: V, rr: regex : regexstd/text/regex/regex: V, replrepl: (list<sslice>) -> $448 string : liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V> -> ee: E stringstd/core/types/string: V, atmostatmost: ? int : intstd/core/types/int: V = -1literal: int
dec = -1
hex8 = 0xFF
bit8 = 0b11111111
)result: -> 476 string : ee: E stringstd/core/types/string: V rr: regex.exec-allstd/text/regex/exec-all: (regex : regex, s : string, atmost : ? int) -> $448 list<list<sslice>>( ss: string, atmostatmost: int ).concat-replacestd/text/regex/concat-replace: (matches : list<list<sslice>>, repl : (list<sslice>) -> $448 string, acc : list<string>) -> $448 string(replrepl: (list<sslice>) -> $448 string,[std/core/types/Nil: forall<a> list<a>]std/core/types/Nil: forall<a> list<a>
) // Replace the first occurrence of `regex` by the result of the replacement fun `repl` in a string `s`. pub fun replacestd/text/regex/replace: forall<e> (s : string, r : regex, repl : (list<sslice>) -> e string) -> e string( ss: string : stringstd/core/types/string: V, rr: regex : regexstd/text/regex/regex: V, replrepl: (list<sslice>) -> $727 string : liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V> -> ee: E stringstd/core/types/string: V )result: -> 755 string : ee: E stringstd/core/types/string: V ss: string.replace-allstd/text/regex/replace-all: (s : string, r : regex, repl : (list<sslice>) -> $727 string, atmost : ? int) -> $727 string( rr: regex, replrepl: (list<sslice>) -> $727 string, 1literal: int
dec = 1
hex8 = 0x01
bit8 = 0b00000001
) // Check if a capture group was matched. pub fun matchedstd/text/regex/matched: (s : sslice) -> bool( ss: sslice : sslicestd/core/sslice/sslice: V )result: -> total bool : boolstd/core/types/bool: V ss: sslice.is-validstd/core/sslice/is-valid: (slice : sslice) -> bool // Replace using a replacement string that can contain `$$` for a `$` sign, `$n` for a capture group, // `$&` for the entire match `==$0`. fun replace-capturesstd/text/regex/replace-captures: (caps : list<sslice>, repl : string) -> string( capscaps: list<sslice> : liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V>, replrepl: string : stringstd/core/types/string: V )result: -> total string : stringstd/core/types/string: V replace-allstd/text/regex/replace-all: (s : string, r : regex, repl : (list<sslice>) -> string, atmost : ? int) -> string( replrepl: string, regexstd/text/regex/regex: (regex : string, ignorecase : ? bool, multiline : ? bool) -> regex("\\$(?:(\\d)|(\\&)|(\\$))"literal: string
count= 20
) ) fnfn: (cap : list<sslice>) -> string(capcap: list<sslice>) match capcap: list<sslice> [_std/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>,digitstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>,ampstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>,dollarstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>]std/core/types/Nil: forall<a> list<a> -> if dollardollar: sslice.is-validstd/core/sslice/is-valid: (slice : sslice) -> bool then "$"literal: string
count= 1
else val grpgrp: int = if ampamp: sslice.is-validstd/core/sslice/is-valid: (slice : sslice) -> bool then 0literal: int
dec = 0
hex8 = 0x00
bit8 = 0b00000000
else parse-intstd/core/int/parse-int: (s : string, hex : ? bool) -> maybe<int>(digitdigit: sslice.stringstd/core/sslice/string: (slice : sslice) -> string).defaultstd/core/maybe/default: (m : maybe<int>, nothing : int) -> int(0literal: int
dec = 0
hex8 = 0x00
bit8 = 0b00000000
) match capscaps: list<sslice>[grpgrp: int] Nothingstd/core/types/Nothing: forall<a> maybe<a> -> ""literal: string
count= 0
Juststd/core/types/Just: forall<a> (value : a) -> maybe<a>(ss: sslice) -> ss: sslice.stringstd/core/sslice/string: (slice : sslice) -> string _ -> "$"literal: string
count= 1
// Replace all occurrences of `regex` with the replacement string `repl` in a string `s`. // The replacement string can contain `$$` for a `$` sign, `$n` for a capture group, // `$&` for the entire match `==$0`. pub fun string/replace-allstd/text/regex/string/replace-all: (s : string, regex : regex, repl : string, atmost : ? int) -> string( ss: string : stringstd/core/types/string: V, regexregex: regex : regexstd/text/regex/regex: V, replrepl: string : stringstd/core/types/string: V, atmostatmost: ? int : intstd/core/types/int: V = -1literal: int
dec = -1
hex8 = 0xFF
bit8 = 0b11111111
)result: -> total string : stringstd/core/types/string: V if (replrepl: string.containsstd/core/string/contains: (s : string, sub : string) -> bool("$"literal: string
count= 1
)) then replace-allstd/text/regex/replace-all: (s : string, r : regex, repl : (list<sslice>) -> string, atmost : ? int) -> string(ss: string, regexregex: regex, fnfn: (caps : list<sslice>) -> string(capscaps: list<sslice>){ replace-capturesstd/text/regex/replace-captures: (caps : list<sslice>, repl : string) -> string(capscaps: list<sslice>,replrepl: string) }, atmostatmost: int) else replace-allstd/text/regex/replace-all: (s : string, r : regex, repl : (list<sslice>) -> string, atmost : ? int) -> string(ss: string, regexregex: regex, fnfn: (list<sslice>) -> string(_){ replrepl: string }, atmostatmost: int
) // Replace the first occurrence of `regex` with a replacement string `repl` in a string `s`. // The replacement string can contain `$$` for a `$` sign, `$n` for a capture group, // `$&` for the entire match `==$0`. pub fun string/replacestd/text/regex/string/replace: (s : string, regex : regex, repl : string) -> string( ss: string : stringstd/core/types/string: V, regexregex: regex : regexstd/text/regex/regex: V, replrepl: string : stringstd/core/types/string: V )result: -> total string : stringstd/core/types/string: V replace-allstd/text/regex/string/replace-all: (s : string, regex : regex, repl : string, atmost : ? int) -> string( ss: string, regexregex: regex, replrepl: string, 1literal: int
dec = 1
hex8 = 0x01
bit8 = 0b00000001
) // Filter only for the non-matched parts. fun filter-non-matchesstd/text/regex/filter-non-matches: (xs : list<list<sslice>>) -> list<list<sslice>>( xsxs: list<list<sslice>> : liststd/core/types/list: V -> V<liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V>> )result: -> total list<list<sslice>> : liststd/core/types/list: V -> V<liststd/core/types/list: V -> V<sslicestd/core/sslice/sslice: V>> match xsxs: list<list<sslice>> Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>(ss: list<sslice>,Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>(_,xxxx: list<list<sslice>>)) -> Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>(ss: list<sslice>,filter-non-matchesstd/text/regex/filter-non-matches: (xs : list<list<sslice>>) -> list<list<sslice>>(xxxx: list<list<sslice>>)) Consstd/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>(ss: list<sslice>,Nilstd/core/types/Nil: forall<a> list<a>) -> [std/core/types/Cons: forall<a> (head : a, tail : list<a>) -> list<a>ss: list<sslice>]std/core/types/Nil: forall<a> list<a> _ -> [std/core/types/Nil: forall<a> list<a>]std/core/types/Nil: forall<a> list<a> // Split a string `s` in at most `atmost` parts using a regular expression `r` as separator. pub fun splitstd/text/regex/split: (s : string, r : regex, atmost : ? int) -> list<string>( ss: string : stringstd/core/types/string: V, rr: regex : regexstd/text/regex/regex: V, atmostatmost: ? int : intstd/core/types/int: V = -1literal: int
dec = -1
hex8 = 0xFF
bit8 = 0b11111111
)result: -> total list<string> : liststd/core/types/list: V -> V<stringstd/core/types/string: V> rr: regex.exec-allstd/text/regex/exec-all: (regex : regex, s : string, atmost : ? int) -> list<list<sslice>>(ss: string,atmostatmost: int).filter-non-matchesstd/text/regex/filter-non-matches: (xs : list<list<sslice>>) -> list<list<sslice>>.capturesstd/text/regex/captures: (xs : list<list<sslice>>) -> list<string>
pub fun testabcstd/text/regex/testabc: (s : string) -> bool(ss: string)result: -> total bool ss: string.containsstd/text/regex/contains: (s : string, r : regex) -> bool(regexstd/text/regex/regex: (regex : string, ignorecase : ? bool, multiline : ? bool) -> regex("[ab]+c"literal: string
count= 6
)
) /* // Split a string `s` over separator `sep` where `sep` does not occur in // _tokens_ matching `exclude`. // For example: ``split-exclude("comma,'sep,arated',values", regex(","),regex("'[^']*'|[^',]"))`` pub fun split-exclude( s : string, sep : regex, exclude : regex ) : list<string> if s=="" then [] else val splitr = regex( r"^(?:((?:" ++ exclude.source ++ ")+)|(" ++ sep.source ++ "))") s.split-excludex(splitr,"") fun split-excludex( s : string, splitr : regex, acc : string ) : list<string> if s=="" return [acc] match( s.find(splitr) ) Nothing -> split-excludex( pretend-decreasing(s.tail), splitr, acc ++ s.head) // todo: improve efficiency? Just(cap) -> if (cap.groups.matched(1)) split-excludex( pretend-decreasing(cap.after1), splitr, acc ++ cap.matched) else Cons(acc, split-excludex( pretend-decreasing(cap.after1), splitr, "")) // todo: make tail recursive */