Module

Data.String.CodePoints

Package
purescript-strings
Repository
purescript/purescript-strings

These functions allow PureScript strings to be treated as if they were sequences of Unicode code points instead of their true underlying implementation (sequences of UTF-16 code units). For nearly all uses of strings, these functions should be preferred over the ones in Data.String.

#CodePoint Source

newtype CodePoint

CodePoint is an Int bounded between 0 and 0x10FFFF, corresponding to Unicode code points.

Instances

#codePointAt Source

codePointAt :: Int -> String -> Maybe CodePoint

Returns the first code point of the string after dropping the given number of code points from the beginning, if there is such a code point. Operates in constant space and in time linear to the given index.

>>> codePointAt 1 "𝐀𝐀𝐀𝐀"
Just (CodePoint 0x1D400) -- represents "𝐀"
-- compare to Data.String:
>>> charAt 1 "𝐀𝐀𝐀𝐀"
Just 'οΏ½'

#codePointFromInt Source

codePointFromInt :: Int -> Maybe CodePoint
>>> it = codePointFromInt 0x1D400 -- U+1D400 MATHEMATICAL BOLD CAPITAL A
Just (CodePoint 0x1D400)

>>> map singleton it
Just "𝐀"

>>> codePointFromInt 0x110000 -- does not correspond to a Unicode code point
Nothing

#codePointToInt Source

codePointToInt :: CodePoint -> Int
>>> codePointToInt (codePointFromChar 'B')
66

>>> boldA = codePointFromInt 0x1D400
>>> boldA
Just (CodePoint 0x1D400)
>>> map codePointToInt boldA
Just 119808 -- is the same as 0x1D400

#codePointFromChar Source

codePointFromChar :: Char -> CodePoint

Creates a CodePoint from a given Char.

>>> codePointFromChar 'B'
CodePoint 0x42 -- represents 'B'

#count Source

count :: (CodePoint -> Boolean) -> String -> Int

Returns the number of code points in the leading sequence of code points which all match the given predicate. Operates in constant space and in time linear to the length of the string.

>>> count (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀"
2

#drop Source

drop :: Int -> String -> String

Drops the given number of code points from the beginning of the string. If the string does not have that many code points, returns the empty string. Operates in constant space and in time linear to the given number.

>>> drop 5 "𝐀𝐀 b c"
"c"
-- compared to Data.String:
>>> drop 5 "𝐀𝐀 b c"
"b c" -- because "𝐀" occupies 2 code units

#dropWhile Source

dropWhile :: (CodePoint -> Boolean) -> String -> String

Drops the leading sequence of code points which all match the given predicate from the string. Operates in constant space and in time linear to the length of the string.

>>> dropWhile (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀"
" b c 𝐀"

#fromCodePointArray Source

fromCodePointArray :: Array CodePoint -> String

Creates a string from an array of code points. Operates in space and time linear to the length of the array.

>>> codePointArray = toCodePointArray "c 𝐀"
>>> codePointArray
[CodePoint 0x63, CodePoint 0x20, CodePoint 0x1D400]
>>> fromCodePointArray codePointArray
"c 𝐀"

#indexOf Source

indexOf :: Pattern -> String -> Maybe Int

Returns the number of code points preceding the first match of the given pattern in the string. Returns Nothing when no matches are found.

>>> indexOf (Pattern "𝐀") "b 𝐀𝐀 c 𝐀"
Just 2
>>> indexOf (Pattern "o") "b 𝐀𝐀 c 𝐀"
Nothing

#indexOf' Source

indexOf' :: Pattern -> Int -> String -> Maybe Int

Returns the number of code points preceding the first match of the given pattern in the string. Pattern matches preceding the given index will be ignored. Returns Nothing when no matches are found.

>>> indexOf' (Pattern "𝐀") 4 "b 𝐀𝐀 c 𝐀"
Just 7
>>> indexOf' (Pattern "o") 4 "b 𝐀𝐀 c 𝐀"
Nothing

#lastIndexOf Source

lastIndexOf :: Pattern -> String -> Maybe Int

Returns the number of code points preceding the last match of the given pattern in the string. Returns Nothing when no matches are found.

>>> lastIndexOf (Pattern "𝐀") "b 𝐀𝐀 c 𝐀"
Just 7
>>> lastIndexOf (Pattern "o") "b 𝐀𝐀 c 𝐀"
Nothing

#lastIndexOf' Source

lastIndexOf' :: Pattern -> Int -> String -> Maybe Int

Returns the number of code points preceding the first match of the given pattern in the string. Pattern matches following the given index will be ignored. Returns Nothing when no matches are found.

>>> lastIndexOf' (Pattern "𝐀") 5 "b 𝐀𝐀 c 𝐀"
Just 3
>>> lastIndexOf' (Pattern "o") 5 "b 𝐀𝐀 c 𝐀"
Nothing

#length Source

length :: String -> Int

Returns the number of code points in the string. Operates in constant space and in time linear to the length of the string.

>>> length "b 𝐀𝐀 c 𝐀"
8
-- compare to Data.String:
>>> length "b 𝐀𝐀 c 𝐀"
11

#singleton Source

singleton :: CodePoint -> String

Creates a string containing just the given code point. Operates in constant space and time.

>>> map singleton (codePointFromInt 0x1D400)
Just "𝐀"

#splitAt Source

splitAt :: Int -> String -> Maybe { before :: String, after :: String }

Returns a record with strings created from the code points on either side of the given index. If the index is not within the string, Nothing is returned.

>>> splitAt 3 "b 𝐀𝐀 c 𝐀"
Just { before: "b 𝐀", after: "𝐀 c 𝐀" }

#take Source

take :: Int -> String -> String

Returns a string containing the given number of code points from the beginning of the given string. If the string does not have that many code points, returns the empty string. Operates in constant space and in time linear to the given number.

>>> take 3 "b 𝐀𝐀 c 𝐀"
"b 𝐀"
-- compare to Data.String:
>>> take 3 "b 𝐀𝐀 c 𝐀"
"b οΏ½"

#takeWhile Source

takeWhile :: (CodePoint -> Boolean) -> String -> String

Returns a string containing the leading sequence of code points which all match the given predicate from the string. Operates in constant space and in time linear to the length of the string.

>>> takeWhile (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀"
"𝐀𝐀"

#toCodePointArray Source

toCodePointArray :: String -> Array CodePoint

Creates an array of code points from a string. Operates in space and time linear to the length of the string.

>>> codePointArray = toCodePointArray "b 𝐀𝐀"
>>> codePointArray
[CodePoint 0x62, CodePoint 0x20, CodePoint 0x1D400, CodePoint 0x1D400]
>>> map singleton codePointArray
["b", " ", "𝐀", "𝐀"]

#uncons Source

uncons :: String -> Maybe { head :: CodePoint, tail :: String }

Returns a record with the first code point and the remaining code points of the string. Returns Nothing if the string is empty. Operates in constant space and time.

>>> uncons "𝐀𝐀 c 𝐀"
Just { head: CodePoint 0x1D400, tail: "𝐀 c 𝐀" }
>>> uncons ""
Nothing

Re-exports from Data.String

#Replacement Source

newtype Replacement

A newtype used in cases to specify a replacement for a pattern.

Constructors

Instances

#Pattern Source

newtype Pattern

A newtype used in cases where there is a string to be matched.

pursPattern = Pattern ".purs"
--can be used like this:
contains pursPattern "Test.purs"
   == true

Constructors

Instances

#trim Source

trim :: String -> String

Removes whitespace from the beginning and end of a string, including whitespace characters and line terminators.

trim "   Hello  \n World\n\t    " == "Hello  \n World"

#toUpper Source

toUpper :: String -> String

Returns the argument converted to uppercase.

toUpper "Hello" == "HELLO"

#toLower Source

toLower :: String -> String

Returns the argument converted to lowercase.

toLower "hElLo" == "hello"

#toCharArray Source

toCharArray :: String -> Array Char

Converts the string into an array of characters.

toCharArray "Hello☺\n" == ['H','e','l','l','o','☺','\n']

#toChar Source

toChar :: String -> Maybe Char

Converts the string to a character, if the length of the string is exactly 1.

toChar "l" == Just 'l'
toChar "Hi" == Nothing -- since length is not 1

#stripSuffix Source

stripSuffix :: Pattern -> String -> Maybe String

If the string ends with the given suffix, return the portion of the string left after removing it, as a Just value. Otherwise, return Nothing.

stripSuffix (Pattern ".exe") "psc.exe" == Just "psc"
stripSuffix (Pattern ".exe") "psc" == Nothing

#stripPrefix Source

stripPrefix :: Pattern -> String -> Maybe String

If the string starts with the given prefix, return the portion of the string left after removing it, as a Just value. Otherwise, return Nothing.

stripPrefix (Pattern "http:") "http://purescript.org" == Just "//purescript.org"
stripPrefix (Pattern "http:") "https://purescript.org" == Nothing

#split Source

split :: Pattern -> String -> Array String

Returns the substrings of the second string separated along occurences of the first string.

split (Pattern " ") "hello world" == ["hello", "world"]

#replaceAll Source

replaceAll :: Pattern -> Replacement -> String -> String

Replaces all occurences of the pattern with the replacement string.

replaceAll (Pattern "<=") (Replacement "≀") "a <= b <= c" == "a ≀ b ≀ c"

#replace Source

replace :: Pattern -> Replacement -> String -> String

Replaces the first occurence of the pattern with the replacement string.

replace (Pattern "<=") (Replacement "≀") "a <= b <= c" == "a ≀ b <= c"

#null Source

null :: String -> Boolean

Returns true if the given string is empty.

null "" == true
null "Hi" == false

#localeCompare Source

localeCompare :: String -> String -> Ordering

Compare two strings in a locale-aware fashion. This is in contrast to the Ord instance on String which treats strings as arrays of code units:

"Γ€" `localeCompare` "b" == LT
"Γ€" `compare` "b" == GT

#joinWith Source

joinWith :: String -> Array String -> String

Joins the strings in the array together, inserting the first argument as separator between them.

joinWith ", " ["apple", "banana", "orange"] == "apple, banana, orange"

#fromCharArray Source

fromCharArray :: Array Char -> String

Converts an array of characters into a string.

fromCharArray ['H', 'e', 'l', 'l', 'o'] == "Hello"

#contains Source

contains :: Pattern -> String -> Boolean

Checks whether the pattern appears in the given string.

contains (Pattern "needle") "haystack with needle" == true
contains (Pattern "needle") "haystack" == false

#charCodeAt Source

charCodeAt :: Int -> String -> Maybe Int

Returns the numeric Unicode value of the character at the given index, if the index is within bounds.

charCodeAt 2 "5 €" == Just 0x20AC
charCodeAt 10 "5 €" == Nothing

#charAt Source

charAt :: Int -> String -> Maybe Char

Returns the character at the given index, if the index is within bounds.

charAt 2 "Hello" == Just 'l'
charAt 10 "Hello" == Nothing