module PostgresqlTypes.Tsvector
( Tsvector,
toLexemeList,
refineFromLexemeList,
normalizeFromLexemeList,
Weight (..),
)
where
import qualified Data.Attoparsec.Text as Attoparsec
import qualified Data.ByteString as ByteString
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text.Encoding
import qualified Data.Vector as Vector
import PostgresqlTypes.Algebra
import PostgresqlTypes.Prelude
import PostgresqlTypes.Via
import qualified PtrPeeker
import qualified PtrPoker.Write as Write
import qualified Test.QuickCheck as QuickCheck
import qualified TextBuilder
data Weight = AWeight | BWeight | CWeight | DWeight
deriving stock (Weight -> Weight -> Bool
(Weight -> Weight -> Bool)
-> (Weight -> Weight -> Bool) -> Eq Weight
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Weight -> Weight -> Bool
== :: Weight -> Weight -> Bool
$c/= :: Weight -> Weight -> Bool
/= :: Weight -> Weight -> Bool
Eq, Eq Weight
Eq Weight =>
(Weight -> Weight -> Ordering)
-> (Weight -> Weight -> Bool)
-> (Weight -> Weight -> Bool)
-> (Weight -> Weight -> Bool)
-> (Weight -> Weight -> Bool)
-> (Weight -> Weight -> Weight)
-> (Weight -> Weight -> Weight)
-> Ord Weight
Weight -> Weight -> Bool
Weight -> Weight -> Ordering
Weight -> Weight -> Weight
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Weight -> Weight -> Ordering
compare :: Weight -> Weight -> Ordering
$c< :: Weight -> Weight -> Bool
< :: Weight -> Weight -> Bool
$c<= :: Weight -> Weight -> Bool
<= :: Weight -> Weight -> Bool
$c> :: Weight -> Weight -> Bool
> :: Weight -> Weight -> Bool
$c>= :: Weight -> Weight -> Bool
>= :: Weight -> Weight -> Bool
$cmax :: Weight -> Weight -> Weight
max :: Weight -> Weight -> Weight
$cmin :: Weight -> Weight -> Weight
min :: Weight -> Weight -> Weight
Ord, Int -> Weight -> ShowS
[Weight] -> ShowS
Weight -> String
(Int -> Weight -> ShowS)
-> (Weight -> String) -> ([Weight] -> ShowS) -> Show Weight
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Weight -> ShowS
showsPrec :: Int -> Weight -> ShowS
$cshow :: Weight -> String
show :: Weight -> String
$cshowList :: [Weight] -> ShowS
showList :: [Weight] -> ShowS
Show, ReadPrec [Weight]
ReadPrec Weight
Int -> ReadS Weight
ReadS [Weight]
(Int -> ReadS Weight)
-> ReadS [Weight]
-> ReadPrec Weight
-> ReadPrec [Weight]
-> Read Weight
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: Int -> ReadS Weight
readsPrec :: Int -> ReadS Weight
$creadList :: ReadS [Weight]
readList :: ReadS [Weight]
$creadPrec :: ReadPrec Weight
readPrec :: ReadPrec Weight
$creadListPrec :: ReadPrec [Weight]
readListPrec :: ReadPrec [Weight]
Read, Int -> Weight
Weight -> Int
Weight -> [Weight]
Weight -> Weight
Weight -> Weight -> [Weight]
Weight -> Weight -> Weight -> [Weight]
(Weight -> Weight)
-> (Weight -> Weight)
-> (Int -> Weight)
-> (Weight -> Int)
-> (Weight -> [Weight])
-> (Weight -> Weight -> [Weight])
-> (Weight -> Weight -> [Weight])
-> (Weight -> Weight -> Weight -> [Weight])
-> Enum Weight
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
$csucc :: Weight -> Weight
succ :: Weight -> Weight
$cpred :: Weight -> Weight
pred :: Weight -> Weight
$ctoEnum :: Int -> Weight
toEnum :: Int -> Weight
$cfromEnum :: Weight -> Int
fromEnum :: Weight -> Int
$cenumFrom :: Weight -> [Weight]
enumFrom :: Weight -> [Weight]
$cenumFromThen :: Weight -> Weight -> [Weight]
enumFromThen :: Weight -> Weight -> [Weight]
$cenumFromTo :: Weight -> Weight -> [Weight]
enumFromTo :: Weight -> Weight -> [Weight]
$cenumFromThenTo :: Weight -> Weight -> Weight -> [Weight]
enumFromThenTo :: Weight -> Weight -> Weight -> [Weight]
Enum, Weight
Weight -> Weight -> Bounded Weight
forall a. a -> a -> Bounded a
$cminBound :: Weight
minBound :: Weight
$cmaxBound :: Weight
maxBound :: Weight
Bounded)
instance Arbitrary Weight where
arbitrary :: Gen Weight
arbitrary = [Weight] -> Gen Weight
forall a. HasCallStack => [a] -> Gen a
QuickCheck.elements [Weight
AWeight, Weight
BWeight, Weight
CWeight, Weight
DWeight]
instance Hashable Weight where
hashWithSalt :: Int -> Weight -> Int
hashWithSalt Int
salt = Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
salt (Int -> Int) -> (Weight -> Int) -> Weight -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Weight -> Int
forall a. Enum a => a -> Int
fromEnum
data Tsvector = Tsvector (Vector (Text, Vector (Word16, Weight)))
deriving stock (Tsvector -> Tsvector -> Bool
(Tsvector -> Tsvector -> Bool)
-> (Tsvector -> Tsvector -> Bool) -> Eq Tsvector
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Tsvector -> Tsvector -> Bool
== :: Tsvector -> Tsvector -> Bool
$c/= :: Tsvector -> Tsvector -> Bool
/= :: Tsvector -> Tsvector -> Bool
Eq, Eq Tsvector
Eq Tsvector =>
(Tsvector -> Tsvector -> Ordering)
-> (Tsvector -> Tsvector -> Bool)
-> (Tsvector -> Tsvector -> Bool)
-> (Tsvector -> Tsvector -> Bool)
-> (Tsvector -> Tsvector -> Bool)
-> (Tsvector -> Tsvector -> Tsvector)
-> (Tsvector -> Tsvector -> Tsvector)
-> Ord Tsvector
Tsvector -> Tsvector -> Bool
Tsvector -> Tsvector -> Ordering
Tsvector -> Tsvector -> Tsvector
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Tsvector -> Tsvector -> Ordering
compare :: Tsvector -> Tsvector -> Ordering
$c< :: Tsvector -> Tsvector -> Bool
< :: Tsvector -> Tsvector -> Bool
$c<= :: Tsvector -> Tsvector -> Bool
<= :: Tsvector -> Tsvector -> Bool
$c> :: Tsvector -> Tsvector -> Bool
> :: Tsvector -> Tsvector -> Bool
$c>= :: Tsvector -> Tsvector -> Bool
>= :: Tsvector -> Tsvector -> Bool
$cmax :: Tsvector -> Tsvector -> Tsvector
max :: Tsvector -> Tsvector -> Tsvector
$cmin :: Tsvector -> Tsvector -> Tsvector
min :: Tsvector -> Tsvector -> Tsvector
Ord)
deriving (Int -> Tsvector -> ShowS
[Tsvector] -> ShowS
Tsvector -> String
(Int -> Tsvector -> ShowS)
-> (Tsvector -> String) -> ([Tsvector] -> ShowS) -> Show Tsvector
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Tsvector -> ShowS
showsPrec :: Int -> Tsvector -> ShowS
$cshow :: Tsvector -> String
show :: Tsvector -> String
$cshowList :: [Tsvector] -> ShowS
showList :: [Tsvector] -> ShowS
Show, ReadPrec [Tsvector]
ReadPrec Tsvector
Int -> ReadS Tsvector
ReadS [Tsvector]
(Int -> ReadS Tsvector)
-> ReadS [Tsvector]
-> ReadPrec Tsvector
-> ReadPrec [Tsvector]
-> Read Tsvector
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: Int -> ReadS Tsvector
readsPrec :: Int -> ReadS Tsvector
$creadList :: ReadS [Tsvector]
readList :: ReadS [Tsvector]
$creadPrec :: ReadPrec Tsvector
readPrec :: ReadPrec Tsvector
$creadListPrec :: ReadPrec [Tsvector]
readListPrec :: ReadPrec [Tsvector]
Read, String -> Tsvector
(String -> Tsvector) -> IsString Tsvector
forall a. (String -> a) -> IsString a
$cfromString :: String -> Tsvector
fromString :: String -> Tsvector
IsString) via (ViaIsScalar Tsvector)
instance Hashable Tsvector where
hashWithSalt :: Int -> Tsvector -> Int
hashWithSalt Int
salt (Tsvector Vector (Text, Vector (Word16, Weight))
lexemes) =
(Int -> (Text, Vector (Word16, Weight)) -> Int)
-> Int -> Vector (Text, Vector (Word16, Weight)) -> Int
forall a b. (a -> b -> a) -> a -> Vector b -> a
Vector.foldl' (\Int
s (Text
t, Vector (Word16, Weight)
ps) -> (Int -> (Word16, Weight) -> Int)
-> Int -> Vector (Word16, Weight) -> Int
forall a b. (a -> b -> a) -> a -> Vector b -> a
Vector.foldl' (\Int
s' (Word16
p, Weight
w) -> Int
s' Int -> Word16 -> Int
forall a. Hashable a => Int -> a -> Int
`hashWithSalt` Word16
p Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
`hashWithSalt` Weight -> Int
forall a. Enum a => a -> Int
fromEnum Weight
w) (Int
s Int -> Text -> Int
forall a. Hashable a => Int -> a -> Int
`hashWithSalt` Text
t) Vector (Word16, Weight)
ps) Int
salt Vector (Text, Vector (Word16, Weight))
lexemes
instance Arbitrary Tsvector where
arbitrary :: Gen Tsvector
arbitrary = do
Int
size <- Gen Int
QuickCheck.getSize
Int
numLexemes <- (Int, Int) -> Gen Int
forall a. Random a => (a, a) -> Gen a
QuickCheck.choose (Int
0, Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
0 Int
size)
Map Text [(Word16, Weight)]
lexemeMap <-
[(Text, [(Word16, Weight)])] -> Map Text [(Word16, Weight)]
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList ([(Text, [(Word16, Weight)])] -> Map Text [(Word16, Weight)])
-> Gen [(Text, [(Word16, Weight)])]
-> Gen (Map Text [(Word16, Weight)])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int
-> Gen (Text, [(Word16, Weight)])
-> Gen [(Text, [(Word16, Weight)])]
forall a. Int -> Gen a -> Gen [a]
QuickCheck.vectorOf Int
numLexemes do
Text
token <-
String -> Text
Text.pack
(String -> Text) -> Gen String -> Gen Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Gen Char -> Gen String
forall a. Gen a -> Gen [a]
QuickCheck.listOf1
(Gen Char -> (Char -> Bool) -> Gen Char
forall a. Gen a -> (a -> Bool) -> Gen a
QuickCheck.suchThat Gen Char
forall a. Arbitrary a => Gen a
arbitrary (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\NUL'))
Int
numPositions <- (Int, Int) -> Gen Int
forall a. Random a => (a, a) -> Gen a
QuickCheck.choose (Int
0, Int
3)
[(Word16, Weight)]
positions <-
[(Word16, Weight)] -> [(Word16, Weight)]
sortAndDedupPositions ([(Word16, Weight)] -> [(Word16, Weight)])
-> Gen [(Word16, Weight)] -> Gen [(Word16, Weight)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Gen (Word16, Weight) -> Gen [(Word16, Weight)]
forall a. Int -> Gen a -> Gen [a]
QuickCheck.vectorOf Int
numPositions do
Word16
pos <- (Word16, Word16) -> Gen Word16
forall a. Random a => (a, a) -> Gen a
QuickCheck.choose (Word16
1, Word16
16383)
Weight
weight <- Gen Weight
forall a. Arbitrary a => Gen a
arbitrary
pure (Word16
pos, Weight
weight)
pure (Text
token, [(Word16, Weight)]
positions)
let sorted :: [(Text, [(Word16, Weight)])]
sorted = Map Text [(Word16, Weight)] -> [(Text, [(Word16, Weight)])]
forall k a. Map k a -> [(k, a)]
Map.toAscList Map Text [(Word16, Weight)]
lexemeMap
Tsvector -> Gen Tsvector
forall a. a -> Gen a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Vector (Text, Vector (Word16, Weight)) -> Tsvector
Tsvector ([(Text, Vector (Word16, Weight))]
-> Vector (Text, Vector (Word16, Weight))
forall a. [a] -> Vector a
Vector.fromList (((Text, [(Word16, Weight)]) -> (Text, Vector (Word16, Weight)))
-> [(Text, [(Word16, Weight)])]
-> [(Text, Vector (Word16, Weight))]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
t, [(Word16, Weight)]
ps) -> (Text
t, [(Word16, Weight)] -> Vector (Word16, Weight)
forall a. [a] -> Vector a
Vector.fromList [(Word16, Weight)]
ps)) [(Text, [(Word16, Weight)])]
sorted)))
shrink :: Tsvector -> [Tsvector]
shrink (Tsvector Vector (Text, Vector (Word16, Weight))
lexemes) =
([(Text, [(Word16, Weight)])] -> Tsvector)
-> [[(Text, [(Word16, Weight)])]] -> [Tsvector]
forall a b. (a -> b) -> [a] -> [b]
map (\[(Text, [(Word16, Weight)])]
ls -> [(Text, Vector (Word16, Weight))] -> Tsvector
normalizeLexemes (((Text, [(Word16, Weight)]) -> (Text, Vector (Word16, Weight)))
-> [(Text, [(Word16, Weight)])]
-> [(Text, Vector (Word16, Weight))]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
t, [(Word16, Weight)]
ps) -> (Text
t, [(Word16, Weight)] -> Vector (Word16, Weight)
forall a. [a] -> Vector a
Vector.fromList [(Word16, Weight)]
ps)) [(Text, [(Word16, Weight)])]
ls)) ([[(Text, [(Word16, Weight)])]] -> [Tsvector])
-> [[(Text, [(Word16, Weight)])]] -> [Tsvector]
forall a b. (a -> b) -> a -> b
$
((Text, [(Word16, Weight)]) -> [(Text, [(Word16, Weight)])])
-> [(Text, [(Word16, Weight)])] -> [[(Text, [(Word16, Weight)])]]
forall a. (a -> [a]) -> [a] -> [[a]]
QuickCheck.shrinkList
( \(Text
tok, [(Word16, Weight)]
positions) -> do
Text
shrunkenTok <- (String -> Text) -> (Text -> String) -> Text -> [Text]
forall a b. Arbitrary a => (a -> b) -> (b -> a) -> b -> [b]
QuickCheck.shrinkMap String -> Text
Text.pack Text -> String
Text.unpack Text
tok
[(Word16, Weight)]
shrunkenPositions <- ((Word16, Weight) -> [(Word16, Weight)])
-> [(Word16, Weight)] -> [[(Word16, Weight)]]
forall a. (a -> [a]) -> [a] -> [[a]]
QuickCheck.shrinkList (Word16, Weight) -> [(Word16, Weight)]
forall a. Arbitrary a => a -> [a]
shrink [(Word16, Weight)]
positions
pure (Text
shrunkenTok, [(Word16, Weight)]
shrunkenPositions)
)
(((Text, Vector (Word16, Weight)) -> (Text, [(Word16, Weight)]))
-> [(Text, Vector (Word16, Weight))]
-> [(Text, [(Word16, Weight)])]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
t, Vector (Word16, Weight)
ps) -> (Text
t, Vector (Word16, Weight) -> [(Word16, Weight)]
forall a. Vector a -> [a]
Vector.toList Vector (Word16, Weight)
ps)) (Vector (Text, Vector (Word16, Weight))
-> [(Text, Vector (Word16, Weight))]
forall a. Vector a -> [a]
Vector.toList Vector (Text, Vector (Word16, Weight))
lexemes))
normalizeLexemes :: [(Text, Vector (Word16, Weight))] -> Tsvector
normalizeLexemes :: [(Text, Vector (Word16, Weight))] -> Tsvector
normalizeLexemes [(Text, Vector (Word16, Weight))]
lexemes =
let m :: Map Text [(Word16, Weight)]
m = ([(Word16, Weight)] -> [(Word16, Weight)] -> [(Word16, Weight)])
-> [(Text, [(Word16, Weight)])] -> Map Text [(Word16, Weight)]
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
Map.fromListWith [(Word16, Weight)] -> [(Word16, Weight)] -> [(Word16, Weight)]
forall a. Semigroup a => a -> a -> a
(<>) (((Text, Vector (Word16, Weight)) -> (Text, [(Word16, Weight)]))
-> [(Text, Vector (Word16, Weight))]
-> [(Text, [(Word16, Weight)])]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
t, Vector (Word16, Weight)
ps) -> (Text
t, Vector (Word16, Weight) -> [(Word16, Weight)]
forall a. Vector a -> [a]
Vector.toList Vector (Word16, Weight)
ps)) [(Text, Vector (Word16, Weight))]
lexemes)
sorted :: [(Text, [(Word16, Weight)])]
sorted = Map Text [(Word16, Weight)] -> [(Text, [(Word16, Weight)])]
forall k a. Map k a -> [(k, a)]
Map.toAscList Map Text [(Word16, Weight)]
m
in Vector (Text, Vector (Word16, Weight)) -> Tsvector
Tsvector ([(Text, Vector (Word16, Weight))]
-> Vector (Text, Vector (Word16, Weight))
forall a. [a] -> Vector a
Vector.fromList (((Text, [(Word16, Weight)]) -> (Text, Vector (Word16, Weight)))
-> [(Text, [(Word16, Weight)])]
-> [(Text, Vector (Word16, Weight))]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
t, [(Word16, Weight)]
ps) -> (Text
t, [(Word16, Weight)] -> Vector (Word16, Weight)
forall a. [a] -> Vector a
Vector.fromList ([(Word16, Weight)] -> [(Word16, Weight)]
sortAndDedupPositions [(Word16, Weight)]
ps))) [(Text, [(Word16, Weight)])]
sorted))
sortAndDedupPositions :: [(Word16, Weight)] -> [(Word16, Weight)]
sortAndDedupPositions :: [(Word16, Weight)] -> [(Word16, Weight)]
sortAndDedupPositions =
([(Word16, Weight)] -> (Word16, Weight))
-> [[(Word16, Weight)]] -> [(Word16, Weight)]
forall a b. (a -> b) -> [a] -> [b]
map (((Word16, Weight) -> (Word16, Weight) -> (Word16, Weight))
-> [(Word16, Weight)] -> (Word16, Weight)
forall a. (a -> a -> a) -> [a] -> a
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldr1 (\(Word16
p, Weight
w1) (Word16
_, Weight
w2) -> (Word16
p, Weight -> Weight -> Weight
forall a. Ord a => a -> a -> a
min Weight
w1 Weight
w2)))
([[(Word16, Weight)]] -> [(Word16, Weight)])
-> ([(Word16, Weight)] -> [[(Word16, Weight)]])
-> [(Word16, Weight)]
-> [(Word16, Weight)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ((Word16, Weight) -> (Word16, Weight) -> Bool)
-> [(Word16, Weight)] -> [[(Word16, Weight)]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
List.groupBy (\(Word16, Weight)
a (Word16, Weight)
b -> (Word16, Weight) -> Word16
forall a b. (a, b) -> a
fst (Word16, Weight)
a Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== (Word16, Weight) -> Word16
forall a b. (a, b) -> a
fst (Word16, Weight)
b)
([(Word16, Weight)] -> [[(Word16, Weight)]])
-> ([(Word16, Weight)] -> [(Word16, Weight)])
-> [(Word16, Weight)]
-> [[(Word16, Weight)]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ((Word16, Weight) -> Word16)
-> [(Word16, Weight)] -> [(Word16, Weight)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
List.sortOn (Word16, Weight) -> Word16
forall a b. (a, b) -> a
fst
instance IsScalar Tsvector where
schemaName :: Tagged Tsvector (Maybe Text)
schemaName = Maybe Text -> Tagged Tsvector (Maybe Text)
forall {k} (s :: k) b. b -> Tagged s b
Tagged Maybe Text
forall a. Maybe a
Nothing
typeName :: Tagged Tsvector Text
typeName = Text -> Tagged Tsvector Text
forall {k} (s :: k) b. b -> Tagged s b
Tagged Text
"tsvector"
baseOid :: Tagged Tsvector (Maybe Word32)
baseOid = Maybe Word32 -> Tagged Tsvector (Maybe Word32)
forall {k} (s :: k) b. b -> Tagged s b
Tagged (Word32 -> Maybe Word32
forall a. a -> Maybe a
Just Word32
3614)
arrayOid :: Tagged Tsvector (Maybe Word32)
arrayOid = Maybe Word32 -> Tagged Tsvector (Maybe Word32)
forall {k} (s :: k) b. b -> Tagged s b
Tagged (Word32 -> Maybe Word32
forall a. a -> Maybe a
Just Word32
3643)
typeParams :: Tagged Tsvector [Text]
typeParams = [Text] -> Tagged Tsvector [Text]
forall {k} (s :: k) b. b -> Tagged s b
Tagged []
binaryEncoder :: Tsvector -> Write
binaryEncoder (Tsvector Vector (Text, Vector (Word16, Weight))
lexemes) =
Int32 -> Write
Write.bInt32 (Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector (Text, Vector (Word16, Weight)) -> Int
forall a. Vector a -> Int
Vector.length Vector (Text, Vector (Word16, Weight))
lexemes))
Write -> Write -> Write
forall a. Semigroup a => a -> a -> a
<> ((Text, Vector (Word16, Weight)) -> Write)
-> Vector (Text, Vector (Word16, Weight)) -> Write
forall m a. Monoid m => (a -> m) -> Vector a -> m
Vector.foldMap (Text, Vector (Word16, Weight)) -> Write
encodeLexeme Vector (Text, Vector (Word16, Weight))
lexemes
where
encodeLexeme :: (Text, Vector (Word16, Weight)) -> Write
encodeLexeme (Text
token, Vector (Word16, Weight)
positions) =
let tokenBytes :: ByteString
tokenBytes = Text -> ByteString
Text.Encoding.encodeUtf8 Text
token
in ByteString -> Write
Write.byteString ByteString
tokenBytes
Write -> Write -> Write
forall a. Semigroup a => a -> a -> a
<> Word8 -> Write
Write.word8 Word8
0
Write -> Write -> Write
forall a. Semigroup a => a -> a -> a
<> Word16 -> Write
Write.bWord16 (Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector (Word16, Weight) -> Int
forall a. Vector a -> Int
Vector.length Vector (Word16, Weight)
positions))
Write -> Write -> Write
forall a. Semigroup a => a -> a -> a
<> ((Word16, Weight) -> Write) -> Vector (Word16, Weight) -> Write
forall m a. Monoid m => (a -> m) -> Vector a -> m
Vector.foldMap (Word16, Weight) -> Write
encodePosition Vector (Word16, Weight)
positions
encodePosition :: (Word16, Weight) -> Write
encodePosition (Word16
pos, Weight
weight) =
let weightBits :: Word16
weightBits = case Weight
weight of
Weight
AWeight -> Word16
3
Weight
BWeight -> Word16
2
Weight
CWeight -> Word16
1
Weight
DWeight -> Word16
0
posClamped :: Word16
posClamped = Word16 -> Word16 -> Word16
forall a. Ord a => a -> a -> a
max Word16
1 (Word16 -> Word16 -> Word16
forall a. Ord a => a -> a -> a
min Word16
16383 Word16
pos)
in Word16 -> Write
Write.bWord16 ((Word16
weightBits Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` Int
14) Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. Word16
posClamped)
binaryDecoder :: Variable (Either DecodingError Tsvector)
binaryDecoder = ExceptT DecodingError Variable Tsvector
-> Variable (Either DecodingError Tsvector)
forall e (m :: * -> *) a. ExceptT e m a -> m (Either e a)
runExceptT do
Int32
numLexemes <- Variable Int32 -> ExceptT DecodingError Variable Int32
forall (m :: * -> *) a. Monad m => m a -> ExceptT DecodingError m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Variable Int32 -> ExceptT DecodingError Variable Int32)
-> Variable Int32 -> ExceptT DecodingError Variable Int32
forall a b. (a -> b) -> a -> b
$ Fixed Int32 -> Variable Int32
forall a. Fixed a -> Variable a
PtrPeeker.fixed Fixed Int32
PtrPeeker.beSignedInt4
Bool
-> ExceptT DecodingError Variable ()
-> ExceptT DecodingError Variable ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int32
numLexemes Int32 -> Int32 -> Bool
forall a. Ord a => a -> a -> Bool
< Int32
0) do
DecodingError -> ExceptT DecodingError Variable ()
forall a. DecodingError -> ExceptT DecodingError Variable a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError
( DecodingError
{ location :: [Text]
location = [Text
"tsvector", Text
"lexemeCount"],
reason :: DecodingErrorReason
reason =
Text -> ByteString -> DecodingErrorReason
ParsingDecodingErrorReason
(String -> Text
forall a. IsString a => String -> a
fromString String
"Negative lexeme count in tsvector binary data")
ByteString
ByteString.empty
}
)
Vector (Text, Vector (Word16, Weight))
lexemes <- [(Text, Vector (Word16, Weight))]
-> Vector (Text, Vector (Word16, Weight))
forall a. [a] -> Vector a
Vector.fromList ([(Text, Vector (Word16, Weight))]
-> Vector (Text, Vector (Word16, Weight)))
-> ExceptT DecodingError Variable [(Text, Vector (Word16, Weight))]
-> ExceptT
DecodingError Variable (Vector (Text, Vector (Word16, Weight)))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int
-> ExceptT DecodingError Variable (Text, Vector (Word16, Weight))
-> ExceptT DecodingError Variable [(Text, Vector (Word16, Weight))]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM (Int32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
numLexemes) ExceptT DecodingError Variable (Text, Vector (Word16, Weight))
forall {t :: (* -> *) -> * -> *}.
(MonadTrans t, MonadError DecodingError (t Variable)) =>
t Variable (Text, Vector (Word16, Weight))
decodeLexeme
pure (Vector (Text, Vector (Word16, Weight)) -> Tsvector
Tsvector Vector (Text, Vector (Word16, Weight))
lexemes)
where
decodeLexeme :: t Variable (Text, Vector (Word16, Weight))
decodeLexeme = do
ByteString
tokenBytes <- Variable ByteString -> t Variable ByteString
forall (m :: * -> *) a. Monad m => m a -> t m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift Variable ByteString
PtrPeeker.nullTerminatedStringAsByteString
case ByteString -> Either UnicodeException Text
Text.Encoding.decodeUtf8' ByteString
tokenBytes of
Left UnicodeException
e ->
DecodingError -> t Variable (Text, Vector (Word16, Weight))
forall a. DecodingError -> t Variable a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError
( DecodingError
{ location :: [Text]
location = [Text
"tsvector", Text
"lexeme"],
reason :: DecodingErrorReason
reason = Text -> ByteString -> DecodingErrorReason
ParsingDecodingErrorReason (String -> Text
forall a. IsString a => String -> a
fromString (UnicodeException -> String
forall a. Show a => a -> String
show UnicodeException
e)) ByteString
tokenBytes
}
)
Right Text
token
| Text -> Bool
Text.null Text
token ->
DecodingError -> t Variable (Text, Vector (Word16, Weight))
forall a. DecodingError -> t Variable a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError
( DecodingError
{ location :: [Text]
location = [Text
"tsvector", Text
"lexeme"],
reason :: DecodingErrorReason
reason =
Text -> ByteString -> DecodingErrorReason
ParsingDecodingErrorReason
(String -> Text
forall a. IsString a => String -> a
fromString String
"empty lexeme is not allowed in tsvector")
ByteString
tokenBytes
}
)
| Bool
otherwise -> do
Word16
numPositions <- Variable Word16 -> t Variable Word16
forall (m :: * -> *) a. Monad m => m a -> t m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Variable Word16 -> t Variable Word16)
-> Variable Word16 -> t Variable Word16
forall a b. (a -> b) -> a -> b
$ Fixed Word16 -> Variable Word16
forall a. Fixed a -> Variable a
PtrPeeker.fixed Fixed Word16
PtrPeeker.beUnsignedInt2
Vector (Word16, Weight)
positions <-
[(Word16, Weight)] -> Vector (Word16, Weight)
forall a. [a] -> Vector a
Vector.fromList ([(Word16, Weight)] -> Vector (Word16, Weight))
-> t Variable [(Word16, Weight)]
-> t Variable (Vector (Word16, Weight))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> t Variable (Word16, Weight) -> t Variable [(Word16, Weight)]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM (Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
numPositions) do
Word16
posWord <- Variable Word16 -> t Variable Word16
forall (m :: * -> *) a. Monad m => m a -> t m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Variable Word16 -> t Variable Word16)
-> Variable Word16 -> t Variable Word16
forall a b. (a -> b) -> a -> b
$ Fixed Word16 -> Variable Word16
forall a. Fixed a -> Variable a
PtrPeeker.fixed Fixed Word16
PtrPeeker.beUnsignedInt2
let weightBits :: Word16
weightBits = (Word16
posWord Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftR` Int
14) Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.&. Word16
0x3
let weight :: Weight
weight = case Word16
weightBits of
Word16
3 -> Weight
AWeight
Word16
2 -> Weight
BWeight
Word16
1 -> Weight
CWeight
Word16
_ -> Weight
DWeight
let pos :: Word16
pos = Word16
posWord Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.&. Word16
0x3FFF
(Word16, Weight) -> t Variable (Word16, Weight)
forall a. a -> t Variable a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Word16
pos, Weight
weight)
pure (Text
token, Vector (Word16, Weight)
positions)
textualEncoder :: Tsvector -> TextBuilder
textualEncoder (Tsvector Vector (Text, Vector (Word16, Weight))
lexemes) =
TextBuilder
-> ((Text, Vector (Word16, Weight)) -> TextBuilder)
-> [(Text, Vector (Word16, Weight))]
-> TextBuilder
forall (f :: * -> *) a.
Foldable f =>
TextBuilder -> (a -> TextBuilder) -> f a -> TextBuilder
TextBuilder.intercalateMap TextBuilder
" " (Text, Vector (Word16, Weight)) -> TextBuilder
forall {a}. Show a => (Text, Vector (a, Weight)) -> TextBuilder
encodeLexeme (Vector (Text, Vector (Word16, Weight))
-> [(Text, Vector (Word16, Weight))]
forall a. Vector a -> [a]
Vector.toList Vector (Text, Vector (Word16, Weight))
lexemes)
where
encodeLexeme :: (Text, Vector (a, Weight)) -> TextBuilder
encodeLexeme (Text
token, Vector (a, Weight)
positions) =
Char -> TextBuilder
TextBuilder.char Char
'\''
TextBuilder -> TextBuilder -> TextBuilder
forall a. Semigroup a => a -> a -> a
<> Text -> TextBuilder
TextBuilder.text (Text -> Text
escapeToken Text
token)
TextBuilder -> TextBuilder -> TextBuilder
forall a. Semigroup a => a -> a -> a
<> Char -> TextBuilder
TextBuilder.char Char
'\''
TextBuilder -> TextBuilder -> TextBuilder
forall a. Semigroup a => a -> a -> a
<> if Vector (a, Weight) -> Bool
forall a. Vector a -> Bool
Vector.null Vector (a, Weight)
positions
then TextBuilder
forall a. Monoid a => a
mempty
else Char -> TextBuilder
TextBuilder.char Char
':' TextBuilder -> TextBuilder -> TextBuilder
forall a. Semigroup a => a -> a -> a
<> TextBuilder
-> ((a, Weight) -> TextBuilder) -> [(a, Weight)] -> TextBuilder
forall (f :: * -> *) a.
Foldable f =>
TextBuilder -> (a -> TextBuilder) -> f a -> TextBuilder
TextBuilder.intercalateMap TextBuilder
"," (a, Weight) -> TextBuilder
forall {a}. Show a => (a, Weight) -> TextBuilder
encodePosition (Vector (a, Weight) -> [(a, Weight)]
forall a. Vector a -> [a]
Vector.toList Vector (a, Weight)
positions)
encodePosition :: (a, Weight) -> TextBuilder
encodePosition (a
pos, Weight
weight) =
String -> TextBuilder
TextBuilder.string (a -> String
forall a. Show a => a -> String
show a
pos)
TextBuilder -> TextBuilder -> TextBuilder
forall a. Semigroup a => a -> a -> a
<> case Weight
weight of
Weight
AWeight -> Char -> TextBuilder
TextBuilder.char Char
'A'
Weight
BWeight -> Char -> TextBuilder
TextBuilder.char Char
'B'
Weight
CWeight -> Char -> TextBuilder
TextBuilder.char Char
'C'
Weight
DWeight -> TextBuilder
forall a. Monoid a => a
mempty
escapeToken :: Text -> Text
escapeToken = (Char -> Text) -> Text -> Text
Text.concatMap Char -> Text
escapeChar
escapeChar :: Char -> Text
escapeChar Char
c = case Char
c of
Char
'\'' -> Text
"''"
Char
'\\' -> Text
"\\\\"
Char
_ -> Char -> Text
Text.singleton Char
c
textualDecoder :: Parser Tsvector
textualDecoder = do
Parser Text ()
Attoparsec.skipSpace
[(Text, [(Word16, Weight)])]
lexemes <- Parser Text (Text, [(Word16, Weight)])
lexemeParser Parser Text (Text, [(Word16, Weight)])
-> Parser Text () -> Parser Text [(Text, [(Word16, Weight)])]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`Attoparsec.sepBy` Parser Text ()
space1
Parser Text ()
Attoparsec.skipSpace
let Tsvector Vector (Text, Vector (Word16, Weight))
normalized = [(Text, Vector (Word16, Weight))] -> Tsvector
normalizeLexemes (((Text, [(Word16, Weight)]) -> (Text, Vector (Word16, Weight)))
-> [(Text, [(Word16, Weight)])]
-> [(Text, Vector (Word16, Weight))]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
t, [(Word16, Weight)]
ps) -> (Text
t, [(Word16, Weight)] -> Vector (Word16, Weight)
forall a. [a] -> Vector a
Vector.fromList [(Word16, Weight)]
ps)) [(Text, [(Word16, Weight)])]
lexemes)
pure (Vector (Text, Vector (Word16, Weight)) -> Tsvector
Tsvector Vector (Text, Vector (Word16, Weight))
normalized)
where
space1 :: Parser Text ()
space1 = do
Text
_ <- (Char -> Bool) -> Parser Text Text
Attoparsec.takeWhile1 (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\t' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n')
pure ()
lexemeParser :: Parser Text (Text, [(Word16, Weight)])
lexemeParser = do
Char
_ <- Char -> Parser Text Char
Attoparsec.char Char
'\''
Text
token <- Parser Text Text
parseToken
Char
_ <- Char -> Parser Text Char
Attoparsec.char Char
'\''
[(Word16, Weight)]
positions <- Parser Text [(Word16, Weight)]
parsePositions Parser Text [(Word16, Weight)]
-> Parser Text [(Word16, Weight)] -> Parser Text [(Word16, Weight)]
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> [(Word16, Weight)] -> Parser Text [(Word16, Weight)]
forall a. a -> Parser Text a
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
pure (Text
token, [(Word16, Weight)]
positions)
parseToken :: Parser Text Text
parseToken = do
String
chars <- Parser Text Char -> Parser Text String
forall a. Parser Text a -> Parser Text [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser Text Char
escapedQuote Parser Text Char -> Parser Text Char -> Parser Text Char
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text Char
escapedBackslash Parser Text Char -> Parser Text Char -> Parser Text Char
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text Char
normalChar)
pure (String -> Text
Text.pack String
chars)
escapedQuote :: Parser Text Char
escapedQuote = do
Text
_ <- Text -> Parser Text Text
Attoparsec.string Text
"''"
pure Char
'\''
escapedBackslash :: Parser Text Char
escapedBackslash = do
Text
_ <- Text -> Parser Text Text
Attoparsec.string Text
"\\\\"
pure Char
'\\'
normalChar :: Parser Text Char
normalChar = (Char -> Bool) -> Parser Text Char
Attoparsec.satisfy (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\'' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\\')
parsePositions :: Parser Text [(Word16, Weight)]
parsePositions = do
Char
_ <- Char -> Parser Text Char
Attoparsec.char Char
':'
Parser Text (Word16, Weight)
parsePosition Parser Text (Word16, Weight)
-> Parser Text Char -> Parser Text [(Word16, Weight)]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`Attoparsec.sepBy1` Char -> Parser Text Char
Attoparsec.char Char
','
parsePosition :: Parser Text (Word16, Weight)
parsePosition = do
Integer
pos <- forall a. Integral a => Parser a
Attoparsec.decimal @Integer
Bool -> Parser Text () -> Parser Text ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Integer
pos Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
< Integer
1 Bool -> Bool -> Bool
|| Integer
pos Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
> Integer
16383) do
String -> Parser Text ()
forall a. HasCallStack => String -> Parser Text a
forall (m :: * -> *) a.
(MonadFail m, HasCallStack) =>
String -> m a
fail (String
"tsvector position out of range 1..16383: " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Integer -> String
forall a. Show a => a -> String
show Integer
pos)
let pos' :: Word16
pos' = Integer -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
pos :: Word16
Weight
weight <-
[Parser Text Weight] -> Parser Text Weight
forall (t :: * -> *) (f :: * -> *) a.
(Foldable t, Alternative f) =>
t (f a) -> f a
asum
[ Char -> Parser Text Char
Attoparsec.char Char
'A' Parser Text Char -> Weight -> Parser Text Weight
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Weight
AWeight,
Char -> Parser Text Char
Attoparsec.char Char
'B' Parser Text Char -> Weight -> Parser Text Weight
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Weight
BWeight,
Char -> Parser Text Char
Attoparsec.char Char
'C' Parser Text Char -> Weight -> Parser Text Weight
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Weight
CWeight,
Char -> Parser Text Char
Attoparsec.char Char
'D' Parser Text Char -> Weight -> Parser Text Weight
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Weight
DWeight,
Weight -> Parser Text Weight
forall a. a -> Parser Text a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Weight
DWeight
]
pure (Word16
pos', Weight
weight)
toLexemeList :: Tsvector -> [(Text, [(Word16, Weight)])]
toLexemeList :: Tsvector -> [(Text, [(Word16, Weight)])]
toLexemeList (Tsvector Vector (Text, Vector (Word16, Weight))
lexemes) =
((Text, Vector (Word16, Weight)) -> (Text, [(Word16, Weight)]))
-> [(Text, Vector (Word16, Weight))]
-> [(Text, [(Word16, Weight)])]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
t, Vector (Word16, Weight)
ps) -> (Text
t, Vector (Word16, Weight) -> [(Word16, Weight)]
forall a. Vector a -> [a]
Vector.toList Vector (Word16, Weight)
ps)) (Vector (Text, Vector (Word16, Weight))
-> [(Text, Vector (Word16, Weight))]
forall a. Vector a -> [a]
Vector.toList Vector (Text, Vector (Word16, Weight))
lexemes)
refineFromLexemeList :: [(Text, [(Word16, Weight)])] -> Maybe Tsvector
refineFromLexemeList :: [(Text, [(Word16, Weight)])] -> Maybe Tsvector
refineFromLexemeList [(Text, [(Word16, Weight)])]
lexemes =
if ((Text, [(Word16, Weight)]) -> Bool)
-> [(Text, [(Word16, Weight)])] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\(Text
t, [(Word16, Weight)]
ps) -> Text -> Bool
Text.null Text
t Bool -> Bool -> Bool
|| Char -> Text -> Bool
Text.elem Char
'\NUL' Text
t Bool -> Bool -> Bool
|| ((Word16, Weight) -> Bool) -> [(Word16, Weight)] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\(Word16
p, Weight
_) -> Word16
p Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
1 Bool -> Bool -> Bool
|| Word16
p Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
> Word16
16383) [(Word16, Weight)]
ps) [(Text, [(Word16, Weight)])]
lexemes
then Maybe Tsvector
forall a. Maybe a
Nothing
else Tsvector -> Maybe Tsvector
forall a. a -> Maybe a
Just ([(Text, Vector (Word16, Weight))] -> Tsvector
normalizeLexemes (((Text, [(Word16, Weight)]) -> (Text, Vector (Word16, Weight)))
-> [(Text, [(Word16, Weight)])]
-> [(Text, Vector (Word16, Weight))]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
t, [(Word16, Weight)]
ps) -> (Text
t, [(Word16, Weight)] -> Vector (Word16, Weight)
forall a. [a] -> Vector a
Vector.fromList [(Word16, Weight)]
ps)) [(Text, [(Word16, Weight)])]
lexemes))
normalizeFromLexemeList :: [(Text, [(Word16, Weight)])] -> Tsvector
normalizeFromLexemeList :: [(Text, [(Word16, Weight)])] -> Tsvector
normalizeFromLexemeList [(Text, [(Word16, Weight)])]
lexemes =
let cleaned :: [(Text, [(Word16, Weight)])]
cleaned = ((Text, [(Word16, Weight)]) -> Bool)
-> [(Text, [(Word16, Weight)])] -> [(Text, [(Word16, Weight)])]
forall a. (a -> Bool) -> [a] -> [a]
filter (\(Text
t, [(Word16, Weight)]
_) -> Bool -> Bool
not (Text -> Bool
Text.null Text
t)) ([(Text, [(Word16, Weight)])] -> [(Text, [(Word16, Weight)])])
-> [(Text, [(Word16, Weight)])] -> [(Text, [(Word16, Weight)])]
forall a b. (a -> b) -> a -> b
$ ((Text, [(Word16, Weight)]) -> (Text, [(Word16, Weight)]))
-> [(Text, [(Word16, Weight)])] -> [(Text, [(Word16, Weight)])]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
t, [(Word16, Weight)]
ps) -> (HasCallStack => Text -> Text -> Text -> Text
Text -> Text -> Text -> Text
Text.replace Text
"\NUL" Text
"" Text
t, [(Word16, Weight)]
ps)) [(Text, [(Word16, Weight)])]
lexemes
in [(Text, Vector (Word16, Weight))] -> Tsvector
normalizeLexemes (((Text, [(Word16, Weight)]) -> (Text, Vector (Word16, Weight)))
-> [(Text, [(Word16, Weight)])]
-> [(Text, Vector (Word16, Weight))]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
t, [(Word16, Weight)]
ps) -> (Text
t, [(Word16, Weight)] -> Vector (Word16, Weight)
forall a. [a] -> Vector a
Vector.fromList [(Word16, Weight)]
ps)) [(Text, [(Word16, Weight)])]
cleaned)