diff --git a/package.yaml b/package.yaml index 6c3077b78..0853fdd38 100644 --- a/package.yaml +++ b/package.yaml @@ -109,6 +109,7 @@ dependencies: - clientsession - monad-memo - xss-sanitize + - text-metrics other-extensions: - GeneralizedNewtypeDeriving diff --git a/src/Model/Types.hs b/src/Model/Types.hs index 75b86551a..d196988f2 100644 --- a/src/Model/Types.hs +++ b/src/Model/Types.hs @@ -23,7 +23,8 @@ import Data.Monoid (Sum(..)) import Data.Maybe (fromJust) import Data.Universe import Data.Universe.Helpers -import Data.UUID.Types +import Data.UUID.Types (UUID) +import qualified Data.UUID.Types as UUID import Data.Default @@ -74,11 +75,11 @@ import System.Random (Random(..)) import Data.Data (Data) import Model.Types.Wordlist - +import Data.Text.Metrics (damerauLevenshtein) instance PathPiece UUID where - fromPathPiece = Data.UUID.Types.fromString . unpack - toPathPiece = pack . toString + fromPathPiece = UUID.fromString . unpack + toPathPiece = pack . UUID.toString instance (CI.FoldCase s, PathPiece s) => PathPiece (CI s) where fromPathPiece = fmap CI.mk . fromPathPiece @@ -439,10 +440,10 @@ data StudyFieldType = FieldPrimary | FieldSecondary derivePersistField "StudyFieldType" instance PersistField UUID where - toPersistValue = PersistDbSpecific . toASCIIBytes - fromPersistValue (PersistText t) = maybe (Left "Failed to parse UUID") Right $ fromText t - fromPersistValue (PersistByteString bs) = maybe (Left "Failed to parse UUID") Right $ fromASCIIBytes bs - fromPersistValue (PersistDbSpecific bs) = maybe (Left "Failed to parse UUID") Right $ fromASCIIBytes bs + toPersistValue = PersistDbSpecific . UUID.toASCIIBytes + fromPersistValue (PersistText t) = maybe (Left "Failed to parse UUID") Right $ UUID.fromText t + fromPersistValue (PersistByteString bs) = maybe (Left "Failed to parse UUID") Right $ UUID.fromASCIIBytes bs + fromPersistValue (PersistDbSpecific bs) = maybe (Left "Failed to parse UUID") Right $ UUID.fromASCIIBytes bs fromPersistValue x = Left $ "Expected UUID, received: " <> tshow x instance PersistFieldSql UUID where @@ -620,7 +621,8 @@ instance ToJSON Pseudonym where toJSON = toJSON . (review pseudonymWords :: Pseudonym -> [PseudonymWord]) pseudonymWordlist :: [PseudonymWord] -pseudonymWordlist = $(wordlist "config/wordlist.txt") +pseudonymCharacters :: Set Char +(pseudonymWordlist, pseudonymCharacters) = $(wordlist "config/wordlist.txt") pseudonymWords :: Prism' [PseudonymWord] Pseudonym pseudonymWords = prism' pToWords pFromWords @@ -643,14 +645,32 @@ pseudonymWords = prism' pToWords pFromWords maxWord = 0b111111111111 pseudonymText :: Prism' Text Pseudonym -pseudonymText = iso tFromWords tToWords . pseudonymWords +pseudonymText = prism' tToWords tFromWords . pseudonymWords where - tFromWords :: Text -> [PseudonymWord] - tFromWords = map CI.mk . Text.words + tFromWords :: Text -> Maybe [PseudonymWord] + tFromWords = mapM (disambiguate . CI.mk) . filter (not . null) . Text.split (\c -> not $ Set.member c pseudonymCharacters) + + disambiguate :: CI Text -> Maybe PseudonymWord + disambiguate inp + | [choice] <- inp ^.. pseudonymWord = Just choice + | otherwise = Nothing tToWords :: [PseudonymWord] -> Text tToWords = Text.unwords . map CI.original +pseudonymWord :: Fold (CI Text) PseudonymWord +pseudonymWord = folding disambiguate + where + disambiguate :: CI Text -> [PseudonymWord] + disambiguate inp + | [other] <- filter (== inp) pseudonymWordlist = [other] + | otherwise = do + other <- pseudonymWordlist + let distance = (damerauLevenshtein `on` CI.foldedCase) inp other + guard $ distance <= 2 -- Smallest distance in vocabulary is just 1, be slightly more generous + return other + + data AuthTag = AuthFree diff --git a/src/Model/Types/Wordlist.hs b/src/Model/Types/Wordlist.hs index 5e35d7f25..17aa73a06 100644 --- a/src/Model/Types/Wordlist.hs +++ b/src/Model/Types/Wordlist.hs @@ -9,13 +9,21 @@ import Data.Text (Text) import qualified Data.Text as Text import qualified Data.Text.IO as Text +import qualified Data.Set as Set + import qualified Data.CaseInsensitive as CI +import qualified Data.Char as Char + wordlist :: FilePath -> ExpQ wordlist file = do qAddDependentFile file wordlist' <- runIO $ filter ((||) <$> not . isComment <*> isWord) . Text.lines <$> Text.readFile file - listE $ map (\(Text.unpack -> word) -> [e|CI.mk $ Text.pack $(lift word)|]) wordlist' + let usedChars = Set.unions $ map (Set.fromList . (>>= (\c -> [Char.toUpper c, Char.toLower c])) . Text.unpack) wordlist' + tupE + [ listE $ map (\(Text.unpack -> word) -> [e|CI.mk $ Text.pack $(lift word)|]) wordlist' + , [e|Set.fromList $(lift $ Set.toList usedChars)|] + ] isWord :: Text -> Bool isWord t diff --git a/test/Model/TypesSpec.hs b/test/Model/TypesSpec.hs index 568412251..ad830582b 100644 --- a/test/Model/TypesSpec.hs +++ b/test/Model/TypesSpec.hs @@ -7,6 +7,9 @@ module Model.TypesSpec where import TestImport +import Control.Lens (review, preview) + + instance Arbitrary Season where arbitrary = elements [minBound..maxBound] shrink = genericShrink @@ -18,6 +21,9 @@ instance Arbitrary TermIdentifier where return $ TermIdentifier{..} shrink = genericShrink +instance Arbitrary Pseudonym where + arbitrary = Pseudonym <$> arbitraryBoundedIntegral + spec :: Spec spec = do describe "TermIdentifier" $ do @@ -28,6 +34,13 @@ spec = do , (TermIdentifier 1995 Winter, "W95") , (TermIdentifier 3068 Winter, "W3068") ] + describe "Pseudonym" $ do + it "has sufficient vocabulary" $ + (length pseudonymWordlist ^ 2) `shouldBe` (succ $ fromIntegral (maxBound - minBound :: Pseudonym)) + it "has compatible encoding/decoding to/from Text" . property $ + \pseudonym -> preview pseudonymText (review pseudonymText pseudonym) == Just pseudonym + it "encodes to Text injectively" . property $ + \p1 p2 -> p1 /= p2 ==> ((/=) `on` review pseudonymText) p1 p2 termExample :: (TermIdentifier, Text) -> Expectation termExample (term, encoded) = example $ do