Merge branch 'master' into 'live'

Cleanup fuzzy pseudonym handling

See merge request !109
This commit is contained in:
Gregor Kleen 2018-11-26 17:36:32 +01:00
commit 23e6385f6a
11 changed files with 65 additions and 58 deletions

View File

@ -414,9 +414,11 @@ NotificationTriggerCorrectionsAssigned: Mir wurden Abgaben zur Korrektur zugetei
CorrCreate: Abgaben erstellen
UnknownPseudonymWord pseudonymWord@Text: Unbekanntes Pseudonym-Wort "#{pseudonymWord}"
InvalidPseudonym pseudonym@Text: Invalides Pseudonym "#{pseudonym}"
InvalidPseudonymSubmissionIgnored oPseudonyms@Text iPseudonym@Text: Abgabe mit Pseudonymen „#{oPseudonyms}“ wurde ignoriert, da „#{iPseudonym}“ nicht automatisiert zu einem validen Pseudonym korrigiert werden konnte.
PseudonymAutocorrections: Korrekturvorschläge:
UnknownPseudonym pseudonym@Text: Unbekanntes Pseudonym "#{pseudonym}"
CorrectionPseudonyms: Abgaben-Pseudonyme
CorrectionPseudonymsTip: Eine Abgabe pro Zeile, bei Gruppenabgaben mehrere Pseudonyme (komma-separiert) innerhalb einer Zeile
CorrectionPseudonymsTip: Eine Abgabe pro Zeile, bei Gruppenabgaben mehrere Pseudonyme (komma-separiert) innerhalb einer Zeile. Kleine Schreibfehler werden u.U. automatisch korrigiert.
PseudonymSheet: Übungsblatt
CorrectionPseudonymSheet termDesc@Text csh@CourseShorthand shn@SheetName: #{termDesc} » #{csh} » #{shn}
SheetGroupTooLarge sheetGroupDesc@Text: Abgabegruppe zu groß: #{sheetGroupDesc}

View File

@ -4,7 +4,7 @@ module Data.CaseInsensitive.Instances
(
) where
import ClassyPrelude.Yesod
import ClassyPrelude.Yesod hiding (lift)
import Data.CaseInsensitive (CI)
import qualified Data.CaseInsensitive as CI
@ -16,6 +16,8 @@ import Text.Blaze (ToMarkup(..))
import Data.Text (Text)
import qualified Data.Text.Encoding as Text
import Language.Haskell.TH.Syntax (Lift(..))
instance PersistField (CI Text) where
toPersistValue ciText = PersistDbSpecific . Text.encodeUtf8 $ CI.original ciText
@ -51,3 +53,6 @@ instance ToWidget site a => ToWidget site (CI a) where
instance RenderMessage site a => RenderMessage site (CI a) where
renderMessage f ls msg = renderMessage f ls $ CI.original msg
instance Lift t => Lift (CI t) where
lift (CI.original -> orig) = [e|CI.mk $(lift orig)|]

View File

@ -1309,7 +1309,7 @@ pageActions (CorrectionsR) =
, menuItemLabel = MsgMenuCorrectionsCreate
, menuItemIcon = Nothing
, menuItemRoute = SomeRoute CorrectionsCreateR
, menuItemModal = True
, menuItemModal = False
, menuItemAccessCallback' = runDB $ do
uid <- liftHandlerT requireAuthId
[E.Value sheetCount] <- E.select . E.from $ \(course `E.InnerJoin` sheet) -> do
@ -1349,7 +1349,7 @@ pageActions (CorrectionsGradeR) =
, menuItemLabel = MsgMenuCorrectionsCreate
, menuItemIcon = Nothing
, menuItemRoute = SomeRoute CorrectionsCreateR
, menuItemModal = True
, menuItemModal = False
, menuItemAccessCallback' = runDB $ do
uid <- liftHandlerT requireAuthId
[E.Value sheetCount] <- E.select . E.from $ \(course `E.InnerJoin` sheet) -> do

View File

@ -124,7 +124,7 @@ colSubmittors = sortable Nothing (i18nCell MsgSubmissionUsers) $ \DBRow{ dbrOutp
protoCell = listCell (Map.toList users) $ \(userId, (User{..}, mPseudo)) ->
anchorCellM (link <$> encrypt userId) $ case mPseudo of
Nothing -> nameWidget userDisplayName userSurname
Just p -> [whamlet|^{nameWidget userDisplayName userSurname} (#{review pseudonymText p})|]
Just p -> [whamlet|^{nameWidget userDisplayName userSurname} (#{review _PseudonymText p})|]
in protoCell & cellAttrs <>~ [("class", "list--inline list--comma-separated")]
colSMatrikel :: IsDBTable m a => Colonnade _ CorrectionTableData (DBCell m a)
@ -154,7 +154,7 @@ colRated = sortable (Just "ratingtime") (i18nCell MsgRatingTime) $ \DBRow{ dbrOu
colPseudonyms :: IsDBTable m a => Colonnade _ CorrectionTableData (DBCell m a)
colPseudonyms = sortable Nothing (i18nCell MsgPseudonyms) $ \DBRow{ dbrOutput=(_, _, _, _, users) } -> let
lCell = listCell (catMaybes $ snd . snd <$> Map.toList users) $ \pseudo ->
cell [whamlet|#{review pseudonymText pseudo}|]
cell [whamlet|#{review _PseudonymText pseudo}|]
in lCell & cellAttrs <>~ [("class", "list--inline list--comma-separated")]
colRatedField :: Colonnade _ CorrectionTableData (DBCell _ (FormResult (DBFormResult CorrectionTableData SubmissionId (Bool, a, b))))
@ -631,17 +631,19 @@ postCorrectionsCreateR = do
}
((pseudonymRes, pseudonymWidget), pseudonymEncoding) <- runFormPost . renderAForm FormStandard $ (,)
<$> areq (selectField sheetOptions) (fslI MsgPseudonymSheet) Nothing
<*> areq (checkMMap textToList textFromList textareaField) (fslpI MsgCorrectionPseudonyms "Pseudonyme" & setTooltip MsgCorrectionPseudonymsTip) Nothing
<*> (textToList <$> areq textareaField (fslpI MsgCorrectionPseudonyms "Pseudonyme" & setTooltip MsgCorrectionPseudonymsTip) Nothing)
<* submitButton
case pseudonymRes of
FormMissing -> return ()
FormFailure errs -> forM_ errs $ addMessage Error . toHtml
FormSuccess (sid, pss) -> do
FormSuccess (sid, (pss, invalids)) -> do
forM_ (Map.toList invalids) $ \((oPseudonyms, iPseudonym), alts) -> $(addMessageFile Warning "templates/messages/ignoredInvalidPseudonym.hamlet")
runDB $ do
Sheet{..} <- get404 sid
(sps, unknown) <- fmap partitionEithers' . forM pss . mapM $ \p -> maybe (Left p) Right <$> getBy (UniqueSheetPseudonym sid p)
forM_ unknown $ addMessageI Error . MsgUnknownPseudonym . review pseudonymText
forM_ unknown $ addMessageI Error . MsgUnknownPseudonym . review _PseudonymText
now <- liftIO getCurrentTime
let
sps' :: [[SheetPseudonym]]
@ -671,7 +673,7 @@ postCorrectionsCreateR = do
let sps'' = filter (not . null) $ filter (\spGroup -> not . flip any spGroup $ \SheetPseudonym{sheetPseudonymUser} -> sheetPseudonymUser `elem` map (submissionUserUser . entityVal) existingSubUsers) sps'
forM_ sps'' $ \spGroup
-> let
sheetGroupDesc = Text.intercalate ", " $ map (review pseudonymText . sheetPseudonymPseudonym) spGroup
sheetGroupDesc = Text.intercalate ", " $ map (review _PseudonymText . sheetPseudonymPseudonym) spGroup
in case sheetGrouping of
Arbitrary maxSize
| genericLength sps > maxSize
@ -727,17 +729,13 @@ postCorrectionsCreateR = do
partitionEithers' :: [[Either a b]] -> ([[b]], [a])
partitionEithers' = runWriter . mapM (WriterT . Identity . swap . partitionEithers)
textToList :: Textarea -> Handler (Either UniWorXMessage [[Pseudonym]])
textToList (map (map Text.strip . Text.splitOn ",") . filter (not . Text.null) . Text.lines . unTextarea -> ws)
= let
invalid :: [Text]
valid :: [[Pseudonym]]
(valid, invalid) = partitionEithers' $ map (map $ \w -> maybe (Left w) Right $ w ^? pseudonymText) ws
in case invalid of
(i:_) -> return . Left $ MsgInvalidPseudonym i
[] -> return $ Right valid
textFromList :: [[Pseudonym]] -> Textarea
textFromList = Textarea . Text.unlines . map (Text.intercalate ", " . map (review pseudonymText))
textToList :: Textarea -> ([[Pseudonym]], Map (Text, Text) [Pseudonym])
textToList (map (map Text.strip . Text.splitOn ",") . filter (not . Text.null) . map Text.strip . Text.lines . unTextarea -> ws)
= runWriter . fmap (mapMaybe sequence) $ mapM (\ws' -> mapM (toPseudonym ws') ws') ws
where
toPseudonym w' w
| Just res <- w ^? _PseudonymText = return $ Just res
| otherwise = Nothing <$ tell (Map.singleton (Text.intercalate ", " w', w) $ w ^.. pseudonymFragments . _PseudonymWords)
getCorrectionsGradeR, postCorrectionsGradeR :: Handler Html
getCorrectionsGradeR = postCorrectionsGradeR

View File

@ -46,7 +46,6 @@ import Data.Monoid (Sum(..), Any(..))
-- import Control.Lens
import Utils.Lens
import qualified Data.Text as Text
--import qualified Data.Aeson as Aeson
import Control.Monad.Random.Class (MonadRandom(..))
@ -318,7 +317,7 @@ getSShowR tid ssh csh shn = do
mPseudonym <- runMaybeT $ do
uid <- MaybeT maybeAuthId
Entity _ SheetPseudonym{sheetPseudonymPseudonym} <- MaybeT . runDB . getBy $ UniqueSheetPseudonymUser sid uid
return . Text.unwords . map CI.original $ review pseudonymWords sheetPseudonymPseudonym
return $ review _PseudonymText sheetPseudonymPseudonym
(generateWidget, generateEnctype) <- generateFormPost $ \csrf ->
over _2 ((toWidget csrf <>) . fvInput) <$> mreq (buttonField BtnGenerate) "" Nothing
defaultLayout $ do
@ -348,9 +347,8 @@ postSPseudonymR tid ssh csh shn = do
Right (Just ps) -> return ps
Left ps -> return ps
ps <- genPseudonym
let ps' = Text.unwords . map CI.original $ review pseudonymWords ps
selectRep $ do
provideRep $ return ps'
provideRep . return $ review _PseudonymText ps
provideJson ps
provideRep (redirect $ CSheetR tid ssh csh shn SShowR :#: ("pseudonym" :: Text) :: Handler Html)

View File

@ -607,25 +607,25 @@ instance FromJSON Pseudonym where
-> return $ fromIntegral w
| otherwise
-> fail "Pseudonym out auf range"
parseJSON (Aeson.String (map CI.mk . Text.words -> ws))
= case preview pseudonymWords ws of
parseJSON (Aeson.String t)
= case t ^? _PseudonymText of
Just p -> return p
Nothing -> fail "Could not parse pseudonym"
parseJSON v = flip (Aeson.withArray "Pseudonym") v $ \ws -> do
ws' <- toList . map CI.mk <$> mapM parseJSON ws
case preview pseudonymWords ws' of
case ws' ^? _PseudonymWords of
Just p -> return p
Nothing -> fail "Could not parse pseudonym words"
instance ToJSON Pseudonym where
toJSON = toJSON . (review pseudonymWords :: Pseudonym -> [PseudonymWord])
toJSON = toJSON . (review _PseudonymWords :: Pseudonym -> [PseudonymWord])
pseudonymWordlist :: [PseudonymWord]
pseudonymCharacters :: Set Char
pseudonymCharacters :: Set (CI Char)
(pseudonymWordlist, pseudonymCharacters) = $(wordlist "config/wordlist.txt")
pseudonymWords :: Prism' [PseudonymWord] Pseudonym
pseudonymWords = prism' pToWords pFromWords
_PseudonymWords :: Prism' [PseudonymWord] Pseudonym
_PseudonymWords = prism' pToWords pFromWords
where
pFromWords :: [PseudonymWord] -> Maybe Pseudonym
pFromWords [w1, w2]
@ -644,32 +644,30 @@ pseudonymWords = prism' pToWords pFromWords
maxWord :: Num a => a
maxWord = 0b111111111111
pseudonymText :: Prism' Text Pseudonym
pseudonymText = prism' tToWords tFromWords . pseudonymWords
_PseudonymText :: Prism' Text Pseudonym
_PseudonymText = prism' tToWords tFromWords . _PseudonymWords
where
tFromWords :: Text -> Maybe [PseudonymWord]
tFromWords = mapM (disambiguate . CI.mk) . filter (not . null) . Text.split (\c -> not $ Set.member c pseudonymCharacters)
disambiguate :: CI Text -> Maybe PseudonymWord
disambiguate inp
| [choice] <- inp ^.. pseudonymWord = Just choice
| otherwise = Nothing
tFromWords input
| [result] <- input ^.. pseudonymFragments
= Just result
| otherwise
= Nothing
tToWords :: [PseudonymWord] -> Text
tToWords = Text.unwords . map CI.original
pseudonymWord :: Fold (CI Text) PseudonymWord
pseudonymWord = folding disambiguate
pseudonymWords :: Fold Text PseudonymWord
pseudonymWords = folding
$ \(CI.mk -> input) -> map (view _2) . unsafeHead . groupBy ((==) `on` view _1) . sortBy (comparing $ view _1) . filter ((<= distanceCutoff) . view _1) $ map (distance input &&& id) pseudonymWordlist
where
disambiguate :: CI Text -> [PseudonymWord]
disambiguate inp
| [other] <- filter (== inp) pseudonymWordlist = [other]
| otherwise = do
other <- pseudonymWordlist
let distance = (damerauLevenshtein `on` CI.foldedCase) inp other
guard $ distance <= 2 -- Smallest distance in vocabulary is just 1, be slightly more generous
return other
distance = damerauLevenshtein `on` CI.foldedCase
-- | Arbitrary cutoff point, for reference: ispell cuts off at 1
distanceCutoff = 2
pseudonymFragments :: Fold Text [PseudonymWord]
pseudonymFragments = folding
$ mapM (toListOf pseudonymWords) . (\l -> guard (length l == 2) *> l) . filter (not . null) . Text.split (\(CI.mk -> c) -> not $ Set.member c pseudonymCharacters)
data AuthTag

View File

@ -12,17 +12,16 @@ import qualified Data.Text.IO as Text
import qualified Data.Set as Set
import qualified Data.CaseInsensitive as CI
import qualified Data.Char as Char
import Data.CaseInsensitive.Instances ()
wordlist :: FilePath -> ExpQ
wordlist file = do
qAddDependentFile file
wordlist' <- runIO $ filter ((||) <$> not . isComment <*> isWord) . Text.lines <$> Text.readFile file
let usedChars = Set.unions $ map (Set.fromList . (>>= (\c -> [Char.toUpper c, Char.toLower c])) . Text.unpack) wordlist'
let usedChars = Set.unions $ map (Set.fromList . map CI.mk . Text.unpack) wordlist'
tupE
[ listE $ map (\(Text.unpack -> word) -> [e|CI.mk $ Text.pack $(lift word)|]) wordlist'
, [e|Set.fromList $(lift $ Set.toList usedChars)|]
, [e|Set.fromDistinctAscList $(lift $ Set.toAscList usedChars)|]
]
isWord :: Text -> Bool

View File

@ -0,0 +1,7 @@
<p>_{MsgInvalidPseudonymSubmissionIgnored oPseudonyms iPseudonym}
$if not (null alts)
<div>
<p>_{MsgPseudonymAutocorrections}
<ul>
$forall v <- alts
<li>#{review _PseudonymText v}

View File

@ -3,4 +3,4 @@ _{MsgSheetDuplicatePseudonym}
<ul>
$forall p <- duplicate
<li .pseudonym>
#{review pseudonymText p}
#{review _PseudonymText p}

View File

@ -6,4 +6,4 @@ _{MsgSheetCreateExisting}
<dd>
<ul>
$forall p <- pseudos
<li .pseudonym>#{review pseudonymText p}
<li .pseudonym>#{review _PseudonymText p}

View File

@ -38,9 +38,9 @@ spec = do
it "has sufficient vocabulary" $
(length pseudonymWordlist ^ 2) `shouldBe` (succ $ fromIntegral (maxBound - minBound :: Pseudonym))
it "has compatible encoding/decoding to/from Text" . property $
\pseudonym -> preview pseudonymText (review pseudonymText pseudonym) == Just pseudonym
\pseudonym -> preview _PseudonymText (review _PseudonymText pseudonym) == Just pseudonym
it "encodes to Text injectively" . property $
\p1 p2 -> p1 /= p2 ==> ((/=) `on` review pseudonymText) p1 p2
\p1 p2 -> p1 /= p2 ==> ((/=) `on` review _PseudonymText) p1 p2
termExample :: (TermIdentifier, Text) -> Expectation
termExample (term, encoded) = example $ do