From c4aab6248a963228579168186d4e010c7e0e1ab1 Mon Sep 17 00:00:00 2001 From: Steffen Jost Date: Wed, 20 Mar 2019 13:15:23 +0100 Subject: [PATCH] inference for studyterms works now --- src/Handler/Admin.hs | 4 ++-- src/Handler/Utils/TermCandidates.hs | 20 +++++++++----------- test/Database.hs | 26 +++++++++++++++++++++++++- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/src/Handler/Admin.hs b/src/Handler/Admin.hs index bf5a29f6d..3df9b1b9d 100644 --- a/src/Handler/Admin.hs +++ b/src/Handler/Admin.hs @@ -315,7 +315,7 @@ postAdminFeaturesR = do dbtFilter = Map.fromList [ ("key", FilterColumn $ mkExactFilter (E.^. StudyTermCandidateKey)) , ("name", FilterColumn $ mkContainsFilter (E.^. StudyTermCandidateName)) - , ("incidence", FilterColumn $ mkExactFilter (E.^. StudyTermCandidateIncidence)) -- TODO containts filter desired here + , ("incidence", FilterColumn $ mkExactFilter (E.^. StudyTermCandidateIncidence)) -- contains filter desired, but impossible here ] dbtFilterUI mPrev = mconcat -- [ prismAForm (singletonFilter "key") mPrev $ aopt intField (fslI MsgStudyTermsKey) -- Typing problem exactFilter suffices here @@ -324,6 +324,6 @@ postAdminFeaturesR = do , prismAForm (singletonFilter "incidence") mPrev $ aopt (searchField False) (fslI MsgStudyCandidateIncidence) ] dbtParams = def - psValidator = def & defaultSorting [SortAscBy "key", SortAscBy "name"] + psValidator = def & defaultSorting [SortAscBy "incidence", SortAscBy "key", SortAscBy "name"] in dbTable psValidator DBTable{..} diff --git a/src/Handler/Utils/TermCandidates.hs b/src/Handler/Utils/TermCandidates.hs index 48fdec8cb..fdf91a7f9 100644 --- a/src/Handler/Utils/TermCandidates.hs +++ b/src/Handler/Utils/TermCandidates.hs @@ -88,13 +88,13 @@ as a fix we simply eliminate all observations that have the same name twice, see -- which may happen in rare cases removeAmbiguous :: DB [TermCandidateIncidence] removeAmbiguous = do - ambiList <- E.select $ E.from $ \(candA `E.InnerJoin` candB) -> do - -- Either an innerJoin with itself or an exists-sub-select - E.on $ (candA E.^. StudyTermCandidateIncidence E.==. candB E.^. StudyTermCandidateIncidence) - E.&&. (candA E.^. StudyTermCandidateKey E.!=. candB E.^. StudyTermCandidateKey) - E.&&. (candA E.^. StudyTermCandidateName E.==. candB E.^. StudyTermCandidateName) - E.&&. (candA E.^. StudyTermCandidateId E.!=. candB E.^. StudyTermCandidateId) -- should not be needed, but does not hurt either - return $ candA E.^. StudyTermCandidateIncidence + ambiList <- E.select $ E.from $ \candidate -> do + E.groupBy ( candidate E.^. StudyTermCandidateIncidence + , candidate E.^. StudyTermCandidateKey + , candidate E.^. StudyTermCandidateName + ) + E.having $ E.countRows E.!=. E.val (1 :: Int64) + return $ candidate E.^. StudyTermCandidateIncidence let ambiSet = E.unValue <$> List.nub ambiList -- Most SQL dialects won't allow deletion and queries on the same table at once, hence we delete in two steps. deleteWhere [StudyTermCandidateIncidence <-. ambiSet] @@ -125,7 +125,7 @@ removeRedundant = do -- we load the table into Haskell and operate there. Memory usage problem? StudyTermsCandidate may become huge. acceptSingletons :: DB [(STKey,Text)] acceptSingletons = do - knownKeys <- fmap unStudyTermsKey <$> selectKeysList [] [Asc StudyTermsKey] + knownKeys <- fmap unStudyTermsKey <$> selectKeysList [StudyTermsName !=. Nothing] [Asc StudyTermsKey] -- let knownKeysSet = Set.fromAscList knownKeys -- In case of memory problems, change next lines to conduit proper: incidences <- fmap entityVal <$> selectList [StudyTermCandidateKey /<-. knownKeys] [] -- LimitTo might be dangerous here, if we get a partial incidence. Possibly first select N incidences, then retrieving all those only. @@ -161,9 +161,7 @@ acceptSingletons = do -- registerFixed :: (STKey, Text) -> DB (Key StudyTerms) registerFixed :: (STKey, Text) -> DB () - registerFixed (key, name) = - -- insertKey (StudyTermsKey key) $ StudyTerms key (Just $ shortenStudyTerm name) (Just name) -- name clash! - void . insert $ StudyTerms key (Just $ shortenStudyTerm name) (Just name) + registerFixed (key, name) = repsert (StudyTermsKey' key) $ StudyTerms key (Just $ shortenStudyTerm name) (Just name) -- register newly fixed candidates forM_ fixedKeys registerFixed diff --git a/test/Database.hs b/test/Database.hs index 11b14a157..8df98f6e9 100755 --- a/test/Database.hs +++ b/test/Database.hs @@ -231,6 +231,8 @@ fillDb = do sdBiol = StudyTermsKey' 26 sdChem1 = StudyTermsKey' 61 sdChem2 = StudyTermsKey' 113 + sdBWL = StudyTermsKey' 21 + sdDeut = StudyTermsKey' 103 repsert sdInf $ StudyTerms 79 (Just "Inf") (Just "Informatikk") repsert sdMath $ StudyTerms 105 (Just "Math" ) (Just "Mathematik") repsert sdMedi $ StudyTerms 121 Nothing (Just "Fehler hier") @@ -240,6 +242,8 @@ fillDb = do repsert sdBiol $ StudyTerms 26 Nothing Nothing -- intentionally left unknown repsert sdChem1 $ StudyTerms 61 Nothing Nothing -- intentionally left unknown repsert sdChem2 $ StudyTerms 113 Nothing Nothing -- intentionally left unknown + repsert sdBWL $ StudyTerms 21 Nothing Nothing -- intentionally left unknown + repsert sdDeut $ StudyTerms 103 Nothing Nothing -- intentionally left unknown incidence1 <- liftIO getRandom void . insert $ StudyTermCandidate incidence1 221 "Bioinformatik" void . insert $ StudyTermCandidate incidence1 221 "Mathematik" @@ -252,7 +256,7 @@ fillDb = do void . insert $ StudyTermCandidate incidence2 61 "Chemie" incidence3 <- liftIO getRandom void . insert $ StudyTermCandidate incidence3 113 "Chemie" - incidence4 <- liftIO getRandom + incidence4 <- liftIO getRandom -- ambiguous incidence void . insert $ StudyTermCandidate incidence4 221 "Bioinformatik" void . insert $ StudyTermCandidate incidence4 221 "Chemie" void . insert $ StudyTermCandidate incidence4 221 "Biologie" @@ -285,6 +289,26 @@ fillDb = do void . insert $ StudyTermCandidate incidence8 121 "Medieninformatik" incidence9 <- liftIO getRandom void . insert $ StudyTermCandidate incidence9 79 "Informatik" + incidence10 <- liftIO getRandom + void . insert $ StudyTermCandidate incidence10 103 "Deutsch" + void . insert $ StudyTermCandidate incidence10 103 "Betriebswirtschafslehre" + void . insert $ StudyTermCandidate incidence10 21 "Deutsch" + void . insert $ StudyTermCandidate incidence10 21 "Betriebswirtschafslehre" + incidence11 <- liftIO getRandom + void . insert $ StudyTermCandidate incidence11 221 "Bioinformatik" + void . insert $ StudyTermCandidate incidence11 221 "Chemie" + void . insert $ StudyTermCandidate incidence11 221 "Biologie" + void . insert $ StudyTermCandidate incidence11 61 "Bioinformatik" + void . insert $ StudyTermCandidate incidence11 61 "Chemie" + void . insert $ StudyTermCandidate incidence11 61 "Biologie" + void . insert $ StudyTermCandidate incidence11 26 "Bioinformatik" + void . insert $ StudyTermCandidate incidence11 26 "Chemie" + void . insert $ StudyTermCandidate incidence11 26 "Biologie" + incidence12 <- liftIO getRandom + void . insert $ StudyTermCandidate incidence12 103 "Deutsch" + void . insert $ StudyTermCandidate incidence12 103 "Betriebswirtschafslehre" + void . insert $ StudyTermCandidate incidence12 21 "Deutsch" + void . insert $ StudyTermCandidate incidence12 21 "Betriebswirtschafslehre" sfMMp <- insert $ StudyFeatures -- keyword type prevents record syntax here maxMuster