From c4aab6248a963228579168186d4e010c7e0e1ab1 Mon Sep 17 00:00:00 2001
From: Steffen Jost <jost@tcs.ifi.lmu.de>
Date: Wed, 20 Mar 2019 13:15:23 +0100
Subject: [PATCH] inference for studyterms works now

---
 src/Handler/Admin.hs                |  4 ++--
 src/Handler/Utils/TermCandidates.hs | 20 +++++++++-----------
 test/Database.hs                    | 26 +++++++++++++++++++++++++-
 3 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/src/Handler/Admin.hs b/src/Handler/Admin.hs
index bf5a29f6d..3df9b1b9d 100644
--- a/src/Handler/Admin.hs
+++ b/src/Handler/Admin.hs
@@ -315,7 +315,7 @@ postAdminFeaturesR = do
           dbtFilter     = Map.fromList
             [ ("key",       FilterColumn $ mkExactFilter    (E.^. StudyTermCandidateKey))
             , ("name",      FilterColumn $ mkContainsFilter (E.^. StudyTermCandidateName))
-            , ("incidence", FilterColumn $ mkExactFilter (E.^. StudyTermCandidateIncidence)) -- TODO containts filter desired here
+            , ("incidence", FilterColumn $ mkExactFilter    (E.^. StudyTermCandidateIncidence)) -- contains filter desired, but impossible here
             ]
           dbtFilterUI mPrev = mconcat
             -- [ prismAForm (singletonFilter "key")       mPrev $ aopt intField            (fslI MsgStudyTermsKey) -- Typing problem exactFilter suffices here
@@ -324,6 +324,6 @@ postAdminFeaturesR = do
             , prismAForm (singletonFilter "incidence") mPrev $ aopt (searchField False) (fslI MsgStudyCandidateIncidence)
             ]
           dbtParams     = def
-          psValidator   = def & defaultSorting [SortAscBy "key", SortAscBy "name"]
+          psValidator   = def & defaultSorting [SortAscBy "incidence", SortAscBy "key", SortAscBy "name"]
       in dbTable psValidator DBTable{..}
 
diff --git a/src/Handler/Utils/TermCandidates.hs b/src/Handler/Utils/TermCandidates.hs
index 48fdec8cb..fdf91a7f9 100644
--- a/src/Handler/Utils/TermCandidates.hs
+++ b/src/Handler/Utils/TermCandidates.hs
@@ -88,13 +88,13 @@ as a fix we simply eliminate all observations that have the same name twice, see
 --   which may happen in rare cases
 removeAmbiguous :: DB [TermCandidateIncidence]
 removeAmbiguous = do
-  ambiList <- E.select $ E.from $ \(candA `E.InnerJoin` candB) -> do
-    -- Either an innerJoin with itself or an exists-sub-select
-    E.on $    (candA E.^. StudyTermCandidateIncidence E.==. candB E.^. StudyTermCandidateIncidence)
-        E.&&. (candA E.^. StudyTermCandidateKey       E.!=. candB E.^. StudyTermCandidateKey)
-        E.&&. (candA E.^. StudyTermCandidateName      E.==. candB E.^. StudyTermCandidateName)
-        E.&&. (candA E.^. StudyTermCandidateId        E.!=. candB E.^. StudyTermCandidateId) -- should not be needed, but does not hurt either
-    return $ candA E.^. StudyTermCandidateIncidence
+  ambiList <- E.select $ E.from $ \candidate -> do
+    E.groupBy ( candidate E.^. StudyTermCandidateIncidence
+              , candidate E.^. StudyTermCandidateKey
+              , candidate E.^. StudyTermCandidateName
+              )
+    E.having $ E.countRows E.!=. E.val (1 :: Int64)
+    return $ candidate E.^. StudyTermCandidateIncidence
   let ambiSet = E.unValue <$> List.nub ambiList
   -- Most SQL dialects won't allow deletion and queries on the same table at once, hence we delete in two steps.
   deleteWhere [StudyTermCandidateIncidence <-. ambiSet]
@@ -125,7 +125,7 @@ removeRedundant = do
 --   we load the table into Haskell and operate there. Memory usage problem? StudyTermsCandidate may become huge.
 acceptSingletons :: DB [(STKey,Text)]
 acceptSingletons = do
-  knownKeys <- fmap unStudyTermsKey <$> selectKeysList [] [Asc StudyTermsKey]
+  knownKeys <- fmap unStudyTermsKey <$> selectKeysList [StudyTermsName !=. Nothing] [Asc StudyTermsKey]
   -- let knownKeysSet = Set.fromAscList knownKeys
   -- In case of memory problems, change next lines to conduit proper:
   incidences <- fmap entityVal <$> selectList [StudyTermCandidateKey /<-. knownKeys] [] -- LimitTo might be dangerous here, if we get a partial incidence. Possibly first select N incidences, then retrieving all those only.
@@ -161,9 +161,7 @@ acceptSingletons = do
 
       -- registerFixed :: (STKey, Text) -> DB (Key StudyTerms)
       registerFixed :: (STKey, Text) -> DB ()
-      registerFixed (key, name) =
-        -- insertKey (StudyTermsKey key) $ StudyTerms key (Just $ shortenStudyTerm name) (Just name) -- name clash!
-        void . insert $ StudyTerms key (Just $ shortenStudyTerm name) (Just name)
+      registerFixed (key, name) = repsert (StudyTermsKey' key) $ StudyTerms key (Just $ shortenStudyTerm name) (Just name)
 
   -- register newly fixed candidates
   forM_ fixedKeys registerFixed
diff --git a/test/Database.hs b/test/Database.hs
index 11b14a157..8df98f6e9 100755
--- a/test/Database.hs
+++ b/test/Database.hs
@@ -231,6 +231,8 @@ fillDb = do
     sdBiol  = StudyTermsKey' 26
     sdChem1 = StudyTermsKey' 61
     sdChem2 = StudyTermsKey' 113
+    sdBWL   = StudyTermsKey' 21
+    sdDeut  = StudyTermsKey' 103
   repsert sdInf   $ StudyTerms  79 (Just "Inf")   (Just "Informatikk")
   repsert sdMath  $ StudyTerms 105 (Just "Math" ) (Just "Mathematik")
   repsert sdMedi  $ StudyTerms 121  Nothing       (Just "Fehler hier")
@@ -240,6 +242,8 @@ fillDb = do
   repsert sdBiol  $ StudyTerms 26   Nothing        Nothing                  -- intentionally left unknown
   repsert sdChem1 $ StudyTerms 61   Nothing        Nothing                  -- intentionally left unknown
   repsert sdChem2 $ StudyTerms 113  Nothing        Nothing                  -- intentionally left unknown
+  repsert sdBWL   $ StudyTerms 21   Nothing        Nothing                  -- intentionally left unknown
+  repsert sdDeut  $ StudyTerms 103  Nothing        Nothing                  -- intentionally left unknown
   incidence1 <- liftIO getRandom
   void . insert $ StudyTermCandidate incidence1 221 "Bioinformatik"
   void . insert $ StudyTermCandidate incidence1 221 "Mathematik"
@@ -252,7 +256,7 @@ fillDb = do
   void . insert $ StudyTermCandidate incidence2 61  "Chemie"
   incidence3 <- liftIO getRandom
   void . insert $ StudyTermCandidate incidence3 113 "Chemie"
-  incidence4 <- liftIO getRandom
+  incidence4 <- liftIO getRandom  -- ambiguous incidence
   void . insert $ StudyTermCandidate incidence4 221 "Bioinformatik"
   void . insert $ StudyTermCandidate incidence4 221 "Chemie"
   void . insert $ StudyTermCandidate incidence4 221 "Biologie"
@@ -285,6 +289,26 @@ fillDb = do
   void . insert $ StudyTermCandidate incidence8 121 "Medieninformatik"
   incidence9 <- liftIO getRandom
   void . insert $ StudyTermCandidate incidence9 79 "Informatik"
+  incidence10 <- liftIO getRandom
+  void . insert $ StudyTermCandidate incidence10 103  "Deutsch"
+  void . insert $ StudyTermCandidate incidence10 103  "Betriebswirtschafslehre"
+  void . insert $ StudyTermCandidate incidence10 21   "Deutsch"
+  void . insert $ StudyTermCandidate incidence10 21   "Betriebswirtschafslehre"
+  incidence11 <- liftIO getRandom
+  void . insert $ StudyTermCandidate incidence11 221 "Bioinformatik"
+  void . insert $ StudyTermCandidate incidence11 221 "Chemie"
+  void . insert $ StudyTermCandidate incidence11 221 "Biologie"
+  void . insert $ StudyTermCandidate incidence11 61  "Bioinformatik"
+  void . insert $ StudyTermCandidate incidence11 61  "Chemie"
+  void . insert $ StudyTermCandidate incidence11 61  "Biologie"
+  void . insert $ StudyTermCandidate incidence11 26  "Bioinformatik"
+  void . insert $ StudyTermCandidate incidence11 26  "Chemie"
+  void . insert $ StudyTermCandidate incidence11 26  "Biologie"
+  incidence12 <- liftIO getRandom
+  void . insert $ StudyTermCandidate incidence12 103  "Deutsch"
+  void . insert $ StudyTermCandidate incidence12 103  "Betriebswirtschafslehre"
+  void . insert $ StudyTermCandidate incidence12 21   "Deutsch"
+  void . insert $ StudyTermCandidate incidence12 21   "Betriebswirtschafslehre"
 
   sfMMp <- insert $ StudyFeatures -- keyword type prevents record syntax here
     maxMuster