diff --git a/src/Codd/Representations/Disk.hs b/src/Codd/Representations/Disk.hs index 98594299..0d8e8330 100644 --- a/src/Codd/Representations/Disk.hs +++ b/src/Codd/Representations/Disk.hs @@ -9,28 +9,26 @@ import Codd.Representations.Types import Codd.Types (PgMajorVersion (..)) import Control.DeepSeq (force) import Control.Monad - ( forM, - forM_, + ( forM_, join, - void, when, ) -import Control.Monad.Identity (runIdentity) import Data.Aeson ( Value, decode, ) import Data.Bifunctor (first) -import qualified Data.ByteString.Lazy as BS import qualified Data.ByteString.Lazy as LBS import Data.List (sortOn) +import qualified Data.List.NonEmpty as NE import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Maybe (fromMaybe) import GHC.IO.Exception (IOErrorType (..), ioe_type) import GHC.Stack (HasCallStack) import System.FilePath - ( takeFileName, + ( takeDirectory, + takeFileName, (), ) import System.IO.Error (isDoesNotExistError) @@ -39,6 +37,7 @@ import UnliftIO MonadUnliftIO, evaluate, handle, + pooledMapConcurrentlyN_, throwIO, tryJust, ) @@ -56,125 +55,46 @@ import Prelude hiding writeFile, ) -{- -This module contains functions and data models to write and read schema representations -from disk. It has been rewritten a few times in attempt to find a model -that is sufficiently clean and robust. - -The unsolved challenge at this point is to unify both reading and writing -under a unique model that represents how files and directories are laid out -on disk, but it doesn't seem worth the trouble, honestly. --} - --- | This class is equivalent to some form of monadic unfolding, mixed --- with a mockable "writeFile" function. It allows us to derive both --- `toFiles` and `persistRepsToDisk`, for example, but isn't a very elegant --- model otherwise. -class DbDiskObj a where - appr :: - (Monad m) => - a -> - -- | When recursing into a new sub-structure, this function - -- will be called with a relative folder where that substructure's - -- root belongs to. - (forall b. (DbDiskObj b) => FilePath -> b -> m d) -> - -- | This function will be called when writing to a file. A relative path - -- to this structure's root will be passed. - (FilePath -> Value -> m d) -> - m [d] - simpleDbDiskObj :: - (Functor m) => ObjName -> Value -> (FilePath -> Value -> m d) -> m [d] -simpleDbDiskObj oname orep ffile = (: []) <$> ffile (mkPathFrag oname) orep - -instance DbDiskObj DbRep where - appr (DbRep dbSettingsRep schemas roles) frec ffile = do - x <- ffile "db-settings" dbSettingsRep - rs <- forM (Map.elems roles) $ frec "roles" - ss <- forM (Map.toList schemas) $ \(schemaName, schema) -> - frec ("schemas" mkPathFrag schemaName) schema - pure $ x : rs ++ ss - -instance DbDiskObj RoleRep where - appr (RoleRep roleName roleRep) _ = simpleDbDiskObj roleName roleRep - -instance DbDiskObj SchemaRep where - appr (SchemaRep _schemaName namespaceRep tables views routines seqs colls types) frec ffile = - do - x <- ffile "objrep" namespaceRep - tbls <- forM (Map.toList tables) $ \(tableName, table) -> - frec ("tables" mkPathFrag tableName) table - vs <- forM (Map.elems views) $ frec "views" - rs <- forM (Map.elems routines) $ frec "routines" - ss <- forM (Map.elems seqs) $ frec "sequences" - cs <- forM (Map.elems colls) $ frec "collations" - ts <- forM (Map.elems types) $ frec "types" - pure $ x : tbls ++ vs ++ rs ++ ss ++ cs ++ ts - -instance DbDiskObj TableRep where - appr (TableRep _tblName tblRep columns constraints triggers policies indexes statistics) frec ffile = - do - let mkpath p = p - x <- ffile (mkpath "objrep") tblRep - cols <- forM (Map.elems columns) $ frec (mkpath "cols") - constrs <- - forM (Map.elems constraints) $ - frec (mkpath "constraints") - trgrs <- forM (Map.elems triggers) $ frec (mkpath "triggers") - pols <- forM (Map.elems policies) $ frec (mkpath "policies") - idxs <- forM (Map.elems indexes) $ frec (mkpath "indexes") - stats <- forM (Map.elems statistics) $ frec (mkpath "statistics") - pure $ x : cols ++ constrs ++ trgrs ++ pols ++ idxs ++ stats - -instance DbDiskObj TableColumnRep where - appr (TableColumnRep colName colRep) _ = simpleDbDiskObj colName colRep - -instance DbDiskObj TableConstraintRep where - appr (TableConstraintRep constrName constrRep) _ = - simpleDbDiskObj constrName constrRep - -instance DbDiskObj TableTriggerRep where - appr (TableTriggerRep triggerName triggerRep) _ = - simpleDbDiskObj triggerName triggerRep - -instance DbDiskObj TablePolicyRep where - appr (TablePolicyRep polName polRep) _ = simpleDbDiskObj polName polRep - -instance DbDiskObj TableStatisticsRep where - appr (TableStatisticsRep stxName stxRep) _ = simpleDbDiskObj stxName stxRep - -instance DbDiskObj TableIndexRep where - appr (TableIndexRep idxName indexRep) _ = simpleDbDiskObj idxName indexRep - -instance DbDiskObj ViewRep where - appr (ViewRep viewName viewRep) _ = simpleDbDiskObj viewName viewRep - -instance DbDiskObj RoutineRep where - appr (RoutineRep routineName routineRep) _ = - simpleDbDiskObj routineName routineRep - -instance DbDiskObj SequenceRep where - appr (SequenceRep seqName seqRep) _ = simpleDbDiskObj seqName seqRep - -instance DbDiskObj CollationRep where - appr (CollationRep collName collationRep) _ = - simpleDbDiskObj collName collationRep - -instance DbDiskObj TypeRep where - appr (TypeRep typeName typeRep) _ = simpleDbDiskObj typeName typeRep + ObjName -> Value -> (FilePath, Value) +simpleDbDiskObj oname orep = (mkPathFrag oname, orep) + +apprDbRep :: DbRep -> [(FilePath, Value)] +apprDbRep (DbRep dbSettingsRep schemas roles) = + let x = ("db-settings", dbSettingsRep) + rs = prependDir "roles" $ flip map (Map.elems roles) $ \(RoleRep n v) -> simpleDbDiskObj n v + ss = prependDir "schemas" $ mconcat $ flip map (Map.toList schemas) $ \(schemaName, schema) -> + prependDir (mkPathFrag schemaName) (apprSchemaRep schema) + in x : rs ++ ss + +apprSchemaRep :: SchemaRep -> [(FilePath, Value)] +apprSchemaRep (SchemaRep _schemaName namespaceRep tables views routines seqs colls types) = + let x = ("objrep", namespaceRep) + tbls = prependDir "tables" $ mconcat $ flip map (Map.toList tables) $ \(tableName, table) -> + prependDir (mkPathFrag tableName) (apprTableRep table) + vs = prependDir "views" $ map (\(ViewRep n v) -> simpleDbDiskObj n v) $ Map.elems views + rs = prependDir "routines" $ map (\(RoutineRep n v) -> simpleDbDiskObj n v) (Map.elems routines) + ss = prependDir "sequences" $ map (\(SequenceRep n v) -> simpleDbDiskObj n v) (Map.elems seqs) + cs = prependDir "collations" $ map (\(CollationRep n v) -> simpleDbDiskObj n v) (Map.elems colls) + ts = prependDir "types" $ map (\(TypeRep n v) -> simpleDbDiskObj n v) (Map.elems types) + in x : tbls ++ vs ++ rs ++ ss ++ cs ++ ts + +apprTableRep :: TableRep -> [(FilePath, Value)] +apprTableRep (TableRep _tblName tblRep columns constraints triggers policies indexes statistics) = + let x = ("objrep", tblRep) + cols = prependDir "cols" $ flip map (Map.elems columns) $ \(TableColumnRep colName colRep) -> simpleDbDiskObj colName colRep + constrs = prependDir "constraints" $ flip map (Map.elems constraints) $ \(TableConstraintRep n v) -> simpleDbDiskObj n v + trgrs = prependDir "triggers" $ flip map (Map.elems triggers) $ \(TableTriggerRep n v) -> simpleDbDiskObj n v + pols = prependDir "policies" $ flip map (Map.elems policies) $ \(TablePolicyRep n v) -> simpleDbDiskObj n v + idxs = prependDir "indexes" $ flip map (Map.elems indexes) $ \(TableIndexRep n v) -> simpleDbDiskObj n v + stats = prependDir "statistics" $ flip map (Map.elems statistics) $ \(TableStatisticsRep n v) -> simpleDbDiskObj n v + in x : cols ++ constrs ++ trgrs ++ pols ++ idxs ++ stats + +prependDir :: FilePath -> [(FilePath, Value)] -> [(FilePath, Value)] +prependDir dir = map (first (dir )) toFiles :: DbRep -> [(FilePath, Value)] -toFiles = sortOn fst . frec - where - frec :: (DbDiskObj a) => a -> [(FilePath, Value)] - frec sobj = - concat $ - runIdentity $ - appr - sobj - (\parentDir obj -> pure $ prependDir parentDir $ frec obj) - (\fn h -> pure [(fn, h)]) - prependDir dir = map (first (dir )) +toFiles = sortOn fst . apprDbRep -- | Wipes out completely the supplied folder and writes the representations of the Database's structures to it again. persistRepsToDisk :: forall m. (HasCallStack, MonadUnliftIO m) => PgMajorVersion -> DbRep -> FilePath -> m () @@ -196,7 +116,7 @@ persistRepsToDisk pgVersion dbSchema schemaDirBeforeVersions = -- We want this function to be fast and as atomic as it can be, but: -- 1. `renameFile` throws if the target path is in a different partition from the source path -- 2. The user might not have permissions to delete the target folder but does have permissions to modify its contents - -- 3. Different operating systems can have different behaviours, like Windows, where renameDirectory fails on Windows if the target directory exists: https://hackage.haskell.org/package/directory-1.3.9.0/docs/System-Directory.html#v:renameDirectory + -- 3. Different operating systems can have different behaviours, like Windows, where renameDirectory fails if the target directory exists: https://hackage.haskell.org/package/directory-1.3.9.0/docs/System-Directory.html#v:renameDirectory -- 4. Windows and I think even Linux can have ACLs that have more than just a binary "can-write" privilege, with "can-delete" being -- separated from "can-create", IIRC. errBestScenario <- tryJust (\(e :: IOError) -> if ioe_type e `elem` [NoSuchThing, UnsatisfiedConstraints, PermissionDenied, IllegalOperation, UnsupportedOperation] then Just () else Nothing) $ do @@ -236,16 +156,17 @@ persistRepsToDisk pgVersion dbSchema schemaDirBeforeVersions = forM_ entries $ \x -> removePathForcibly (path x) else createDirectoryIfMissing True path - writeRec :: (DbDiskObj a) => FilePath -> a -> IO () - writeRec dir obj = - void $ - appr - obj - ( \parentDir sobj -> do - createDirectoryIfMissing True (dir parentDir) - writeRec (dir parentDir) sobj - ) - (\fn jsonRep -> BS.writeFile (dir fn) (detEncodeJSONByteString jsonRep)) + writeRec :: FilePath -> DbRep -> IO () + writeRec dir obj = do + createDirectoryIfMissing True dir + -- Limit number of open file descriptors even if in the future we + -- increase codd's capabilities beyond the current 2 + pooledMapConcurrentlyN_ 2 (writeFolder dir) $ NE.groupBy (\(f1, _) (f2, _) -> takeDirectory f1 == takeDirectory f2) (sortOn fst $ toFiles obj) + writeFolder :: FilePath -> NE.NonEmpty (FilePath, Value) -> IO () + writeFolder dir filesPerFolder = do + let relFolderToCreate = takeDirectory $ fst $ NE.head filesPerFolder + createDirectoryIfMissing True (dir relFolderToCreate) + forM_ filesPerFolder (\(fn, jsonRep) -> LBS.writeFile (dir fn) (detEncodeJSONByteString jsonRep)) readNamespaceRep :: (MonadUnliftIO m) => FilePath -> m SchemaRep readNamespaceRep dir = diff --git a/test/TypesGen.hs b/test/TypesGen.hs index 02b72745..e30af45e 100644 --- a/test/TypesGen.hs +++ b/test/TypesGen.hs @@ -1,7 +1,10 @@ +{-# LANGUAGE CPP #-} + module TypesGen where import Codd.Representations import Codd.Types (PgMajorVersion (..)) +import qualified Data.Aeson as Aeson import Data.Function (on) import Data.List (nubBy) import Data.Map.Strict (Map) @@ -15,7 +18,7 @@ instance Arbitrary DbRepsGen where arbitrary = let repsGen = DbRep - <$> arbitrary + <$> arbJson <*> uniqueMapOf 3 schemaHashGen objName <*> uniqueMapOf 2 roleHashGen objName versionGen = PgMajorVersion <$> arbitrary @@ -24,39 +27,42 @@ instance Arbitrary DbRepsGen where schemaHashGen = SchemaRep <$> genObjName - <*> arbitrary + <*> arbJson <*> uniqueMapOf 20 tableGen objName <*> uniqueMapOf 5 viewGen objName <*> uniqueMapOf 10 routineGen objName <*> uniqueMapOf 15 sequenceGen objName <*> uniqueMapOf 2 collationGen objName <*> uniqueMapOf 5 typeGen objName - roleHashGen = RoleRep <$> genObjName <*> arbitrary + roleHashGen = RoleRep <$> genObjName <*> arbJson -- Per-schema object generators tableGen = TableRep <$> genObjName - <*> arbitrary + <*> arbJson <*> uniqueMapOf 20 colGen objName <*> uniqueMapOf 5 constraintGen objName <*> uniqueMapOf 1 triggerGen objName <*> uniqueMapOf 2 policyGen objName <*> uniqueMapOf 3 indexGen objName <*> uniqueMapOf 1 stxGen objName - viewGen = ViewRep <$> genObjName <*> arbitrary - routineGen = RoutineRep <$> genObjName <*> arbitrary - sequenceGen = SequenceRep <$> genObjName <*> arbitrary - collationGen = CollationRep <$> genObjName <*> arbitrary - typeGen = TypeRep <$> genObjName <*> arbitrary + viewGen = ViewRep <$> genObjName <*> arbJson + routineGen = RoutineRep <$> genObjName <*> arbJson + sequenceGen = SequenceRep <$> genObjName <*> arbJson + collationGen = CollationRep <$> genObjName <*> arbJson + typeGen = TypeRep <$> genObjName <*> arbJson -- Per-table object generators - colGen = TableColumnRep <$> genObjName <*> arbitrary - constraintGen = TableConstraintRep <$> genObjName <*> arbitrary - triggerGen = TableTriggerRep <$> genObjName <*> arbitrary - policyGen = TablePolicyRep <$> genObjName <*> arbitrary - indexGen = TableIndexRep <$> genObjName <*> arbitrary - stxGen = TableStatisticsRep <$> genObjName <*> arbitrary + colGen = TableColumnRep <$> genObjName <*> arbJson + constraintGen = TableConstraintRep <$> genObjName <*> arbJson + triggerGen = TableTriggerRep <$> genObjName <*> arbJson + policyGen = TablePolicyRep <$> genObjName <*> arbJson + indexGen = TableIndexRep <$> genObjName <*> arbJson + stxGen = TableStatisticsRep <$> genObjName <*> arbJson + +arbJson :: Gen Aeson.Value +arbJson = arbitrary uniqueListOf :: (Eq b) => Int -> Gen a -> (a -> b) -> Gen [a] uniqueListOf size gen uniqBy = @@ -66,17 +72,36 @@ uniqueMapOf :: (Ord k) => Int -> Gen a -> (a -> k) -> Gen (Map k a) uniqueMapOf size gen uniqBy = Map.fromList . map (\v -> (uniqBy v, v)) <$> resize size (listOf gen) +-- | Valid first characters for database object names. +validLowerFirstChars, validUpperFirstChars :: [Char] +#ifdef darwin_HOST_OS +-- Tests fail intermittenly on Darwin, and ñ seems to be the offending character +-- in filesystem operations. See https://github.com/mzabani/codd/pull/220 for +-- a long attempt at fixing it nicely. +-- I tried using 'OsPath' and encoding and decoding carefully, but it did not work. +-- My best hypothesis still is some weird encoding that does not roundtrip, +-- despite github actions seemingly using UTF8 encoding in their filesystem. +-- Of course, ideally we would fix this, but without MacOS to test with, +-- pushing to CI and checking what happens there is too terrible a feedback loop. +-- https://eclecticlight.co/2021/05/08/explainer-unicode-normalization-and-apfs/ +-- https://hasufell.github.io/posts/2022-06-29-fixing-haskell-filepaths.html +-- https://www.postgresql.org/docs/12/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS +validLowerFirstChars = "abcdefghijklmnopqrstuvxwyzçáéíóú_" +validUpperFirstChars = "ABCDEFGHIJKLMNOPQRSTUVXWYZÇÁÉÍÓÚ_" +#else +validLowerFirstChars = "abcdefghijklmnopqrstuvxwyzçáéíóúñ_" +validUpperFirstChars = "ABCDEFGHIJKLMNOPQRSTUVXWYZÇÁÉÍÓÚñ_" +#endif + genObjName :: Gen ObjName genObjName = ObjName . Text.pack <$> frequency [(100, genLower), (5, genMixed)] where - -- Docs: https://www.postgresql.org/docs/12/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS - validLowerFirstChars = "abcdefghijklmnopqrstuvxwyzçáéíóúñ_" - validUpperFirstChars = "ABCDEFGHIJKLMNOPQRSTUVXWYZÇÁÉÍÓÚñ_" validLowerOtherChars = validLowerFirstChars ++ "0123456789$" validUpperOtherChars = validUpperFirstChars ++ "0123456789$" + genLower = do c <- elements validLowerFirstChars -- Max Length 63 bytes of UTF8-Encoded name diff --git a/test/WritingReadingRepresentationsSpec.hs b/test/WritingReadingRepresentationsSpec.hs index 3ee1916c..c65ae3ee 100644 --- a/test/WritingReadingRepresentationsSpec.hs +++ b/test/WritingReadingRepresentationsSpec.hs @@ -27,9 +27,7 @@ spec = do describe "Writing and reading representations" $ do it "persistRepsToDisk is inverse of readRepsFromDisk" $ do property $ \(DbRepsGen dbHashes pgVersion) -> do - -- /dev/shm is shared memory so should be faster, if it exists (MacOS doesn't have it) - shmExists <- doesDirectoryExist "/dev/shm" - baseFolder :: FilePath <- if shmExists then pure "/dev/shm" else getEmptyTempDir + baseFolder <- getEmptyTempDir writeSchemaAndReadSchemaRoundtrip pgVersion dbHashes (baseFolder "inverse-test-sql-folder") modifyMaxSuccess (const 1) $ it