mzabani · mzabani · Sep 1, 2025 · Aug 16, 2025 · Aug 17, 2025 · Aug 17, 2025
diff --git a/src/Codd/Representations/Disk.hs b/src/Codd/Representations/Disk.hs
@@ -9,28 +9,26 @@ import Codd.Representations.Types
 import Codd.Types (PgMajorVersion (..))
 import Control.DeepSeq (force)
 import Control.Monad
-  ( forM,
-    forM_,
+  ( forM_,
     join,
-    void,
     when,
   )
-import Control.Monad.Identity (runIdentity)
 import Data.Aeson
   ( Value,
     decode,
   )
 import Data.Bifunctor (first)
-import qualified Data.ByteString.Lazy as BS
 import qualified Data.ByteString.Lazy as LBS
 import Data.List (sortOn)
+import qualified Data.List.NonEmpty as NE
 import Data.Map.Strict (Map)
 import qualified Data.Map.Strict as Map
 import Data.Maybe (fromMaybe)
 import GHC.IO.Exception (IOErrorType (..), ioe_type)
 import GHC.Stack (HasCallStack)
 import System.FilePath
-  ( takeFileName,
+  ( takeDirectory,
+    takeFileName,
     (</>),
   )
 import System.IO.Error (isDoesNotExistError)
@@ -39,6 +37,7 @@ import UnliftIO
     MonadUnliftIO,
     evaluate,
     handle,
+    pooledMapConcurrentlyN_,
     throwIO,
     tryJust,
   )
@@ -56,125 +55,46 @@ import Prelude hiding
     writeFile,
   )
 
-{-
-This module contains functions and data models to write and read schema representations
-from disk. It has been rewritten a few times in attempt to find a model
-that is sufficiently clean and robust.
-
-The unsolved challenge at this point is to unify both reading and writing
-under a unique model that represents how files and directories are laid out
-on disk, but it doesn't seem worth the trouble, honestly.
--}
-
--- | This class is equivalent to some form of monadic unfolding, mixed
--- with a mockable "writeFile" function. It allows us to derive both
--- `toFiles` and `persistRepsToDisk`, for example, but isn't a very elegant
--- model otherwise.
-class DbDiskObj a where
-  appr ::
-    (Monad m) =>
-    a ->
-    -- | When recursing into a new sub-structure, this function
-    -- will be called with a relative folder where that substructure's
-    -- root belongs to.
-    (forall b. (DbDiskObj b) => FilePath -> b -> m d) ->
-    -- | This function will be called when writing to a file. A relative path
-    -- to this structure's root will be passed.
-    (FilePath -> Value -> m d) ->
-    m [d]
-
 simpleDbDiskObj ::
-  (Functor m) => ObjName -> Value -> (FilePath -> Value -> m d) -> m [d]
-simpleDbDiskObj oname orep ffile = (: []) <$> ffile (mkPathFrag oname) orep
-
-instance DbDiskObj DbRep where
-  appr (DbRep dbSettingsRep schemas roles) frec ffile = do
-    x <- ffile "db-settings" dbSettingsRep
-    rs <- forM (Map.elems roles) $ frec "roles"
-    ss <- forM (Map.toList schemas) $ \(schemaName, schema) ->
-      frec ("schemas" </> mkPathFrag schemaName) schema
-    pure $ x : rs ++ ss
-
-instance DbDiskObj RoleRep where
-  appr (RoleRep roleName roleRep) _ = simpleDbDiskObj roleName roleRep
-
-instance DbDiskObj SchemaRep where
-  appr (SchemaRep _schemaName namespaceRep tables views routines seqs colls types) frec ffile =
-    do
-      x <- ffile "objrep" namespaceRep
-      tbls <- forM (Map.toList tables) $ \(tableName, table) ->
-        frec ("tables" </> mkPathFrag tableName) table
-      vs <- forM (Map.elems views) $ frec "views"
-      rs <- forM (Map.elems routines) $ frec "routines"
-      ss <- forM (Map.elems seqs) $ frec "sequences"
-      cs <- forM (Map.elems colls) $ frec "collations"
-      ts <- forM (Map.elems types) $ frec "types"
-      pure $ x : tbls ++ vs ++ rs ++ ss ++ cs ++ ts
-
-instance DbDiskObj TableRep where
-  appr (TableRep _tblName tblRep columns constraints triggers policies indexes statistics) frec ffile =
-    do
-      let mkpath p = p
-      x <- ffile (mkpath "objrep") tblRep
-      cols <- forM (Map.elems columns) $ frec (mkpath "cols")
-      constrs <-
-        forM (Map.elems constraints) $
-          frec (mkpath "constraints")
-      trgrs <- forM (Map.elems triggers) $ frec (mkpath "triggers")
-      pols <- forM (Map.elems policies) $ frec (mkpath "policies")
-      idxs <- forM (Map.elems indexes) $ frec (mkpath "indexes")
-      stats <- forM (Map.elems statistics) $ frec (mkpath "statistics")
-      pure $ x : cols ++ constrs ++ trgrs ++ pols ++ idxs ++ stats
-
-instance DbDiskObj TableColumnRep where
-  appr (TableColumnRep colName colRep) _ = simpleDbDiskObj colName colRep
-
-instance DbDiskObj TableConstraintRep where
-  appr (TableConstraintRep constrName constrRep) _ =
-    simpleDbDiskObj constrName constrRep
-
-instance DbDiskObj TableTriggerRep where
-  appr (TableTriggerRep triggerName triggerRep) _ =
-    simpleDbDiskObj triggerName triggerRep
-
-instance DbDiskObj TablePolicyRep where
-  appr (TablePolicyRep polName polRep) _ = simpleDbDiskObj polName polRep
-
-instance DbDiskObj TableStatisticsRep where
-  appr (TableStatisticsRep stxName stxRep) _ = simpleDbDiskObj stxName stxRep
-
-instance DbDiskObj TableIndexRep where
-  appr (TableIndexRep idxName indexRep) _ = simpleDbDiskObj idxName indexRep
-
-instance DbDiskObj ViewRep where
-  appr (ViewRep viewName viewRep) _ = simpleDbDiskObj viewName viewRep
-
-instance DbDiskObj RoutineRep where
-  appr (RoutineRep routineName routineRep) _ =
-    simpleDbDiskObj routineName routineRep
-
-instance DbDiskObj SequenceRep where
-  appr (SequenceRep seqName seqRep) _ = simpleDbDiskObj seqName seqRep
-
-instance DbDiskObj CollationRep where
-  appr (CollationRep collName collationRep) _ =
-    simpleDbDiskObj collName collationRep
-
-instance DbDiskObj TypeRep where
-  appr (TypeRep typeName typeRep) _ = simpleDbDiskObj typeName typeRep
+  ObjName -> Value -> (FilePath, Value)
+simpleDbDiskObj oname orep = (mkPathFrag oname, orep)
+
+apprDbRep :: DbRep -> [(FilePath, Value)]
+apprDbRep (DbRep dbSettingsRep schemas roles) =
+  let x = ("db-settings", dbSettingsRep)
+      rs = prependDir "roles" $ flip map (Map.elems roles) $ \(RoleRep n v) -> simpleDbDiskObj n v
+      ss = prependDir "schemas" $ mconcat $ flip map (Map.toList schemas) $ \(schemaName, schema) ->
+        prependDir (mkPathFrag schemaName) (apprSchemaRep schema)
+   in x : rs ++ ss
+
+apprSchemaRep :: SchemaRep -> [(FilePath, Value)]
+apprSchemaRep (SchemaRep _schemaName namespaceRep tables views routines seqs colls types) =
+  let x = ("objrep", namespaceRep)
+      tbls = prependDir "tables" $ mconcat $ flip map (Map.toList tables) $ \(tableName, table) ->
+        prependDir (mkPathFrag tableName) (apprTableRep table)
+      vs = prependDir "views" $ map (\(ViewRep n v) -> simpleDbDiskObj n v) $ Map.elems views
+      rs = prependDir "routines" $ map (\(RoutineRep n v) -> simpleDbDiskObj n v) (Map.elems routines)
+      ss = prependDir "sequences" $ map (\(SequenceRep n v) -> simpleDbDiskObj n v) (Map.elems seqs)
+      cs = prependDir "collations" $ map (\(CollationRep n v) -> simpleDbDiskObj n v) (Map.elems colls)
+      ts = prependDir "types" $ map (\(TypeRep n v) -> simpleDbDiskObj n v) (Map.elems types)
+   in x : tbls ++ vs ++ rs ++ ss ++ cs ++ ts
+
+apprTableRep :: TableRep -> [(FilePath, Value)]
+apprTableRep (TableRep _tblName tblRep columns constraints triggers policies indexes statistics) =
+  let x = ("objrep", tblRep)
+      cols = prependDir "cols" $ flip map (Map.elems columns) $ \(TableColumnRep colName colRep) -> simpleDbDiskObj colName colRep
+      constrs = prependDir "constraints" $ flip map (Map.elems constraints) $ \(TableConstraintRep n v) -> simpleDbDiskObj n v
+      trgrs = prependDir "triggers" $ flip map (Map.elems triggers) $ \(TableTriggerRep n v) -> simpleDbDiskObj n v
+      pols = prependDir "policies" $ flip map (Map.elems policies) $ \(TablePolicyRep n v) -> simpleDbDiskObj n v
+      idxs = prependDir "indexes" $ flip map (Map.elems indexes) $ \(TableIndexRep n v) -> simpleDbDiskObj n v
+      stats = prependDir "statistics" $ flip map (Map.elems statistics) $ \(TableStatisticsRep n v) -> simpleDbDiskObj n v
+   in x : cols ++ constrs ++ trgrs ++ pols ++ idxs ++ stats
+
+prependDir :: FilePath -> [(FilePath, Value)] -> [(FilePath, Value)]
+prependDir dir = map (first (dir </>))
 
 toFiles :: DbRep -> [(FilePath, Value)]
-toFiles = sortOn fst . frec
-  where
-    frec :: (DbDiskObj a) => a -> [(FilePath, Value)]
-    frec sobj =
-      concat $
-        runIdentity $
-          appr
-            sobj
-            (\parentDir obj -> pure $ prependDir parentDir $ frec obj)
-            (\fn h -> pure [(fn, h)])
-    prependDir dir = map (first (dir </>))
+toFiles = sortOn fst . apprDbRep
 
 -- | Wipes out completely the supplied folder and writes the representations of the Database's structures to it again.
 persistRepsToDisk :: forall m. (HasCallStack, MonadUnliftIO m) => PgMajorVersion -> DbRep -> FilePath -> m ()
@@ -196,7 +116,7 @@ persistRepsToDisk pgVersion dbSchema schemaDirBeforeVersions =
       -- We want this function to be fast and as atomic as it can be, but:
       -- 1. `renameFile` throws if the target path is in a different partition from the source path
       -- 2. The user might not have permissions to delete the target folder but does have permissions to modify its contents
-      -- 3. Different operating systems can have different behaviours, like Windows, where renameDirectory fails on Windows if the target directory exists: https://hackage.haskell.org/package/directory-1.3.9.0/docs/System-Directory.html#v:renameDirectory
+      -- 3. Different operating systems can have different behaviours, like Windows, where renameDirectory fails if the target directory exists: https://hackage.haskell.org/package/directory-1.3.9.0/docs/System-Directory.html#v:renameDirectory
       -- 4. Windows and I think even Linux can have ACLs that have more than just a binary "can-write" privilege, with "can-delete" being
       --   separated from "can-create", IIRC.
       errBestScenario <- tryJust (\(e :: IOError) -> if ioe_type e `elem` [NoSuchThing, UnsatisfiedConstraints, PermissionDenied, IllegalOperation, UnsupportedOperation] then Just () else Nothing) $ do
@@ -236,16 +156,17 @@ persistRepsToDisk pgVersion dbSchema schemaDirBeforeVersions =
           forM_ entries $ \x -> removePathForcibly (path </> x)
         else createDirectoryIfMissing True path
 
-    writeRec :: (DbDiskObj a) => FilePath -> a -> IO ()
-    writeRec dir obj =
-      void $
-        appr
-          obj
-          ( \parentDir sobj -> do
-              createDirectoryIfMissing True (dir </> parentDir)
-              writeRec (dir </> parentDir) sobj
-          )
-          (\fn jsonRep -> BS.writeFile (dir </> fn) (detEncodeJSONByteString jsonRep))
+    writeRec :: FilePath -> DbRep -> IO ()
+    writeRec dir obj = do
+      createDirectoryIfMissing True dir
+      -- Limit number of open file descriptors even if in the future we
+      -- increase codd's capabilities beyond the current 2
+      pooledMapConcurrentlyN_ 2 (writeFolder dir) $ NE.groupBy (\(f1, _) (f2, _) -> takeDirectory f1 == takeDirectory f2) (sortOn fst $ toFiles obj)
+    writeFolder :: FilePath -> NE.NonEmpty (FilePath, Value) -> IO ()
+    writeFolder dir filesPerFolder = do
+      let relFolderToCreate = takeDirectory $ fst $ NE.head filesPerFolder
+      createDirectoryIfMissing True (dir </> relFolderToCreate)
+      forM_ filesPerFolder (\(fn, jsonRep) -> LBS.writeFile (dir </> fn) (detEncodeJSONByteString jsonRep))
 
 readNamespaceRep :: (MonadUnliftIO m) => FilePath -> m SchemaRep
 readNamespaceRep dir =

diff --git a/test/TypesGen.hs b/test/TypesGen.hs
@@ -1,7 +1,10 @@
+{-# LANGUAGE CPP #-}
+
 module TypesGen where
 
 import Codd.Representations
 import Codd.Types (PgMajorVersion (..))
+import qualified Data.Aeson as Aeson
 import Data.Function (on)
 import Data.List (nubBy)
 import Data.Map.Strict (Map)
@@ -15,7 +18,7 @@ instance Arbitrary DbRepsGen where
   arbitrary =
     let repsGen =
           DbRep
-            <$> arbitrary
+            <$> arbJson
             <*> uniqueMapOf 3 schemaHashGen objName
             <*> uniqueMapOf 2 roleHashGen objName
         versionGen = PgMajorVersion <$> arbitrary
@@ -24,39 +27,42 @@ instance Arbitrary DbRepsGen where
       schemaHashGen =
         SchemaRep
           <$> genObjName
-          <*> arbitrary
+          <*> arbJson
           <*> uniqueMapOf 20 tableGen objName
           <*> uniqueMapOf 5 viewGen objName
           <*> uniqueMapOf 10 routineGen objName
           <*> uniqueMapOf 15 sequenceGen objName
           <*> uniqueMapOf 2 collationGen objName
           <*> uniqueMapOf 5 typeGen objName
-      roleHashGen = RoleRep <$> genObjName <*> arbitrary
+      roleHashGen = RoleRep <$> genObjName <*> arbJson
 
       -- Per-schema object generators
       tableGen =
         TableRep
           <$> genObjName
-          <*> arbitrary
+          <*> arbJson
           <*> uniqueMapOf 20 colGen objName
           <*> uniqueMapOf 5 constraintGen objName
           <*> uniqueMapOf 1 triggerGen objName
           <*> uniqueMapOf 2 policyGen objName
           <*> uniqueMapOf 3 indexGen objName
           <*> uniqueMapOf 1 stxGen objName
-      viewGen = ViewRep <$> genObjName <*> arbitrary
-      routineGen = RoutineRep <$> genObjName <*> arbitrary
-      sequenceGen = SequenceRep <$> genObjName <*> arbitrary
-      collationGen = CollationRep <$> genObjName <*> arbitrary
-      typeGen = TypeRep <$> genObjName <*> arbitrary
+      viewGen = ViewRep <$> genObjName <*> arbJson
+      routineGen = RoutineRep <$> genObjName <*> arbJson
+      sequenceGen = SequenceRep <$> genObjName <*> arbJson
+      collationGen = CollationRep <$> genObjName <*> arbJson
+      typeGen = TypeRep <$> genObjName <*> arbJson
 
       -- Per-table object generators
-      colGen = TableColumnRep <$> genObjName <*> arbitrary
-      constraintGen = TableConstraintRep <$> genObjName <*> arbitrary
-      triggerGen = TableTriggerRep <$> genObjName <*> arbitrary
-      policyGen = TablePolicyRep <$> genObjName <*> arbitrary
-      indexGen = TableIndexRep <$> genObjName <*> arbitrary
-      stxGen = TableStatisticsRep <$> genObjName <*> arbitrary
+      colGen = TableColumnRep <$> genObjName <*> arbJson
+      constraintGen = TableConstraintRep <$> genObjName <*> arbJson
+      triggerGen = TableTriggerRep <$> genObjName <*> arbJson
+      policyGen = TablePolicyRep <$> genObjName <*> arbJson
+      indexGen = TableIndexRep <$> genObjName <*> arbJson
+      stxGen = TableStatisticsRep <$> genObjName <*> arbJson
+
+arbJson :: Gen Aeson.Value
+arbJson = arbitrary
 
 uniqueListOf :: (Eq b) => Int -> Gen a -> (a -> b) -> Gen [a]
 uniqueListOf size gen uniqBy =
@@ -66,17 +72,36 @@ uniqueMapOf :: (Ord k) => Int -> Gen a -> (a -> k) -> Gen (Map k a)
 uniqueMapOf size gen uniqBy =
   Map.fromList . map (\v -> (uniqBy v, v)) <$> resize size (listOf gen)
 
+-- | Valid first characters for database object names.
+validLowerFirstChars, validUpperFirstChars :: [Char]
+#ifdef darwin_HOST_OS
+-- Tests fail intermittenly on Darwin, and ñ seems to be the offending character
+-- in filesystem operations. See https://github.com/mzabani/codd/pull/220 for
+-- a long attempt at fixing it nicely.
+-- I tried using 'OsPath' and encoding and decoding carefully, but it did not work.
+-- My best hypothesis still is some weird encoding that does not roundtrip,
+-- despite github actions seemingly using UTF8 encoding in their filesystem.
+-- Of course, ideally we would fix this, but without MacOS to test with,
+-- pushing to CI and checking what happens there is too terrible a feedback loop.
+-- https://eclecticlight.co/2021/05/08/explainer-unicode-normalization-and-apfs/
+-- https://hasufell.github.io/posts/2022-06-29-fixing-haskell-filepaths.html
+-- https://www.postgresql.org/docs/12/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
+validLowerFirstChars = "abcdefghijklmnopqrstuvxwyzçáéíóú_"
+validUpperFirstChars = "ABCDEFGHIJKLMNOPQRSTUVXWYZÇÁÉÍÓÚ_"
+#else
+validLowerFirstChars = "abcdefghijklmnopqrstuvxwyzçáéíóúñ_"
+validUpperFirstChars = "ABCDEFGHIJKLMNOPQRSTUVXWYZÇÁÉÍÓÚñ_"
+#endif
+
 genObjName :: Gen ObjName
 genObjName =
   ObjName . Text.pack
     <$> frequency
       [(100, genLower), (5, genMixed)]
   where
-    -- Docs: https://www.postgresql.org/docs/12/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
-    validLowerFirstChars = "abcdefghijklmnopqrstuvxwyzçáéíóúñ_"
-    validUpperFirstChars = "ABCDEFGHIJKLMNOPQRSTUVXWYZÇÁÉÍÓÚñ_"
     validLowerOtherChars = validLowerFirstChars ++ "0123456789$"
     validUpperOtherChars = validUpperFirstChars ++ "0123456789$"
+
     genLower = do
       c <- elements validLowerFirstChars
       -- Max Length 63 bytes of UTF8-Encoded name

diff --git a/test/WritingReadingRepresentationsSpec.hs b/test/WritingReadingRepresentationsSpec.hs
@@ -27,9 +27,7 @@ spec = do
   describe "Writing and reading representations" $ do
     it "persistRepsToDisk is inverse of readRepsFromDisk" $ do
       property $ \(DbRepsGen dbHashes pgVersion) -> do
-        -- /dev/shm is shared memory so should be faster, if it exists (MacOS doesn't have it)
-        shmExists <- doesDirectoryExist "/dev/shm"
-        baseFolder :: FilePath <- if shmExists then pure "/dev/shm" else getEmptyTempDir
+        baseFolder <- getEmptyTempDir
         writeSchemaAndReadSchemaRoundtrip pgVersion dbHashes (baseFolder </> "inverse-test-sql-folder")
     modifyMaxSuccess (const 1)
       $ it