near · barakeinav1 · May 26, 2026 · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026
diff --git a/crates/e2e-tests/src/cluster.rs b/crates/e2e-tests/src/cluster.rs
@@ -1034,6 +1034,13 @@ impl MpcNodeState {
         }
     }
 
+    pub fn home_dir(&self) -> &Path {
+        match self {
+            MpcNodeState::Running(n) => n.setup().home_dir(),
+            MpcNodeState::Stopped(s) => s.home_dir(),
+        }
+    }
+
     pub fn p2p_public_key(&self) -> Ed25519PublicKey {
         match self {
             MpcNodeState::Running(n) => n.setup().p2p_public_key(),

diff --git a/crates/e2e-tests/src/mpc_node.rs b/crates/e2e-tests/src/mpc_node.rs
@@ -137,7 +137,10 @@ impl MpcNode {
     }
 }
 
+pub const STDOUT_LOG: &str = "stdout.log";
+pub const STDOUT_LOG_PREVIOUS: &str = "stdout.log.previous";
 pub const STDERR_LOG: &str = "stderr.log";
+pub const STDERR_LOG_PREVIOUS: &str = "stderr.log.previous";
 
 /// Guard that kills the child process on drop.
 struct ProcessGuard(Child);
@@ -352,10 +355,25 @@ impl MpcNodeSetup {
             "starting mpc-node"
         );
 
-        let stdout_file = std::fs::File::create(self.home_dir.join("stdout.log"))
-            .context("failed to create stdout log")?;
-        let stderr_file = std::fs::File::create(self.home_dir.join("stderr.log"))
-            .context("failed to create stderr log")?;
+        // Rotate stdout.log / stderr.log to .previous on restart so that on
+        // a post-restart test failure the diagnostic can dump both the
+        // pre-kill mpc-node tracing (stdout) and any crash output (stderr)
+        // from BEFORE the restart, alongside whatever the restarted process
+        // produced. Without this rotation, `File::create` truncates and the
+        // pre-restart context is lost — leaving us blind to the upstream
+        // nearcore panic stack that lives in the pre-restart stderr.
+        let stdout_path = self.home_dir.join(STDOUT_LOG);
+        if stdout_path.exists() {
+            let _ = std::fs::rename(&stdout_path, self.home_dir.join(STDOUT_LOG_PREVIOUS));
+        }
+        let stdout_file =
+            std::fs::File::create(&stdout_path).context("failed to create stdout log")?;
+        let stderr_path = self.home_dir.join(STDERR_LOG);
+        if stderr_path.exists() {
+            let _ = std::fs::rename(&stderr_path, self.home_dir.join(STDERR_LOG_PREVIOUS));
+        }
+        let stderr_file =
+            std::fs::File::create(&stderr_path).context("failed to create stderr log")?;
 
         let child = Command::new(&self.binary_path)
             .arg("start-with-config-file")

diff --git a/crates/e2e-tests/tests/common.rs b/crates/e2e-tests/tests/common.rs
@@ -1,3 +1,4 @@
+use std::io::{Read, Seek, SeekFrom};
 use std::path::{Path, PathBuf};
 use std::time::Duration;
 
@@ -206,7 +207,28 @@ pub async fn wait_for_node_indexer_height_above(
     )
     .await
     .with_context(|| {
-        format!("node {idx} indexer did not advance past height {min_height} within {timeout:?}")
+        // On failure, dump the tails of the node's stdout/stderr log files —
+        // both the pre-restart copy (rotated to `.previous` by `MpcNodeSetup::start`)
+        // and the current copy. mpc-node's `tracing` output goes to stdout;
+        // panic backtraces and `eprintln!` go to stderr. The post-restart
+        // `stderr.log` is where the upstream nearcore panic stack lives
+        // (see `docs/investigation/nearcore-indexer-sigkill-restart-panic.md`).
+        let home = cluster.nodes[idx].home_dir();
+        let stdout_previous = read_log_tail(&home.join("stdout.log.previous"), 16_384);
+        let stderr_previous = read_log_tail(&home.join("stderr.log.previous"), 16_384);
+        let stdout_current = read_log_tail(&home.join("stdout.log"), 16_384);
+        let stderr_current = read_log_tail(&home.join("stderr.log"), 16_384);
+        format!(
+            "node {idx} indexer did not advance past height {min_height} within {timeout:?}\n\
+             --- last 16KB of node {idx} stdout.log.previous (pre-restart mpc-node tracing) ---\n{stdout_previous}\n\
+             --- end stdout.log.previous ---\n\
+             --- last 16KB of node {idx} stderr.log.previous (pre-restart stderr; panic from pre-restart process if any) ---\n{stderr_previous}\n\
+             --- end stderr.log.previous ---\n\
+             --- last 16KB of node {idx} stdout.log (post-restart mpc-node tracing) ---\n{stdout_current}\n\
+             --- end stdout.log ---\n\
+             --- last 16KB of node {idx} stderr.log (post-restart stderr; upstream nearcore panic stack typically here) ---\n{stderr_current}\n\
+             --- end stderr.log ---"
+        )
     })?;
     let elapsed = start.elapsed();
     tracing::info!(
@@ -220,6 +242,27 @@ pub async fn wait_for_node_indexer_height_above(
     Ok(())
 }
 
+/// Best-effort read of the last `max_bytes` of a log file. Returns a synthetic
+/// placeholder string if the file can't be opened/read. Used to inline a
+/// node's log tail into the test panic message when a kill+restart wait
+/// helper times out, so CI logs surface the upstream panic stack right next
+/// to the test failure rather than leaving us to dig through saved artifacts.
+fn read_log_tail(path: &Path, max_bytes: usize) -> String {
+    let Ok(mut f) = std::fs::File::open(path) else {
+        return format!("(could not open {})", path.display());
+    };
+    let len = f.metadata().map(|m| m.len()).unwrap_or(0);
+    let skip = len.saturating_sub(max_bytes as u64);
+    if f.seek(SeekFrom::Start(skip)).is_err() {
+        return format!("(seek failed on {})", path.display());
+    }
+    let mut buf = Vec::with_capacity(max_bytes);
+    if f.read_to_end(&mut buf).is_err() {
+        return format!("(read failed on {})", path.display());
+    }
+    String::from_utf8_lossy(&buf).into_owned()
+}
+
 /// Read node `idx`'s current indexer block height. Returns `Ok(None)` if
 /// the node is not running or the HTTP scrape can't connect (process
 /// down); returns `Err` if a metrics body read fails partway through.

diff --git a/crates/node/src/migration_service.rs b/crates/node/src/migration_service.rs
@@ -2,7 +2,9 @@ use std::{net::SocketAddr, sync::Arc};
 
 use ed25519_dalek::SigningKey;
 use near_account_id::AccountId;
+use near_mpc_contract_interface::types::Ed25519PublicKey;
 use onboarding::onboard;
+use tee_authority::tee_authority::TeeAuthority;
 use tokio::sync::{RwLock, watch};
 use types::MigrationInfo;
 
@@ -30,6 +32,7 @@ impl From<&SecretsConfig> for MigrationSecrets {
     }
 }
 
+#[expect(clippy::too_many_arguments)]
 pub async fn spawn_recovery_server_and_run_onboarding(
     migration_web_ui: SocketAddr,
     migration_secrets: MigrationSecrets,
@@ -38,6 +41,8 @@ pub async fn spawn_recovery_server_and_run_onboarding(
     my_migration_info_receiver: watch::Receiver<MigrationInfo>,
     contract_state_receiver: watch::Receiver<ContractState>,
     tx_sender: impl TransactionSender,
+    tee_authority: TeeAuthority,
+    account_public_key: Ed25519PublicKey,
 ) -> anyhow::Result<()> {
     let (import_keyshares_sender, import_keyshares_receiver) = tokio::sync::watch::channel(vec![]);
     let web_server_state = web::types::WebServerState {
@@ -61,6 +66,8 @@ pub async fn spawn_recovery_server_and_run_onboarding(
         tx_sender,
         keyshare_storage.clone(),
         import_keyshares_receiver,
+        tee_authority,
+        account_public_key,
     )
     .await?;
     Ok(())

diff --git a/crates/node/src/migration_service/onboarding.rs b/crates/node/src/migration_service/onboarding.rs
@@ -5,7 +5,9 @@ use backon::{ExponentialBuilder, Retryable};
 use ed25519_dalek::VerifyingKey;
 use futures::TryFutureExt;
 use near_account_id::AccountId;
+use near_mpc_contract_interface::types::Ed25519PublicKey;
 use near_mpc_crypto_types::Keyset;
+use tee_authority::tee_authority::TeeAuthority;
 use tokio::sync::{RwLock, watch};
 use tokio_util::sync::CancellationToken;
 
@@ -17,6 +19,7 @@ use crate::{
     },
     keyshare::{Keyshare, KeyshareStorage},
     migration_service::types::{MigrationInfo, OnboardingJob, OnboardingTask},
+    tee::remote_attestation::submit_attestation_before_concluding_migration,
 };
 
 /// Waits until the node becomes an active participant in the current epoch or
@@ -25,6 +28,7 @@ use crate::{
 /// runs onboarding tasks as needed.
 ///
 /// Returns `Ok(())` when this node is an active participant in the current epoch.
+#[expect(clippy::too_many_arguments)]
 pub(crate) async fn onboard(
     contract_state_receiver: watch::Receiver<ContractState>,
     my_migration_info_receiver: watch::Receiver<MigrationInfo>,
@@ -33,6 +37,8 @@ pub(crate) async fn onboard(
     tx_sender: impl TransactionSender,
     keyshare_storage: Arc<RwLock<KeyshareStorage>>,
     keyshare_receiver: watch::Receiver<Vec<Keyshare>>,
+    tee_authority: TeeAuthority,
+    account_public_key: Ed25519PublicKey,
 ) -> anyhow::Result<()> {
     tracing::info!(?my_near_account_id, "starting onboarding");
     let (cancel_monitoring_task, mut onboarding_job_receiver) = start_onboarding_monitoring_task(
@@ -68,6 +74,9 @@ pub(crate) async fn onboard(
                     tx_sender.clone(),
                     my_migration_info_receiver.clone(),
                     cancellation_token.clone(),
+                    tee_authority.clone(),
+                    (&tls_public_key).into(),
+                    account_public_key.clone(),
                 )
                 .await;
                 if cancellation_token.is_cancelled() {
@@ -230,13 +239,17 @@ async fn wait_for_active_migration_to_clear(
 /// This function returns Ok(()) if it is cancelled or succeeds.
 ///
 /// **Not cancellation-safe!** Needs to be cancelled via `cancel_import_token`
+#[expect(clippy::too_many_arguments)]
 async fn execute_onboarding(
     importing_keyset: Keyset,
     keyshare_storage: Arc<RwLock<KeyshareStorage>>,
     keyshare_receiver: watch::Receiver<Vec<Keyshare>>,
     tx_sender: impl TransactionSender,
     my_migration_info_receiver: watch::Receiver<MigrationInfo>,
     cancel_import_token: CancellationToken,
+    tee_authority: TeeAuthority,
+    tls_public_key: Ed25519PublicKey,
+    account_public_key: Ed25519PublicKey,
 ) -> anyhow::Result<()> {
     if keyshare_storage
         .read()
@@ -254,6 +267,34 @@ async fn execute_onboarding(
         .await?;
     }
 
+    // Submit a fresh attestation before concluding. Back-migration can
+    // otherwise hit the case where the destination's stored on-chain
+    // attestation is past expiry by the contract's `current_time_seconds`,
+    // and `reverify_participants` rejects the conclude (see #2121). Failure
+    // here is logged but non-fatal: in the non-back-migration path the
+    // existing on-chain attestation is typically still valid, so we fall
+    // through to `retry_conclude_onboarding`.
+    tokio::select! {
+        result = submit_attestation_before_concluding_migration(
+            tee_authority,
+            tx_sender.clone(),
+            tls_public_key,
+            account_public_key,
+        ) => {
+            if let Err(err) = result {
+                tracing::warn!(
+                    ?err,
+                    "failed to refresh attestation before concluding migration; \
+                     proceeding with existing on-chain attestation"
+                );
+            }
+        }
+        _ = cancel_import_token.cancelled() => {
+            tracing::info!("attestation refresh cancelled");
+            return Ok(());
+        }
+    }
+
     tokio::select! {
         _ = retry_conclude_onboarding(
             importing_keyset,

diff --git a/crates/node/src/run.rs b/crates/node/src/run.rs
@@ -394,13 +394,16 @@ where
     let allowed_docker_images_receiver_clone = indexer_api.allowed_docker_images_receiver.clone();
     let allowed_launcher_compose_receiver_clone =
         indexer_api.allowed_launcher_compose_receiver.clone();
+    let tee_authority_for_monitor = tee_authority.clone();
+    let tls_public_key_for_monitor = tls_public_key.clone();
+    let account_public_key_for_monitor = account_public_key.clone();
     tokio::spawn(async move {
         if let Err(e) = monitor_attestation_removal(
             account_id_clone,
-            tee_authority,
+            tee_authority_for_monitor,
             tx_sender_clone,
-            tls_public_key,
-            account_public_key,
+            tls_public_key_for_monitor,
+            account_public_key_for_monitor,
             allowed_docker_images_receiver_clone,
             allowed_launcher_compose_receiver_clone,
             tee_accounts_receiver,
@@ -425,6 +428,8 @@ where
         indexer_api.my_migration_info_receiver.clone(),
         indexer_api.contract_state_receiver.clone(),
         indexer_api.txn_sender.clone(),
+        tee_authority,
+        account_public_key,
     )
     .await?;
 

diff --git a/crates/node/src/tee/remote_attestation.rs b/crates/node/src/tee/remote_attestation.rs
@@ -22,6 +22,16 @@ use near_account_id::AccountId;
 use near_mpc_contract_interface::types::NodeId;
 use tokio::sync::watch;
 
+/// Upper bound on how long
+/// `submit_attestation_before_concluding_migration` will keep retrying its
+/// underlying `submit_remote_attestation` call. The helper is documented as
+/// best-effort and non-fatal (the caller swallows its errors); bounding here
+/// prevents an unresponsive `TransactionSender` (e.g. the test mock) from
+/// hanging the onboarding flow indefinitely. `MAX_RETRY_DURATION` (12 h)
+/// would be the long-running production behavior for the other callers of
+/// `submit_remote_attestation`, which we keep unchanged.
+const SUBMIT_ATTESTATION_BEFORE_CONCLUDE_TIMEOUT: Duration = Duration::from_secs(30);
+
 const MIN_BACKOFF_DURATION: Duration = Duration::from_millis(100);
 const MAX_BACKOFF_DURATION: Duration = Duration::from_secs(60);
 const MAX_RETRY_DURATION: Duration = Duration::from_secs(60 * 60 * 12); // 12 hours.
@@ -89,6 +99,44 @@ pub async fn submit_remote_attestation(
         .context("failed to submit attestation after multiple retry attempts")?
 }
 
+/// Generates a fresh attestation and submits it on-chain before this node
+/// concludes a back-migration.
+///
+/// Without this, back-migration can hit a case where the destination's
+/// stored attestation is already past expiry by the contract's
+/// `current_time_seconds` at conclude time, so `reverify_participants`
+/// rejects with `InvalidTeeRemoteAttestation` and `retry_conclude_onboarding`
+/// loops forever on the same stale state (see near/mpc#2121).
+///
+/// `submit_remote_attestation` waits for `TransactionStatus::Executed`, so
+/// by the time this function returns Ok the contract holds a fresh
+/// attestation under `tls_public_key`.
+pub async fn submit_attestation_before_concluding_migration(
+    tee_authority: TeeAuthority,
+    tx_sender: impl TransactionSender,
+    tls_public_key: Ed25519PublicKey,
+    account_public_key: Ed25519PublicKey,
+) -> anyhow::Result<()> {
+    let report_data: ReportData =
+        ReportDataV1::new(*tls_public_key.as_bytes(), *account_public_key.as_bytes()).into();
+    let attestation = tee_authority
+        .generate_attestation(report_data)
+        .await
+        .map_err(|e| anyhow::anyhow!(e))
+        .context("generate fresh attestation for conclude_node_migration")?;
+    tokio::time::timeout(
+        SUBMIT_ATTESTATION_BEFORE_CONCLUDE_TIMEOUT,
+        submit_remote_attestation(tx_sender, attestation, tls_public_key),
+    )
+    .await
+    .map_err(|_| {
+        anyhow::anyhow!(
+            "submit_remote_attestation did not complete within {SUBMIT_ATTESTATION_BEFORE_CONCLUDE_TIMEOUT:?}; \
+             best-effort attestation refresh giving up"
+        )
+    })?
+}
+
 fn validate_remote_attestation(
     attestation: &Attestation,
     tls_public_key: Ed25519PublicKey,
@@ -536,6 +584,42 @@ mod tests {
         );
     }
 
+    #[tokio::test]
+    #[expect(non_snake_case)]
+    async fn submit_attestation_before_concluding_migration__should_submit_one_fresh_attestation() {
+        // Given
+        let mut rng = rand::rngs::StdRng::seed_from_u64(42);
+        let tls_public_key: Ed25519PublicKey =
+            (&SigningKey::generate(&mut rng).verifying_key()).into();
+        let account_public_key: Ed25519PublicKey =
+            (&SigningKey::generate(&mut rng).verifying_key()).into();
+        let tee_authority = TeeAuthority::from(LocalTeeAuthorityConfig::default());
+        let node_id = NodeId {
+            account_id: "dummy.near".parse().unwrap(),
+            tls_public_key: tls_public_key.clone(),
+            account_public_key: account_public_key.clone(),
+        };
+        let (tee_accounts_sender, _) = watch::channel(vec![]);
+        let sender = MockSender::new(tee_accounts_sender, node_id);
+
+        // When
+        submit_attestation_before_concluding_migration(
+            tee_authority,
+            sender.clone(),
+            tls_public_key,
+            account_public_key,
+        )
+        .await
+        .expect("refresh-before-conclude should succeed against a valid TeeAuthority");
+
+        // Then
+        assert_eq!(
+            sender.count(),
+            1,
+            "expected exactly one SubmitParticipantInfo tx before conclude_node_migration"
+        );
+    }
+
     #[tokio::test]
     async fn test_validate_remote_attestation_valid() {
         let mut rng = rand::rngs::StdRng::seed_from_u64(42);