Patch Mpt

# Test rotary cache fix rotary = PatchedRotaryEmbedding(dim=64, max_seq_len=512) x = torch.randn(1, 10, 64) cos1, sin1 = rotary(x, seq_len=10) cos2, sin2 = rotary(x, seq_len=20) # seqlen changes → recalc cache assert cos1.shape[0] == 10 assert cos2.shape[0] == 20 print("Rotary cache patch: OK")

These patches are bonded to pipes or plates to detect structural defects using ultrasonic waves. patch mpt

# Case: (batch, 1, key_len) elif attention_mask.dim() == 3 and attention_mask.size(1) == 1: mask = attention_mask[:, :, None, :] else: raise ValueError(f"Unexpected mask shape: attention_mask.shape") max_seq_len=512) x = torch.randn(1

def _update_cache(self, seq_len: int, device: torch.device, dtype: torch.dtype): if seq_len == self._cached_seq_len: return inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float() / self.dim)) t = torch.arange(seq_len, device=device, dtype=inv_freq.dtype) freqs = torch.einsum("i,j->ij", t, inv_freq) emb = torch.cat((freqs, freqs), dim=-1) self._cached_cos = emb.cos().to(dtype) self._cached_sin = emb.sin().to(dtype) self._cached_seq_len = seq_len sin1 = rotary(x