Refactor code for consistency and readability

mdvillagra · mdvillagra · commit 747b96022d7d · 2025-11-18T20:50:37.000-03:00
- Updated string formatting from single quotes to double quotes in several files for uniformity.
- Added newlines for improved readability in multiple functions and classes across various modules.
- Enhanced error messages and print statements for better clarity during execution.
diff --git a/src/zklora/__init__.py b/src/zklora/__init__.py
@@ -1,4 +1,4 @@
-__version__ = '0.1.2'
+__version__ = "0.1.2"
 
 from .zk_proof_generator import batch_verify_proofs
 from .lora_contributor_mpi import LoRAServer, LoRAServerSocket
@@ -7,11 +7,11 @@
 
 
 __all__ = [
-    'batch_verify_proofs',
-    'LoRAServer',
-    'LoRAServerSocket',
-    'BaseModelClient',
-    'commit_activations',
-    'verify_commitment',
-    '__version__',
-]
+    "batch_verify_proofs",
+    "LoRAServer",
+    "LoRAServerSocket",
+    "BaseModelClient",
+    "commit_activations",
+    "verify_commitment",
+    "__version__",
+]
diff --git a/src/zklora/activations_commit.py b/src/zklora/activations_commit.py
@@ -2,26 +2,28 @@
 import json
 import numpy as np
 
+
 def get_merkle_root(activations_path: str) -> str:
     """
     Calculate the Merkle root hash of model activations stored in a JSON file.
-    
+
     Args:
         activations_path: Path to JSON file containing model activations under "input_data" key
-        
+
     Returns:
         str: Hexadecimal string of the Merkle root hash, prefixed with "0x"
     """
     # Load the intermediate activations from JSON file
-    with open(activations_path, 'r') as f:
+    with open(activations_path, "r") as f:
         activations = json.load(f)
 
     # Convert nested data to numpy array and flatten
     flattened_np = np.array(activations["input_data"]).reshape(-1)
-    
+
     # Get and return the Merkle root hash
     return merkle.insert_values(flattened_np.tolist())
 
+
 if __name__ == "__main__":
     activations_path = "intermediate_activations/base_model_model_lm_head.json"
     merkle_root = get_merkle_root(activations_path)
diff --git a/src/zklora/base_model_user_mpi/__init__.py b/src/zklora/base_model_user_mpi/__init__.py
@@ -8,6 +8,7 @@
 import torch.nn as nn
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
+
 class BaseModelToLoRAComm:
     def __init__(self, host_a="127.0.0.1", port_a=30000):
         self.host_a = host_a
@@ -20,16 +21,18 @@ def init_request(self):
 
     def lora_forward(self, sub_name, arr):
         req = {
-            "request_type":"lora_forward",
+            "request_type": "lora_forward",
             "submodule_name": sub_name,
-            "input_array": arr
+            "input_array": arr,
         }
         resp = self.send_and_recv(req)
         return resp.get("output_array", None)
 
     def end_inference(self):
         req = {"request_type": "end_inference"}
-        resp = self.send_and_recv(req)#, timeout=600.0)  # might be slower if proof gen is big
+        resp = self.send_and_recv(
+            req
+        )  # , timeout=600.0)  # might be slower if proof gen is big
         return resp
 
     def send_and_recv(self, data_dict):
@@ -52,13 +55,18 @@ def send_and_recv(self, data_dict):
         s.close()
 
         if not buffer:
-            raise RuntimeError("[B] No data from A (EOF). Possibly A took too long or closed early.")
+            raise RuntimeError(
+                "[B] No data from A (EOF). Possibly A took too long or closed early."
+            )
 
         resp = pickle.loads(buffer)
         return resp
 
+
 class RemoteLoRAWrappedModule(nn.Module):
-    def __init__(self, sub_name, local_sub, comm: BaseModelToLoRAComm, combine_mode="replace"):
+    def __init__(
+        self, sub_name, local_sub, comm: BaseModelToLoRAComm, combine_mode="replace"
+    ):
         super().__init__()
         self.sub_name = sub_name
         self.local_sub = local_sub
@@ -77,6 +85,7 @@ def forward(self, x: torch.Tensor):
             return base_out + out_t
         return out_t
 
+
 class BaseModelClient:
     def __init__(
         self,
@@ -127,9 +136,13 @@ def init_and_patch(self):
                     *parents, child = path_parts
                     m = self._navigate(self.model, parents)
                     orig_sub = getattr(m, child)
-                    wrapped = RemoteLoRAWrappedModule(full_name, orig_sub, comm, self.combine_mode)
+                    wrapped = RemoteLoRAWrappedModule(
+                        full_name, orig_sub, comm, self.combine_mode
+                    )
                     setattr(m, child, wrapped)
-                    print(f"[B] Patched submodule '{full_name}' from {comm.host_a}:{comm.port_a}.")
+                    print(
+                        f"[B] Patched submodule '{full_name}' from {comm.host_a}:{comm.port_a}."
+                    )
                 except Exception as e:
                     print(f"[B] Could not patch '{full_name}': {e}")
 
@@ -144,4 +157,6 @@ def end_inference(self):
         """Notify all contributors that inference is finished."""
         for comm in self.comms:
             resp = comm.end_inference()
-            print("[B] end_inference => got ack from", comm.host_a, comm.port_a, ":", resp)
+            print(
+                "[B] end_inference => got ack from", comm.host_a, comm.port_a, ":", resp
+            )
diff --git a/src/zklora/lora_contributor_mpi/__init__.py b/src/zklora/lora_contributor_mpi/__init__.py
@@ -17,10 +17,12 @@
 from ..zk_proof_generator import generate_proofs, resolve_proof_paths
 from ..mpi_lora_onnx_exporter import export_lora_onnx_json_mpi
 
+
 def read_file_as_bytes(path: str) -> bytes:
     with open(path, "rb") as f:
         return f.read()
 
+
 def strip_prefix(raw_name: str) -> str:
     """
     Remove 'base_model.model.', 'base_model.', 'model.' from the submodule name.
@@ -30,9 +32,10 @@ def strip_prefix(raw_name: str) -> str:
     name2 = raw_name
     for pfx in ["base_model.model.", "base_model.", "model."]:
         if name2.startswith(pfx):
-            name2 = name2[len(pfx):]
+            name2 = name2[len(pfx) :]
     return name2.strip()
 
+
 class LoRAServer:
     def __init__(self, base_model_name: str, lora_model_id: str, out_dir: str):
         self.out_dir = out_dir
@@ -91,18 +94,20 @@ def finalize_proofs_and_collect(self):
                 x_data=last_in,
                 submodule=mod,
                 output_dir=self.out_dir,
-                verbose=True
+                verbose=True,
             )
         self.session_data.clear()
 
         # generate proofs synchronously
-        print("[A] Running generate_proofs(...) via asyncio.run(...) in the same thread.")
+        print(
+            "[A] Running generate_proofs(...) via asyncio.run(...) in the same thread."
+        )
         proof_res = asyncio.run(
             generate_proofs(
                 onnx_dir=self.out_dir,
                 json_dir=self.out_dir,
                 output_dir=self.out_dir,
-                verbose=True
+                verbose=True,
             )
         )
 
@@ -113,6 +118,7 @@ def finalize_proofs_and_collect(self):
 
         return
 
+
 class LoRAServerSocket(threading.Thread):
     def __init__(self, host, port, lora_server: LoRAServer, stop_event):
         super().__init__()
@@ -123,13 +129,16 @@ def __init__(self, host, port, lora_server: LoRAServer, stop_event):
 
     def run(self):
         import socket
+
         print(f"[A-Server] listening on {self.host}:{self.port}")
         srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         srv.bind((self.host, self.port))
         srv.listen(5)
         srv.settimeout(1200.0)
 
-        print(f"[A-Server] Running on {self.host}:{self.port}, local artifacts in '{self.lora_server.out_dir}'")
+        print(
+            f"[A-Server] Running on {self.host}:{self.port}, local artifacts in '{self.lora_server.out_dir}'"
+        )
         try:
             while not self.stop_event.is_set():
                 try:
@@ -147,28 +156,28 @@ def handle_conn(self, conn, addr):
             if not data:
                 return
             req = pickle.loads(data)
-            rtype = req.get("request_type","lora_forward")
+            rtype = req.get("request_type", "lora_forward")
 
             if rtype == "init_request":
                 submods = self.lora_server.list_lora_injection_points()
-                resp = {"response_type":"init_response","injection_points": submods}
+                resp = {"response_type": "init_response", "injection_points": submods}
 
             elif rtype == "lora_forward":
                 sname = req["submodule_name"]
                 arr = req["input_array"]
                 tin = torch.tensor(arr, dtype=torch.float32)
                 out = self.lora_server.apply_lora(sname, tin)
                 resp = {
-                    "response_type":"lora_forward_response",
-                    "output_array": out.cpu().numpy()
+                    "response_type": "lora_forward_response",
+                    "output_array": out.cpu().numpy(),
                 }
 
             elif rtype == "end_inference":
                 # generate proofs locally
                 self.lora_server.finalize_proofs_and_collect()
                 resp = {
                     "response_type": "end_inference_ack",
-                    "message": "A finished proof generation locally. B can close."
+                    "message": "A finished proof generation locally. B can close.",
                 }
 
             else:
@@ -191,4 +200,4 @@ def recv_all(self, conn, chunk_size=4096):
             if not chunk:
                 break
             buffer += chunk
-        return buffer
+        return buffer
diff --git a/src/zklora/mpi_lora_onnx_exporter.py b/src/zklora/mpi_lora_onnx_exporter.py
@@ -1,6 +1,6 @@
 # zklora/mpi_lora_onnx_exporter.py
 """
-New code specifically for 'split inference' (MPI) scenario, 
+New code specifically for 'split inference' (MPI) scenario,
 similar to lora_onnx_exporter but with different approach or naming to avoid collisions.
 """
 
@@ -9,14 +9,13 @@
 import torch
 import numpy as np
 import torch.nn as nn
-from peft import PeftModel
 
 
 def normalize_lora_matrices_mpi(
     A: torch.Tensor, B: torch.Tensor, x_data: np.ndarray
 ) -> tuple[torch.Tensor, torch.Tensor, int, int, int]:
     """
-    Same shape logic as the older function, but with a new name 
+    Same shape logic as the older function, but with a new name
     to avoid collisions with the old version.
     x_data => (batch, seq_len, hidden_dim).
     """
@@ -43,7 +42,7 @@ def normalize_lora_matrices_mpi(
 
 class LoraShapeTransformerMPI(nn.Module):
     """
-    Variation of LoraShapeTransformer used specifically for 
+    Variation of LoraShapeTransformer used specifically for
     the split-inference approach, with a new class name to avoid collisions.
     """
 
@@ -72,7 +71,7 @@ def export_lora_onnx_json_mpi(
     verbose: bool = False,
 ):
     """
-    The 'split inference' version of the ONNX+JSON exporter. 
+    The 'split inference' version of the ONNX+JSON exporter.
     Similar logic but a different name to avoid collisions with the old function.
     """
     import torch.onnx
@@ -85,13 +84,17 @@ def export_lora_onnx_json_mpi(
     # If the submodule doesn't have lora_A/lora_B, skip
     if not (hasattr(submodule, "lora_A") and hasattr(submodule, "lora_B")):
         if verbose:
-            print(f"[export_lora_onnx_json_mpi] No lora_A/B in submodule '{sub_name}', skipping.")
+            print(
+                f"[export_lora_onnx_json_mpi] No lora_A/B in submodule '{sub_name}', skipping."
+            )
         return
 
     a_keys = list(submodule.lora_A.keys()) if hasattr(submodule.lora_A, "keys") else []
     if not a_keys:
         if verbose:
-            print(f"[export_lora_onnx_json_mpi] No adapter keys in submodule.lora_A for '{sub_name}'.")
+            print(
+                f"[export_lora_onnx_json_mpi] No adapter keys in submodule.lora_A for '{sub_name}'."
+            )
         return
 
     A_mod = submodule.lora_A[a_keys[0]]
@@ -102,14 +105,19 @@ def export_lora_onnx_json_mpi(
 
     try:
         from .mpi_lora_onnx_exporter import normalize_lora_matrices_mpi
-        A_fixed, B_fixed, in_dim, rank, out_dim = normalize_lora_matrices_mpi(A, B, x_data)
+
+        A_fixed, B_fixed, in_dim, rank, out_dim = normalize_lora_matrices_mpi(
+            A, B, x_data
+        )
     except ValueError as e:
         if verbose:
             print(f"Shape fix error for '{sub_name}': {e}")
         return
 
     # Build the shape-transformer
-    lora_transformer = LoraShapeTransformerMPI(A_fixed, B_fixed, batch_size, seq_len, hidden_dim).eval()
+    lora_transformer = LoraShapeTransformerMPI(
+        A_fixed, B_fixed, batch_size, seq_len, hidden_dim
+    ).eval()
 
     safe_name = sub_name.replace(".", "_").replace("/", "_")
     os.makedirs(output_dir, exist_ok=True)
@@ -135,7 +143,6 @@ def export_lora_onnx_json_mpi(
             print(f"Export error for '{sub_name}': {e}")
 
     # Save JSON
-    import json
     json_path = os.path.join(output_dir, f"{safe_name}.json")
     with open(json_path, "w") as f:
         row_data = x_1d.numpy().tolist()
diff --git a/src/zklora/polynomial_commit.py b/src/zklora/polynomial_commit.py