gc scripts

1 year ago · b50847486c
6 changed files with 508 additions and 15 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,5 @@
 __pycache__/
 .DS_Store
 *.egg-info/
+shap_e/examples/shap_e_model_cache/
+shap_e/examples/gc/shap_e_model_cache/
--- a/shap_e/examples/.ipynb_checkpoints/sample_text_to_3d-checkpoint.ipynb
+++ b/shap_e/examples/.ipynb_checkpoints/sample_text_to_3d-checkpoint.ipynb
@ -0,0 +1,172 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "964ccced",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "from shap_e.diffusion.sample import sample_latents\n",
+    "from shap_e.diffusion.gaussian_diffusion import diffusion_from_config\n",
+    "from shap_e.models.download import load_model, load_config\n",
+    "from shap_e.util.notebooks import create_pan_cameras, decode_latent_images, gif_widget"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "8eed3a76",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "4127249f-da93-4da9-a15e-47fc1d918758",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "NVIDIA GeForce RTX 3090\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(torch.cuda.get_device_name())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2d922637",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "xm = load_model('transmitter', device=device)\n",
+    "model = load_model('text300M', device=device)\n",
+    "diffusion = diffusion_from_config(load_config('diffusion'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "53d329d0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f76a8f93c93e4b77af91f03645eb5011",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/64 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "batch_size = 1\n",
+    "guidance_scale = 30.0\n",
+    "prompt = \"bin bag\"\n",
+    "\n",
+    "latents = sample_latents(\n",
+    "    batch_size=batch_size,\n",
+    "    model=model,\n",
+    "    diffusion=diffusion,\n",
+    "    guidance_scale=guidance_scale,\n",
+    "    model_kwargs=dict(texts=[prompt] * batch_size),\n",
+    "    progress=True,\n",
+    "    clip_denoised=True,\n",
+    "    use_fp16=True,\n",
+    "    use_karras=True,\n",
+    "    karras_steps=64,\n",
+    "    sigma_min=1e-3,\n",
+    "    sigma_max=160,\n",
+    "    s_churn=0,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "633da2ec",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7c8b8946a49847dd9aa5376f9568775f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HTML(value='<img src=\"data:image/gif;base64,R0lGODlhQABAAIcAAJOYl4+UlY6TlI2TlI2Tk46Sko2Sk42SkoySk4ySko2RkYyRko…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "render_mode = 'nerf' # you can change this to 'stf'\n",
+    "size = 64 # this is the size of the renders; higher values take longer to render.\n",
+    "\n",
+    "cameras = create_pan_cameras(size, device)\n",
+    "for i, latent in enumerate(latents):\n",
+    "    images = decode_latent_images(xm, latent, cameras, rendering_mode=render_mode)\n",
+    "    display(gif_widget(images))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "85a4dce4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Example of saving the latents as meshes.\n",
+    "from shap_e.util.notebooks import decode_latent_mesh\n",
+    "\n",
+    "for i, latent in enumerate(latents):\n",
+    "    t = decode_latent_mesh(xm, latent).tri_mesh()\n",
+    "    with open(f'example_mesh_{i}.ply', 'wb') as f:\n",
+    "        t.write_ply(f)\n",
+    "    with open(f'example_mesh_{i}.obj', 'w') as f:\n",
+    "        t.write_obj(f)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/shap_e/examples/gc/ShapeGenerator.py
+++ b/shap_e/examples/gc/ShapeGenerator.py
@ -0,0 +1,96 @@
+import torch
+from shap_e.diffusion.sample import sample_latents
+from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
+from shap_e.models.download import load_model, load_config
+from shap_e.util.notebooks import decode_latent_mesh
+from tqdm import tqdm
+import pygltflib
+from pygltflib import GLTF2
+import trimesh
+import open3d as o3d
+import os
+import datetime
+
+class ShapeGenerator:
+    def __init__(self, output_path, batch_size, step_size, guidance):
+        self.device = None
+        self.xm = None
+        self.model = None
+        self.diffusion = None
+        self.iterations = 0
+        self.latents = None
+        self.output_path = output_path
+        self.batch_size = batch_size
+        self.step_size = step_size
+        self.guidance = guidance
+
+    def run(self):
+        print("Loading Models..")
+        self.load_models()
+        print("Finished Loading Models!")
+
+    def load_models(self):
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.xm = load_model('transmitter', device=self.device)
+        self.model = load_model('text300M', device=self.device)
+        self.diffusion = diffusion_from_config(load_config('diffusion'))
+       
+    
+    def generate_object(self, prompt):
+        batch_size = 2
+        # Create random latents
+        latent_dim = self.model.d_latent
+        random_latents = torch.randn(batch_size, latent_dim).to(self.model.device)
+        print(random_latents.shape)
+        model_kwargs = {}
+
+
+        self.latents = sample_latents(
+            batch_size=self.batch_size,
+            model=self.model,
+            diffusion=self.diffusion,
+            guidance_scale=self.guidance,
+            model_kwargs=model_kwargs,
+            progress=True,  # This should already show progress
+            clip_denoised=True,
+            use_fp16=True,
+            use_karras=True,
+            karras_steps=self.step_size,
+            sigma_min=1e-3,
+            sigma_max=160,
+            s_churn=0,
+            device = self.model.device,
+        )
+
+        self.export_model(prompt)
+
+    def export_model(self, prompt):
+        timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+        obj_filepath = f'{prompt}-{self.iterations}.obj'
+        output_filepath = f'{self.output_path}/{prompt}-{timestamp}.gltf'
+        print(output_filepath)
+        for i, latent in enumerate(self.latents):
+            t = decode_latent_mesh(self.xm, latent).tri_mesh()
+            with open(obj_filepath, 'w') as f:
+                t.write_obj(f)
+        final_mesh = self.construct_mesh(obj_filepath)
+        o3d.io.write_triangle_mesh(output_filepath, final_mesh)
+        self.iterations += 1
+
+    def construct_mesh(self, obj_fp):
+        mesh = o3d.io.read_triangle_mesh(obj_fp)
+
+        if os.path.exists(obj_fp):
+            os.remove(obj_fp)
+
+        original_triangle_count = len(mesh.triangles)
+        target_triangle_count = original_triangle_count // 3
+        decimated_mesh = mesh.simplify_quadric_decimation(
+            target_number_of_triangles=target_triangle_count)
+        filtered_mesh = decimated_mesh.filter_smooth_simple(number_of_iterations=5)
+        filtered_mesh.compute_vertex_normals()
+        return filtered_mesh
+
+
+
+        
--- a/shap_e/examples/gc/TextGenerator.py
+++ b/shap_e/examples/gc/TextGenerator.py
@ -0,0 +1,51 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, set_seed
+import random
+import torch
+
+class TextGenerator:
+    def __init__(self):
+        self.model = None
+        self.tokenizer = None
+        #self.load_models()
+
+    def load_models(self):
+        print('Loading Models...')
+        self.tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
+        self.model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")   
+        print('Models Loaded!')
+
+    def generate_text(self):
+        
+        model = AutoModelForCausalLM.from_pretrained( 
+            "microsoft/Phi-3-mini-4k-instruct",  
+            device_map="cuda",  
+            torch_dtype="auto",  
+            trust_remote_code=True,  
+        ) 
+        tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") 
+
+        messages = [ 
+            {"role": "system", "content": "You are a helpful AI assistant, that generates two nouns and returns one sentence in the format of: a (noun) with a (noun).\n You can descirbe a random object typically found in a bin"}, 
+            {"role": "user", "content": "Can you provide me with a sentence"}, 
+        ] 
+
+        pipe = pipeline( 
+            "text-generation", 
+            model=model, 
+            tokenizer=tokenizer, 
+        ) 
+
+        generation_args = { 
+            "max_new_tokens": 50,  # Reduced to focus on concise output
+            "return_full_text": False,
+            "temperature": 0.7,  # Adjusted for more randomness
+            "do_sample": True,
+            "top_k": 100,  # Top-k sampling
+            "top_p": 1,  # Nucleus sampling
+        } 
+
+        output = pipe(messages, **generation_args) 
+        return output[0]['generated_text']
+        
+
+ 
--- a/shap_e/examples/gc/app.py
+++ b/shap_e/examples/gc/app.py
@ -0,0 +1,105 @@
+import argparse
+import threading
+import time
+import random
+from ShapeGenerator import ShapeGenerator
+from TextGenerator import TextGenerator
+
+# command example
+# python app.py --output_dir /mnt/c/Users/caile/Desktop/output
+
+class GCApp:
+    def __init__(self, output_dir, batch_size, step_size, guidance_scale):
+        self.output_dir = output_dir
+        self.obj_gen = ShapeGenerator(self.output_dir, batch_size, step_size, guidance_scale)
+        self.running = False
+        self.stop_event = threading.Event()
+        self.thread = None
+        self.waste_items = [
+            "Plastic bottle", "Aluminum can", "Glass bottle", "Food wrapper", 
+            "Cardboard box", "Paper bag", "Plastic bag", "Electronics", 
+            "Old smartphone", "Broken TV", "Computer parts", "Batteries", 
+            "Light bulbs", "Old furniture", "Styrofoam cup", "Food container", 
+            "Takeout box", "Cigarette butts", "Plastic utensils", "Straws", 
+            "Bottle caps", "Rubber tires", "Broken toys", "Old clothes", 
+            "Shoes", "Wooden pallets", "Paint cans", "Cleaning products", 
+            "Old appliances", "Wires", "Cables", "Extension cords", 
+            "Old magazines", "Newspapers", "Scrap metal", "Construction debris", 
+            "Yard waste", "Grass clippings", "Leaves", "Old mattresses", 
+            "Carpeting", "Food scraps", "Pet waste", "Diapers", 
+            "Sanitary products", "Receipts", "Plastic wrap", "Packing peanuts", 
+            "Ice cream containers", "Fast food containers", "Takeaway cups", 
+            "Clamshell packaging", "Plastic film", "Broken glass", 
+            "Old books", "VCR tapes", "CDs", "DVDs", 
+            "Game consoles", "Remote controls", "Ink cartridges", 
+            "Toner cartridges", "Old tools", "Gardening tools", 
+            "Bike parts", "Fishing gear", "Beach toys", "Pool floats", 
+            "Old bicycles", "Skateboards", "Surfboards", "Helmets", 
+            "Used batteries", "Old jewelry", "Keyboards", "Mice (computer)", 
+            "Speakers", "Old cameras", "Projectors", "Printers", 
+            "Scanners", "Shredded paper", "Bubble wrap", "Plastic sheeting", 
+            "Tarps", "Old car parts", "Motor oil containers", 
+            "Propane tanks", "Oil filters", "Windshield wipers", 
+            "Car batteries", "Antifreeze containers", "Used tires", 
+            "Old propane tanks", "Scrap wood", "Broken furniture", 
+            "Old carpets", "Leather scraps", "Textile waste", 
+            "Compostable waste"
+        ]
+
+    def start_generation(self):
+        self.running = True
+        self.stop_event.clear()
+        self.thread = threading.Thread(target=self._generate_objects)
+        self.thread.start()
+    
+    def stop_generation(self):
+        self.stop_event.set()
+        self.running = False
+        if self.thread:
+            self.thread.join()
+
+    def get_random_item_prompt(self):
+        return random.choice(self.waste_items)
+    
+    def _generate_objects(self):
+        while not self.stop_event.is_set():
+            self.obj_gen.generate_object(self.get_random_item_prompt())
+            time.sleep(1)
+    
+    def run(self):
+        self.obj_gen.run()
+
+        while True:
+            command = input("Enter a command, <start> <stop> <generate (prompt)>: ")
+            if command.lower() == 'exit':
+                print("Exiting the program.")
+                self.stop_generation()
+                break
+            elif command.lower() == 'start':
+                if not self.running:
+                    print("Starting continuous generation.")
+                    self.start_generation()
+                else:
+                    print("Generation already running.")
+            elif command.lower() == 'stop':
+                print("Stopping continuous generation.")
+                self.stop_generation()
+            else:
+                print("Unknown command.")
+
+def main(output_dir, batch_size, step_size, guidance_scale):
+    app = GCApp(output_dir, batch_size, step_size, guidance_scale)
+    app.run()
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate shapes with the ShapeGenerator.")
+    parser.add_argument("--output_dir", type=str, required=True, help="The directory to save generated shapes.")
+    parser.add_argument("--batch_size", type=int, default=2, help="The number of batches for shap-e. the higher the batch size the longer it will take to process but will output a more refined mesh.")
+    parser.add_argument("--step_size", type=int, default=64, help="The number of steps/iterations for shap-e. the higher the step size the longer it will take to process but will output a more refined mesh.")
+    parser.add_argument("--guidance_scale", type=int, default=30, help="The guidance scale in context to the text prompt. The higher this value, the model will generate something closer to the text description (CLIP).")
+
+    args = parser.parse_args()
+    
+    main(args.output_dir, args.batch_size, args.step_size, args.guidance_scale)
+
+    
--- a/shap_e/examples/sample_text_to_3d.ipynb
+++ b/shap_e/examples/sample_text_to_3d.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "id": "964ccced",
   "metadata": {},
   "outputs": [],
@ -17,7 +17,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
   "id": "8eed3a76",
   "metadata": {},
   "outputs": [],
@ -27,7 +27,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
+   "id": "4127249f-da93-4da9-a15e-47fc1d918758",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "NVIDIA GeForce RTX 3090\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(torch.cuda.get_device_name())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
   "id": "2d922637",
   "metadata": {},
   "outputs": [],
@ -39,14 +57,29 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
   "id": "53d329d0",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "299c91406ddc4a368d1c80ed81c20a84",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/64 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
   "source": [
-    "batch_size = 4\n",
-    "guidance_scale = 15.0\n",
-    "prompt = \"a shark\"\n",
+    "batch_size = 1\n",
+    "guidance_scale = 30.0\n",
+    "prompt = \"road sign\"\n",
    "\n",
    "latents = sample_latents(\n",
    "    batch_size=batch_size,\n",
@ -67,10 +100,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
   "id": "633da2ec",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7c8b8946a49847dd9aa5376f9568775f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HTML(value='<img src=\"data:image/gif;base64,R0lGODlhQABAAIcAAJOYl4+UlY6TlI2TlI2Tk46Sko2Sk42SkoySk4ySko2RkYyRko…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
   "source": [
    "render_mode = 'nerf' # you can change this to 'stf'\n",
    "size = 64 # this is the size of the renders; higher values take longer to render.\n",
@ -83,21 +131,40 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
   "id": "85a4dce4",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/cailean/shap-e/shap_e/models/stf/renderer.py:286: UserWarning: exception rendering with PyTorch3D: No module named 'pytorch3d'\n",
+      "  warnings.warn(f\"exception rendering with PyTorch3D: {exc}\")\n",
+      "/home/cailean/shap-e/shap_e/models/stf/renderer.py:287: UserWarning: falling back on native PyTorch renderer, which does not support full gradients\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
   "source": [
    "# Example of saving the latents as meshes.\n",
    "from shap_e.util.notebooks import decode_latent_mesh\n",
    "\n",
    "for i, latent in enumerate(latents):\n",
    "    t = decode_latent_mesh(xm, latent).tri_mesh()\n",
-    "    with open(f'example_mesh_{i}.ply', 'wb') as f:\n",
+    "    with open(f'road_example_mesh_{i}.ply', 'wb') as f:\n",
    "        t.write_ply(f)\n",
-    "    with open(f'example_mesh_{i}.obj', 'w') as f:\n",
+    "    with open(f'road_example_mesh_{i}.obj', 'w') as f:\n",
    "        t.write_obj(f)"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "71b5ace4-b449-4a7e-b4e3-66ee6a5d03c3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
@ -116,7 +183,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,