gc scripts

11 months ago · b50847486c
6 changed files with 508 additions and 15 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,5 @@
 __pycache__/
 .DS_Store
 *.egg-info/
 shap_e/examples/shap_e_model_cache/
 shap_e/examples/gc/shap_e_model_cache/
--- a/shap_e/examples/.ipynb_checkpoints/sample_text_to_3d-checkpoint.ipynb
+++ b/shap_e/examples/.ipynb_checkpoints/sample_text_to_3d-checkpoint.ipynb
@ -0,0 +1,172 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "964ccced",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "\n",
    "from shap_e.diffusion.sample import sample_latents\n",
    "from shap_e.diffusion.gaussian_diffusion import diffusion_from_config\n",
    "from shap_e.models.download import load_model, load_config\n",
    "from shap_e.util.notebooks import create_pan_cameras, decode_latent_images, gif_widget"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "8eed3a76",
   "metadata": {},
   "outputs": [],
   "source": [
    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "4127249f-da93-4da9-a15e-47fc1d918758",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NVIDIA GeForce RTX 3090\n"
     ]
    }
   ],
   "source": [
    "print(torch.cuda.get_device_name())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d922637",
   "metadata": {},
   "outputs": [],
   "source": [
    "xm = load_model('transmitter', device=device)\n",
    "model = load_model('text300M', device=device)\n",
    "diffusion = diffusion_from_config(load_config('diffusion'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "53d329d0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f76a8f93c93e4b77af91f03645eb5011",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/64 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "batch_size = 1\n",
    "guidance_scale = 30.0\n",
    "prompt = \"bin bag\"\n",
    "\n",
    "latents = sample_latents(\n",
    "    batch_size=batch_size,\n",
    "    model=model,\n",
    "    diffusion=diffusion,\n",
    "    guidance_scale=guidance_scale,\n",
    "    model_kwargs=dict(texts=[prompt] * batch_size),\n",
    "    progress=True,\n",
    "    clip_denoised=True,\n",
    "    use_fp16=True,\n",
    "    use_karras=True,\n",
    "    karras_steps=64,\n",
    "    sigma_min=1e-3,\n",
    "    sigma_max=160,\n",
    "    s_churn=0,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "633da2ec",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7c8b8946a49847dd9aa5376f9568775f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HTML(value='<img src=\"data:image/gif;base64,R0lGODlhQABAAIcAAJOYl4+UlY6TlI2TlI2Tk46Sko2Sk42SkoySk4ySko2RkYyRko…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "render_mode = 'nerf' # you can change this to 'stf'\n",
    "size = 64 # this is the size of the renders; higher values take longer to render.\n",
    "\n",
    "cameras = create_pan_cameras(size, device)\n",
    "for i, latent in enumerate(latents):\n",
    "    images = decode_latent_images(xm, latent, cameras, rendering_mode=render_mode)\n",
    "    display(gif_widget(images))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "85a4dce4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Example of saving the latents as meshes.\n",
    "from shap_e.util.notebooks import decode_latent_mesh\n",
    "\n",
    "for i, latent in enumerate(latents):\n",
    "    t = decode_latent_mesh(xm, latent).tri_mesh()\n",
    "    with open(f'example_mesh_{i}.ply', 'wb') as f:\n",
    "        t.write_ply(f)\n",
    "    with open(f'example_mesh_{i}.obj', 'w') as f:\n",
    "        t.write_obj(f)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/shap_e/examples/gc/ShapeGenerator.py
+++ b/shap_e/examples/gc/ShapeGenerator.py
@ -0,0 +1,96 @@
 import torch
 from shap_e.diffusion.sample import sample_latents
 from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
 from shap_e.models.download import load_model, load_config
 from shap_e.util.notebooks import decode_latent_mesh
 from tqdm import tqdm
 import pygltflib
 from pygltflib import GLTF2
 import trimesh
 import open3d as o3d
 import os
 import datetime
 class ShapeGenerator:
    def __init__(self, output_path, batch_size, step_size, guidance):
        self.device = None
        self.xm = None
        self.model = None
        self.diffusion = None
        self.iterations = 0
        self.latents = None
        self.output_path = output_path
        self.batch_size = batch_size
        self.step_size = step_size
        self.guidance = guidance
    def run(self):
        print("Loading Models..")
        self.load_models()
        print("Finished Loading Models!")
    def load_models(self):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.xm = load_model('transmitter', device=self.device)
        self.model = load_model('text300M', device=self.device)
        self.diffusion = diffusion_from_config(load_config('diffusion'))
    def generate_object(self, prompt):
        batch_size = 2
        # Create random latents
        latent_dim = self.model.d_latent
        random_latents = torch.randn(batch_size, latent_dim).to(self.model.device)
        print(random_latents.shape)
        model_kwargs = {}
        self.latents = sample_latents(
            batch_size=self.batch_size,
            model=self.model,
            diffusion=self.diffusion,
            guidance_scale=self.guidance,
            model_kwargs=model_kwargs,
            progress=True,  # This should already show progress
            clip_denoised=True,
            use_fp16=True,
            use_karras=True,
            karras_steps=self.step_size,
            sigma_min=1e-3,
            sigma_max=160,
            s_churn=0,
            device = self.model.device,
        )
        self.export_model(prompt)
    def export_model(self, prompt):
        timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        obj_filepath = f'{prompt}-{self.iterations}.obj'
        output_filepath = f'{self.output_path}/{prompt}-{timestamp}.gltf'
        print(output_filepath)
        for i, latent in enumerate(self.latents):
            t = decode_latent_mesh(self.xm, latent).tri_mesh()
            with open(obj_filepath, 'w') as f:
                t.write_obj(f)
        final_mesh = self.construct_mesh(obj_filepath)
        o3d.io.write_triangle_mesh(output_filepath, final_mesh)
        self.iterations += 1
    def construct_mesh(self, obj_fp):
        mesh = o3d.io.read_triangle_mesh(obj_fp)
        if os.path.exists(obj_fp):
            os.remove(obj_fp)
        original_triangle_count = len(mesh.triangles)
        target_triangle_count = original_triangle_count // 3
        decimated_mesh = mesh.simplify_quadric_decimation(
            target_number_of_triangles=target_triangle_count)
        filtered_mesh = decimated_mesh.filter_smooth_simple(number_of_iterations=5)
        filtered_mesh.compute_vertex_normals()
        return filtered_mesh
--- a/shap_e/examples/gc/TextGenerator.py
+++ b/shap_e/examples/gc/TextGenerator.py
@ -0,0 +1,51 @@
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, set_seed
 import random
 import torch
 class TextGenerator:
    def __init__(self):
        self.model = None
        self.tokenizer = None
        #self.load_models()
    def load_models(self):
        print('Loading Models...')
        self.tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
        self.model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")   
        print('Models Loaded!')
    def generate_text(self):
        model = AutoModelForCausalLM.from_pretrained( 
            "microsoft/Phi-3-mini-4k-instruct",  
            device_map="cuda",  
            torch_dtype="auto",  
            trust_remote_code=True,  
        ) 
        tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") 
        messages = [ 
            {"role": "system", "content": "You are a helpful AI assistant, that generates two nouns and returns one sentence in the format of: a (noun) with a (noun).\n You can descirbe a random object typically found in a bin"}, 
            {"role": "user", "content": "Can you provide me with a sentence"}, 
        ] 
        pipe = pipeline( 
            "text-generation", 
            model=model, 
            tokenizer=tokenizer, 
        ) 
        generation_args = { 
            "max_new_tokens": 50,  # Reduced to focus on concise output
            "return_full_text": False,
            "temperature": 0.7,  # Adjusted for more randomness
            "do_sample": True,
            "top_k": 100,  # Top-k sampling
            "top_p": 1,  # Nucleus sampling
        } 
        output = pipe(messages, **generation_args) 
        return output[0]['generated_text']
--- a/shap_e/examples/gc/app.py
+++ b/shap_e/examples/gc/app.py
@ -0,0 +1,105 @@
 import argparse
 import threading
 import time
 import random
 from ShapeGenerator import ShapeGenerator
 from TextGenerator import TextGenerator
 # command example
 # python app.py --output_dir /mnt/c/Users/caile/Desktop/output
 class GCApp:
    def __init__(self, output_dir, batch_size, step_size, guidance_scale):
        self.output_dir = output_dir
        self.obj_gen = ShapeGenerator(self.output_dir, batch_size, step_size, guidance_scale)
        self.running = False
        self.stop_event = threading.Event()
        self.thread = None
        self.waste_items = [
            "Plastic bottle", "Aluminum can", "Glass bottle", "Food wrapper", 
            "Cardboard box", "Paper bag", "Plastic bag", "Electronics", 
            "Old smartphone", "Broken TV", "Computer parts", "Batteries", 
            "Light bulbs", "Old furniture", "Styrofoam cup", "Food container", 
            "Takeout box", "Cigarette butts", "Plastic utensils", "Straws", 
            "Bottle caps", "Rubber tires", "Broken toys", "Old clothes", 
            "Shoes", "Wooden pallets", "Paint cans", "Cleaning products", 
            "Old appliances", "Wires", "Cables", "Extension cords", 
            "Old magazines", "Newspapers", "Scrap metal", "Construction debris", 
            "Yard waste", "Grass clippings", "Leaves", "Old mattresses", 
            "Carpeting", "Food scraps", "Pet waste", "Diapers", 
            "Sanitary products", "Receipts", "Plastic wrap", "Packing peanuts", 
            "Ice cream containers", "Fast food containers", "Takeaway cups", 
            "Clamshell packaging", "Plastic film", "Broken glass", 
            "Old books", "VCR tapes", "CDs", "DVDs", 
            "Game consoles", "Remote controls", "Ink cartridges", 
            "Toner cartridges", "Old tools", "Gardening tools", 
            "Bike parts", "Fishing gear", "Beach toys", "Pool floats", 
            "Old bicycles", "Skateboards", "Surfboards", "Helmets", 
            "Used batteries", "Old jewelry", "Keyboards", "Mice (computer)", 
            "Speakers", "Old cameras", "Projectors", "Printers", 
            "Scanners", "Shredded paper", "Bubble wrap", "Plastic sheeting", 
            "Tarps", "Old car parts", "Motor oil containers", 
            "Propane tanks", "Oil filters", "Windshield wipers", 
            "Car batteries", "Antifreeze containers", "Used tires", 
            "Old propane tanks", "Scrap wood", "Broken furniture", 
            "Old carpets", "Leather scraps", "Textile waste", 
            "Compostable waste"
        ]
    def start_generation(self):
        self.running = True
        self.stop_event.clear()
        self.thread = threading.Thread(target=self._generate_objects)
        self.thread.start()
    def stop_generation(self):
        self.stop_event.set()
        self.running = False
        if self.thread:
            self.thread.join()
    def get_random_item_prompt(self):
        return random.choice(self.waste_items)
    def _generate_objects(self):
        while not self.stop_event.is_set():
            self.obj_gen.generate_object(self.get_random_item_prompt())
            time.sleep(1)
    def run(self):
        self.obj_gen.run()
        while True:
            command = input("Enter a command, <start> <stop> <generate (prompt)>: ")
            if command.lower() == 'exit':
                print("Exiting the program.")
                self.stop_generation()
                break
            elif command.lower() == 'start':
                if not self.running:
                    print("Starting continuous generation.")
                    self.start_generation()
                else:
                    print("Generation already running.")
            elif command.lower() == 'stop':
                print("Stopping continuous generation.")
                self.stop_generation()
            else:
                print("Unknown command.")
 def main(output_dir, batch_size, step_size, guidance_scale):
    app = GCApp(output_dir, batch_size, step_size, guidance_scale)
    app.run()
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Generate shapes with the ShapeGenerator.")
    parser.add_argument("--output_dir", type=str, required=True, help="The directory to save generated shapes.")
    parser.add_argument("--batch_size", type=int, default=2, help="The number of batches for shap-e. the higher the batch size the longer it will take to process but will output a more refined mesh.")
    parser.add_argument("--step_size", type=int, default=64, help="The number of steps/iterations for shap-e. the higher the step size the longer it will take to process but will output a more refined mesh.")
    parser.add_argument("--guidance_scale", type=int, default=30, help="The guidance scale in context to the text prompt. The higher this value, the model will generate something closer to the text description (CLIP).")
    args = parser.parse_args()
    main(args.output_dir, args.batch_size, args.step_size, args.guidance_scale)
--- a/shap_e/examples/sample_text_to_3d.ipynb
+++ b/shap_e/examples/sample_text_to_3d.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "id": "964ccced",
   "metadata": {},
   "outputs": [],
@ -17,7 +17,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
   "id": "8eed3a76",
   "metadata": {},
   "outputs": [],
@ -27,7 +27,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
   "id": "4127249f-da93-4da9-a15e-47fc1d918758",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NVIDIA GeForce RTX 3090\n"
     ]
    }
   ],
   "source": [
    "print(torch.cuda.get_device_name())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "2d922637",
   "metadata": {},
   "outputs": [],
@ -39,14 +57,29 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
   "id": "53d329d0",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "299c91406ddc4a368d1c80ed81c20a84",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/64 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
-    "batch_size = 4\n",
+    "batch_size = 1\n",
-    "guidance_scale = 15.0\n",
+    "guidance_scale = 30.0\n",
-    "prompt = \"a shark\"\n",
+    "prompt = \"road sign\"\n",
    "\n",
    "latents = sample_latents(\n",
    "    batch_size=batch_size,\n",
@ -67,10 +100,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
   "id": "633da2ec",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7c8b8946a49847dd9aa5376f9568775f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HTML(value='<img src=\"data:image/gif;base64,R0lGODlhQABAAIcAAJOYl4+UlY6TlI2TlI2Tk46Sko2Sk42SkoySk4ySko2RkYyRko…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "render_mode = 'nerf' # you can change this to 'stf'\n",
    "size = 64 # this is the size of the renders; higher values take longer to render.\n",
@ -83,21 +131,40 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
   "id": "85a4dce4",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/cailean/shap-e/shap_e/models/stf/renderer.py:286: UserWarning: exception rendering with PyTorch3D: No module named 'pytorch3d'\n",
      "  warnings.warn(f\"exception rendering with PyTorch3D: {exc}\")\n",
      "/home/cailean/shap-e/shap_e/models/stf/renderer.py:287: UserWarning: falling back on native PyTorch renderer, which does not support full gradients\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "# Example of saving the latents as meshes.\n",
    "from shap_e.util.notebooks import decode_latent_mesh\n",
    "\n",
    "for i, latent in enumerate(latents):\n",
    "    t = decode_latent_mesh(xm, latent).tri_mesh()\n",
-    "    with open(f'example_mesh_{i}.ply', 'wb') as f:\n",
+    "    with open(f'road_example_mesh_{i}.ply', 'wb') as f:\n",
    "        t.write_ply(f)\n",
-    "    with open(f'example_mesh_{i}.obj', 'w') as f:\n",
+    "    with open(f'road_example_mesh_{i}.obj', 'w') as f:\n",
    "        t.write_obj(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71b5ace4-b449-4a7e-b4e3-66ee6a5d03c3",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
@ -116,7 +183,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,