Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer | |
import datasets | |
import asyncio | |
import numpy as np | |
def make_script(shader_code): | |
# code copied and fixed(escaping single quotes to double quotes!!!) from https://webglfundamentals.org/webgl/webgl-shadertoy.html | |
script = (""" | |
<!-- Licensed under a BSD license. See license.html for license --> | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<meta charset="utf-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes"> | |
<title>WebGL - Shadertoy</title> | |
<link type="text/css" href="https://webglfundamentals.org/webgl/resources/webgl-tutorials.css" rel="stylesheet" /> | |
<style> | |
.divcanvas { | |
position: relative; | |
display: inline-block; | |
} | |
canvas { | |
display: block; | |
} | |
.playpause { | |
position: absolute; | |
left: 10px; | |
top: 10px; | |
width: 100%; | |
height: 100%; | |
font-size: 60px; | |
justify-content: center; | |
align-items: center; | |
color: rgba(255, 255, 255, 0.3); | |
transition: opacity 0.2s ease-in-out; | |
} | |
.playpausehide, | |
.playpause:hover { | |
opacity: 0; | |
} | |
.iframe .divcanvas { | |
display: block; | |
} | |
</style> | |
</head> | |
<body> | |
<div class="divcanvas"> | |
<canvas id="canvas"></canvas> | |
<div class="playpause">▶</div> | |
</div> | |
\nblank canvas here indicates that some of the shadertoy specific functions are not yet supported with this implementation (like #define I believe). you can always copy and paste the code into a shadertoy.com window to try. | |
</body> | |
<!-- | |
for most samples webgl-utils only provides shader compiling/linking and | |
canvas resizing because why clutter the examples with code thats the same in every sample. | |
See https://webglfundamentals.org/webgl/lessons/webgl-boilerplate.html | |
and https://webglfundamentals.org/webgl/lessons/webgl-resizing-the-canvas.html | |
for webgl-utils, m3, m4, and webgl-lessons-ui. | |
--> | |
<script src="https://webglfundamentals.org/webgl/resources/webgl-utils.js"></script> | |
<script> | |
"use strict"; | |
function main() { | |
// Get A WebGL context | |
/** @type {HTMLCanvasElement} */ | |
const canvas = document.querySelector("#canvas"); | |
const gl = canvas.getContext("webgl"); | |
if (!gl) { | |
return; | |
} | |
const vs = ` | |
// an attribute will receive data from a buffer | |
attribute vec4 a_position; | |
// all shaders have a main function | |
void main() { | |
// gl_Position is a special variable a vertex shader | |
// is responsible for setting | |
gl_Position = a_position; | |
} | |
`; | |
const fs = ` | |
precision highp float; | |
uniform vec2 iResolution; | |
uniform vec2 iMouse; | |
uniform float iTime; | |
""" + shader_code + """ | |
void main() { | |
mainImage(gl_FragColor, gl_FragCoord.xy); | |
} | |
`; | |
// setup GLSL program | |
const program = webglUtils.createProgramFromSources(gl, [vs, fs]); | |
// look up where the vertex data needs to go. | |
const positionAttributeLocation = gl.getAttribLocation(program, "a_position"); | |
// look up uniform locations | |
const resolutionLocation = gl.getUniformLocation(program, "iResolution"); | |
const mouseLocation = gl.getUniformLocation(program, "iMouse"); | |
const timeLocation = gl.getUniformLocation(program, "iTime"); | |
// Create a buffer to put three 2d clip space points in | |
const positionBuffer = gl.createBuffer(); | |
// Bind it to ARRAY_BUFFER (think of it as ARRAY_BUFFER = positionBuffer) | |
gl.bindBuffer(gl.ARRAY_BUFFER, positionBuffer); | |
// fill it with a 2 triangles that cover clipspace | |
gl.bufferData(gl.ARRAY_BUFFER, new Float32Array([ | |
-1, -1, // first triangle | |
1, -1, | |
-1, 1, | |
-1, 1, // second triangle | |
1, -1, | |
1, 1, | |
]), gl.STATIC_DRAW); | |
const playpauseElem = document.querySelector(".playpause"); | |
const inputElem = document.querySelector(".divcanvas"); | |
inputElem.addEventListener("mouseover", requestFrame); | |
inputElem.addEventListener("mouseout", cancelFrame); | |
let mouseX = 0; | |
let mouseY = 0; | |
function setMousePosition(e) { | |
const rect = inputElem.getBoundingClientRect(); | |
mouseX = e.clientX - rect.left; | |
mouseY = rect.height - (e.clientY - rect.top) - 1; // bottom is 0 in WebGL | |
} | |
inputElem.addEventListener("mousemove", setMousePosition); | |
inputElem.addEventListener("touchstart", (e) => { | |
e.preventDefault(); | |
playpauseElem.classList.add("playpausehide"); | |
requestFrame(); | |
}, {passive: false}); | |
inputElem.addEventListener("touchmove", (e) => { | |
e.preventDefault(); | |
setMousePosition(e.touches[0]); | |
}, {passive: false}); | |
inputElem.addEventListener("touchend", (e) => { | |
e.preventDefault(); | |
playpauseElem.classList.remove("playpausehide"); | |
cancelFrame(); | |
}, {passive: false}); | |
let requestId; | |
function requestFrame() { | |
if (!requestId) { | |
requestId = requestAnimationFrame(render); | |
} | |
} | |
function cancelFrame() { | |
if (requestId) { | |
cancelAnimationFrame(requestId); | |
requestId = undefined; | |
} | |
} | |
let then = 0; | |
let time = 0; | |
function render(now) { | |
requestId = undefined; | |
now *= 0.001; // convert to seconds | |
const elapsedTime = Math.min(now - then, 0.1); | |
time += elapsedTime; | |
then = now; | |
webglUtils.resizeCanvasToDisplaySize(gl.canvas); | |
// Tell WebGL how to convert from clip space to pixels | |
gl.viewport(0, 0, gl.canvas.width, gl.canvas.height); | |
// Tell it to use our program (pair of shaders) | |
gl.useProgram(program); | |
// Turn on the attribute | |
gl.enableVertexAttribArray(positionAttributeLocation); | |
// Bind the position buffer. | |
gl.bindBuffer(gl.ARRAY_BUFFER, positionBuffer); | |
// Tell the attribute how to get data out of positionBuffer (ARRAY_BUFFER) | |
gl.vertexAttribPointer( | |
positionAttributeLocation, | |
2, // 2 components per iteration | |
gl.FLOAT, // the data is 32bit floats | |
false, // dont normalize the data | |
0, // 0 = move forward size * sizeof(type) each iteration to get the next position | |
0, // start at the beginning of the buffer | |
); | |
gl.uniform2f(resolutionLocation, gl.canvas.width, gl.canvas.height); | |
gl.uniform2f(mouseLocation, mouseX, mouseY); | |
gl.uniform1f(timeLocation, time); | |
gl.drawArrays( | |
gl.TRIANGLES, | |
0, // offset | |
6, // num vertices to process | |
); | |
requestFrame(); | |
} | |
requestFrame(); | |
requestAnimationFrame(cancelFrame); | |
} | |
main(); | |
</script> | |
</html> | |
""") | |
return script | |
def make_iframe(shader_code): #keep a single function? | |
script = make_script(shader_code) | |
return f"""<iframe width="640" height="420" srcdoc=\'{script}\' allowfullscreen></iframe>""" | |
intro_text = """ | |
# Welcome to the interactive shadercoding demo. | |
This gives you access to a filtered version of the [Shadertoys](https://huggingface.co/datasets/Vipitis/Shadertoys) dataset, only shaders that consist of a single pass are available. | |
And then lets you use code generation models to make alterations to part of the shadercode. | |
## How To Use: | |
1. Load any Model for [`text-generation`](https://huggingface.co/models?pipeline_tag=text-generation) and hit ENTER. | |
2. Use the slider to sample a shader from the dataset. | |
- The original shader will be embedding on the left, click on title to get to the source. | |
- The shadercode will be displayed on the right, this is interactive. | |
- A preview of the currently displayed shadercode will be displayed on the lower left. (hover to advance time) | |
3. use the dropdown to select a function to modify. | |
4. press either button to make modifications to that function | |
5. you can also edit the code manually. | |
""" | |
outro_text =""" | |
## Models to try (look at [ShaderEval](https://huggingface.co/spaces/Vipitis/ShaderEval) for an indication of how helpful they will be): | |
- [gpt2](https://huggingface.co/gpt2) baseline for language models, really struggles with shadercode. | |
- [bigscience/bloom-1b1](https://huggingface.co/bigscience/bloom-1b1) a newer and larger freely available model. Does understand a big of code. | |
- [codeparrot/codeparrot-small](https://huggingface.co/codeparrot/codeparrot-small) a model trained on code, but not on shadercode. Manages to graps the patterns. | |
- [salesforce/codegen-2B-multi](https://huggingface.co/salesforce/codegen-2B-multi) a larger model that indicates some potential. | |
- [bigcode/santacoder](https://huggingface.co/bigcode/santacoder) a model trained on subset of [TheStack](https://huggingface.co/datasets/bigcode/the-stack), struggles with shadercode. | |
- [Vipitis/santacoder-finetuned-the-stack-glsl](https://huggingface.co/Vipitis/santacoder-finetuned-the-stack-glsl) fine-tuned by me on the glsl subset of [TheStack](https://huggingface.co/datasets/bigcode/the-stack), is an improvement. | |
- [Vipitis/santacoder-finetuned-Shadertoys](https://huggingface.co/Vipitis/santacoder-finetuned-Shadertoys) fine-tuned by me on whole shaders from [Shadertoys](https://huggingface.co/datasets/Vipitis/Shadertoys). Does overfit quite a bit with greedy decoding. | |
- [Vipitis/santacoder-finetuned-Shadertoys-fine](https://huggingface.co/Vipitis/santacoder-finetuned-Shadertoys-fine) fine-tuned by me just functions from [Shadertoys-fine](https://huggingface.co/datasets/Vipitis/Shadertoys-fine). Memorizes the exact function about half the time. | |
- [bigcode/starcoder](https://huggingface.co/bigcode/starcoder) a very large model which I haven't tried yet. | |
- **any other model you want to** | |
## TODO (feel free to contribute with a [Pull-Request](https://huggingface.co/Vipitis/santacoder-finetuned-the-stack-glsl/discussions?status=open&type=pull_request)): | |
- [x] use embedded Shadertoy for reference/attribution (done, but some errors) | |
- [~] working render implementation on CPU only space (as webgl via webglfundamentals, ccs needs fixing for iframe (or hijack Shadertoy iframe)) | |
- [~] generate variations of return statements [ShaderEval task1](https://huggingface.co/spaces/Vipitis/ShaderEval) (needs to be reworked using the other parts) | |
- [x] generate whole functions (seems to work quite well) | |
- [] dropdown for model selection (from curated list or all supported models?) | |
- [] generation history stating which function and orig/generated returns. (use State ??). do it as comments in the code? | |
- [~] display errros/issues to the user (raise gr.Error could be one idea, but highlighting in the code would be awesome) currently adds a comment to the code. | |
- [] generate whole shaders (via prompts guidance, recursive from errors) | |
- [] accordion with generation parameters (as pipeline_kwargs?) look up starcoder playround and take "inspiration" from there | |
- [] support FIM task for better model context | |
- [~] include some context for prompt (title, comments before a functions) - now works with the first comment inside a function body (has to be first) | |
- [] gradio examples | |
### Notes: | |
- this is meant as a resource to show code generation for a "creative" task. | |
- the goal is not to not replace shader artists, but aims to be an assistant instead. | |
- the space still lacks quite a lot of features, but will continue to evolve. | |
- this demo can be useful to sannity check evaluation results, where the academic numbers are made. | |
- If you create a remix with these tools, please attribute the original creator of your starting point when sharing the results. (And perhaps share in the [discussion tab](https://huggingface.co/Vipitis/santacoder-finetuned-the-stack-glsl/discussions?status=open&type=discussion) too) | |
""" | |
passes_dataset = datasets.load_dataset("Vipitis/Shadertoys") | |
single_passes = passes_dataset.filter(lambda x: not x["has_inputs"] and x["num_passes"] == 1) #could also include shaders with no extra functions. | |
all_single_passes = datasets.concatenate_datasets([single_passes["train"], single_passes["test"]]) | |
num_samples = len(all_single_passes) | |
import tree_sitter | |
from tree_sitter import Language, Parser | |
Language.build_library("./build/my-languages.so", ['tree-sitter-glsl']) | |
GLSL_LANGUAGE = Language('./build/my-languages.so', 'glsl') | |
parser = Parser() | |
parser.set_language(GLSL_LANGUAGE) | |
def grab_sample(sample_idx): | |
sample_pass = all_single_passes[sample_idx] | |
sample_code = sample_pass["code"] | |
sample_source = sample_pass["source"] | |
sample_title = sample_pass["title"] | |
sample_auhtor = sample_pass["author"] | |
source_iframe = construct_embed(sample_source) | |
print(f"{source_iframe=}") | |
# sample_funcs = _parse_functions(sample_code) | |
# funcs = _parse_functions(sample_code) | |
# func_identifiers = [f"{idx:2d}: {n.child_by_field_name('declarator').text.decode()}" for idx, n in enumerate(funcs)] | |
# print(f"updating drop down to:{func_identifiers}") | |
return sample_pass, sample_code, source_iframe, funcs#, gr.Dropdown.update(choices=func_identifiers) #, sample_title, sample_auhtor | |
def _parse_functions(in_code): | |
""" | |
returns all functions in the code as their actual nodes. | |
includes any comment made directly after the function definition or diretly after #copilot trigger | |
""" | |
tree = parser.parse(bytes(in_code, "utf8")) | |
funcs = [n for n in tree.root_node.children if n.type == "function_definition"] | |
return funcs | |
PIPE = None | |
def _make_pipeline(model_cp = "Vipitis/santacoder-finetuned-Shadertoys-fine"): #bad default model for testing | |
tokenizer = AutoTokenizer.from_pretrained(model_cp, trust_remote_code=True) | |
model = AutoModelForCausalLM.from_pretrained(model_cp, trust_remote_code=True) | |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, trust_remote_code=True) | |
PIPE = pipe # set the global? | |
print(f"loaded model {model_cp} as a pipline") | |
return pipe | |
def process_retn(retn): | |
return retn.split(";")[0].strip() | |
def get_full_replacement(orig_code, retn_start_idx, retn_end_idx, prediction) -> str: | |
""" | |
Batches the generated return statement into the code and returns the full altered code. | |
""" | |
print(f"{orig_code[retn_start_idx:retn_end_idx]=}") | |
generated = process_retn(prediction) | |
print(f"{generated=}") | |
variation = orig_code[:retn_start_idx] + generated + orig_code[retn_end_idx:] | |
return variation | |
def alter_return(orig_code, func_idx="0:", pipeline=PIPE): #default pipeline can't be passed as gloabl? | |
""" | |
Replaces the return statement of a function with a generated one. | |
Args: | |
orig_code (str): The original code. | |
func_idx (int): The index of the function to replace the return statement of. | |
pipeline (Pipeline): The pipeline to use for generation. | |
Returns: | |
str: The altered code. | |
""" | |
if pipeline is None: | |
print("no pipeline found, loading default one") | |
pipeline = _make_pipeline() | |
print(f"{func_idx=}") | |
func_idx = int(func_idx.split(":")[0].strip()) | |
retrns = [] | |
retrn_start_idx = orig_code.find("return") | |
while retrn_start_idx != -1: | |
retrn_end_idx = orig_code.find(";", retrn_start_idx) | |
retrns.append((retrn_start_idx, retrn_end_idx)) | |
retrn_start_idx = orig_code.find("return", retrn_end_idx) | |
num_returns = len(retrns) | |
if num_returns == 0: | |
print("no return statement found, returning original code") | |
return orig_code | |
func_idx = int(max(0, min(func_idx, num_returns - 1))) #clamp to valid range, cast to int as a bodge. | |
retrn_start_idx, retrn_end_idx = retrns[func_idx] | |
model_context = orig_code[:retrn_start_idx] #TODO: maximal context? | |
model_inp = model_context + "return" | |
new_toks = (retrn_end_idx - retrn_start_idx) * 2 #TODO: approximation, we do have early stopping? maybe also use a number instead? | |
pipe_generation = pipeline(model_inp, max_new_tokens=new_toks, return_full_text=False)[0]["generated_text"] #pipeline kwargs are missing?! | |
altered_code = get_full_replacement(orig_code, retrn_start_idx+7, retrn_end_idx, pipe_generation) | |
return altered_code | |
def _line_chr2char(text, line_idx, chr_idx): | |
""" | |
returns the character index at the given line and character index. | |
""" | |
lines = text.split("\n") | |
char_idx = 0 | |
for i in range(line_idx): | |
char_idx += len(lines[i]) + 1 | |
char_idx += chr_idx | |
return char_idx | |
def alter_body(old_code, func_id: str, funcs_list: list, pipeline=PIPE): | |
""" | |
Replaces the body of a function with a generated one. | |
Args: | |
old_code (str): The original code. | |
func_node (Node): The node of the function to replace the body of. | |
pipeline (Pipeline): The pipeline to use for generation. | |
Returns: | |
str: The altered code. | |
""" | |
print(f"{func_id=}") | |
func_id = int(func_id.split(":")[0].strip()) #undo their string casting? | |
func_node = funcs_list[func_id] | |
print(f"using for generation: {func_node=}") | |
print(f"{pipeline=}") # check if default even loaded | |
if pipeline is None: | |
print("no pipeline found, loading default one") | |
pipeline = _make_pipeline("Vipitis/santacoder-finetuned-Shadertoys-fine") | |
func_start_idx = _line_chr2char(old_code, func_node.start_point[0], func_node.start_point[1]) | |
identifier_str = func_node.child_by_field_name("type").text.decode() + " " + func_node.child_by_field_name("declarator").text.decode() #func_start_idx:body_start_idx? | |
body_node = func_node.child_by_field_name("body") | |
body_start_idx = _line_chr2char(old_code, body_node.start_point[0], body_node.start_point[1]) | |
body_end_idx = _line_chr2char(old_code, body_node.end_point[0], body_node.end_point[1]) | |
print(f"{old_code[body_start_idx:body_end_idx]=}") | |
model_context = identifier_str # base case | |
# add any comments at the beginning of the function to the model_context | |
second_child = func_node.child_by_field_name("body").children[1] #might error out? | |
if second_child.type == "comment": | |
# print(second_child.text.decode()) | |
model_context += " { \n " + second_child.text.decode() | |
print(f"{model_context=}") | |
num_new_tokens = max(160,(body_end_idx - body_start_idx) + 10) #TODO: approximation, we do have early stopping? maybe also use a number instead? HARD MAX for performance limits. | |
print(f"generating up to {num_new_tokens} after {model_context!r}") | |
generation = pipeline(model_context, max_new_tokens=num_new_tokens, return_full_text=False)[0]["generated_text"] | |
print(f"{generation=}") | |
ctx_with_generation = model_context + generation | |
print(f"{ctx_with_generation=}") | |
try: | |
#strip the body | |
first_gened_func = _parse_functions(ctx_with_generation)[0] # truncate generation to a single function? | |
except IndexError: | |
print("generation wasn't a full function.") | |
altered_code = old_code[:func_start_idx] + model_context + generation + "//the generation didn't complete the function!\n" + old_code[body_end_idx:] #needs a newline to break out of the comment. | |
return altered_code, pipeline | |
# raise gr.Error(f"didn't generate a full function: {generation!r}]") | |
print(f"{first_gened_func=}") | |
generated_body = first_gened_func.child_by_field_name("body").text.decode() | |
print(f"{generated_body=}") | |
altered_code = old_code[:func_start_idx] + identifier_str + generated_body + old_code[body_end_idx:] | |
return altered_code, pipeline | |
def add_history(func_id, orig_rtn, gened_rtn, history): | |
# is this a list? or a JSON dict? | |
history[func_id] = (orig_rtn, gened_rtn) | |
return history, history | |
def list_dropdown(in_code): #only used for auto update, not on sample pick? | |
funcs = _parse_functions(in_code) | |
# print(f"updating drop down to:{func_identifiers=}") | |
func_identifiers = [f"{idx:2d}: {n.child_by_field_name('declarator').text.decode()}" for idx, n in enumerate(funcs)] | |
# funcs = [n for n in funcs] #wrapped as set to avoid json issues? | |
print(f"updating drop down to:{func_identifiers}") | |
return funcs, gr.Dropdown.update(choices=func_identifiers) | |
def construct_embed(source_url): | |
shader_id = source_url.split("/")[-1] | |
return f'<iframe width="640" height="360" frameborder="0" src="https://www.shadertoy.com/embed/{shader_id}?gui=true&t=0&paused=true&muted=true" allowfullscreen></iframe>' | |
with gr.Blocks() as site: | |
top_md = gr.Markdown(intro_text) | |
model_cp = gr.Textbox(value="Vipitis/santacoder-finetuned-Shadertoys-fine", label="Model Checkpoint (Enter to load!)", interactive=True) | |
sample_idx = gr.Slider(minimum=0, maximum=num_samples, value=3211, label="pick sample from dataset", step=1.0) | |
func_dropdown = gr.Dropdown(label="chose a function to modify") #breaks if I add a string in before that? | |
with gr.Row(): | |
gen_return_button = gr.Button("generate a alternate return statement", label="generate return") | |
gen_func_button = gr.Button("generate an alternate function body", label="generate function") | |
# update_funcs_button = gr.Button("update functions", label="update functions") | |
with gr.Row(): | |
with gr.Column(): | |
source_embed = gr.HTML('<iframe width="640" height="360" frameborder="0" src="" allowfullscreen></iframe>', label="How this shader originally renders") | |
our_embed = gr.HTML(label="glsl render of the current code") | |
sample_code = gr.Code("// touch the slider to select a shader", label="Current Code (will update changes you generate)", language=None) | |
bot_md = gr.Markdown(outro_text) | |
sample_pass = gr.State(value={}) | |
pipe = gr.State(value=PIPE) | |
pipe.value=_make_pipeline("Vipitis/santacoder-finetuned-Shadertoys-fine") # set a default like this? | |
funcs = gr.State(value=[]) | |
# hist_state = gr.State(Value={}) | |
# history_table = gr.JSON() | |
model_cp.submit(fn=_make_pipeline, inputs=[model_cp], outputs=[pipe]) # how can we trigger this on load? | |
sample_idx.release(fn=grab_sample, inputs=[sample_idx], outputs=[sample_pass, sample_code, source_embed]) | |
gen_return_button.click(fn=alter_return, inputs=[sample_code, func_dropdown, pipe], outputs=[sample_code]) | |
gen_func_button.click(fn=alter_body, inputs=[sample_code, func_dropdown, funcs, pipe], outputs=[sample_code, pipe]) | |
sample_code.change(fn=list_dropdown, inputs=[sample_code], outputs=[funcs, func_dropdown]) # to update this after generation, so spans aren't messed up | |
sample_code.change(fn=make_iframe, inputs=[sample_code], outputs=[our_embed]) #twice could cause issues, find better ways. | |
site.launch() | |