Make a Web Video Filter Using Tensorflow And Three JS

Last Updated On 23 Apr 2023 by

This tutorial guides you step by step through creating a HTML5 video filter that works on mobile and desktop. All of this made possible thanks to TensorFlow and Three JS.

An image

We will use the following libraries:

TensorFlow JS: A machine learning library that can be used to train and deploy machine learning models.

Three JS: A JavaScript library that can be used to create interactive 3D graphics.

Getting all the dependencies

Tensorflow model can be quite large so we will rely on CDN delivery instead of hosting it in out project by adding the following in the <head> tag of our project:

<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@1.2"></script>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/body-pix@2.0"></script>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/body-pix@2.0"></script>

As for Three.js we want to use some of the code shipped in the examples so we install the dependency using npm:

npm i three

From there we will be able to load the utilities we need directly from node_modules directory.

Loading the user camera

First we setup the camera so that we can get a stream of the user webcam:

function initVideoStream() {
  const videoElement = document.createElement('video');
  console.log('Initializing Video Stream...')
  return new Promise(ready => {
    navigator.mediaDevices.getUserMedia({ video: true, audio: false })
      .then(stream => {
        videoElement.srcObject = stream;
        videoElement.play();
        videoElement.addEventListener('playing', (event) => {
          canvasTexture.width = canvasMask.width = videoElement.width = videoElement.videoWidth;
          canvasTexture.height = canvasMask.height = videoElement.height = videoElement.videoHeight;
          ready(videoElement);
        });
      })
      .catch(err => {
        alert(`[Error occurred]: ${err}`);
      });
  })
}

Initialize Tensorflow with the BodyPix model

Next we load the BodyPix model:

function loadBodyPix() {
  console.log('Initializing BodyPix Library...')
  return new Promise(ready => {
    const options = {
      multiplier: 0.5,
      stride: 32,
      quantBytes: 4,
    }
    return bodyPix.load(options)
      .then(net => ready(net))
      .catch(err => console.log(err))
  });
}

Prepare the 3D scene

Finally we just need to setup the 3D scene.

For the sticker effect we create a small animated model in Blender. With just a single plane and a few bones we get the desired effect. Not worrying too much about the scale or animation speed as it can later be changed from Three.

An image

Once the 3D model is exported to GLTF, we need to use the appropriate Three loader to load the model in our small application as below:

function init3D() {
  console.log('Initializing Three...');

  clock = new THREE.Clock();
  counter = 0;
  sliceCounter = 0;
  width = videoElement.videoWidth;
  height = videoElement.videoHeight;
  maxSide = Math.max(width, height);
  camera = new THREE.OrthographicCamera(width / - 2, width / 2, height / 2, height / - 2, 1, 1000);
  camera.position.set(0, 0, 500);

  scene = new THREE.Scene();

  backgroundMesh = new THREE.Mesh(new THREE.PlaneGeometry(width, height), new THREE.MeshBasicMaterial({
    side: THREE.DoubleSide,
    map: new THREE.VideoTexture(videoElement)
  }));
  backgroundMesh.rotation.y = -Math.PI;
  backgroundMesh.position.set(0, 0, 110);
  scene.add(backgroundMesh);

  const loader = new GLTFLoader();
  loader.load('./../models/sticker.glb', function (gltf) {
    scene.add(gltf.scene);
    gltf.scene.traverse(function (child) {
      if (child.name === 'StickerPlaneMesh') {
        stickerTexture = new THREE.CanvasTexture(canvasTexture);
        stickerTexture.wrapT = stickerTexture.wrapS = THREE.RepeatWrapping;
        stickerTexture.repeat.x = - 1;
        stickerTexture.repeat.y = - 1;
        child.material = new THREE.MeshBasicMaterial({
          color: 0xffffff,
          transparent: true,
          side: THREE.DoubleSide,
          map: stickerTexture,
        });
        child.geometry.scale(width / 2, height / 2, 1);
        stickerMesh = child;
      } else
        if (child.name === 'StickerPlane') {
          child.position.set(0, -height * 0.5, 115)
        }
    });

    fallAction = gltf.animations[0];
    mixer = new THREE.AnimationMixer(gltf.scene);
    mixer.clipAction(fallAction).play();
    fallAction.clampWhenFinished = true;
    mixer.timeScale = 2;

  });


  renderer = new THREE.WebGLRenderer({ antialias: true, alpha: true });
  renderer.setClearColor(0x000000, 0);
  renderer.setPixelRatio(window.devicePixelRatio);
  renderer.setSize(width, height);
  container.appendChild(renderer.domElement);

  const controls = new OrbitControls(camera, renderer.domElement);
  controls.enablePan = false;
  controls.minDistance = 0.5;
  controls.maxDistance = 50000;
}

Generating the mask data

Before we can start using textures from the webcam to Three we need it to be processed by Tensorflow to determine the mask area and abstract the body shapes from the video stream:

async function drawMask() {
  const segmentation = await net.segmentPerson(videoElement);
  const coloredPartImage = bodyPix.toMask(segmentation);
  const opacity = 1;
  const flipHorizontal = false;
  const maskBlurAmount = 0;
  bodyPix.drawMask(
    canvasMask, videoElement, coloredPartImage, opacity, maskBlurAmount,
    flipHorizontal
  );
}

Cut a sticker slice

Now all the pieces are put together, we are just missing a function to cut a sticker slice every now and then.

This will get a webcam frame, pass it to tensorflow to abstract the body shape, then send this as a texture to Three/WebGL to render it in the 3D model animation.

function sliceOne(mesh) {
  drawMask(videoElement, net);
  const { width, height } = canvasTexture;
  const numPerSide = 1;
  const x = (sliceCounter % numPerSide) * (width / numPerSide);
  const y = (Math.floor(sliceCounter / numPerSide) % numPerSide) * (height / numPerSide);

  const imgData = contextMask.getImageData(0, 0, canvasMask.width, canvasMask.height);
  const data = imgData.data;
  for (let i4 = 0; i4 < data.length; i4 += 4) {
    const red = data[i4];
    const green = data[i4 + 1];
    const blue = data[i4 + 2];
    const alpha = data[i4 + 3];
    if (!red && !green && !blue) {
      data[i4 + 3] = 0;
    }
  }
  contextMask.putImageData(imgData, 0, 0);
  textureContext.clearRect(x, y, width / numPerSide, height / numPerSide);
  // Outline
  const borderSize = 7;
  textureContext.filter = `
    drop-shadow(${borderSize}px 0px 0 white)
    drop-shadow(-${borderSize}px 0px 0 white)
    drop-shadow(0px -${borderSize}px 0 white)
    drop-shadow(0px ${borderSize}px 0 white)
  `;
  textureContext.drawImage(canvasMask, x, y, width / numPerSide, height / numPerSide);
  if (stickerMesh) {
    stickerMesh.material.map.needsUpdate = true;
  }
}

Mashing it up all together

And that’s all it takes! We’re just missing a render loop function to sync it all together:

async function init() {
  videoElement = await initVideoStream();
  net = await loadBodyPix();
  init3D();
  animate();
}

function animate() {
  const delta = clock.getDelta();
  requestAnimationFrame(animate);
  counter += delta;
  if (counter > 0.5) {
    counter = 0;
    if (stickerMesh) {
      stickerMesh.material.color.setHSL(Math.random(), 0.75, 0.75);
    }
    mixer.clipAction(fallAction).reset();
    sliceOne()
  }
  if (mixer) mixer.update(delta);
  renderer.render(scene, camera);
}

Check out the final result demo here

Also find the full source code on github

And of course make sure to allow your webcam when opening the page :-)

About The Author

Headshot of Michael Iriarte aka Mika

Hi, I'm Michael aka Mika. I'm a software engineer with years of experience in frontend development. Thank you for visiting tips4devs.com I hope you learned something fun today! You can follow me on Twitter, see some of my work on GitHub, or read more about me on my website.