Initial Slow Version of Blurring Backgrounds with Bodypix

2020-05-03 12:12:26 -04:00 · 2020-05-03 12:12:26 -04:00 · c3f2738e78
commit c3f2738e78
7 changed files with 215 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
+node_modules/
+package-lock.json
+.mypy_cache/
+.vscode/
--- a/decode_image/LICENSE
+++ b/decode_image/LICENSE
@ -0,0 +1,22 @@
+MIT License
+
+Copyright (c) 2020 Daniel Llewellyn
+Copyright (c) 2020 Benjamin Elder
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/decode_image/index.js
+++ b/decode_image/index.js
@ -0,0 +1,84 @@
+const {Tensor3D, tensor3d, util} = require('@tensorflow/tfjs-core');
+const jpeg = require('jpeg-js');
+
+/**
+ * Decode a JPEG-encoded image to a 3D Tensor of dtype `int32`.
+ *
+ * ```js
+ * const image = require('path/to/img.jpg');
+ * const imageAssetPath = Image.resolveAssetSource(image);
+ * const response = await fetch(imageAssetPath.uri, {}, { isBinary: true });
+ * const rawImageData = await response.arrayBuffer();
+ * const imageTensor = decodeJpeg(rawImageData);
+ * ```
+ *
+ * @param contents The JPEG-encoded image in an Uint8Array.
+ * @param channels An optional int. Defaults to 3. Accepted values are
+ *     0: use the number of channels in the JPG-encoded image.
+ *     1: output a grayscale image.
+ *     3: output an RGB image.
+ * @returns A 3D Tensor of dtype `int32` with shape [height, width, 1/3].
+ */
+/** @doc {heading: 'Media', subheading: 'Images'} */
+function decodeJpeg(
+    contents, channels = 3) {
+  util.assert(
+      getImageType(contents) === 'jpeg',
+      () => 'The passed contents are not a valid JPEG image');
+  util.assert(
+      channels === 3, () => 'Only 3 channels is supported at this time');
+  const TO_UINT8ARRAY = true;
+  const {width, height, data} = jpeg.decode(contents, TO_UINT8ARRAY);
+  // Drop the alpha channel info because jpeg.decode always returns a typedArray
+  // with 255
+  const buffer = new Uint8Array(width * height * 3);
+  let offset = 0;  // offset into original data
+  for (let i = 0; i < buffer.length; i += 3) {
+    buffer[i] = data[offset];
+    buffer[i + 1] = data[offset + 1];
+    buffer[i + 2] = data[offset + 2];
+
+    offset += 4;
+  }
+
+  return tensor3d(buffer, [height, width, channels]);
+}
+
+/**
+ * Helper function to get image type based on starting bytes of the image file.
+ */
+function getImageType(content) {
+  // Classify the contents of a file based on starting bytes (aka magic number:
+  // tslint:disable-next-line:max-line-length
+  // https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files)
+  // This aligns with TensorFlow Core code:
+  // tslint:disable-next-line:max-line-length
+  // https://github.com/tensorflow/tensorflow/blob/4213d5c1bd921f8d5b7b2dc4bbf1eea78d0b5258/tensorflow/core/kernels/decode_image_op.cc#L44
+  if (content.length > 3 && content[0] === 255 && content[1] === 216 &&
+      content[2] === 255) {
+    // JPEG byte chunk starts with `ff d8 ff`
+    return 'jpeg';
+  } else if (
+      content.length > 4 && content[0] === 71 && content[1] === 73 &&
+      content[2] === 70 && content[3] === 56) {
+    // GIF byte chunk starts with `47 49 46 38`
+    return 'gif';
+  } else if (
+      content.length > 8 && content[0] === 137 && content[1] === 80 &&
+      content[2] === 78 && content[3] === 71 && content[4] === 13 &&
+      content[5] === 10 && content[6] === 26 && content[7] === 10) {
+    // PNG byte chunk starts with `\211 P N G \r \n \032 \n (89 50 4E 47 0D 0A
+    // 1A 0A)`
+    return 'png';
+  } else if (content.length > 3 && content[0] === 66 && content[1] === 77) {
+    // BMP byte chunk starts with `42 4d`
+    return 'bmp';
+  } else {
+    throw new Error(
+        'Expected image (JPEG, PNG, or GIF), but got unsupported image type');
+  }
+}
+
+module.exports = {
+  decodeJpeg
+}
--- a/main.py
+++ b/main.py
@ -0,0 +1,42 @@
+import zmq
+import cv2
+import numpy as np
+
+ctx = zmq.Context() 
+sock = ctx.socket(zmq.REQ)
+sock.connect('ipc:///tmp/bodypix')
+cap = cv2.VideoCapture(0)
+while True:
+    _, frame = cap.read()
+    _, image = cv2.imencode('.jpg', frame)
+
+    print("LENGTH", len(image.tostring()), flush=True)
+    sock.send(image.tostring())
+    convereted_img = sock.recv()
+
+    mask = np.frombuffer(convereted_img, dtype=np.uint8)
+    mask = mask.reshape((frame.shape[0], frame.shape[1], 4))
+    mask = mask[:, :, 0]
+
+    # post-process mask and frame
+    mask = cv2.UMat(mask)
+    mask = cv2.dilate(mask, np.ones((10, 10), np.uint8), iterations=1)
+    mask = cv2.blur(cv2.UMat(mask.get().astype(np.float32)), (30, 30))
+
+    frame = cv2.UMat(frame)
+    background = cv2.GaussianBlur(frame, (221, 221), sigmaX=20, sigmaY=20)
+
+    # composite the foreground and background
+    frame = frame.get().astype(np.uint8)
+    mask = mask.get().astype(np.float32)
+    background = background.get().astype(np.uint8)
+    inv_mask = 1 - mask
+    for c in range(frame.shape[2]):
+        frame[:, :, c] = frame[:, :, c] * mask + background[:, :, c] * inv_mask
+
+    cv2.imshow('frame', frame)
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+  
+cap.release()
+cv2.destroyAllWindows()
--- a/package.json
+++ b/package.json
@ -0,0 +1,19 @@
+{
+  "name": "zbody",
+  "version": "1.0.0",
+  "description": "ZeroMQ Bodypix 2.0 Server",
+  "main": "server.js",
+  "dependencies": {
+    "@tensorflow-models/body-pix": "^2.0.5",
+    "@tensorflow/tfjs-node": "^1.7.4",
+    "canvas": "^2.6.1",
+    "jpeg-js": "^0.4.0",
+    "zeromq": "^6.0.0-beta.6"
+  },
+  "devDependencies": {},
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "author": "",
+  "license": "ISC"
+}
--- a/server.js
+++ b/server.js
@ -0,0 +1,32 @@
+const zmq = require('zeromq');
+const tf = require('@tensorflow/tfjs-node');
+const bodyPix = require('@tensorflow-models/body-pix');
+const {decodeJpeg} = require('./decode_image');
+
+let net = null;
+
+async function load() {
+	net = await bodyPix.load({
+		architecture: 'MobileNetV1',
+		outputStride: 16,
+		multiplier: 0.5,
+		quantBytes: 2
+	});
+}
+
+async function run() {
+	const sock = new zmq.Reply;
+
+	await sock.bind('ipc:///tmp/bodypix');
+	console.log("Bounded to ipc:///tmp/bodypix");
+
+	for await (const [msg] of sock) {
+		console.log("Received RAW Message");
+		const image = decodeJpeg(msg)
+		const segmentation = await net.segmentPerson(image);
+		await sock.send(segmentation.data);
+	}
+}
+
+load();
+run();
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,12 @@
+from setuptools import setup, find_packages
+
+setup(name="bodypix-background",
+      version="0.1",
+      author="Brandon Rozek",
+      packages=find_packages(),
+      install_requires=[
+          "opencv-python~=4.2.0.34",
+          "numpy~=1.18.3",
+          "pyzmq~=19.0.0"
+      ]
+)