non vorrei lavorare

昔はおもにプログラミングやガジェット系、今は?

俺が雑にAtom ShellをWebCLに対応させた件

はじめに

この記事は、@kjunichiの2014年パーソナルアドベントカレンダーの12日目の記事です。

 

2014/12/18追記

Node.jsのv0.11.14以上で以下を実行

git clone https://github.com/kjunichi/node-webcl.git
cd node-webcl
npm install
git checkout 4db992272393fa267f4811cfcea84760b792ad6f

を実行した後に各モジュールにてAtom Shell向けのリビルドを実行してお使いください。

git checkoutで怪しげなID指定なのはオリジナルのnode-webclのバージョン0.9.0がイマイチ不安定だったからです。。。

必要なもの

Atom Shell向けに各モジュールをビルド

以下はNode.jsの0.11系で作業

node-gypがない場合

npm install -g node-gyp

node-webclの依存モジュール含め一旦、以下のコマンドで インストールする

cd node-webcl
npm install

その後、各ネイティブのディレクトリで以下を実行

HOME=~/.atom-shell-gyp node-gyp rebuild --target=0.20.0 --arch=x64 --dist-url=https://gh-contractor-zcbenz.s3.amazonaws.com/atom-shell/dist

--target=0.20.0は現時点での最新のAtom Shellが0.20.0なので、新しいのものがリリースされたら、 そのバージョンに変更する必要がある。

Atom Shellのサンプルコード

  • node_modules直下のnode-webclの階層にnode-imageのリンクを張っておくこと
  • node-webclのlibフォルダを直下にもコピーもしくはリンクしておくこと
cp -r node-webcl/lib .

各ファイルの配置

├── BoxFilter.cl
├── index.html
├── lenaRGB.jpg
├── lib
├── main.js
├── node_modules
└── package.json

モジュールの配置

node_modules/
├── node-image -> node-webcl/node_modules/node-image
└── node-webcl
    ├── node_modules
    │   ├── node-image
    │   └── node-webgl
    │       ├── node_modules
    │           └── node-glfw

package.json

{
  "name"    : "your-app",
  "version" : "0.1.0",
  "main"    : "main.js"
}

main.js

var app = require('app'); // Module to control application life.
var BrowserWindow = require('browser-window'); // Module to create native browser window.
var dialog = require('dialog');
var ipc = require('ipc');
var path = require('path');

  // Report crashes to our server.
require('crash-reporter').start();
// Keep a global reference of the window object, if you don't, the window will
// be closed automatically when the javascript object is GCed.
var mainWindow = null;
// Quit when all windows are closed.
app.on('window-all-closed', function() {
  if (process.platform != 'darwin')
    app.quit();
});
// This method will be called when atom-shell has done everything
// initialization and ready for creating browser windows.
app.on('ready', function() {
  //app.commandLine.appendSwitch("js-flags","--harmony_collections");
  app.commandLine.appendSwitch("js-flags", "--harmony");
  // Create the browser window.
  mainWindow = new BrowserWindow({
    width: 800,
    height: 600,
    //fullscreen: true
  });
  // and load the index.html of the app.
  var targetPath = path.resolve(__dirname, 'index.html');
  mainWindow.loadUrl('file://'+targetPath);

  // Emitted when the window is closed.
  mainWindow.on('closed', function() {
    // Dereference the window object, usually you would store windows
    // in an array if your app supports multi windows, this is the time
    // when you should delete the corresponding element.
    mainWindow = null;
  });
  //mainWindow.setFullScreen(true);
});

index.html

<!DOCTYPE html>
<meta charset="UTF-8">
<html>
<title>Hello,Cocoa</title>
<h2>Hello NodObjC</h2>
We are using node.js
<script>
  document.write(process.version)
</script>.
<div id="hello"></div>
<canvas id="result"></canvas>
<script>
// Copyright (c) 2011-2012, Motorola Mobility, Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of the Motorola Mobility, Inc. nor the names of its
//    contributors may be used to endorse or promote products derived from this
//    software without specific prior written permission.
//
//  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


  WebCL = require('node-webcl');
  console.log(WebCL);
  clu = require('./lib/clUtils');
  util = require('util');
  fs = require('fs');
  Image = require('node-image').Image;
  log = console.log;


//First check if the webcl extension is installed at all
if (WebCL == undefined) {
  alert("Unfortunately your system does not support WebCL. " +
  "Make sure that you have the WebCL extension installed.");
  //return;
}

process.on('exit',function() {
  log('Exiting app');
  log(util.inspect(process.memoryUsage()));
})


// Box processing params
var uiNumOutputPix = 64;                    // Default output pix per workgroup... may be modified depending HW/OpenCl caps
var iRadius = 10;                           // initial radius of 2D box filter mask
var fScale = 1/(2 * iRadius + 1);  // precalculated GV rescaling value

// OpenCL variables
var ckBoxRowsTex;             // OpenCL Kernel for row sum (using 2d Image/texture)
var ckBoxColumns;             // OpenCL for column sum and normalize
var cmDevBufIn;               // OpenCL device memory object (buffer or 2d Image) for input data
var cmDevBufTemp;             // OpenCL device memory temp buffer object
var cmDevBufOut;              // OpenCL device memory output buffer object
var szBuffBytes;              // Size of main image buffers
var szGlobalWorkSize=[0,0];      // global # of work items
var szLocalWorkSize= [0,0];       // work group # of work items
var szMaxWorkgroupSize = 512; // initial max # of work items

// load image
var file = __dirname+'/lenaRGB.jpg';
console.log('Loading image '+file);
var img=Image.load(file);
var image=img.convertTo32Bits();
var szBuffBytes = image.height*image.pitch;
//img.unload();
console.log('Image '+file+': \n'+util.inspect(image));

//Pick platform
var platformList=WebCL.getPlatforms();
var platform=platformList[0];

//Query the set of GPU devices on this platform
var devices = platform.getDevices(WebCL.DEVICE_TYPE_ALL);
console.log("  # of Devices Available = "+devices.length);
var uiTargetDevice = clu.clamp(uiTargetDevice, 0, (devices.length - 1));
var device=devices[uiTargetDevice];
console.log("  Using Device "+ uiTargetDevice+": "+device.getInfo(WebCL.DEVICE_NAME));

var hasImageSupport=device.getInfo(WebCL.DEVICE_IMAGE_SUPPORT);
if(hasImageSupport != WebCL.TRUE) {
  log("No image support");
  //return;
}

var numComputeUnits=device.getInfo(WebCL.DEVICE_MAX_COMPUTE_UNITS);
console.log('  # of Compute Units = '+numComputeUnits);

console.log('  createContext...');
context=WebCL.createContext({
  devices: device,
  platform: platform
});

// Create a command-queue
queue=context.createCommandQueue(device, 0);

// Allocate OpenCL object for the source data
var InputFormat= {
  order : WebCL.RGBA,
  data_type : WebCL.UNSIGNED_INT8,
  size : [ image.width, image.height ],
  rowPitch : image.pitch
};

//2D Image (Texture) on device
cmDevBufIn = context.createImage(WebCL.MEM_READ_ONLY | WebCL.MEM_USE_HOST_PTR, InputFormat, image.buffer);

RowSampler = context.createSampler(false, WebCL.ADDRESS_CLAMP, WebCL.FILTER_NEAREST);

// Allocate the OpenCL intermediate and result buffer memory objects on the device GMEM
cmDevBufTemp = context.createBuffer(WebCL.MEM_READ_WRITE, szBuffBytes);
cmDevBufOut = context.createBuffer(WebCL.MEM_WRITE_ONLY, szBuffBytes);

//Create the program
sourceCL = fs.readFileSync(__dirname+'/BoxFilter.cl','ascii');
cpProgram = context.createProgram(sourceCL);

sBuildOpts = "-cl-fast-relaxed-math";
ciErrNum = cpProgram.build(device, sBuildOpts);

// Create kernels
ckBoxRowsTex = cpProgram.createKernel("BoxRowsTex");
ckBoxColumns = cpProgram.createKernel("BoxColumns");

// set the kernel args
ResetKernelArgs(image.width, image.height, iRadius, fScale);

// Warmup call to assure OpenCL driver is awake
BoxFilterGPU (image, cmDevBufOut, iRadius, fScale);
queue.finish();

// launch processing on the GPU
BoxFilterGPU (image, cmDevBufOut, iRadius, fScale);
queue.finish();

// Copy results back to host memory, block until complete
var uiOutput=new Uint8Array(szBuffBytes);
queue.enqueueReadBuffer(cmDevBufOut, WebCL.TRUE, 0, szBuffBytes, uiOutput);

// PNG uses 32-bit images, JPG can only work on 24-bit images
if(!Image.save('out_'+iRadius+'.png',uiOutput, image.width,image.height, image.pitch, image.bpp, 0xFF0000, 0x00FF00, 0xFF))
log("Error saving image");

// cleanup
console.log(util.inspect(process.memoryUsage()));
var outCs = document.getElementById("result");
var ctx = outCs.getContext("2d");
var img = ctx.createImageData(image.width,image.height);
ctx.putImageData(img,image.width,image.height);
function ResetKernelArgs(width, height, r, fScale)
{
  // (Image/texture version)
  ckBoxRowsTex.setArg(0, cmDevBufIn);
  ckBoxRowsTex.setArg(1, cmDevBufTemp);
  ckBoxRowsTex.setArg(2, RowSampler);
  ckBoxRowsTex.setArg(3, width, WebCL.type.UINT);
  ckBoxRowsTex.setArg(4, height, WebCL.type.UINT);
  ckBoxRowsTex.setArg(5, r, WebCL.type.INT);
  ckBoxRowsTex.setArg(6, fScale, WebCL.type.FLOAT);

  // Set the Argument values for the column kernel
  ckBoxColumns.setArg(0, cmDevBufTemp);
  ckBoxColumns.setArg(1, cmDevBufOut);
  ckBoxColumns.setArg(2, width, WebCL.type.UINT);
  ckBoxColumns.setArg(3, height, WebCL.type.UINT);
  ckBoxColumns.setArg(4, r, WebCL.type.INT);
  ckBoxColumns.setArg(5, fScale, WebCL.type.FLOAT);
}

//OpenCL computation function for GPU:
//Copies input data to the device, runs kernel, copies output data back to host
//*****************************************************************************
function BoxFilterGPU(image, cmOutputBuffer, r, fScale)
{
  // Setup Kernel Args
  ckBoxColumns.setArg(1, cmOutputBuffer);

  // Copy input data from host to device
  var szTexOrigin = [0, 0, 0];                // Offset of input texture origin relative to host image
  var szTexRegion = [image.width, image.height, 1];   // Size of texture region to operate on
  console.log('enqueue image: origin='+szTexOrigin+", region="+szTexRegion);
  queue.enqueueWriteImage(cmDevBufIn, WebCL.TRUE, szTexOrigin, szTexRegion, 0, 0, image.buffer);

  // Set global and local work sizes for row kernel
  szLocalWorkSize[0] = uiNumOutputPix;
  szLocalWorkSize[1] = 1;
  szGlobalWorkSize[0]= szLocalWorkSize[0] * clu.DivUp(image.height, szLocalWorkSize[0]);
  szGlobalWorkSize[1] = 1;
  console.log("row kernel work sizes: global="+szGlobalWorkSize+" local="+szLocalWorkSize);

  //Sync host
  queue.finish();

  //Launch row kernel
  queue.enqueueNDRangeKernel(ckBoxRowsTex, null, szGlobalWorkSize, szLocalWorkSize);

  //Set global and local work sizes for column kernel
  szLocalWorkSize[0] = 64;
  szLocalWorkSize[1] = 1;
  szGlobalWorkSize[0] = szLocalWorkSize[0] * clu.DivUp(image.width, szLocalWorkSize[0]);
  szGlobalWorkSize[1] = 1;
  console.log("column kernel work sizes: global="+szGlobalWorkSize+" local="+szLocalWorkSize);

  //Launch column kernel
  queue.enqueueNDRangeKernel(ckBoxColumns, null, szGlobalWorkSize, szLocalWorkSize);

  //sync host
  queue.finish();
}

</script>

</html>

関連記事

10年前の記事

3年前の記事

1年前の記事

2年後の記事

3年後の記事