function [C, comptime] = parallel_mthread
 
%===============================================================================
% MATLAB EXAMPLE: PARALLEL HELLO WORLD USING MULTITHREADING
%                 -> matrix-matrix multiplication C = A*B
%
% OUTPUT
%   C ................ result
%   comptime ......... computation time (matrix product only)
%===============================================================================
 
%===============================================================================
% Get input from environment variables
%===============================================================================
size_A = str2num(getenv('DIM_A'));
size_B = str2num(getenv('DIM_B'));
if isempty(size_A) || isempty(size_B)
    error('Missing input arguments!');
end
if size_A(2)~=size_B(1)
    error(sprintf('Dimension mismatch of A (%d columns) and B (%d rows)!',...
                  size_A(2), size_B(1)));
end
 
%===============================================================================
% Manage multithreading
%===============================================================================
% Get number of threads depending on job type (batch job or interactive job).
% In batch jobs 1 MATLAB task will use "nw" threads.
%
% obtain number of threads from Slurm environment variables
cluster = getenv('SLURM_CLUSTER_NAME');
if strcmp(cluster, 'inter')
    % interactive job
    nw = str2num(getenv('SLURM_JOB_CPUS_PER_NODE'));
elseif strcmp(cluster, 'cm4') || ...
       strcmp(cluster, 'serial')
    % batch job
    nw = str2num(getenv('SLURM_CPUS_PER_TASK'));
else
    % default
    nw = 1;
end
% set threads
maxNumCompThreads(nw);
 
%===============================================================================
% Work
%===============================================================================
fprintf('Hello from MATLAB process PID=%d running on node %s!\n',...
        feature('getpid'),...
        strtrim(evalc('system(''hostname'');')));
 
% generate well-defined matrices
NA = prod(size_A);
NB = prod(size_B);
A = reshape( linspace( 1,NA, NA), size_A );
B = reshape( linspace(NB, 1, NB), size_B );
 
% compute
tic;
C = A*B;
comptime = toc;
fprintf('parallel computation (multithreading) of matrix-matrix product:\n');
fprintf('\tnumber of threads = %d\n', nw);
fprintf('\ttime = %.2f s\n', comptime);