Parallele Computing - 2 vs. 4 processor speed












3












$begingroup$


I am evaluating a code which ends with Table having ParallelEvaluate of a function XXXX[phi, theta, si]. For a grid of 225 points, a normal 2 processor laptop is taking 7 h as compared to 8.30 h by a high end Xeon 4 processor computer. CPU and memory usage for laptop and computer are about 66% vs 99% and 700MB vs 900 MB respectively. Will be thankful for any suggestion on how to improve the evaluation speed on computer. Thanks



Ea = 500000;
R[0] = {f[0, 1] -> 1/6, f[0, 2] -> 1/6, f[0, 3] -> 1/6,
f[0, 4] -> 1/6 , f[0, 5] -> 1/6, f[0, 6] -> 1/6};
DF[m_] := Table[f[m, n], {n, 1, 6}];
W90 = Sqrt[2]*P*Ec;
W180 = 2*P*Ec;
P = 0.26;
Ec = 100000;
epsilonApplied[phi_, theta_, si_,
m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {Ea*Sin[theta]*Sin[phi]}, {Ea*
Sin[theta]*Cos[phi]}, {Ea*Cos[theta]}};
SigmaApplied[phi_, theta_, si_,
m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}};
EigenStrain[phi_, theta_, si_,
m_] = {{-0.00333333 (f[m, 1] + f[m, 2]) +
0.00666667 (f[m, 3] + f[m, 4]) -
0.00333333 (f[m, 5] + f[m, 6])}, {-0.00333333 (f[m, 1] +
f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) +
0.00666667 (f[m, 5] + f[m, 6])}, {0.00666667 (f[m, 1] +
f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) -
0.00333333 (f[m, 5] + f[m, 6])}, {0.}, {0.}, {0.}, {0}, {0}, {0}};
ES1 = {{0.768576, 0.00232016, 0.120616, 0, 0, 0, 0,
0, -2.62804*10^-11}, {0.00232016, 0.768576, 0.120616, 0, 0, 0, 0,
0, -2.62804*10^-11}, {0.02064, 0.02064, 0.503286, 0, 0, 0, 0, 0,
0}, {0, 0, 0, -1.14762, 0, 0, 0, -3.52489*10^-10, 0}, {0, 0, 0,
0, -1.14762, 0, -3.52489*10^-10, 0, 0}, {0, 0, 0, 0, 0, 0.191583,
0, 0, 0}, {0, 0, 0, 0, -6.08402*10^7, 0, 0.315028, 0, 0}, {0, 0,
0, -6.08402*10^7, 0, 0, 0, 0.315028, 0}, {2.44999*10^8,
2.44999*10^8, 1.2168*10^8, 0, 0, 0, 0, 0, 0.369943}};
ES = Rationalize[ES1, 10^-16];

E99 = {{1, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0, 0}, {0,
0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 1/2, 0, 0, 0, 0, 0}, {0, 0, 0,
0, 1/2, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 1/2, 0, 0, 0}, {0, 0, 0, 0,
0, 0, 1, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0,
0, 1}};
Lmatrix = {{166000000000, 77000000000, 78000000000, 0, 0, 0, 0,
0, -4.4}, {77000000000, 166000000000, 78000000000, 0, 0, 0, 0,
0, -4.4}, {78000000000, 78000000000, 162000000000, 0, 0, 0, 0, 0,
18.6}, {0, 0, 0, 43000000000, 0, 0, 0, 11.6, 0}, {0, 0, 0, 0,
43000000000, 0, 11.6, 0, 0}, {0, 0, 0, 0, 0, 44500000000, 0, 0,
0}, {0, 0, 0, 0, 11.6, 0, -1000*8.85*10^-12, 0, 0}, {0, 0, 0,
11.6, 0, 0, 0, -1000*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0,
0, 0, -910*8.85*10^-12}};
a[1, 1, phi_, theta_, si_] =
Cos[phi]*Cos[si] - Cos[theta]*Sin[phi]*Sin[si];
a[1, 2, phi_, theta_, si_] =
Cos[si]*Sin[phi] + Cos[theta]*Cos[phi]*Sin[si];
a[1, 3, phi_, theta_, si_] = Sin[theta]*Sin[si];
a[2, 1, phi_, theta_, si_] = -Cos[theta]*Cos[si]*Sin[phi] -
Cos[phi]*Sin[si];
a[2, 2, phi_, theta_, si_] = -Sin[phi]*Sin[si] +
Cos[theta]*Cos[phi]*Cos[si];
a[2, 3, phi_, theta_, si_] = Cos[si]*Sin[theta];
a[3, 1, phi_, theta_, si_] = Sin[theta]*Sin[phi];
a[3, 2, phi_, theta_, si_] = -Cos[phi]*Sin[theta];
a[3, 3, phi_, theta_, si_] = Cos[theta];
CC[1, 1] = CC[2, 2] = 222*10^9;
CC[1, 2] = 108*10^9;
CC[1, 3] = CC[2, 3] = 111*10^9;
CC[3, 3] = 151*10^9;
CC[4, 4] = CC[5, 5] = 61*10^9;
CC[6, 6] = 134*10^9;
CC[1, 4] =
CC[1, 5] =
CC[1, 6] =
CC[2, 4] =
CC[2, 5] =
CC[2, 6] =
CC[3, 4] =
CC[3, 5] = CC[3, 6] = CC[4, 5] = CC[4, 6] = CC[5, 6] = 0;
A = {{1, 1}, {2, 2}, {3, 3}, {2, 3}, {1, 3}, {1, 2}};

B[i_, j_, k_, l_, phi_, theta_, si_] :=
Module[{aA = {A[[i]], A[[j]], A[[k]], A[[l]]} }, {a[Part[aA, 1, 1],
Part[aA, 3, 1], phi, theta, si],
a[Part[aA, 1, 2], Part[aA, 3, 2], phi, theta, si],
a[Part[aA, 2, 1], Part[aA, 4, 1], phi, theta, si],
a[Part[aA, 2, 2], Part[aA, 4, 2], phi, theta, si]}];
F[i_, j_, k_, l_, phi_, theta_, si_] :=
Module[{bB = B[i, j, k, l, phi, theta, si]},
Part[bB, 1]*Part[bB, 2]*Part[bB, 3]*Part[bB, 4]];
cfmat[phi_, theta_, si_, m_] :=
Table[Sum[
F[i, j, k, l, phi, theta, si]*If[k > l, CC[l, k], CC[k, l]], {k,
1, 6}, {l, 1, 6}], {i, 1, 6}, {j, 1, 6}]

Lgrain[phi_, theta_, si_, m_] =
Module[{ rule1 =
Flatten[Table[
Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
6}, {j, 1, 6}]]}, {{Subscript[cf, 1, 1], Subscript[cf, 1, 2],
Subscript[cf, 1, 3], 0, 0, 0, 0, 0, -4.4}, {Subscript[cf, 2,
1], Subscript[cf, 2, 2], Subscript[cf, 2, 3], 0, 0, 0, 0,
0, -4.4}, {Subscript[cf, 3, 1], Subscript[cf, 3, 2], Subscript[
cf, 3, 3], 0, 0, 0, 0, 0, 18.6}, {0, 0, 0, Subscript[cf, 4, 4],
0, 0, 0, 11.6, 0}, {0, 0, 0, 0, Subscript[cf, 5, 5], 0, 11.6, 0,
0}, {0, 0, 0, 0, 0, Subscript[cf, 6, 6], 0, 0, 0}, {0, 0, 0, 0,
11.6, 0, -2200*8.85*10^-12, 0, 0}, {0, 0, 0, 11.6, 0, 0,
0, -2200*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0, 0,
0, -56*8.85*10^-12}} /. rule1];

H = Lmatrix.(Inverse[ES] - E99);

TrueStrain[phi_, theta_, si_, m_] =
epsilonApplied[phi, theta, si,
m] + (Inverse[
H + Lgrain[phi, theta, si, m]].(Lgrain[phi, theta, si,
m].EigenStrain[phi, theta, si, m]));

TrueStress[phi_, theta_, si_, m_] =
SigmaApplied[phi, theta, si, m] -
H.(TrueStrain[phi, theta, si, m] -
epsilonApplied[phi, theta, si, m]);
Et[phi_, theta_, si_, m_] =
If[m == 1, Ea,
Sqrt[Part[TrueStrain[phi, theta, si, m - 1], 7]*
Part[TrueStrain[phi, theta, si, m - 1], 7] +
Part[TrueStrain[phi, theta, si, m - 1], 8]*
Part[TrueStrain[phi, theta, si, m - 1], 8] +
Part[TrueStrain[phi, theta, si, m - 1], 9]*
Part[TrueStrain[phi, theta, si, m - 1], 9]]];

PE[phi_, theta_, si_, m_, 1] = -P*Cos[theta]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 2] = P*Cos[theta]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 3] = -P*Sin[theta]*Cos[phi]*
Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 4] =
P*Sin[theta]*Cos[phi]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 5] = -P*Sin[theta]*Sin[phi]*
Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 6] =
P*Sin[theta]*Sin[phi]*Et[phi, theta, si, m];

SE[phi_, theta_, si_,
m_] = -0.5*(Transpose[TrueStress[phi, theta, si, m]].TrueStrain[
phi, theta, si, m]);

SEM[phi_, theta_, si_, m_] = Tr[SE[phi, theta, si, m]];
DC2 := DeleteCases[DeleteCases[{1, 2, 3, 4, 5, 6}, i], j];

DSC[phi_, theta_, si_, m_] := Module[{diff, tabs, tab36, rules},
rules =
Flatten[Table[
Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
6}, {j, 1, 6}]];
diff =
2*(SEM[phi, theta, si, m] /. rules) -
2*(SEM[phi, theta, si, m - 1] /. rules);
tabs =
Flatten[MapIndexed[Flatten@*List,
Table[2*{PE[phi, theta, si, m, j] -
PE[phi, theta, si, m, i]}, {i, 1, 6, 1}, {j, 1, 6,
1}], {2}], 1];
tab36 =
ArrayReshape[
Table[{If[i == j, 0,
diff /. {f[m, i] -> 0,
f[m, j] -> f[m - 1, j] + f[m - 1, i],
f[m, DC2[[1]]] -> f[m - 1, DC2[[1]]],
f[m, DC2[[4]]] -> f[m - 1, DC2[[4]]],
f[m, DC2[[2]]] -> f[m - 1, DC2[[2]]],
f[m, DC2[[3]]] -> f[m - 1, DC2[[3]]]}] +If[i == j, 0,
If[i == 1 && j == 2 || i == 2 && j == 1 || i == 3 && j == 4 ||
i == 4 && j == 3 || i == 5 && j == 6 || i == 6 && j == 5,
2 W180, 2 W90]], i, j}, {i, 1, 6}, {j, 1, 6}], {36, 3}];
(tabs + tab36)/2
];

XXXX[phi_, theta_, si_] :=
Last[Last[
Last[Reap[X = 1;
Do[Do[dsc = DSC[phi, theta, si, m];
If[(SortBy[dsc /. R[m - 1], First][[k]][[1]]) < 0,
DC[phi, theta, si, m] =
DeleteCases[
DeleteCases[{1, 2, 3, 4, 5, 6},
SortBy[dsc /. R[m - 1], First][[k]][[3]]],
SortBy[dsc /. R[m - 1], First][[k]][[2]]];
R[m] = {f[m, SortBy[dsc /. R[m - 1], First][[k]][[3]]] ->
f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[3]]] +
f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[2]]],
f[m, SortBy[dsc /. R[m - 1], First][[k]][[2]]] -> 0,
f[m, DC[phi, theta, si, m][[1]]] ->
f[m - 1, DC[phi, theta, si, m][[1]]],
f[m, DC[phi, theta, si, m][[2]]] ->
f[m - 1, DC[phi, theta, si, m][[2]]],
f[m, DC[phi, theta, si, m][[3]]] ->
f[m - 1, DC[phi, theta, si, m][[3]]],
f[m, DC[phi, theta, si, m][[4]]] ->
f[m - 1, DC[phi, theta, si, m][[4]]]} /. R[m - 1];
X = k + 1; Sow[Sort[R[m]]];
If[Sort[R[m]][[All, 2]] != Sort[R[m - 1]][[All, 2]],
Break], R[m] = R[m - 1]; Break], {k, X, 15}], {m, 1,
4}]]]]][[All, 2]]
XX1 = Table[{ParallelEvaluate[XXXX[phi, theta, si]], phi, theta,
si}, {phi, 0, Pi/4, Pi/56}, {theta, 0, ArcCot[Cos[phi]],
ArcCot[Cos[phi]]/14}, {si, 0 Pi, 0 Pi, 0}]









share|improve this question











$endgroup$

















    3












    $begingroup$


    I am evaluating a code which ends with Table having ParallelEvaluate of a function XXXX[phi, theta, si]. For a grid of 225 points, a normal 2 processor laptop is taking 7 h as compared to 8.30 h by a high end Xeon 4 processor computer. CPU and memory usage for laptop and computer are about 66% vs 99% and 700MB vs 900 MB respectively. Will be thankful for any suggestion on how to improve the evaluation speed on computer. Thanks



    Ea = 500000;
    R[0] = {f[0, 1] -> 1/6, f[0, 2] -> 1/6, f[0, 3] -> 1/6,
    f[0, 4] -> 1/6 , f[0, 5] -> 1/6, f[0, 6] -> 1/6};
    DF[m_] := Table[f[m, n], {n, 1, 6}];
    W90 = Sqrt[2]*P*Ec;
    W180 = 2*P*Ec;
    P = 0.26;
    Ec = 100000;
    epsilonApplied[phi_, theta_, si_,
    m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {Ea*Sin[theta]*Sin[phi]}, {Ea*
    Sin[theta]*Cos[phi]}, {Ea*Cos[theta]}};
    SigmaApplied[phi_, theta_, si_,
    m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}};
    EigenStrain[phi_, theta_, si_,
    m_] = {{-0.00333333 (f[m, 1] + f[m, 2]) +
    0.00666667 (f[m, 3] + f[m, 4]) -
    0.00333333 (f[m, 5] + f[m, 6])}, {-0.00333333 (f[m, 1] +
    f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) +
    0.00666667 (f[m, 5] + f[m, 6])}, {0.00666667 (f[m, 1] +
    f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) -
    0.00333333 (f[m, 5] + f[m, 6])}, {0.}, {0.}, {0.}, {0}, {0}, {0}};
    ES1 = {{0.768576, 0.00232016, 0.120616, 0, 0, 0, 0,
    0, -2.62804*10^-11}, {0.00232016, 0.768576, 0.120616, 0, 0, 0, 0,
    0, -2.62804*10^-11}, {0.02064, 0.02064, 0.503286, 0, 0, 0, 0, 0,
    0}, {0, 0, 0, -1.14762, 0, 0, 0, -3.52489*10^-10, 0}, {0, 0, 0,
    0, -1.14762, 0, -3.52489*10^-10, 0, 0}, {0, 0, 0, 0, 0, 0.191583,
    0, 0, 0}, {0, 0, 0, 0, -6.08402*10^7, 0, 0.315028, 0, 0}, {0, 0,
    0, -6.08402*10^7, 0, 0, 0, 0.315028, 0}, {2.44999*10^8,
    2.44999*10^8, 1.2168*10^8, 0, 0, 0, 0, 0, 0.369943}};
    ES = Rationalize[ES1, 10^-16];

    E99 = {{1, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0, 0}, {0,
    0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 1/2, 0, 0, 0, 0, 0}, {0, 0, 0,
    0, 1/2, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 1/2, 0, 0, 0}, {0, 0, 0, 0,
    0, 0, 1, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0,
    0, 1}};
    Lmatrix = {{166000000000, 77000000000, 78000000000, 0, 0, 0, 0,
    0, -4.4}, {77000000000, 166000000000, 78000000000, 0, 0, 0, 0,
    0, -4.4}, {78000000000, 78000000000, 162000000000, 0, 0, 0, 0, 0,
    18.6}, {0, 0, 0, 43000000000, 0, 0, 0, 11.6, 0}, {0, 0, 0, 0,
    43000000000, 0, 11.6, 0, 0}, {0, 0, 0, 0, 0, 44500000000, 0, 0,
    0}, {0, 0, 0, 0, 11.6, 0, -1000*8.85*10^-12, 0, 0}, {0, 0, 0,
    11.6, 0, 0, 0, -1000*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0,
    0, 0, -910*8.85*10^-12}};
    a[1, 1, phi_, theta_, si_] =
    Cos[phi]*Cos[si] - Cos[theta]*Sin[phi]*Sin[si];
    a[1, 2, phi_, theta_, si_] =
    Cos[si]*Sin[phi] + Cos[theta]*Cos[phi]*Sin[si];
    a[1, 3, phi_, theta_, si_] = Sin[theta]*Sin[si];
    a[2, 1, phi_, theta_, si_] = -Cos[theta]*Cos[si]*Sin[phi] -
    Cos[phi]*Sin[si];
    a[2, 2, phi_, theta_, si_] = -Sin[phi]*Sin[si] +
    Cos[theta]*Cos[phi]*Cos[si];
    a[2, 3, phi_, theta_, si_] = Cos[si]*Sin[theta];
    a[3, 1, phi_, theta_, si_] = Sin[theta]*Sin[phi];
    a[3, 2, phi_, theta_, si_] = -Cos[phi]*Sin[theta];
    a[3, 3, phi_, theta_, si_] = Cos[theta];
    CC[1, 1] = CC[2, 2] = 222*10^9;
    CC[1, 2] = 108*10^9;
    CC[1, 3] = CC[2, 3] = 111*10^9;
    CC[3, 3] = 151*10^9;
    CC[4, 4] = CC[5, 5] = 61*10^9;
    CC[6, 6] = 134*10^9;
    CC[1, 4] =
    CC[1, 5] =
    CC[1, 6] =
    CC[2, 4] =
    CC[2, 5] =
    CC[2, 6] =
    CC[3, 4] =
    CC[3, 5] = CC[3, 6] = CC[4, 5] = CC[4, 6] = CC[5, 6] = 0;
    A = {{1, 1}, {2, 2}, {3, 3}, {2, 3}, {1, 3}, {1, 2}};

    B[i_, j_, k_, l_, phi_, theta_, si_] :=
    Module[{aA = {A[[i]], A[[j]], A[[k]], A[[l]]} }, {a[Part[aA, 1, 1],
    Part[aA, 3, 1], phi, theta, si],
    a[Part[aA, 1, 2], Part[aA, 3, 2], phi, theta, si],
    a[Part[aA, 2, 1], Part[aA, 4, 1], phi, theta, si],
    a[Part[aA, 2, 2], Part[aA, 4, 2], phi, theta, si]}];
    F[i_, j_, k_, l_, phi_, theta_, si_] :=
    Module[{bB = B[i, j, k, l, phi, theta, si]},
    Part[bB, 1]*Part[bB, 2]*Part[bB, 3]*Part[bB, 4]];
    cfmat[phi_, theta_, si_, m_] :=
    Table[Sum[
    F[i, j, k, l, phi, theta, si]*If[k > l, CC[l, k], CC[k, l]], {k,
    1, 6}, {l, 1, 6}], {i, 1, 6}, {j, 1, 6}]

    Lgrain[phi_, theta_, si_, m_] =
    Module[{ rule1 =
    Flatten[Table[
    Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
    6}, {j, 1, 6}]]}, {{Subscript[cf, 1, 1], Subscript[cf, 1, 2],
    Subscript[cf, 1, 3], 0, 0, 0, 0, 0, -4.4}, {Subscript[cf, 2,
    1], Subscript[cf, 2, 2], Subscript[cf, 2, 3], 0, 0, 0, 0,
    0, -4.4}, {Subscript[cf, 3, 1], Subscript[cf, 3, 2], Subscript[
    cf, 3, 3], 0, 0, 0, 0, 0, 18.6}, {0, 0, 0, Subscript[cf, 4, 4],
    0, 0, 0, 11.6, 0}, {0, 0, 0, 0, Subscript[cf, 5, 5], 0, 11.6, 0,
    0}, {0, 0, 0, 0, 0, Subscript[cf, 6, 6], 0, 0, 0}, {0, 0, 0, 0,
    11.6, 0, -2200*8.85*10^-12, 0, 0}, {0, 0, 0, 11.6, 0, 0,
    0, -2200*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0, 0,
    0, -56*8.85*10^-12}} /. rule1];

    H = Lmatrix.(Inverse[ES] - E99);

    TrueStrain[phi_, theta_, si_, m_] =
    epsilonApplied[phi, theta, si,
    m] + (Inverse[
    H + Lgrain[phi, theta, si, m]].(Lgrain[phi, theta, si,
    m].EigenStrain[phi, theta, si, m]));

    TrueStress[phi_, theta_, si_, m_] =
    SigmaApplied[phi, theta, si, m] -
    H.(TrueStrain[phi, theta, si, m] -
    epsilonApplied[phi, theta, si, m]);
    Et[phi_, theta_, si_, m_] =
    If[m == 1, Ea,
    Sqrt[Part[TrueStrain[phi, theta, si, m - 1], 7]*
    Part[TrueStrain[phi, theta, si, m - 1], 7] +
    Part[TrueStrain[phi, theta, si, m - 1], 8]*
    Part[TrueStrain[phi, theta, si, m - 1], 8] +
    Part[TrueStrain[phi, theta, si, m - 1], 9]*
    Part[TrueStrain[phi, theta, si, m - 1], 9]]];

    PE[phi_, theta_, si_, m_, 1] = -P*Cos[theta]*Et[phi, theta, si, m];
    PE[phi_, theta_, si_, m_, 2] = P*Cos[theta]*Et[phi, theta, si, m];
    PE[phi_, theta_, si_, m_, 3] = -P*Sin[theta]*Cos[phi]*
    Et[phi, theta, si, m];
    PE[phi_, theta_, si_, m_, 4] =
    P*Sin[theta]*Cos[phi]*Et[phi, theta, si, m];
    PE[phi_, theta_, si_, m_, 5] = -P*Sin[theta]*Sin[phi]*
    Et[phi, theta, si, m];
    PE[phi_, theta_, si_, m_, 6] =
    P*Sin[theta]*Sin[phi]*Et[phi, theta, si, m];

    SE[phi_, theta_, si_,
    m_] = -0.5*(Transpose[TrueStress[phi, theta, si, m]].TrueStrain[
    phi, theta, si, m]);

    SEM[phi_, theta_, si_, m_] = Tr[SE[phi, theta, si, m]];
    DC2 := DeleteCases[DeleteCases[{1, 2, 3, 4, 5, 6}, i], j];

    DSC[phi_, theta_, si_, m_] := Module[{diff, tabs, tab36, rules},
    rules =
    Flatten[Table[
    Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
    6}, {j, 1, 6}]];
    diff =
    2*(SEM[phi, theta, si, m] /. rules) -
    2*(SEM[phi, theta, si, m - 1] /. rules);
    tabs =
    Flatten[MapIndexed[Flatten@*List,
    Table[2*{PE[phi, theta, si, m, j] -
    PE[phi, theta, si, m, i]}, {i, 1, 6, 1}, {j, 1, 6,
    1}], {2}], 1];
    tab36 =
    ArrayReshape[
    Table[{If[i == j, 0,
    diff /. {f[m, i] -> 0,
    f[m, j] -> f[m - 1, j] + f[m - 1, i],
    f[m, DC2[[1]]] -> f[m - 1, DC2[[1]]],
    f[m, DC2[[4]]] -> f[m - 1, DC2[[4]]],
    f[m, DC2[[2]]] -> f[m - 1, DC2[[2]]],
    f[m, DC2[[3]]] -> f[m - 1, DC2[[3]]]}] +If[i == j, 0,
    If[i == 1 && j == 2 || i == 2 && j == 1 || i == 3 && j == 4 ||
    i == 4 && j == 3 || i == 5 && j == 6 || i == 6 && j == 5,
    2 W180, 2 W90]], i, j}, {i, 1, 6}, {j, 1, 6}], {36, 3}];
    (tabs + tab36)/2
    ];

    XXXX[phi_, theta_, si_] :=
    Last[Last[
    Last[Reap[X = 1;
    Do[Do[dsc = DSC[phi, theta, si, m];
    If[(SortBy[dsc /. R[m - 1], First][[k]][[1]]) < 0,
    DC[phi, theta, si, m] =
    DeleteCases[
    DeleteCases[{1, 2, 3, 4, 5, 6},
    SortBy[dsc /. R[m - 1], First][[k]][[3]]],
    SortBy[dsc /. R[m - 1], First][[k]][[2]]];
    R[m] = {f[m, SortBy[dsc /. R[m - 1], First][[k]][[3]]] ->
    f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[3]]] +
    f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[2]]],
    f[m, SortBy[dsc /. R[m - 1], First][[k]][[2]]] -> 0,
    f[m, DC[phi, theta, si, m][[1]]] ->
    f[m - 1, DC[phi, theta, si, m][[1]]],
    f[m, DC[phi, theta, si, m][[2]]] ->
    f[m - 1, DC[phi, theta, si, m][[2]]],
    f[m, DC[phi, theta, si, m][[3]]] ->
    f[m - 1, DC[phi, theta, si, m][[3]]],
    f[m, DC[phi, theta, si, m][[4]]] ->
    f[m - 1, DC[phi, theta, si, m][[4]]]} /. R[m - 1];
    X = k + 1; Sow[Sort[R[m]]];
    If[Sort[R[m]][[All, 2]] != Sort[R[m - 1]][[All, 2]],
    Break], R[m] = R[m - 1]; Break], {k, X, 15}], {m, 1,
    4}]]]]][[All, 2]]
    XX1 = Table[{ParallelEvaluate[XXXX[phi, theta, si]], phi, theta,
    si}, {phi, 0, Pi/4, Pi/56}, {theta, 0, ArcCot[Cos[phi]],
    ArcCot[Cos[phi]]/14}, {si, 0 Pi, 0 Pi, 0}]









    share|improve this question











    $endgroup$















      3












      3








      3


      1



      $begingroup$


      I am evaluating a code which ends with Table having ParallelEvaluate of a function XXXX[phi, theta, si]. For a grid of 225 points, a normal 2 processor laptop is taking 7 h as compared to 8.30 h by a high end Xeon 4 processor computer. CPU and memory usage for laptop and computer are about 66% vs 99% and 700MB vs 900 MB respectively. Will be thankful for any suggestion on how to improve the evaluation speed on computer. Thanks



      Ea = 500000;
      R[0] = {f[0, 1] -> 1/6, f[0, 2] -> 1/6, f[0, 3] -> 1/6,
      f[0, 4] -> 1/6 , f[0, 5] -> 1/6, f[0, 6] -> 1/6};
      DF[m_] := Table[f[m, n], {n, 1, 6}];
      W90 = Sqrt[2]*P*Ec;
      W180 = 2*P*Ec;
      P = 0.26;
      Ec = 100000;
      epsilonApplied[phi_, theta_, si_,
      m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {Ea*Sin[theta]*Sin[phi]}, {Ea*
      Sin[theta]*Cos[phi]}, {Ea*Cos[theta]}};
      SigmaApplied[phi_, theta_, si_,
      m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}};
      EigenStrain[phi_, theta_, si_,
      m_] = {{-0.00333333 (f[m, 1] + f[m, 2]) +
      0.00666667 (f[m, 3] + f[m, 4]) -
      0.00333333 (f[m, 5] + f[m, 6])}, {-0.00333333 (f[m, 1] +
      f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) +
      0.00666667 (f[m, 5] + f[m, 6])}, {0.00666667 (f[m, 1] +
      f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) -
      0.00333333 (f[m, 5] + f[m, 6])}, {0.}, {0.}, {0.}, {0}, {0}, {0}};
      ES1 = {{0.768576, 0.00232016, 0.120616, 0, 0, 0, 0,
      0, -2.62804*10^-11}, {0.00232016, 0.768576, 0.120616, 0, 0, 0, 0,
      0, -2.62804*10^-11}, {0.02064, 0.02064, 0.503286, 0, 0, 0, 0, 0,
      0}, {0, 0, 0, -1.14762, 0, 0, 0, -3.52489*10^-10, 0}, {0, 0, 0,
      0, -1.14762, 0, -3.52489*10^-10, 0, 0}, {0, 0, 0, 0, 0, 0.191583,
      0, 0, 0}, {0, 0, 0, 0, -6.08402*10^7, 0, 0.315028, 0, 0}, {0, 0,
      0, -6.08402*10^7, 0, 0, 0, 0.315028, 0}, {2.44999*10^8,
      2.44999*10^8, 1.2168*10^8, 0, 0, 0, 0, 0, 0.369943}};
      ES = Rationalize[ES1, 10^-16];

      E99 = {{1, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0, 0}, {0,
      0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 1/2, 0, 0, 0, 0, 0}, {0, 0, 0,
      0, 1/2, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 1/2, 0, 0, 0}, {0, 0, 0, 0,
      0, 0, 1, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0,
      0, 1}};
      Lmatrix = {{166000000000, 77000000000, 78000000000, 0, 0, 0, 0,
      0, -4.4}, {77000000000, 166000000000, 78000000000, 0, 0, 0, 0,
      0, -4.4}, {78000000000, 78000000000, 162000000000, 0, 0, 0, 0, 0,
      18.6}, {0, 0, 0, 43000000000, 0, 0, 0, 11.6, 0}, {0, 0, 0, 0,
      43000000000, 0, 11.6, 0, 0}, {0, 0, 0, 0, 0, 44500000000, 0, 0,
      0}, {0, 0, 0, 0, 11.6, 0, -1000*8.85*10^-12, 0, 0}, {0, 0, 0,
      11.6, 0, 0, 0, -1000*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0,
      0, 0, -910*8.85*10^-12}};
      a[1, 1, phi_, theta_, si_] =
      Cos[phi]*Cos[si] - Cos[theta]*Sin[phi]*Sin[si];
      a[1, 2, phi_, theta_, si_] =
      Cos[si]*Sin[phi] + Cos[theta]*Cos[phi]*Sin[si];
      a[1, 3, phi_, theta_, si_] = Sin[theta]*Sin[si];
      a[2, 1, phi_, theta_, si_] = -Cos[theta]*Cos[si]*Sin[phi] -
      Cos[phi]*Sin[si];
      a[2, 2, phi_, theta_, si_] = -Sin[phi]*Sin[si] +
      Cos[theta]*Cos[phi]*Cos[si];
      a[2, 3, phi_, theta_, si_] = Cos[si]*Sin[theta];
      a[3, 1, phi_, theta_, si_] = Sin[theta]*Sin[phi];
      a[3, 2, phi_, theta_, si_] = -Cos[phi]*Sin[theta];
      a[3, 3, phi_, theta_, si_] = Cos[theta];
      CC[1, 1] = CC[2, 2] = 222*10^9;
      CC[1, 2] = 108*10^9;
      CC[1, 3] = CC[2, 3] = 111*10^9;
      CC[3, 3] = 151*10^9;
      CC[4, 4] = CC[5, 5] = 61*10^9;
      CC[6, 6] = 134*10^9;
      CC[1, 4] =
      CC[1, 5] =
      CC[1, 6] =
      CC[2, 4] =
      CC[2, 5] =
      CC[2, 6] =
      CC[3, 4] =
      CC[3, 5] = CC[3, 6] = CC[4, 5] = CC[4, 6] = CC[5, 6] = 0;
      A = {{1, 1}, {2, 2}, {3, 3}, {2, 3}, {1, 3}, {1, 2}};

      B[i_, j_, k_, l_, phi_, theta_, si_] :=
      Module[{aA = {A[[i]], A[[j]], A[[k]], A[[l]]} }, {a[Part[aA, 1, 1],
      Part[aA, 3, 1], phi, theta, si],
      a[Part[aA, 1, 2], Part[aA, 3, 2], phi, theta, si],
      a[Part[aA, 2, 1], Part[aA, 4, 1], phi, theta, si],
      a[Part[aA, 2, 2], Part[aA, 4, 2], phi, theta, si]}];
      F[i_, j_, k_, l_, phi_, theta_, si_] :=
      Module[{bB = B[i, j, k, l, phi, theta, si]},
      Part[bB, 1]*Part[bB, 2]*Part[bB, 3]*Part[bB, 4]];
      cfmat[phi_, theta_, si_, m_] :=
      Table[Sum[
      F[i, j, k, l, phi, theta, si]*If[k > l, CC[l, k], CC[k, l]], {k,
      1, 6}, {l, 1, 6}], {i, 1, 6}, {j, 1, 6}]

      Lgrain[phi_, theta_, si_, m_] =
      Module[{ rule1 =
      Flatten[Table[
      Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
      6}, {j, 1, 6}]]}, {{Subscript[cf, 1, 1], Subscript[cf, 1, 2],
      Subscript[cf, 1, 3], 0, 0, 0, 0, 0, -4.4}, {Subscript[cf, 2,
      1], Subscript[cf, 2, 2], Subscript[cf, 2, 3], 0, 0, 0, 0,
      0, -4.4}, {Subscript[cf, 3, 1], Subscript[cf, 3, 2], Subscript[
      cf, 3, 3], 0, 0, 0, 0, 0, 18.6}, {0, 0, 0, Subscript[cf, 4, 4],
      0, 0, 0, 11.6, 0}, {0, 0, 0, 0, Subscript[cf, 5, 5], 0, 11.6, 0,
      0}, {0, 0, 0, 0, 0, Subscript[cf, 6, 6], 0, 0, 0}, {0, 0, 0, 0,
      11.6, 0, -2200*8.85*10^-12, 0, 0}, {0, 0, 0, 11.6, 0, 0,
      0, -2200*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0, 0,
      0, -56*8.85*10^-12}} /. rule1];

      H = Lmatrix.(Inverse[ES] - E99);

      TrueStrain[phi_, theta_, si_, m_] =
      epsilonApplied[phi, theta, si,
      m] + (Inverse[
      H + Lgrain[phi, theta, si, m]].(Lgrain[phi, theta, si,
      m].EigenStrain[phi, theta, si, m]));

      TrueStress[phi_, theta_, si_, m_] =
      SigmaApplied[phi, theta, si, m] -
      H.(TrueStrain[phi, theta, si, m] -
      epsilonApplied[phi, theta, si, m]);
      Et[phi_, theta_, si_, m_] =
      If[m == 1, Ea,
      Sqrt[Part[TrueStrain[phi, theta, si, m - 1], 7]*
      Part[TrueStrain[phi, theta, si, m - 1], 7] +
      Part[TrueStrain[phi, theta, si, m - 1], 8]*
      Part[TrueStrain[phi, theta, si, m - 1], 8] +
      Part[TrueStrain[phi, theta, si, m - 1], 9]*
      Part[TrueStrain[phi, theta, si, m - 1], 9]]];

      PE[phi_, theta_, si_, m_, 1] = -P*Cos[theta]*Et[phi, theta, si, m];
      PE[phi_, theta_, si_, m_, 2] = P*Cos[theta]*Et[phi, theta, si, m];
      PE[phi_, theta_, si_, m_, 3] = -P*Sin[theta]*Cos[phi]*
      Et[phi, theta, si, m];
      PE[phi_, theta_, si_, m_, 4] =
      P*Sin[theta]*Cos[phi]*Et[phi, theta, si, m];
      PE[phi_, theta_, si_, m_, 5] = -P*Sin[theta]*Sin[phi]*
      Et[phi, theta, si, m];
      PE[phi_, theta_, si_, m_, 6] =
      P*Sin[theta]*Sin[phi]*Et[phi, theta, si, m];

      SE[phi_, theta_, si_,
      m_] = -0.5*(Transpose[TrueStress[phi, theta, si, m]].TrueStrain[
      phi, theta, si, m]);

      SEM[phi_, theta_, si_, m_] = Tr[SE[phi, theta, si, m]];
      DC2 := DeleteCases[DeleteCases[{1, 2, 3, 4, 5, 6}, i], j];

      DSC[phi_, theta_, si_, m_] := Module[{diff, tabs, tab36, rules},
      rules =
      Flatten[Table[
      Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
      6}, {j, 1, 6}]];
      diff =
      2*(SEM[phi, theta, si, m] /. rules) -
      2*(SEM[phi, theta, si, m - 1] /. rules);
      tabs =
      Flatten[MapIndexed[Flatten@*List,
      Table[2*{PE[phi, theta, si, m, j] -
      PE[phi, theta, si, m, i]}, {i, 1, 6, 1}, {j, 1, 6,
      1}], {2}], 1];
      tab36 =
      ArrayReshape[
      Table[{If[i == j, 0,
      diff /. {f[m, i] -> 0,
      f[m, j] -> f[m - 1, j] + f[m - 1, i],
      f[m, DC2[[1]]] -> f[m - 1, DC2[[1]]],
      f[m, DC2[[4]]] -> f[m - 1, DC2[[4]]],
      f[m, DC2[[2]]] -> f[m - 1, DC2[[2]]],
      f[m, DC2[[3]]] -> f[m - 1, DC2[[3]]]}] +If[i == j, 0,
      If[i == 1 && j == 2 || i == 2 && j == 1 || i == 3 && j == 4 ||
      i == 4 && j == 3 || i == 5 && j == 6 || i == 6 && j == 5,
      2 W180, 2 W90]], i, j}, {i, 1, 6}, {j, 1, 6}], {36, 3}];
      (tabs + tab36)/2
      ];

      XXXX[phi_, theta_, si_] :=
      Last[Last[
      Last[Reap[X = 1;
      Do[Do[dsc = DSC[phi, theta, si, m];
      If[(SortBy[dsc /. R[m - 1], First][[k]][[1]]) < 0,
      DC[phi, theta, si, m] =
      DeleteCases[
      DeleteCases[{1, 2, 3, 4, 5, 6},
      SortBy[dsc /. R[m - 1], First][[k]][[3]]],
      SortBy[dsc /. R[m - 1], First][[k]][[2]]];
      R[m] = {f[m, SortBy[dsc /. R[m - 1], First][[k]][[3]]] ->
      f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[3]]] +
      f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[2]]],
      f[m, SortBy[dsc /. R[m - 1], First][[k]][[2]]] -> 0,
      f[m, DC[phi, theta, si, m][[1]]] ->
      f[m - 1, DC[phi, theta, si, m][[1]]],
      f[m, DC[phi, theta, si, m][[2]]] ->
      f[m - 1, DC[phi, theta, si, m][[2]]],
      f[m, DC[phi, theta, si, m][[3]]] ->
      f[m - 1, DC[phi, theta, si, m][[3]]],
      f[m, DC[phi, theta, si, m][[4]]] ->
      f[m - 1, DC[phi, theta, si, m][[4]]]} /. R[m - 1];
      X = k + 1; Sow[Sort[R[m]]];
      If[Sort[R[m]][[All, 2]] != Sort[R[m - 1]][[All, 2]],
      Break], R[m] = R[m - 1]; Break], {k, X, 15}], {m, 1,
      4}]]]]][[All, 2]]
      XX1 = Table[{ParallelEvaluate[XXXX[phi, theta, si]], phi, theta,
      si}, {phi, 0, Pi/4, Pi/56}, {theta, 0, ArcCot[Cos[phi]],
      ArcCot[Cos[phi]]/14}, {si, 0 Pi, 0 Pi, 0}]









      share|improve this question











      $endgroup$




      I am evaluating a code which ends with Table having ParallelEvaluate of a function XXXX[phi, theta, si]. For a grid of 225 points, a normal 2 processor laptop is taking 7 h as compared to 8.30 h by a high end Xeon 4 processor computer. CPU and memory usage for laptop and computer are about 66% vs 99% and 700MB vs 900 MB respectively. Will be thankful for any suggestion on how to improve the evaluation speed on computer. Thanks



      Ea = 500000;
      R[0] = {f[0, 1] -> 1/6, f[0, 2] -> 1/6, f[0, 3] -> 1/6,
      f[0, 4] -> 1/6 , f[0, 5] -> 1/6, f[0, 6] -> 1/6};
      DF[m_] := Table[f[m, n], {n, 1, 6}];
      W90 = Sqrt[2]*P*Ec;
      W180 = 2*P*Ec;
      P = 0.26;
      Ec = 100000;
      epsilonApplied[phi_, theta_, si_,
      m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {Ea*Sin[theta]*Sin[phi]}, {Ea*
      Sin[theta]*Cos[phi]}, {Ea*Cos[theta]}};
      SigmaApplied[phi_, theta_, si_,
      m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}};
      EigenStrain[phi_, theta_, si_,
      m_] = {{-0.00333333 (f[m, 1] + f[m, 2]) +
      0.00666667 (f[m, 3] + f[m, 4]) -
      0.00333333 (f[m, 5] + f[m, 6])}, {-0.00333333 (f[m, 1] +
      f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) +
      0.00666667 (f[m, 5] + f[m, 6])}, {0.00666667 (f[m, 1] +
      f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) -
      0.00333333 (f[m, 5] + f[m, 6])}, {0.}, {0.}, {0.}, {0}, {0}, {0}};
      ES1 = {{0.768576, 0.00232016, 0.120616, 0, 0, 0, 0,
      0, -2.62804*10^-11}, {0.00232016, 0.768576, 0.120616, 0, 0, 0, 0,
      0, -2.62804*10^-11}, {0.02064, 0.02064, 0.503286, 0, 0, 0, 0, 0,
      0}, {0, 0, 0, -1.14762, 0, 0, 0, -3.52489*10^-10, 0}, {0, 0, 0,
      0, -1.14762, 0, -3.52489*10^-10, 0, 0}, {0, 0, 0, 0, 0, 0.191583,
      0, 0, 0}, {0, 0, 0, 0, -6.08402*10^7, 0, 0.315028, 0, 0}, {0, 0,
      0, -6.08402*10^7, 0, 0, 0, 0.315028, 0}, {2.44999*10^8,
      2.44999*10^8, 1.2168*10^8, 0, 0, 0, 0, 0, 0.369943}};
      ES = Rationalize[ES1, 10^-16];

      E99 = {{1, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0, 0}, {0,
      0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 1/2, 0, 0, 0, 0, 0}, {0, 0, 0,
      0, 1/2, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 1/2, 0, 0, 0}, {0, 0, 0, 0,
      0, 0, 1, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0,
      0, 1}};
      Lmatrix = {{166000000000, 77000000000, 78000000000, 0, 0, 0, 0,
      0, -4.4}, {77000000000, 166000000000, 78000000000, 0, 0, 0, 0,
      0, -4.4}, {78000000000, 78000000000, 162000000000, 0, 0, 0, 0, 0,
      18.6}, {0, 0, 0, 43000000000, 0, 0, 0, 11.6, 0}, {0, 0, 0, 0,
      43000000000, 0, 11.6, 0, 0}, {0, 0, 0, 0, 0, 44500000000, 0, 0,
      0}, {0, 0, 0, 0, 11.6, 0, -1000*8.85*10^-12, 0, 0}, {0, 0, 0,
      11.6, 0, 0, 0, -1000*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0,
      0, 0, -910*8.85*10^-12}};
      a[1, 1, phi_, theta_, si_] =
      Cos[phi]*Cos[si] - Cos[theta]*Sin[phi]*Sin[si];
      a[1, 2, phi_, theta_, si_] =
      Cos[si]*Sin[phi] + Cos[theta]*Cos[phi]*Sin[si];
      a[1, 3, phi_, theta_, si_] = Sin[theta]*Sin[si];
      a[2, 1, phi_, theta_, si_] = -Cos[theta]*Cos[si]*Sin[phi] -
      Cos[phi]*Sin[si];
      a[2, 2, phi_, theta_, si_] = -Sin[phi]*Sin[si] +
      Cos[theta]*Cos[phi]*Cos[si];
      a[2, 3, phi_, theta_, si_] = Cos[si]*Sin[theta];
      a[3, 1, phi_, theta_, si_] = Sin[theta]*Sin[phi];
      a[3, 2, phi_, theta_, si_] = -Cos[phi]*Sin[theta];
      a[3, 3, phi_, theta_, si_] = Cos[theta];
      CC[1, 1] = CC[2, 2] = 222*10^9;
      CC[1, 2] = 108*10^9;
      CC[1, 3] = CC[2, 3] = 111*10^9;
      CC[3, 3] = 151*10^9;
      CC[4, 4] = CC[5, 5] = 61*10^9;
      CC[6, 6] = 134*10^9;
      CC[1, 4] =
      CC[1, 5] =
      CC[1, 6] =
      CC[2, 4] =
      CC[2, 5] =
      CC[2, 6] =
      CC[3, 4] =
      CC[3, 5] = CC[3, 6] = CC[4, 5] = CC[4, 6] = CC[5, 6] = 0;
      A = {{1, 1}, {2, 2}, {3, 3}, {2, 3}, {1, 3}, {1, 2}};

      B[i_, j_, k_, l_, phi_, theta_, si_] :=
      Module[{aA = {A[[i]], A[[j]], A[[k]], A[[l]]} }, {a[Part[aA, 1, 1],
      Part[aA, 3, 1], phi, theta, si],
      a[Part[aA, 1, 2], Part[aA, 3, 2], phi, theta, si],
      a[Part[aA, 2, 1], Part[aA, 4, 1], phi, theta, si],
      a[Part[aA, 2, 2], Part[aA, 4, 2], phi, theta, si]}];
      F[i_, j_, k_, l_, phi_, theta_, si_] :=
      Module[{bB = B[i, j, k, l, phi, theta, si]},
      Part[bB, 1]*Part[bB, 2]*Part[bB, 3]*Part[bB, 4]];
      cfmat[phi_, theta_, si_, m_] :=
      Table[Sum[
      F[i, j, k, l, phi, theta, si]*If[k > l, CC[l, k], CC[k, l]], {k,
      1, 6}, {l, 1, 6}], {i, 1, 6}, {j, 1, 6}]

      Lgrain[phi_, theta_, si_, m_] =
      Module[{ rule1 =
      Flatten[Table[
      Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
      6}, {j, 1, 6}]]}, {{Subscript[cf, 1, 1], Subscript[cf, 1, 2],
      Subscript[cf, 1, 3], 0, 0, 0, 0, 0, -4.4}, {Subscript[cf, 2,
      1], Subscript[cf, 2, 2], Subscript[cf, 2, 3], 0, 0, 0, 0,
      0, -4.4}, {Subscript[cf, 3, 1], Subscript[cf, 3, 2], Subscript[
      cf, 3, 3], 0, 0, 0, 0, 0, 18.6}, {0, 0, 0, Subscript[cf, 4, 4],
      0, 0, 0, 11.6, 0}, {0, 0, 0, 0, Subscript[cf, 5, 5], 0, 11.6, 0,
      0}, {0, 0, 0, 0, 0, Subscript[cf, 6, 6], 0, 0, 0}, {0, 0, 0, 0,
      11.6, 0, -2200*8.85*10^-12, 0, 0}, {0, 0, 0, 11.6, 0, 0,
      0, -2200*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0, 0,
      0, -56*8.85*10^-12}} /. rule1];

      H = Lmatrix.(Inverse[ES] - E99);

      TrueStrain[phi_, theta_, si_, m_] =
      epsilonApplied[phi, theta, si,
      m] + (Inverse[
      H + Lgrain[phi, theta, si, m]].(Lgrain[phi, theta, si,
      m].EigenStrain[phi, theta, si, m]));

      TrueStress[phi_, theta_, si_, m_] =
      SigmaApplied[phi, theta, si, m] -
      H.(TrueStrain[phi, theta, si, m] -
      epsilonApplied[phi, theta, si, m]);
      Et[phi_, theta_, si_, m_] =
      If[m == 1, Ea,
      Sqrt[Part[TrueStrain[phi, theta, si, m - 1], 7]*
      Part[TrueStrain[phi, theta, si, m - 1], 7] +
      Part[TrueStrain[phi, theta, si, m - 1], 8]*
      Part[TrueStrain[phi, theta, si, m - 1], 8] +
      Part[TrueStrain[phi, theta, si, m - 1], 9]*
      Part[TrueStrain[phi, theta, si, m - 1], 9]]];

      PE[phi_, theta_, si_, m_, 1] = -P*Cos[theta]*Et[phi, theta, si, m];
      PE[phi_, theta_, si_, m_, 2] = P*Cos[theta]*Et[phi, theta, si, m];
      PE[phi_, theta_, si_, m_, 3] = -P*Sin[theta]*Cos[phi]*
      Et[phi, theta, si, m];
      PE[phi_, theta_, si_, m_, 4] =
      P*Sin[theta]*Cos[phi]*Et[phi, theta, si, m];
      PE[phi_, theta_, si_, m_, 5] = -P*Sin[theta]*Sin[phi]*
      Et[phi, theta, si, m];
      PE[phi_, theta_, si_, m_, 6] =
      P*Sin[theta]*Sin[phi]*Et[phi, theta, si, m];

      SE[phi_, theta_, si_,
      m_] = -0.5*(Transpose[TrueStress[phi, theta, si, m]].TrueStrain[
      phi, theta, si, m]);

      SEM[phi_, theta_, si_, m_] = Tr[SE[phi, theta, si, m]];
      DC2 := DeleteCases[DeleteCases[{1, 2, 3, 4, 5, 6}, i], j];

      DSC[phi_, theta_, si_, m_] := Module[{diff, tabs, tab36, rules},
      rules =
      Flatten[Table[
      Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
      6}, {j, 1, 6}]];
      diff =
      2*(SEM[phi, theta, si, m] /. rules) -
      2*(SEM[phi, theta, si, m - 1] /. rules);
      tabs =
      Flatten[MapIndexed[Flatten@*List,
      Table[2*{PE[phi, theta, si, m, j] -
      PE[phi, theta, si, m, i]}, {i, 1, 6, 1}, {j, 1, 6,
      1}], {2}], 1];
      tab36 =
      ArrayReshape[
      Table[{If[i == j, 0,
      diff /. {f[m, i] -> 0,
      f[m, j] -> f[m - 1, j] + f[m - 1, i],
      f[m, DC2[[1]]] -> f[m - 1, DC2[[1]]],
      f[m, DC2[[4]]] -> f[m - 1, DC2[[4]]],
      f[m, DC2[[2]]] -> f[m - 1, DC2[[2]]],
      f[m, DC2[[3]]] -> f[m - 1, DC2[[3]]]}] +If[i == j, 0,
      If[i == 1 && j == 2 || i == 2 && j == 1 || i == 3 && j == 4 ||
      i == 4 && j == 3 || i == 5 && j == 6 || i == 6 && j == 5,
      2 W180, 2 W90]], i, j}, {i, 1, 6}, {j, 1, 6}], {36, 3}];
      (tabs + tab36)/2
      ];

      XXXX[phi_, theta_, si_] :=
      Last[Last[
      Last[Reap[X = 1;
      Do[Do[dsc = DSC[phi, theta, si, m];
      If[(SortBy[dsc /. R[m - 1], First][[k]][[1]]) < 0,
      DC[phi, theta, si, m] =
      DeleteCases[
      DeleteCases[{1, 2, 3, 4, 5, 6},
      SortBy[dsc /. R[m - 1], First][[k]][[3]]],
      SortBy[dsc /. R[m - 1], First][[k]][[2]]];
      R[m] = {f[m, SortBy[dsc /. R[m - 1], First][[k]][[3]]] ->
      f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[3]]] +
      f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[2]]],
      f[m, SortBy[dsc /. R[m - 1], First][[k]][[2]]] -> 0,
      f[m, DC[phi, theta, si, m][[1]]] ->
      f[m - 1, DC[phi, theta, si, m][[1]]],
      f[m, DC[phi, theta, si, m][[2]]] ->
      f[m - 1, DC[phi, theta, si, m][[2]]],
      f[m, DC[phi, theta, si, m][[3]]] ->
      f[m - 1, DC[phi, theta, si, m][[3]]],
      f[m, DC[phi, theta, si, m][[4]]] ->
      f[m - 1, DC[phi, theta, si, m][[4]]]} /. R[m - 1];
      X = k + 1; Sow[Sort[R[m]]];
      If[Sort[R[m]][[All, 2]] != Sort[R[m - 1]][[All, 2]],
      Break], R[m] = R[m - 1]; Break], {k, X, 15}], {m, 1,
      4}]]]]][[All, 2]]
      XX1 = Table[{ParallelEvaluate[XXXX[phi, theta, si]], phi, theta,
      si}, {phi, 0, Pi/4, Pi/56}, {theta, 0, ArcCot[Cos[phi]],
      ArcCot[Cos[phi]]/14}, {si, 0 Pi, 0 Pi, 0}]






      performance-tuning parallelization






      share|improve this question















      share|improve this question













      share|improve this question




      share|improve this question








      edited 12 hours ago







      user49535

















      asked 17 hours ago









      user49535user49535

      1465




      1465






















          1 Answer
          1






          active

          oldest

          votes


















          6












          $begingroup$

          Without knowing the exact function (I assume it's something fairly long, possibly involving integrals or differential equations), I can only make the following suggestions:



          It looks like you're using exact numbers. If this is necessary for your application, then there's probably not a lot you can do, but exact numbers usually slow things down substantially. If you can, use Real numbers (just place a dot after the numbers like {phi, 0., Pi/4., Pi/56.}. If you need more precision than that but don't necessarily require the infinite precision of exact numbers, you can also do this: {phi, 0`50, Pi/4`50, Pi/56`50}. This will give you 50 digits of precision to work with which should make your final answer pretty close to the exact answer.



          The other thing I would try is:



          XX1 = ParallelTable[
          {XXXX[phi, theta, si]], phi, theta, si},
          {phi, 0, Pi/4, Pi/56},
          {theta, 0, ArcCot[Cos[phi]], ArcCot[Cos[phi]]/14},
          {si, 0 Pi, 0 Pi, 0}
          ]


          I think that ParallelTable is a better way to handle this than ParallelEvaluate. On a trial function, I see about a 100x speedup. ParallelEvaluate is simply evaluating your exact same function 4 times at each data point rather than splitting the task into multiple threads.



          If you can, combine both things for the best speedup.



          I hope this helps a bit! There are some people on here that are amazing at optimizing, perhaps they will be able to improve the speed even more. If it's possible, I would recommend posting your XXXX function unless it's insanely long.






          share|improve this answer











          $endgroup$













          • $begingroup$
            Thanks @LukasLang ! How do you type grave accents without it interpreting them as the inline code markers? I tried backslashes before them, but that didn’t help.
            $endgroup$
            – MassDefect
            16 hours ago








          • 1




            $begingroup$
            You have to increase the amount of enclosing accents: ``` `` Codewithaccents`` ```. If you need double accents, you enclose the code with three, and so on (edit: for some reason, it doesn't work in the comment section - but you can edit your answer to see how it's done)
            $endgroup$
            – Lukas Lang
            16 hours ago












          • $begingroup$
            @LukasLang Oh, I see! Thanks!
            $endgroup$
            – MassDefect
            15 hours ago










          • $begingroup$
            Thanks both of you. three points 1. I do not necessary need to use exact values of (theta, phi) if it can speed up, can use ".". 2. I tried to use ParalleleTable first, but in contrast to your experience, it took 30h/48h for 4/2 processor computer as compared to 8h/7h for ParallelEvaluate. 4 - 8 times slower. 3. How can I combine both...you mean ParallelTable[ParallelEvaluate[. ??
            $endgroup$
            – user49535
            14 hours ago






          • 1




            $begingroup$
            @user49535 As MassDefect already pointed out, using ParallelEvaluate here does not make sense at all. It enforces that the same value is computed on each of your CPU cores which is why you won't gain any speedup. It really depends on your actual function XXXX whether ParallelTable can help at all. If it is a pure function then ParallelTable should help.But if XXXX has side effects (like modifying data that has to be used by another thread) then it is hard to parallelize the execution. In a nutshell, we cannot give any further suggestions without knowing XXXX.
            $endgroup$
            – Henrik Schumacher
            13 hours ago













          Your Answer





          StackExchange.ifUsing("editor", function () {
          return StackExchange.using("mathjaxEditing", function () {
          StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
          StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["$", "$"], ["\\(","\\)"]]);
          });
          });
          }, "mathjax-editing");

          StackExchange.ready(function() {
          var channelOptions = {
          tags: "".split(" "),
          id: "387"
          };
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function() {
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled) {
          StackExchange.using("snippets", function() {
          createEditor();
          });
          }
          else {
          createEditor();
          }
          });

          function createEditor() {
          StackExchange.prepareEditor({
          heartbeatType: 'answer',
          autoActivateHeartbeat: false,
          convertImagesToLinks: false,
          noModals: true,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: null,
          bindNavPrevention: true,
          postfix: "",
          imageUploader: {
          brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
          contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
          allowUrls: true
          },
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          });


          }
          });














          draft saved

          draft discarded


















          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fmathematica.stackexchange.com%2fquestions%2f189731%2fparallele-computing-2-vs-4-processor-speed%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown

























          1 Answer
          1






          active

          oldest

          votes








          1 Answer
          1






          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes









          6












          $begingroup$

          Without knowing the exact function (I assume it's something fairly long, possibly involving integrals or differential equations), I can only make the following suggestions:



          It looks like you're using exact numbers. If this is necessary for your application, then there's probably not a lot you can do, but exact numbers usually slow things down substantially. If you can, use Real numbers (just place a dot after the numbers like {phi, 0., Pi/4., Pi/56.}. If you need more precision than that but don't necessarily require the infinite precision of exact numbers, you can also do this: {phi, 0`50, Pi/4`50, Pi/56`50}. This will give you 50 digits of precision to work with which should make your final answer pretty close to the exact answer.



          The other thing I would try is:



          XX1 = ParallelTable[
          {XXXX[phi, theta, si]], phi, theta, si},
          {phi, 0, Pi/4, Pi/56},
          {theta, 0, ArcCot[Cos[phi]], ArcCot[Cos[phi]]/14},
          {si, 0 Pi, 0 Pi, 0}
          ]


          I think that ParallelTable is a better way to handle this than ParallelEvaluate. On a trial function, I see about a 100x speedup. ParallelEvaluate is simply evaluating your exact same function 4 times at each data point rather than splitting the task into multiple threads.



          If you can, combine both things for the best speedup.



          I hope this helps a bit! There are some people on here that are amazing at optimizing, perhaps they will be able to improve the speed even more. If it's possible, I would recommend posting your XXXX function unless it's insanely long.






          share|improve this answer











          $endgroup$













          • $begingroup$
            Thanks @LukasLang ! How do you type grave accents without it interpreting them as the inline code markers? I tried backslashes before them, but that didn’t help.
            $endgroup$
            – MassDefect
            16 hours ago








          • 1




            $begingroup$
            You have to increase the amount of enclosing accents: ``` `` Codewithaccents`` ```. If you need double accents, you enclose the code with three, and so on (edit: for some reason, it doesn't work in the comment section - but you can edit your answer to see how it's done)
            $endgroup$
            – Lukas Lang
            16 hours ago












          • $begingroup$
            @LukasLang Oh, I see! Thanks!
            $endgroup$
            – MassDefect
            15 hours ago










          • $begingroup$
            Thanks both of you. three points 1. I do not necessary need to use exact values of (theta, phi) if it can speed up, can use ".". 2. I tried to use ParalleleTable first, but in contrast to your experience, it took 30h/48h for 4/2 processor computer as compared to 8h/7h for ParallelEvaluate. 4 - 8 times slower. 3. How can I combine both...you mean ParallelTable[ParallelEvaluate[. ??
            $endgroup$
            – user49535
            14 hours ago






          • 1




            $begingroup$
            @user49535 As MassDefect already pointed out, using ParallelEvaluate here does not make sense at all. It enforces that the same value is computed on each of your CPU cores which is why you won't gain any speedup. It really depends on your actual function XXXX whether ParallelTable can help at all. If it is a pure function then ParallelTable should help.But if XXXX has side effects (like modifying data that has to be used by another thread) then it is hard to parallelize the execution. In a nutshell, we cannot give any further suggestions without knowing XXXX.
            $endgroup$
            – Henrik Schumacher
            13 hours ago


















          6












          $begingroup$

          Without knowing the exact function (I assume it's something fairly long, possibly involving integrals or differential equations), I can only make the following suggestions:



          It looks like you're using exact numbers. If this is necessary for your application, then there's probably not a lot you can do, but exact numbers usually slow things down substantially. If you can, use Real numbers (just place a dot after the numbers like {phi, 0., Pi/4., Pi/56.}. If you need more precision than that but don't necessarily require the infinite precision of exact numbers, you can also do this: {phi, 0`50, Pi/4`50, Pi/56`50}. This will give you 50 digits of precision to work with which should make your final answer pretty close to the exact answer.



          The other thing I would try is:



          XX1 = ParallelTable[
          {XXXX[phi, theta, si]], phi, theta, si},
          {phi, 0, Pi/4, Pi/56},
          {theta, 0, ArcCot[Cos[phi]], ArcCot[Cos[phi]]/14},
          {si, 0 Pi, 0 Pi, 0}
          ]


          I think that ParallelTable is a better way to handle this than ParallelEvaluate. On a trial function, I see about a 100x speedup. ParallelEvaluate is simply evaluating your exact same function 4 times at each data point rather than splitting the task into multiple threads.



          If you can, combine both things for the best speedup.



          I hope this helps a bit! There are some people on here that are amazing at optimizing, perhaps they will be able to improve the speed even more. If it's possible, I would recommend posting your XXXX function unless it's insanely long.






          share|improve this answer











          $endgroup$













          • $begingroup$
            Thanks @LukasLang ! How do you type grave accents without it interpreting them as the inline code markers? I tried backslashes before them, but that didn’t help.
            $endgroup$
            – MassDefect
            16 hours ago








          • 1




            $begingroup$
            You have to increase the amount of enclosing accents: ``` `` Codewithaccents`` ```. If you need double accents, you enclose the code with three, and so on (edit: for some reason, it doesn't work in the comment section - but you can edit your answer to see how it's done)
            $endgroup$
            – Lukas Lang
            16 hours ago












          • $begingroup$
            @LukasLang Oh, I see! Thanks!
            $endgroup$
            – MassDefect
            15 hours ago










          • $begingroup$
            Thanks both of you. three points 1. I do not necessary need to use exact values of (theta, phi) if it can speed up, can use ".". 2. I tried to use ParalleleTable first, but in contrast to your experience, it took 30h/48h for 4/2 processor computer as compared to 8h/7h for ParallelEvaluate. 4 - 8 times slower. 3. How can I combine both...you mean ParallelTable[ParallelEvaluate[. ??
            $endgroup$
            – user49535
            14 hours ago






          • 1




            $begingroup$
            @user49535 As MassDefect already pointed out, using ParallelEvaluate here does not make sense at all. It enforces that the same value is computed on each of your CPU cores which is why you won't gain any speedup. It really depends on your actual function XXXX whether ParallelTable can help at all. If it is a pure function then ParallelTable should help.But if XXXX has side effects (like modifying data that has to be used by another thread) then it is hard to parallelize the execution. In a nutshell, we cannot give any further suggestions without knowing XXXX.
            $endgroup$
            – Henrik Schumacher
            13 hours ago
















          6












          6








          6





          $begingroup$

          Without knowing the exact function (I assume it's something fairly long, possibly involving integrals or differential equations), I can only make the following suggestions:



          It looks like you're using exact numbers. If this is necessary for your application, then there's probably not a lot you can do, but exact numbers usually slow things down substantially. If you can, use Real numbers (just place a dot after the numbers like {phi, 0., Pi/4., Pi/56.}. If you need more precision than that but don't necessarily require the infinite precision of exact numbers, you can also do this: {phi, 0`50, Pi/4`50, Pi/56`50}. This will give you 50 digits of precision to work with which should make your final answer pretty close to the exact answer.



          The other thing I would try is:



          XX1 = ParallelTable[
          {XXXX[phi, theta, si]], phi, theta, si},
          {phi, 0, Pi/4, Pi/56},
          {theta, 0, ArcCot[Cos[phi]], ArcCot[Cos[phi]]/14},
          {si, 0 Pi, 0 Pi, 0}
          ]


          I think that ParallelTable is a better way to handle this than ParallelEvaluate. On a trial function, I see about a 100x speedup. ParallelEvaluate is simply evaluating your exact same function 4 times at each data point rather than splitting the task into multiple threads.



          If you can, combine both things for the best speedup.



          I hope this helps a bit! There are some people on here that are amazing at optimizing, perhaps they will be able to improve the speed even more. If it's possible, I would recommend posting your XXXX function unless it's insanely long.






          share|improve this answer











          $endgroup$



          Without knowing the exact function (I assume it's something fairly long, possibly involving integrals or differential equations), I can only make the following suggestions:



          It looks like you're using exact numbers. If this is necessary for your application, then there's probably not a lot you can do, but exact numbers usually slow things down substantially. If you can, use Real numbers (just place a dot after the numbers like {phi, 0., Pi/4., Pi/56.}. If you need more precision than that but don't necessarily require the infinite precision of exact numbers, you can also do this: {phi, 0`50, Pi/4`50, Pi/56`50}. This will give you 50 digits of precision to work with which should make your final answer pretty close to the exact answer.



          The other thing I would try is:



          XX1 = ParallelTable[
          {XXXX[phi, theta, si]], phi, theta, si},
          {phi, 0, Pi/4, Pi/56},
          {theta, 0, ArcCot[Cos[phi]], ArcCot[Cos[phi]]/14},
          {si, 0 Pi, 0 Pi, 0}
          ]


          I think that ParallelTable is a better way to handle this than ParallelEvaluate. On a trial function, I see about a 100x speedup. ParallelEvaluate is simply evaluating your exact same function 4 times at each data point rather than splitting the task into multiple threads.



          If you can, combine both things for the best speedup.



          I hope this helps a bit! There are some people on here that are amazing at optimizing, perhaps they will be able to improve the speed even more. If it's possible, I would recommend posting your XXXX function unless it's insanely long.







          share|improve this answer














          share|improve this answer



          share|improve this answer








          edited 16 hours ago









          Lukas Lang

          6,6651930




          6,6651930










          answered 17 hours ago









          MassDefectMassDefect

          86628




          86628












          • $begingroup$
            Thanks @LukasLang ! How do you type grave accents without it interpreting them as the inline code markers? I tried backslashes before them, but that didn’t help.
            $endgroup$
            – MassDefect
            16 hours ago








          • 1




            $begingroup$
            You have to increase the amount of enclosing accents: ``` `` Codewithaccents`` ```. If you need double accents, you enclose the code with three, and so on (edit: for some reason, it doesn't work in the comment section - but you can edit your answer to see how it's done)
            $endgroup$
            – Lukas Lang
            16 hours ago












          • $begingroup$
            @LukasLang Oh, I see! Thanks!
            $endgroup$
            – MassDefect
            15 hours ago










          • $begingroup$
            Thanks both of you. three points 1. I do not necessary need to use exact values of (theta, phi) if it can speed up, can use ".". 2. I tried to use ParalleleTable first, but in contrast to your experience, it took 30h/48h for 4/2 processor computer as compared to 8h/7h for ParallelEvaluate. 4 - 8 times slower. 3. How can I combine both...you mean ParallelTable[ParallelEvaluate[. ??
            $endgroup$
            – user49535
            14 hours ago






          • 1




            $begingroup$
            @user49535 As MassDefect already pointed out, using ParallelEvaluate here does not make sense at all. It enforces that the same value is computed on each of your CPU cores which is why you won't gain any speedup. It really depends on your actual function XXXX whether ParallelTable can help at all. If it is a pure function then ParallelTable should help.But if XXXX has side effects (like modifying data that has to be used by another thread) then it is hard to parallelize the execution. In a nutshell, we cannot give any further suggestions without knowing XXXX.
            $endgroup$
            – Henrik Schumacher
            13 hours ago




















          • $begingroup$
            Thanks @LukasLang ! How do you type grave accents without it interpreting them as the inline code markers? I tried backslashes before them, but that didn’t help.
            $endgroup$
            – MassDefect
            16 hours ago








          • 1




            $begingroup$
            You have to increase the amount of enclosing accents: ``` `` Codewithaccents`` ```. If you need double accents, you enclose the code with three, and so on (edit: for some reason, it doesn't work in the comment section - but you can edit your answer to see how it's done)
            $endgroup$
            – Lukas Lang
            16 hours ago












          • $begingroup$
            @LukasLang Oh, I see! Thanks!
            $endgroup$
            – MassDefect
            15 hours ago










          • $begingroup$
            Thanks both of you. three points 1. I do not necessary need to use exact values of (theta, phi) if it can speed up, can use ".". 2. I tried to use ParalleleTable first, but in contrast to your experience, it took 30h/48h for 4/2 processor computer as compared to 8h/7h for ParallelEvaluate. 4 - 8 times slower. 3. How can I combine both...you mean ParallelTable[ParallelEvaluate[. ??
            $endgroup$
            – user49535
            14 hours ago






          • 1




            $begingroup$
            @user49535 As MassDefect already pointed out, using ParallelEvaluate here does not make sense at all. It enforces that the same value is computed on each of your CPU cores which is why you won't gain any speedup. It really depends on your actual function XXXX whether ParallelTable can help at all. If it is a pure function then ParallelTable should help.But if XXXX has side effects (like modifying data that has to be used by another thread) then it is hard to parallelize the execution. In a nutshell, we cannot give any further suggestions without knowing XXXX.
            $endgroup$
            – Henrik Schumacher
            13 hours ago


















          $begingroup$
          Thanks @LukasLang ! How do you type grave accents without it interpreting them as the inline code markers? I tried backslashes before them, but that didn’t help.
          $endgroup$
          – MassDefect
          16 hours ago






          $begingroup$
          Thanks @LukasLang ! How do you type grave accents without it interpreting them as the inline code markers? I tried backslashes before them, but that didn’t help.
          $endgroup$
          – MassDefect
          16 hours ago






          1




          1




          $begingroup$
          You have to increase the amount of enclosing accents: ``` `` Codewithaccents`` ```. If you need double accents, you enclose the code with three, and so on (edit: for some reason, it doesn't work in the comment section - but you can edit your answer to see how it's done)
          $endgroup$
          – Lukas Lang
          16 hours ago






          $begingroup$
          You have to increase the amount of enclosing accents: ``` `` Codewithaccents`` ```. If you need double accents, you enclose the code with three, and so on (edit: for some reason, it doesn't work in the comment section - but you can edit your answer to see how it's done)
          $endgroup$
          – Lukas Lang
          16 hours ago














          $begingroup$
          @LukasLang Oh, I see! Thanks!
          $endgroup$
          – MassDefect
          15 hours ago




          $begingroup$
          @LukasLang Oh, I see! Thanks!
          $endgroup$
          – MassDefect
          15 hours ago












          $begingroup$
          Thanks both of you. three points 1. I do not necessary need to use exact values of (theta, phi) if it can speed up, can use ".". 2. I tried to use ParalleleTable first, but in contrast to your experience, it took 30h/48h for 4/2 processor computer as compared to 8h/7h for ParallelEvaluate. 4 - 8 times slower. 3. How can I combine both...you mean ParallelTable[ParallelEvaluate[. ??
          $endgroup$
          – user49535
          14 hours ago




          $begingroup$
          Thanks both of you. three points 1. I do not necessary need to use exact values of (theta, phi) if it can speed up, can use ".". 2. I tried to use ParalleleTable first, but in contrast to your experience, it took 30h/48h for 4/2 processor computer as compared to 8h/7h for ParallelEvaluate. 4 - 8 times slower. 3. How can I combine both...you mean ParallelTable[ParallelEvaluate[. ??
          $endgroup$
          – user49535
          14 hours ago




          1




          1




          $begingroup$
          @user49535 As MassDefect already pointed out, using ParallelEvaluate here does not make sense at all. It enforces that the same value is computed on each of your CPU cores which is why you won't gain any speedup. It really depends on your actual function XXXX whether ParallelTable can help at all. If it is a pure function then ParallelTable should help.But if XXXX has side effects (like modifying data that has to be used by another thread) then it is hard to parallelize the execution. In a nutshell, we cannot give any further suggestions without knowing XXXX.
          $endgroup$
          – Henrik Schumacher
          13 hours ago






          $begingroup$
          @user49535 As MassDefect already pointed out, using ParallelEvaluate here does not make sense at all. It enforces that the same value is computed on each of your CPU cores which is why you won't gain any speedup. It really depends on your actual function XXXX whether ParallelTable can help at all. If it is a pure function then ParallelTable should help.But if XXXX has side effects (like modifying data that has to be used by another thread) then it is hard to parallelize the execution. In a nutshell, we cannot give any further suggestions without knowing XXXX.
          $endgroup$
          – Henrik Schumacher
          13 hours ago




















          draft saved

          draft discarded




















































          Thanks for contributing an answer to Mathematica Stack Exchange!


          • Please be sure to answer the question. Provide details and share your research!

          But avoid



          • Asking for help, clarification, or responding to other answers.

          • Making statements based on opinion; back them up with references or personal experience.


          Use MathJax to format equations. MathJax reference.


          To learn more, see our tips on writing great answers.




          draft saved


          draft discarded














          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fmathematica.stackexchange.com%2fquestions%2f189731%2fparallele-computing-2-vs-4-processor-speed%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown





















































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown

































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown







          Popular posts from this blog

          How to label and detect the document text images

          Tabula Rosettana

          Aureus (color)