Parallele Computing - 2 vs. 4 processor speed
$begingroup$
I am evaluating a code which ends with Table having ParallelEvaluate
of a function XXXX[phi, theta, si]
. For a grid of 225 points, a normal 2 processor laptop is taking 7 h as compared to 8.30 h by a high end Xeon 4 processor computer. CPU and memory usage for laptop and computer are about 66% vs 99% and 700MB vs 900 MB respectively. Will be thankful for any suggestion on how to improve the evaluation speed on computer. Thanks
Ea = 500000;
R[0] = {f[0, 1] -> 1/6, f[0, 2] -> 1/6, f[0, 3] -> 1/6,
f[0, 4] -> 1/6 , f[0, 5] -> 1/6, f[0, 6] -> 1/6};
DF[m_] := Table[f[m, n], {n, 1, 6}];
W90 = Sqrt[2]*P*Ec;
W180 = 2*P*Ec;
P = 0.26;
Ec = 100000;
epsilonApplied[phi_, theta_, si_,
m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {Ea*Sin[theta]*Sin[phi]}, {Ea*
Sin[theta]*Cos[phi]}, {Ea*Cos[theta]}};
SigmaApplied[phi_, theta_, si_,
m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}};
EigenStrain[phi_, theta_, si_,
m_] = {{-0.00333333 (f[m, 1] + f[m, 2]) +
0.00666667 (f[m, 3] + f[m, 4]) -
0.00333333 (f[m, 5] + f[m, 6])}, {-0.00333333 (f[m, 1] +
f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) +
0.00666667 (f[m, 5] + f[m, 6])}, {0.00666667 (f[m, 1] +
f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) -
0.00333333 (f[m, 5] + f[m, 6])}, {0.}, {0.}, {0.}, {0}, {0}, {0}};
ES1 = {{0.768576, 0.00232016, 0.120616, 0, 0, 0, 0,
0, -2.62804*10^-11}, {0.00232016, 0.768576, 0.120616, 0, 0, 0, 0,
0, -2.62804*10^-11}, {0.02064, 0.02064, 0.503286, 0, 0, 0, 0, 0,
0}, {0, 0, 0, -1.14762, 0, 0, 0, -3.52489*10^-10, 0}, {0, 0, 0,
0, -1.14762, 0, -3.52489*10^-10, 0, 0}, {0, 0, 0, 0, 0, 0.191583,
0, 0, 0}, {0, 0, 0, 0, -6.08402*10^7, 0, 0.315028, 0, 0}, {0, 0,
0, -6.08402*10^7, 0, 0, 0, 0.315028, 0}, {2.44999*10^8,
2.44999*10^8, 1.2168*10^8, 0, 0, 0, 0, 0, 0.369943}};
ES = Rationalize[ES1, 10^-16];
E99 = {{1, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0, 0}, {0,
0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 1/2, 0, 0, 0, 0, 0}, {0, 0, 0,
0, 1/2, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 1/2, 0, 0, 0}, {0, 0, 0, 0,
0, 0, 1, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0,
0, 1}};
Lmatrix = {{166000000000, 77000000000, 78000000000, 0, 0, 0, 0,
0, -4.4}, {77000000000, 166000000000, 78000000000, 0, 0, 0, 0,
0, -4.4}, {78000000000, 78000000000, 162000000000, 0, 0, 0, 0, 0,
18.6}, {0, 0, 0, 43000000000, 0, 0, 0, 11.6, 0}, {0, 0, 0, 0,
43000000000, 0, 11.6, 0, 0}, {0, 0, 0, 0, 0, 44500000000, 0, 0,
0}, {0, 0, 0, 0, 11.6, 0, -1000*8.85*10^-12, 0, 0}, {0, 0, 0,
11.6, 0, 0, 0, -1000*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0,
0, 0, -910*8.85*10^-12}};
a[1, 1, phi_, theta_, si_] =
Cos[phi]*Cos[si] - Cos[theta]*Sin[phi]*Sin[si];
a[1, 2, phi_, theta_, si_] =
Cos[si]*Sin[phi] + Cos[theta]*Cos[phi]*Sin[si];
a[1, 3, phi_, theta_, si_] = Sin[theta]*Sin[si];
a[2, 1, phi_, theta_, si_] = -Cos[theta]*Cos[si]*Sin[phi] -
Cos[phi]*Sin[si];
a[2, 2, phi_, theta_, si_] = -Sin[phi]*Sin[si] +
Cos[theta]*Cos[phi]*Cos[si];
a[2, 3, phi_, theta_, si_] = Cos[si]*Sin[theta];
a[3, 1, phi_, theta_, si_] = Sin[theta]*Sin[phi];
a[3, 2, phi_, theta_, si_] = -Cos[phi]*Sin[theta];
a[3, 3, phi_, theta_, si_] = Cos[theta];
CC[1, 1] = CC[2, 2] = 222*10^9;
CC[1, 2] = 108*10^9;
CC[1, 3] = CC[2, 3] = 111*10^9;
CC[3, 3] = 151*10^9;
CC[4, 4] = CC[5, 5] = 61*10^9;
CC[6, 6] = 134*10^9;
CC[1, 4] =
CC[1, 5] =
CC[1, 6] =
CC[2, 4] =
CC[2, 5] =
CC[2, 6] =
CC[3, 4] =
CC[3, 5] = CC[3, 6] = CC[4, 5] = CC[4, 6] = CC[5, 6] = 0;
A = {{1, 1}, {2, 2}, {3, 3}, {2, 3}, {1, 3}, {1, 2}};
B[i_, j_, k_, l_, phi_, theta_, si_] :=
Module[{aA = {A[[i]], A[[j]], A[[k]], A[[l]]} }, {a[Part[aA, 1, 1],
Part[aA, 3, 1], phi, theta, si],
a[Part[aA, 1, 2], Part[aA, 3, 2], phi, theta, si],
a[Part[aA, 2, 1], Part[aA, 4, 1], phi, theta, si],
a[Part[aA, 2, 2], Part[aA, 4, 2], phi, theta, si]}];
F[i_, j_, k_, l_, phi_, theta_, si_] :=
Module[{bB = B[i, j, k, l, phi, theta, si]},
Part[bB, 1]*Part[bB, 2]*Part[bB, 3]*Part[bB, 4]];
cfmat[phi_, theta_, si_, m_] :=
Table[Sum[
F[i, j, k, l, phi, theta, si]*If[k > l, CC[l, k], CC[k, l]], {k,
1, 6}, {l, 1, 6}], {i, 1, 6}, {j, 1, 6}]
Lgrain[phi_, theta_, si_, m_] =
Module[{ rule1 =
Flatten[Table[
Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
6}, {j, 1, 6}]]}, {{Subscript[cf, 1, 1], Subscript[cf, 1, 2],
Subscript[cf, 1, 3], 0, 0, 0, 0, 0, -4.4}, {Subscript[cf, 2,
1], Subscript[cf, 2, 2], Subscript[cf, 2, 3], 0, 0, 0, 0,
0, -4.4}, {Subscript[cf, 3, 1], Subscript[cf, 3, 2], Subscript[
cf, 3, 3], 0, 0, 0, 0, 0, 18.6}, {0, 0, 0, Subscript[cf, 4, 4],
0, 0, 0, 11.6, 0}, {0, 0, 0, 0, Subscript[cf, 5, 5], 0, 11.6, 0,
0}, {0, 0, 0, 0, 0, Subscript[cf, 6, 6], 0, 0, 0}, {0, 0, 0, 0,
11.6, 0, -2200*8.85*10^-12, 0, 0}, {0, 0, 0, 11.6, 0, 0,
0, -2200*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0, 0,
0, -56*8.85*10^-12}} /. rule1];
H = Lmatrix.(Inverse[ES] - E99);
TrueStrain[phi_, theta_, si_, m_] =
epsilonApplied[phi, theta, si,
m] + (Inverse[
H + Lgrain[phi, theta, si, m]].(Lgrain[phi, theta, si,
m].EigenStrain[phi, theta, si, m]));
TrueStress[phi_, theta_, si_, m_] =
SigmaApplied[phi, theta, si, m] -
H.(TrueStrain[phi, theta, si, m] -
epsilonApplied[phi, theta, si, m]);
Et[phi_, theta_, si_, m_] =
If[m == 1, Ea,
Sqrt[Part[TrueStrain[phi, theta, si, m - 1], 7]*
Part[TrueStrain[phi, theta, si, m - 1], 7] +
Part[TrueStrain[phi, theta, si, m - 1], 8]*
Part[TrueStrain[phi, theta, si, m - 1], 8] +
Part[TrueStrain[phi, theta, si, m - 1], 9]*
Part[TrueStrain[phi, theta, si, m - 1], 9]]];
PE[phi_, theta_, si_, m_, 1] = -P*Cos[theta]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 2] = P*Cos[theta]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 3] = -P*Sin[theta]*Cos[phi]*
Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 4] =
P*Sin[theta]*Cos[phi]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 5] = -P*Sin[theta]*Sin[phi]*
Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 6] =
P*Sin[theta]*Sin[phi]*Et[phi, theta, si, m];
SE[phi_, theta_, si_,
m_] = -0.5*(Transpose[TrueStress[phi, theta, si, m]].TrueStrain[
phi, theta, si, m]);
SEM[phi_, theta_, si_, m_] = Tr[SE[phi, theta, si, m]];
DC2 := DeleteCases[DeleteCases[{1, 2, 3, 4, 5, 6}, i], j];
DSC[phi_, theta_, si_, m_] := Module[{diff, tabs, tab36, rules},
rules =
Flatten[Table[
Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
6}, {j, 1, 6}]];
diff =
2*(SEM[phi, theta, si, m] /. rules) -
2*(SEM[phi, theta, si, m - 1] /. rules);
tabs =
Flatten[MapIndexed[Flatten@*List,
Table[2*{PE[phi, theta, si, m, j] -
PE[phi, theta, si, m, i]}, {i, 1, 6, 1}, {j, 1, 6,
1}], {2}], 1];
tab36 =
ArrayReshape[
Table[{If[i == j, 0,
diff /. {f[m, i] -> 0,
f[m, j] -> f[m - 1, j] + f[m - 1, i],
f[m, DC2[[1]]] -> f[m - 1, DC2[[1]]],
f[m, DC2[[4]]] -> f[m - 1, DC2[[4]]],
f[m, DC2[[2]]] -> f[m - 1, DC2[[2]]],
f[m, DC2[[3]]] -> f[m - 1, DC2[[3]]]}] +If[i == j, 0,
If[i == 1 && j == 2 || i == 2 && j == 1 || i == 3 && j == 4 ||
i == 4 && j == 3 || i == 5 && j == 6 || i == 6 && j == 5,
2 W180, 2 W90]], i, j}, {i, 1, 6}, {j, 1, 6}], {36, 3}];
(tabs + tab36)/2
];
XXXX[phi_, theta_, si_] :=
Last[Last[
Last[Reap[X = 1;
Do[Do[dsc = DSC[phi, theta, si, m];
If[(SortBy[dsc /. R[m - 1], First][[k]][[1]]) < 0,
DC[phi, theta, si, m] =
DeleteCases[
DeleteCases[{1, 2, 3, 4, 5, 6},
SortBy[dsc /. R[m - 1], First][[k]][[3]]],
SortBy[dsc /. R[m - 1], First][[k]][[2]]];
R[m] = {f[m, SortBy[dsc /. R[m - 1], First][[k]][[3]]] ->
f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[3]]] +
f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[2]]],
f[m, SortBy[dsc /. R[m - 1], First][[k]][[2]]] -> 0,
f[m, DC[phi, theta, si, m][[1]]] ->
f[m - 1, DC[phi, theta, si, m][[1]]],
f[m, DC[phi, theta, si, m][[2]]] ->
f[m - 1, DC[phi, theta, si, m][[2]]],
f[m, DC[phi, theta, si, m][[3]]] ->
f[m - 1, DC[phi, theta, si, m][[3]]],
f[m, DC[phi, theta, si, m][[4]]] ->
f[m - 1, DC[phi, theta, si, m][[4]]]} /. R[m - 1];
X = k + 1; Sow[Sort[R[m]]];
If[Sort[R[m]][[All, 2]] != Sort[R[m - 1]][[All, 2]],
Break], R[m] = R[m - 1]; Break], {k, X, 15}], {m, 1,
4}]]]]][[All, 2]]
XX1 = Table[{ParallelEvaluate[XXXX[phi, theta, si]], phi, theta,
si}, {phi, 0, Pi/4, Pi/56}, {theta, 0, ArcCot[Cos[phi]],
ArcCot[Cos[phi]]/14}, {si, 0 Pi, 0 Pi, 0}]
performance-tuning parallelization
$endgroup$
add a comment |
$begingroup$
I am evaluating a code which ends with Table having ParallelEvaluate
of a function XXXX[phi, theta, si]
. For a grid of 225 points, a normal 2 processor laptop is taking 7 h as compared to 8.30 h by a high end Xeon 4 processor computer. CPU and memory usage for laptop and computer are about 66% vs 99% and 700MB vs 900 MB respectively. Will be thankful for any suggestion on how to improve the evaluation speed on computer. Thanks
Ea = 500000;
R[0] = {f[0, 1] -> 1/6, f[0, 2] -> 1/6, f[0, 3] -> 1/6,
f[0, 4] -> 1/6 , f[0, 5] -> 1/6, f[0, 6] -> 1/6};
DF[m_] := Table[f[m, n], {n, 1, 6}];
W90 = Sqrt[2]*P*Ec;
W180 = 2*P*Ec;
P = 0.26;
Ec = 100000;
epsilonApplied[phi_, theta_, si_,
m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {Ea*Sin[theta]*Sin[phi]}, {Ea*
Sin[theta]*Cos[phi]}, {Ea*Cos[theta]}};
SigmaApplied[phi_, theta_, si_,
m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}};
EigenStrain[phi_, theta_, si_,
m_] = {{-0.00333333 (f[m, 1] + f[m, 2]) +
0.00666667 (f[m, 3] + f[m, 4]) -
0.00333333 (f[m, 5] + f[m, 6])}, {-0.00333333 (f[m, 1] +
f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) +
0.00666667 (f[m, 5] + f[m, 6])}, {0.00666667 (f[m, 1] +
f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) -
0.00333333 (f[m, 5] + f[m, 6])}, {0.}, {0.}, {0.}, {0}, {0}, {0}};
ES1 = {{0.768576, 0.00232016, 0.120616, 0, 0, 0, 0,
0, -2.62804*10^-11}, {0.00232016, 0.768576, 0.120616, 0, 0, 0, 0,
0, -2.62804*10^-11}, {0.02064, 0.02064, 0.503286, 0, 0, 0, 0, 0,
0}, {0, 0, 0, -1.14762, 0, 0, 0, -3.52489*10^-10, 0}, {0, 0, 0,
0, -1.14762, 0, -3.52489*10^-10, 0, 0}, {0, 0, 0, 0, 0, 0.191583,
0, 0, 0}, {0, 0, 0, 0, -6.08402*10^7, 0, 0.315028, 0, 0}, {0, 0,
0, -6.08402*10^7, 0, 0, 0, 0.315028, 0}, {2.44999*10^8,
2.44999*10^8, 1.2168*10^8, 0, 0, 0, 0, 0, 0.369943}};
ES = Rationalize[ES1, 10^-16];
E99 = {{1, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0, 0}, {0,
0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 1/2, 0, 0, 0, 0, 0}, {0, 0, 0,
0, 1/2, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 1/2, 0, 0, 0}, {0, 0, 0, 0,
0, 0, 1, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0,
0, 1}};
Lmatrix = {{166000000000, 77000000000, 78000000000, 0, 0, 0, 0,
0, -4.4}, {77000000000, 166000000000, 78000000000, 0, 0, 0, 0,
0, -4.4}, {78000000000, 78000000000, 162000000000, 0, 0, 0, 0, 0,
18.6}, {0, 0, 0, 43000000000, 0, 0, 0, 11.6, 0}, {0, 0, 0, 0,
43000000000, 0, 11.6, 0, 0}, {0, 0, 0, 0, 0, 44500000000, 0, 0,
0}, {0, 0, 0, 0, 11.6, 0, -1000*8.85*10^-12, 0, 0}, {0, 0, 0,
11.6, 0, 0, 0, -1000*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0,
0, 0, -910*8.85*10^-12}};
a[1, 1, phi_, theta_, si_] =
Cos[phi]*Cos[si] - Cos[theta]*Sin[phi]*Sin[si];
a[1, 2, phi_, theta_, si_] =
Cos[si]*Sin[phi] + Cos[theta]*Cos[phi]*Sin[si];
a[1, 3, phi_, theta_, si_] = Sin[theta]*Sin[si];
a[2, 1, phi_, theta_, si_] = -Cos[theta]*Cos[si]*Sin[phi] -
Cos[phi]*Sin[si];
a[2, 2, phi_, theta_, si_] = -Sin[phi]*Sin[si] +
Cos[theta]*Cos[phi]*Cos[si];
a[2, 3, phi_, theta_, si_] = Cos[si]*Sin[theta];
a[3, 1, phi_, theta_, si_] = Sin[theta]*Sin[phi];
a[3, 2, phi_, theta_, si_] = -Cos[phi]*Sin[theta];
a[3, 3, phi_, theta_, si_] = Cos[theta];
CC[1, 1] = CC[2, 2] = 222*10^9;
CC[1, 2] = 108*10^9;
CC[1, 3] = CC[2, 3] = 111*10^9;
CC[3, 3] = 151*10^9;
CC[4, 4] = CC[5, 5] = 61*10^9;
CC[6, 6] = 134*10^9;
CC[1, 4] =
CC[1, 5] =
CC[1, 6] =
CC[2, 4] =
CC[2, 5] =
CC[2, 6] =
CC[3, 4] =
CC[3, 5] = CC[3, 6] = CC[4, 5] = CC[4, 6] = CC[5, 6] = 0;
A = {{1, 1}, {2, 2}, {3, 3}, {2, 3}, {1, 3}, {1, 2}};
B[i_, j_, k_, l_, phi_, theta_, si_] :=
Module[{aA = {A[[i]], A[[j]], A[[k]], A[[l]]} }, {a[Part[aA, 1, 1],
Part[aA, 3, 1], phi, theta, si],
a[Part[aA, 1, 2], Part[aA, 3, 2], phi, theta, si],
a[Part[aA, 2, 1], Part[aA, 4, 1], phi, theta, si],
a[Part[aA, 2, 2], Part[aA, 4, 2], phi, theta, si]}];
F[i_, j_, k_, l_, phi_, theta_, si_] :=
Module[{bB = B[i, j, k, l, phi, theta, si]},
Part[bB, 1]*Part[bB, 2]*Part[bB, 3]*Part[bB, 4]];
cfmat[phi_, theta_, si_, m_] :=
Table[Sum[
F[i, j, k, l, phi, theta, si]*If[k > l, CC[l, k], CC[k, l]], {k,
1, 6}, {l, 1, 6}], {i, 1, 6}, {j, 1, 6}]
Lgrain[phi_, theta_, si_, m_] =
Module[{ rule1 =
Flatten[Table[
Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
6}, {j, 1, 6}]]}, {{Subscript[cf, 1, 1], Subscript[cf, 1, 2],
Subscript[cf, 1, 3], 0, 0, 0, 0, 0, -4.4}, {Subscript[cf, 2,
1], Subscript[cf, 2, 2], Subscript[cf, 2, 3], 0, 0, 0, 0,
0, -4.4}, {Subscript[cf, 3, 1], Subscript[cf, 3, 2], Subscript[
cf, 3, 3], 0, 0, 0, 0, 0, 18.6}, {0, 0, 0, Subscript[cf, 4, 4],
0, 0, 0, 11.6, 0}, {0, 0, 0, 0, Subscript[cf, 5, 5], 0, 11.6, 0,
0}, {0, 0, 0, 0, 0, Subscript[cf, 6, 6], 0, 0, 0}, {0, 0, 0, 0,
11.6, 0, -2200*8.85*10^-12, 0, 0}, {0, 0, 0, 11.6, 0, 0,
0, -2200*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0, 0,
0, -56*8.85*10^-12}} /. rule1];
H = Lmatrix.(Inverse[ES] - E99);
TrueStrain[phi_, theta_, si_, m_] =
epsilonApplied[phi, theta, si,
m] + (Inverse[
H + Lgrain[phi, theta, si, m]].(Lgrain[phi, theta, si,
m].EigenStrain[phi, theta, si, m]));
TrueStress[phi_, theta_, si_, m_] =
SigmaApplied[phi, theta, si, m] -
H.(TrueStrain[phi, theta, si, m] -
epsilonApplied[phi, theta, si, m]);
Et[phi_, theta_, si_, m_] =
If[m == 1, Ea,
Sqrt[Part[TrueStrain[phi, theta, si, m - 1], 7]*
Part[TrueStrain[phi, theta, si, m - 1], 7] +
Part[TrueStrain[phi, theta, si, m - 1], 8]*
Part[TrueStrain[phi, theta, si, m - 1], 8] +
Part[TrueStrain[phi, theta, si, m - 1], 9]*
Part[TrueStrain[phi, theta, si, m - 1], 9]]];
PE[phi_, theta_, si_, m_, 1] = -P*Cos[theta]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 2] = P*Cos[theta]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 3] = -P*Sin[theta]*Cos[phi]*
Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 4] =
P*Sin[theta]*Cos[phi]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 5] = -P*Sin[theta]*Sin[phi]*
Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 6] =
P*Sin[theta]*Sin[phi]*Et[phi, theta, si, m];
SE[phi_, theta_, si_,
m_] = -0.5*(Transpose[TrueStress[phi, theta, si, m]].TrueStrain[
phi, theta, si, m]);
SEM[phi_, theta_, si_, m_] = Tr[SE[phi, theta, si, m]];
DC2 := DeleteCases[DeleteCases[{1, 2, 3, 4, 5, 6}, i], j];
DSC[phi_, theta_, si_, m_] := Module[{diff, tabs, tab36, rules},
rules =
Flatten[Table[
Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
6}, {j, 1, 6}]];
diff =
2*(SEM[phi, theta, si, m] /. rules) -
2*(SEM[phi, theta, si, m - 1] /. rules);
tabs =
Flatten[MapIndexed[Flatten@*List,
Table[2*{PE[phi, theta, si, m, j] -
PE[phi, theta, si, m, i]}, {i, 1, 6, 1}, {j, 1, 6,
1}], {2}], 1];
tab36 =
ArrayReshape[
Table[{If[i == j, 0,
diff /. {f[m, i] -> 0,
f[m, j] -> f[m - 1, j] + f[m - 1, i],
f[m, DC2[[1]]] -> f[m - 1, DC2[[1]]],
f[m, DC2[[4]]] -> f[m - 1, DC2[[4]]],
f[m, DC2[[2]]] -> f[m - 1, DC2[[2]]],
f[m, DC2[[3]]] -> f[m - 1, DC2[[3]]]}] +If[i == j, 0,
If[i == 1 && j == 2 || i == 2 && j == 1 || i == 3 && j == 4 ||
i == 4 && j == 3 || i == 5 && j == 6 || i == 6 && j == 5,
2 W180, 2 W90]], i, j}, {i, 1, 6}, {j, 1, 6}], {36, 3}];
(tabs + tab36)/2
];
XXXX[phi_, theta_, si_] :=
Last[Last[
Last[Reap[X = 1;
Do[Do[dsc = DSC[phi, theta, si, m];
If[(SortBy[dsc /. R[m - 1], First][[k]][[1]]) < 0,
DC[phi, theta, si, m] =
DeleteCases[
DeleteCases[{1, 2, 3, 4, 5, 6},
SortBy[dsc /. R[m - 1], First][[k]][[3]]],
SortBy[dsc /. R[m - 1], First][[k]][[2]]];
R[m] = {f[m, SortBy[dsc /. R[m - 1], First][[k]][[3]]] ->
f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[3]]] +
f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[2]]],
f[m, SortBy[dsc /. R[m - 1], First][[k]][[2]]] -> 0,
f[m, DC[phi, theta, si, m][[1]]] ->
f[m - 1, DC[phi, theta, si, m][[1]]],
f[m, DC[phi, theta, si, m][[2]]] ->
f[m - 1, DC[phi, theta, si, m][[2]]],
f[m, DC[phi, theta, si, m][[3]]] ->
f[m - 1, DC[phi, theta, si, m][[3]]],
f[m, DC[phi, theta, si, m][[4]]] ->
f[m - 1, DC[phi, theta, si, m][[4]]]} /. R[m - 1];
X = k + 1; Sow[Sort[R[m]]];
If[Sort[R[m]][[All, 2]] != Sort[R[m - 1]][[All, 2]],
Break], R[m] = R[m - 1]; Break], {k, X, 15}], {m, 1,
4}]]]]][[All, 2]]
XX1 = Table[{ParallelEvaluate[XXXX[phi, theta, si]], phi, theta,
si}, {phi, 0, Pi/4, Pi/56}, {theta, 0, ArcCot[Cos[phi]],
ArcCot[Cos[phi]]/14}, {si, 0 Pi, 0 Pi, 0}]
performance-tuning parallelization
$endgroup$
add a comment |
$begingroup$
I am evaluating a code which ends with Table having ParallelEvaluate
of a function XXXX[phi, theta, si]
. For a grid of 225 points, a normal 2 processor laptop is taking 7 h as compared to 8.30 h by a high end Xeon 4 processor computer. CPU and memory usage for laptop and computer are about 66% vs 99% and 700MB vs 900 MB respectively. Will be thankful for any suggestion on how to improve the evaluation speed on computer. Thanks
Ea = 500000;
R[0] = {f[0, 1] -> 1/6, f[0, 2] -> 1/6, f[0, 3] -> 1/6,
f[0, 4] -> 1/6 , f[0, 5] -> 1/6, f[0, 6] -> 1/6};
DF[m_] := Table[f[m, n], {n, 1, 6}];
W90 = Sqrt[2]*P*Ec;
W180 = 2*P*Ec;
P = 0.26;
Ec = 100000;
epsilonApplied[phi_, theta_, si_,
m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {Ea*Sin[theta]*Sin[phi]}, {Ea*
Sin[theta]*Cos[phi]}, {Ea*Cos[theta]}};
SigmaApplied[phi_, theta_, si_,
m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}};
EigenStrain[phi_, theta_, si_,
m_] = {{-0.00333333 (f[m, 1] + f[m, 2]) +
0.00666667 (f[m, 3] + f[m, 4]) -
0.00333333 (f[m, 5] + f[m, 6])}, {-0.00333333 (f[m, 1] +
f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) +
0.00666667 (f[m, 5] + f[m, 6])}, {0.00666667 (f[m, 1] +
f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) -
0.00333333 (f[m, 5] + f[m, 6])}, {0.}, {0.}, {0.}, {0}, {0}, {0}};
ES1 = {{0.768576, 0.00232016, 0.120616, 0, 0, 0, 0,
0, -2.62804*10^-11}, {0.00232016, 0.768576, 0.120616, 0, 0, 0, 0,
0, -2.62804*10^-11}, {0.02064, 0.02064, 0.503286, 0, 0, 0, 0, 0,
0}, {0, 0, 0, -1.14762, 0, 0, 0, -3.52489*10^-10, 0}, {0, 0, 0,
0, -1.14762, 0, -3.52489*10^-10, 0, 0}, {0, 0, 0, 0, 0, 0.191583,
0, 0, 0}, {0, 0, 0, 0, -6.08402*10^7, 0, 0.315028, 0, 0}, {0, 0,
0, -6.08402*10^7, 0, 0, 0, 0.315028, 0}, {2.44999*10^8,
2.44999*10^8, 1.2168*10^8, 0, 0, 0, 0, 0, 0.369943}};
ES = Rationalize[ES1, 10^-16];
E99 = {{1, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0, 0}, {0,
0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 1/2, 0, 0, 0, 0, 0}, {0, 0, 0,
0, 1/2, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 1/2, 0, 0, 0}, {0, 0, 0, 0,
0, 0, 1, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0,
0, 1}};
Lmatrix = {{166000000000, 77000000000, 78000000000, 0, 0, 0, 0,
0, -4.4}, {77000000000, 166000000000, 78000000000, 0, 0, 0, 0,
0, -4.4}, {78000000000, 78000000000, 162000000000, 0, 0, 0, 0, 0,
18.6}, {0, 0, 0, 43000000000, 0, 0, 0, 11.6, 0}, {0, 0, 0, 0,
43000000000, 0, 11.6, 0, 0}, {0, 0, 0, 0, 0, 44500000000, 0, 0,
0}, {0, 0, 0, 0, 11.6, 0, -1000*8.85*10^-12, 0, 0}, {0, 0, 0,
11.6, 0, 0, 0, -1000*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0,
0, 0, -910*8.85*10^-12}};
a[1, 1, phi_, theta_, si_] =
Cos[phi]*Cos[si] - Cos[theta]*Sin[phi]*Sin[si];
a[1, 2, phi_, theta_, si_] =
Cos[si]*Sin[phi] + Cos[theta]*Cos[phi]*Sin[si];
a[1, 3, phi_, theta_, si_] = Sin[theta]*Sin[si];
a[2, 1, phi_, theta_, si_] = -Cos[theta]*Cos[si]*Sin[phi] -
Cos[phi]*Sin[si];
a[2, 2, phi_, theta_, si_] = -Sin[phi]*Sin[si] +
Cos[theta]*Cos[phi]*Cos[si];
a[2, 3, phi_, theta_, si_] = Cos[si]*Sin[theta];
a[3, 1, phi_, theta_, si_] = Sin[theta]*Sin[phi];
a[3, 2, phi_, theta_, si_] = -Cos[phi]*Sin[theta];
a[3, 3, phi_, theta_, si_] = Cos[theta];
CC[1, 1] = CC[2, 2] = 222*10^9;
CC[1, 2] = 108*10^9;
CC[1, 3] = CC[2, 3] = 111*10^9;
CC[3, 3] = 151*10^9;
CC[4, 4] = CC[5, 5] = 61*10^9;
CC[6, 6] = 134*10^9;
CC[1, 4] =
CC[1, 5] =
CC[1, 6] =
CC[2, 4] =
CC[2, 5] =
CC[2, 6] =
CC[3, 4] =
CC[3, 5] = CC[3, 6] = CC[4, 5] = CC[4, 6] = CC[5, 6] = 0;
A = {{1, 1}, {2, 2}, {3, 3}, {2, 3}, {1, 3}, {1, 2}};
B[i_, j_, k_, l_, phi_, theta_, si_] :=
Module[{aA = {A[[i]], A[[j]], A[[k]], A[[l]]} }, {a[Part[aA, 1, 1],
Part[aA, 3, 1], phi, theta, si],
a[Part[aA, 1, 2], Part[aA, 3, 2], phi, theta, si],
a[Part[aA, 2, 1], Part[aA, 4, 1], phi, theta, si],
a[Part[aA, 2, 2], Part[aA, 4, 2], phi, theta, si]}];
F[i_, j_, k_, l_, phi_, theta_, si_] :=
Module[{bB = B[i, j, k, l, phi, theta, si]},
Part[bB, 1]*Part[bB, 2]*Part[bB, 3]*Part[bB, 4]];
cfmat[phi_, theta_, si_, m_] :=
Table[Sum[
F[i, j, k, l, phi, theta, si]*If[k > l, CC[l, k], CC[k, l]], {k,
1, 6}, {l, 1, 6}], {i, 1, 6}, {j, 1, 6}]
Lgrain[phi_, theta_, si_, m_] =
Module[{ rule1 =
Flatten[Table[
Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
6}, {j, 1, 6}]]}, {{Subscript[cf, 1, 1], Subscript[cf, 1, 2],
Subscript[cf, 1, 3], 0, 0, 0, 0, 0, -4.4}, {Subscript[cf, 2,
1], Subscript[cf, 2, 2], Subscript[cf, 2, 3], 0, 0, 0, 0,
0, -4.4}, {Subscript[cf, 3, 1], Subscript[cf, 3, 2], Subscript[
cf, 3, 3], 0, 0, 0, 0, 0, 18.6}, {0, 0, 0, Subscript[cf, 4, 4],
0, 0, 0, 11.6, 0}, {0, 0, 0, 0, Subscript[cf, 5, 5], 0, 11.6, 0,
0}, {0, 0, 0, 0, 0, Subscript[cf, 6, 6], 0, 0, 0}, {0, 0, 0, 0,
11.6, 0, -2200*8.85*10^-12, 0, 0}, {0, 0, 0, 11.6, 0, 0,
0, -2200*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0, 0,
0, -56*8.85*10^-12}} /. rule1];
H = Lmatrix.(Inverse[ES] - E99);
TrueStrain[phi_, theta_, si_, m_] =
epsilonApplied[phi, theta, si,
m] + (Inverse[
H + Lgrain[phi, theta, si, m]].(Lgrain[phi, theta, si,
m].EigenStrain[phi, theta, si, m]));
TrueStress[phi_, theta_, si_, m_] =
SigmaApplied[phi, theta, si, m] -
H.(TrueStrain[phi, theta, si, m] -
epsilonApplied[phi, theta, si, m]);
Et[phi_, theta_, si_, m_] =
If[m == 1, Ea,
Sqrt[Part[TrueStrain[phi, theta, si, m - 1], 7]*
Part[TrueStrain[phi, theta, si, m - 1], 7] +
Part[TrueStrain[phi, theta, si, m - 1], 8]*
Part[TrueStrain[phi, theta, si, m - 1], 8] +
Part[TrueStrain[phi, theta, si, m - 1], 9]*
Part[TrueStrain[phi, theta, si, m - 1], 9]]];
PE[phi_, theta_, si_, m_, 1] = -P*Cos[theta]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 2] = P*Cos[theta]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 3] = -P*Sin[theta]*Cos[phi]*
Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 4] =
P*Sin[theta]*Cos[phi]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 5] = -P*Sin[theta]*Sin[phi]*
Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 6] =
P*Sin[theta]*Sin[phi]*Et[phi, theta, si, m];
SE[phi_, theta_, si_,
m_] = -0.5*(Transpose[TrueStress[phi, theta, si, m]].TrueStrain[
phi, theta, si, m]);
SEM[phi_, theta_, si_, m_] = Tr[SE[phi, theta, si, m]];
DC2 := DeleteCases[DeleteCases[{1, 2, 3, 4, 5, 6}, i], j];
DSC[phi_, theta_, si_, m_] := Module[{diff, tabs, tab36, rules},
rules =
Flatten[Table[
Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
6}, {j, 1, 6}]];
diff =
2*(SEM[phi, theta, si, m] /. rules) -
2*(SEM[phi, theta, si, m - 1] /. rules);
tabs =
Flatten[MapIndexed[Flatten@*List,
Table[2*{PE[phi, theta, si, m, j] -
PE[phi, theta, si, m, i]}, {i, 1, 6, 1}, {j, 1, 6,
1}], {2}], 1];
tab36 =
ArrayReshape[
Table[{If[i == j, 0,
diff /. {f[m, i] -> 0,
f[m, j] -> f[m - 1, j] + f[m - 1, i],
f[m, DC2[[1]]] -> f[m - 1, DC2[[1]]],
f[m, DC2[[4]]] -> f[m - 1, DC2[[4]]],
f[m, DC2[[2]]] -> f[m - 1, DC2[[2]]],
f[m, DC2[[3]]] -> f[m - 1, DC2[[3]]]}] +If[i == j, 0,
If[i == 1 && j == 2 || i == 2 && j == 1 || i == 3 && j == 4 ||
i == 4 && j == 3 || i == 5 && j == 6 || i == 6 && j == 5,
2 W180, 2 W90]], i, j}, {i, 1, 6}, {j, 1, 6}], {36, 3}];
(tabs + tab36)/2
];
XXXX[phi_, theta_, si_] :=
Last[Last[
Last[Reap[X = 1;
Do[Do[dsc = DSC[phi, theta, si, m];
If[(SortBy[dsc /. R[m - 1], First][[k]][[1]]) < 0,
DC[phi, theta, si, m] =
DeleteCases[
DeleteCases[{1, 2, 3, 4, 5, 6},
SortBy[dsc /. R[m - 1], First][[k]][[3]]],
SortBy[dsc /. R[m - 1], First][[k]][[2]]];
R[m] = {f[m, SortBy[dsc /. R[m - 1], First][[k]][[3]]] ->
f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[3]]] +
f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[2]]],
f[m, SortBy[dsc /. R[m - 1], First][[k]][[2]]] -> 0,
f[m, DC[phi, theta, si, m][[1]]] ->
f[m - 1, DC[phi, theta, si, m][[1]]],
f[m, DC[phi, theta, si, m][[2]]] ->
f[m - 1, DC[phi, theta, si, m][[2]]],
f[m, DC[phi, theta, si, m][[3]]] ->
f[m - 1, DC[phi, theta, si, m][[3]]],
f[m, DC[phi, theta, si, m][[4]]] ->
f[m - 1, DC[phi, theta, si, m][[4]]]} /. R[m - 1];
X = k + 1; Sow[Sort[R[m]]];
If[Sort[R[m]][[All, 2]] != Sort[R[m - 1]][[All, 2]],
Break], R[m] = R[m - 1]; Break], {k, X, 15}], {m, 1,
4}]]]]][[All, 2]]
XX1 = Table[{ParallelEvaluate[XXXX[phi, theta, si]], phi, theta,
si}, {phi, 0, Pi/4, Pi/56}, {theta, 0, ArcCot[Cos[phi]],
ArcCot[Cos[phi]]/14}, {si, 0 Pi, 0 Pi, 0}]
performance-tuning parallelization
$endgroup$
I am evaluating a code which ends with Table having ParallelEvaluate
of a function XXXX[phi, theta, si]
. For a grid of 225 points, a normal 2 processor laptop is taking 7 h as compared to 8.30 h by a high end Xeon 4 processor computer. CPU and memory usage for laptop and computer are about 66% vs 99% and 700MB vs 900 MB respectively. Will be thankful for any suggestion on how to improve the evaluation speed on computer. Thanks
Ea = 500000;
R[0] = {f[0, 1] -> 1/6, f[0, 2] -> 1/6, f[0, 3] -> 1/6,
f[0, 4] -> 1/6 , f[0, 5] -> 1/6, f[0, 6] -> 1/6};
DF[m_] := Table[f[m, n], {n, 1, 6}];
W90 = Sqrt[2]*P*Ec;
W180 = 2*P*Ec;
P = 0.26;
Ec = 100000;
epsilonApplied[phi_, theta_, si_,
m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {Ea*Sin[theta]*Sin[phi]}, {Ea*
Sin[theta]*Cos[phi]}, {Ea*Cos[theta]}};
SigmaApplied[phi_, theta_, si_,
m_] = {{0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}};
EigenStrain[phi_, theta_, si_,
m_] = {{-0.00333333 (f[m, 1] + f[m, 2]) +
0.00666667 (f[m, 3] + f[m, 4]) -
0.00333333 (f[m, 5] + f[m, 6])}, {-0.00333333 (f[m, 1] +
f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) +
0.00666667 (f[m, 5] + f[m, 6])}, {0.00666667 (f[m, 1] +
f[m, 2]) - 0.00333333 (f[m, 3] + f[m, 4]) -
0.00333333 (f[m, 5] + f[m, 6])}, {0.}, {0.}, {0.}, {0}, {0}, {0}};
ES1 = {{0.768576, 0.00232016, 0.120616, 0, 0, 0, 0,
0, -2.62804*10^-11}, {0.00232016, 0.768576, 0.120616, 0, 0, 0, 0,
0, -2.62804*10^-11}, {0.02064, 0.02064, 0.503286, 0, 0, 0, 0, 0,
0}, {0, 0, 0, -1.14762, 0, 0, 0, -3.52489*10^-10, 0}, {0, 0, 0,
0, -1.14762, 0, -3.52489*10^-10, 0, 0}, {0, 0, 0, 0, 0, 0.191583,
0, 0, 0}, {0, 0, 0, 0, -6.08402*10^7, 0, 0.315028, 0, 0}, {0, 0,
0, -6.08402*10^7, 0, 0, 0, 0.315028, 0}, {2.44999*10^8,
2.44999*10^8, 1.2168*10^8, 0, 0, 0, 0, 0, 0.369943}};
ES = Rationalize[ES1, 10^-16];
E99 = {{1, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0, 0}, {0,
0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 1/2, 0, 0, 0, 0, 0}, {0, 0, 0,
0, 1/2, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 1/2, 0, 0, 0}, {0, 0, 0, 0,
0, 0, 1, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0,
0, 1}};
Lmatrix = {{166000000000, 77000000000, 78000000000, 0, 0, 0, 0,
0, -4.4}, {77000000000, 166000000000, 78000000000, 0, 0, 0, 0,
0, -4.4}, {78000000000, 78000000000, 162000000000, 0, 0, 0, 0, 0,
18.6}, {0, 0, 0, 43000000000, 0, 0, 0, 11.6, 0}, {0, 0, 0, 0,
43000000000, 0, 11.6, 0, 0}, {0, 0, 0, 0, 0, 44500000000, 0, 0,
0}, {0, 0, 0, 0, 11.6, 0, -1000*8.85*10^-12, 0, 0}, {0, 0, 0,
11.6, 0, 0, 0, -1000*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0,
0, 0, -910*8.85*10^-12}};
a[1, 1, phi_, theta_, si_] =
Cos[phi]*Cos[si] - Cos[theta]*Sin[phi]*Sin[si];
a[1, 2, phi_, theta_, si_] =
Cos[si]*Sin[phi] + Cos[theta]*Cos[phi]*Sin[si];
a[1, 3, phi_, theta_, si_] = Sin[theta]*Sin[si];
a[2, 1, phi_, theta_, si_] = -Cos[theta]*Cos[si]*Sin[phi] -
Cos[phi]*Sin[si];
a[2, 2, phi_, theta_, si_] = -Sin[phi]*Sin[si] +
Cos[theta]*Cos[phi]*Cos[si];
a[2, 3, phi_, theta_, si_] = Cos[si]*Sin[theta];
a[3, 1, phi_, theta_, si_] = Sin[theta]*Sin[phi];
a[3, 2, phi_, theta_, si_] = -Cos[phi]*Sin[theta];
a[3, 3, phi_, theta_, si_] = Cos[theta];
CC[1, 1] = CC[2, 2] = 222*10^9;
CC[1, 2] = 108*10^9;
CC[1, 3] = CC[2, 3] = 111*10^9;
CC[3, 3] = 151*10^9;
CC[4, 4] = CC[5, 5] = 61*10^9;
CC[6, 6] = 134*10^9;
CC[1, 4] =
CC[1, 5] =
CC[1, 6] =
CC[2, 4] =
CC[2, 5] =
CC[2, 6] =
CC[3, 4] =
CC[3, 5] = CC[3, 6] = CC[4, 5] = CC[4, 6] = CC[5, 6] = 0;
A = {{1, 1}, {2, 2}, {3, 3}, {2, 3}, {1, 3}, {1, 2}};
B[i_, j_, k_, l_, phi_, theta_, si_] :=
Module[{aA = {A[[i]], A[[j]], A[[k]], A[[l]]} }, {a[Part[aA, 1, 1],
Part[aA, 3, 1], phi, theta, si],
a[Part[aA, 1, 2], Part[aA, 3, 2], phi, theta, si],
a[Part[aA, 2, 1], Part[aA, 4, 1], phi, theta, si],
a[Part[aA, 2, 2], Part[aA, 4, 2], phi, theta, si]}];
F[i_, j_, k_, l_, phi_, theta_, si_] :=
Module[{bB = B[i, j, k, l, phi, theta, si]},
Part[bB, 1]*Part[bB, 2]*Part[bB, 3]*Part[bB, 4]];
cfmat[phi_, theta_, si_, m_] :=
Table[Sum[
F[i, j, k, l, phi, theta, si]*If[k > l, CC[l, k], CC[k, l]], {k,
1, 6}, {l, 1, 6}], {i, 1, 6}, {j, 1, 6}]
Lgrain[phi_, theta_, si_, m_] =
Module[{ rule1 =
Flatten[Table[
Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
6}, {j, 1, 6}]]}, {{Subscript[cf, 1, 1], Subscript[cf, 1, 2],
Subscript[cf, 1, 3], 0, 0, 0, 0, 0, -4.4}, {Subscript[cf, 2,
1], Subscript[cf, 2, 2], Subscript[cf, 2, 3], 0, 0, 0, 0,
0, -4.4}, {Subscript[cf, 3, 1], Subscript[cf, 3, 2], Subscript[
cf, 3, 3], 0, 0, 0, 0, 0, 18.6}, {0, 0, 0, Subscript[cf, 4, 4],
0, 0, 0, 11.6, 0}, {0, 0, 0, 0, Subscript[cf, 5, 5], 0, 11.6, 0,
0}, {0, 0, 0, 0, 0, Subscript[cf, 6, 6], 0, 0, 0}, {0, 0, 0, 0,
11.6, 0, -2200*8.85*10^-12, 0, 0}, {0, 0, 0, 11.6, 0, 0,
0, -2200*8.85*10^-12, 0}, {-4.4, -4.4, 18.6, 0, 0, 0, 0,
0, -56*8.85*10^-12}} /. rule1];
H = Lmatrix.(Inverse[ES] - E99);
TrueStrain[phi_, theta_, si_, m_] =
epsilonApplied[phi, theta, si,
m] + (Inverse[
H + Lgrain[phi, theta, si, m]].(Lgrain[phi, theta, si,
m].EigenStrain[phi, theta, si, m]));
TrueStress[phi_, theta_, si_, m_] =
SigmaApplied[phi, theta, si, m] -
H.(TrueStrain[phi, theta, si, m] -
epsilonApplied[phi, theta, si, m]);
Et[phi_, theta_, si_, m_] =
If[m == 1, Ea,
Sqrt[Part[TrueStrain[phi, theta, si, m - 1], 7]*
Part[TrueStrain[phi, theta, si, m - 1], 7] +
Part[TrueStrain[phi, theta, si, m - 1], 8]*
Part[TrueStrain[phi, theta, si, m - 1], 8] +
Part[TrueStrain[phi, theta, si, m - 1], 9]*
Part[TrueStrain[phi, theta, si, m - 1], 9]]];
PE[phi_, theta_, si_, m_, 1] = -P*Cos[theta]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 2] = P*Cos[theta]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 3] = -P*Sin[theta]*Cos[phi]*
Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 4] =
P*Sin[theta]*Cos[phi]*Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 5] = -P*Sin[theta]*Sin[phi]*
Et[phi, theta, si, m];
PE[phi_, theta_, si_, m_, 6] =
P*Sin[theta]*Sin[phi]*Et[phi, theta, si, m];
SE[phi_, theta_, si_,
m_] = -0.5*(Transpose[TrueStress[phi, theta, si, m]].TrueStrain[
phi, theta, si, m]);
SEM[phi_, theta_, si_, m_] = Tr[SE[phi, theta, si, m]];
DC2 := DeleteCases[DeleteCases[{1, 2, 3, 4, 5, 6}, i], j];
DSC[phi_, theta_, si_, m_] := Module[{diff, tabs, tab36, rules},
rules =
Flatten[Table[
Subscript[cf, i, j] -> cfmat[phi, theta, si, m][[i, j]], {i, 1,
6}, {j, 1, 6}]];
diff =
2*(SEM[phi, theta, si, m] /. rules) -
2*(SEM[phi, theta, si, m - 1] /. rules);
tabs =
Flatten[MapIndexed[Flatten@*List,
Table[2*{PE[phi, theta, si, m, j] -
PE[phi, theta, si, m, i]}, {i, 1, 6, 1}, {j, 1, 6,
1}], {2}], 1];
tab36 =
ArrayReshape[
Table[{If[i == j, 0,
diff /. {f[m, i] -> 0,
f[m, j] -> f[m - 1, j] + f[m - 1, i],
f[m, DC2[[1]]] -> f[m - 1, DC2[[1]]],
f[m, DC2[[4]]] -> f[m - 1, DC2[[4]]],
f[m, DC2[[2]]] -> f[m - 1, DC2[[2]]],
f[m, DC2[[3]]] -> f[m - 1, DC2[[3]]]}] +If[i == j, 0,
If[i == 1 && j == 2 || i == 2 && j == 1 || i == 3 && j == 4 ||
i == 4 && j == 3 || i == 5 && j == 6 || i == 6 && j == 5,
2 W180, 2 W90]], i, j}, {i, 1, 6}, {j, 1, 6}], {36, 3}];
(tabs + tab36)/2
];
XXXX[phi_, theta_, si_] :=
Last[Last[
Last[Reap[X = 1;
Do[Do[dsc = DSC[phi, theta, si, m];
If[(SortBy[dsc /. R[m - 1], First][[k]][[1]]) < 0,
DC[phi, theta, si, m] =
DeleteCases[
DeleteCases[{1, 2, 3, 4, 5, 6},
SortBy[dsc /. R[m - 1], First][[k]][[3]]],
SortBy[dsc /. R[m - 1], First][[k]][[2]]];
R[m] = {f[m, SortBy[dsc /. R[m - 1], First][[k]][[3]]] ->
f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[3]]] +
f[m - 1, SortBy[dsc /. R[m - 1], First][[k]][[2]]],
f[m, SortBy[dsc /. R[m - 1], First][[k]][[2]]] -> 0,
f[m, DC[phi, theta, si, m][[1]]] ->
f[m - 1, DC[phi, theta, si, m][[1]]],
f[m, DC[phi, theta, si, m][[2]]] ->
f[m - 1, DC[phi, theta, si, m][[2]]],
f[m, DC[phi, theta, si, m][[3]]] ->
f[m - 1, DC[phi, theta, si, m][[3]]],
f[m, DC[phi, theta, si, m][[4]]] ->
f[m - 1, DC[phi, theta, si, m][[4]]]} /. R[m - 1];
X = k + 1; Sow[Sort[R[m]]];
If[Sort[R[m]][[All, 2]] != Sort[R[m - 1]][[All, 2]],
Break], R[m] = R[m - 1]; Break], {k, X, 15}], {m, 1,
4}]]]]][[All, 2]]
XX1 = Table[{ParallelEvaluate[XXXX[phi, theta, si]], phi, theta,
si}, {phi, 0, Pi/4, Pi/56}, {theta, 0, ArcCot[Cos[phi]],
ArcCot[Cos[phi]]/14}, {si, 0 Pi, 0 Pi, 0}]
performance-tuning parallelization
performance-tuning parallelization
edited 12 hours ago
user49535
asked 17 hours ago
user49535user49535
1465
1465
add a comment |
add a comment |
1 Answer
1
active
oldest
votes
$begingroup$
Without knowing the exact function (I assume it's something fairly long, possibly involving integrals or differential equations), I can only make the following suggestions:
It looks like you're using exact numbers. If this is necessary for your application, then there's probably not a lot you can do, but exact numbers usually slow things down substantially. If you can, use Real
numbers (just place a dot after the numbers like {phi, 0., Pi/4., Pi/56.}
. If you need more precision than that but don't necessarily require the infinite precision of exact numbers, you can also do this: {phi, 0`50, Pi/4`50, Pi/56`50}
. This will give you 50 digits of precision to work with which should make your final answer pretty close to the exact answer.
The other thing I would try is:
XX1 = ParallelTable[
{XXXX[phi, theta, si]], phi, theta, si},
{phi, 0, Pi/4, Pi/56},
{theta, 0, ArcCot[Cos[phi]], ArcCot[Cos[phi]]/14},
{si, 0 Pi, 0 Pi, 0}
]
I think that ParallelTable
is a better way to handle this than ParallelEvaluate
. On a trial function, I see about a 100x speedup. ParallelEvaluate
is simply evaluating your exact same function 4 times at each data point rather than splitting the task into multiple threads.
If you can, combine both things for the best speedup.
I hope this helps a bit! There are some people on here that are amazing at optimizing, perhaps they will be able to improve the speed even more. If it's possible, I would recommend posting your XXXX
function unless it's insanely long.
$endgroup$
$begingroup$
Thanks @LukasLang ! How do you type grave accents without it interpreting them as the inline code markers? I tried backslashes before them, but that didn’t help.
$endgroup$
– MassDefect
16 hours ago
1
$begingroup$
You have to increase the amount of enclosing accents: ``` `` Codewith
accents`` ```. If you need double accents, you enclose the code with three, and so on (edit: for some reason, it doesn't work in the comment section - but you can edit your answer to see how it's done)
$endgroup$
– Lukas Lang
16 hours ago
$begingroup$
@LukasLang Oh, I see! Thanks!
$endgroup$
– MassDefect
15 hours ago
$begingroup$
Thanks both of you. three points 1. I do not necessary need to use exact values of (theta, phi) if it can speed up, can use ".". 2. I tried to use ParalleleTable first, but in contrast to your experience, it took 30h/48h for 4/2 processor computer as compared to 8h/7h for ParallelEvaluate. 4 - 8 times slower. 3. How can I combine both...you mean ParallelTable[ParallelEvaluate[. ??
$endgroup$
– user49535
14 hours ago
1
$begingroup$
@user49535 As MassDefect already pointed out, usingParallelEvaluate
here does not make sense at all. It enforces that the same value is computed on each of your CPU cores which is why you won't gain any speedup. It really depends on your actual functionXXXX
whetherParallelTable
can help at all. If it is a pure function thenParallelTable
should help.But ifXXXX
has side effects (like modifying data that has to be used by another thread) then it is hard to parallelize the execution. In a nutshell, we cannot give any further suggestions without knowingXXXX
.
$endgroup$
– Henrik Schumacher
13 hours ago
|
show 2 more comments
Your Answer
StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["$", "$"], ["\\(","\\)"]]);
});
});
}, "mathjax-editing");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "387"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fmathematica.stackexchange.com%2fquestions%2f189731%2fparallele-computing-2-vs-4-processor-speed%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
$begingroup$
Without knowing the exact function (I assume it's something fairly long, possibly involving integrals or differential equations), I can only make the following suggestions:
It looks like you're using exact numbers. If this is necessary for your application, then there's probably not a lot you can do, but exact numbers usually slow things down substantially. If you can, use Real
numbers (just place a dot after the numbers like {phi, 0., Pi/4., Pi/56.}
. If you need more precision than that but don't necessarily require the infinite precision of exact numbers, you can also do this: {phi, 0`50, Pi/4`50, Pi/56`50}
. This will give you 50 digits of precision to work with which should make your final answer pretty close to the exact answer.
The other thing I would try is:
XX1 = ParallelTable[
{XXXX[phi, theta, si]], phi, theta, si},
{phi, 0, Pi/4, Pi/56},
{theta, 0, ArcCot[Cos[phi]], ArcCot[Cos[phi]]/14},
{si, 0 Pi, 0 Pi, 0}
]
I think that ParallelTable
is a better way to handle this than ParallelEvaluate
. On a trial function, I see about a 100x speedup. ParallelEvaluate
is simply evaluating your exact same function 4 times at each data point rather than splitting the task into multiple threads.
If you can, combine both things for the best speedup.
I hope this helps a bit! There are some people on here that are amazing at optimizing, perhaps they will be able to improve the speed even more. If it's possible, I would recommend posting your XXXX
function unless it's insanely long.
$endgroup$
$begingroup$
Thanks @LukasLang ! How do you type grave accents without it interpreting them as the inline code markers? I tried backslashes before them, but that didn’t help.
$endgroup$
– MassDefect
16 hours ago
1
$begingroup$
You have to increase the amount of enclosing accents: ``` `` Codewith
accents`` ```. If you need double accents, you enclose the code with three, and so on (edit: for some reason, it doesn't work in the comment section - but you can edit your answer to see how it's done)
$endgroup$
– Lukas Lang
16 hours ago
$begingroup$
@LukasLang Oh, I see! Thanks!
$endgroup$
– MassDefect
15 hours ago
$begingroup$
Thanks both of you. three points 1. I do not necessary need to use exact values of (theta, phi) if it can speed up, can use ".". 2. I tried to use ParalleleTable first, but in contrast to your experience, it took 30h/48h for 4/2 processor computer as compared to 8h/7h for ParallelEvaluate. 4 - 8 times slower. 3. How can I combine both...you mean ParallelTable[ParallelEvaluate[. ??
$endgroup$
– user49535
14 hours ago
1
$begingroup$
@user49535 As MassDefect already pointed out, usingParallelEvaluate
here does not make sense at all. It enforces that the same value is computed on each of your CPU cores which is why you won't gain any speedup. It really depends on your actual functionXXXX
whetherParallelTable
can help at all. If it is a pure function thenParallelTable
should help.But ifXXXX
has side effects (like modifying data that has to be used by another thread) then it is hard to parallelize the execution. In a nutshell, we cannot give any further suggestions without knowingXXXX
.
$endgroup$
– Henrik Schumacher
13 hours ago
|
show 2 more comments
$begingroup$
Without knowing the exact function (I assume it's something fairly long, possibly involving integrals or differential equations), I can only make the following suggestions:
It looks like you're using exact numbers. If this is necessary for your application, then there's probably not a lot you can do, but exact numbers usually slow things down substantially. If you can, use Real
numbers (just place a dot after the numbers like {phi, 0., Pi/4., Pi/56.}
. If you need more precision than that but don't necessarily require the infinite precision of exact numbers, you can also do this: {phi, 0`50, Pi/4`50, Pi/56`50}
. This will give you 50 digits of precision to work with which should make your final answer pretty close to the exact answer.
The other thing I would try is:
XX1 = ParallelTable[
{XXXX[phi, theta, si]], phi, theta, si},
{phi, 0, Pi/4, Pi/56},
{theta, 0, ArcCot[Cos[phi]], ArcCot[Cos[phi]]/14},
{si, 0 Pi, 0 Pi, 0}
]
I think that ParallelTable
is a better way to handle this than ParallelEvaluate
. On a trial function, I see about a 100x speedup. ParallelEvaluate
is simply evaluating your exact same function 4 times at each data point rather than splitting the task into multiple threads.
If you can, combine both things for the best speedup.
I hope this helps a bit! There are some people on here that are amazing at optimizing, perhaps they will be able to improve the speed even more. If it's possible, I would recommend posting your XXXX
function unless it's insanely long.
$endgroup$
$begingroup$
Thanks @LukasLang ! How do you type grave accents without it interpreting them as the inline code markers? I tried backslashes before them, but that didn’t help.
$endgroup$
– MassDefect
16 hours ago
1
$begingroup$
You have to increase the amount of enclosing accents: ``` `` Codewith
accents`` ```. If you need double accents, you enclose the code with three, and so on (edit: for some reason, it doesn't work in the comment section - but you can edit your answer to see how it's done)
$endgroup$
– Lukas Lang
16 hours ago
$begingroup$
@LukasLang Oh, I see! Thanks!
$endgroup$
– MassDefect
15 hours ago
$begingroup$
Thanks both of you. three points 1. I do not necessary need to use exact values of (theta, phi) if it can speed up, can use ".". 2. I tried to use ParalleleTable first, but in contrast to your experience, it took 30h/48h for 4/2 processor computer as compared to 8h/7h for ParallelEvaluate. 4 - 8 times slower. 3. How can I combine both...you mean ParallelTable[ParallelEvaluate[. ??
$endgroup$
– user49535
14 hours ago
1
$begingroup$
@user49535 As MassDefect already pointed out, usingParallelEvaluate
here does not make sense at all. It enforces that the same value is computed on each of your CPU cores which is why you won't gain any speedup. It really depends on your actual functionXXXX
whetherParallelTable
can help at all. If it is a pure function thenParallelTable
should help.But ifXXXX
has side effects (like modifying data that has to be used by another thread) then it is hard to parallelize the execution. In a nutshell, we cannot give any further suggestions without knowingXXXX
.
$endgroup$
– Henrik Schumacher
13 hours ago
|
show 2 more comments
$begingroup$
Without knowing the exact function (I assume it's something fairly long, possibly involving integrals or differential equations), I can only make the following suggestions:
It looks like you're using exact numbers. If this is necessary for your application, then there's probably not a lot you can do, but exact numbers usually slow things down substantially. If you can, use Real
numbers (just place a dot after the numbers like {phi, 0., Pi/4., Pi/56.}
. If you need more precision than that but don't necessarily require the infinite precision of exact numbers, you can also do this: {phi, 0`50, Pi/4`50, Pi/56`50}
. This will give you 50 digits of precision to work with which should make your final answer pretty close to the exact answer.
The other thing I would try is:
XX1 = ParallelTable[
{XXXX[phi, theta, si]], phi, theta, si},
{phi, 0, Pi/4, Pi/56},
{theta, 0, ArcCot[Cos[phi]], ArcCot[Cos[phi]]/14},
{si, 0 Pi, 0 Pi, 0}
]
I think that ParallelTable
is a better way to handle this than ParallelEvaluate
. On a trial function, I see about a 100x speedup. ParallelEvaluate
is simply evaluating your exact same function 4 times at each data point rather than splitting the task into multiple threads.
If you can, combine both things for the best speedup.
I hope this helps a bit! There are some people on here that are amazing at optimizing, perhaps they will be able to improve the speed even more. If it's possible, I would recommend posting your XXXX
function unless it's insanely long.
$endgroup$
Without knowing the exact function (I assume it's something fairly long, possibly involving integrals or differential equations), I can only make the following suggestions:
It looks like you're using exact numbers. If this is necessary for your application, then there's probably not a lot you can do, but exact numbers usually slow things down substantially. If you can, use Real
numbers (just place a dot after the numbers like {phi, 0., Pi/4., Pi/56.}
. If you need more precision than that but don't necessarily require the infinite precision of exact numbers, you can also do this: {phi, 0`50, Pi/4`50, Pi/56`50}
. This will give you 50 digits of precision to work with which should make your final answer pretty close to the exact answer.
The other thing I would try is:
XX1 = ParallelTable[
{XXXX[phi, theta, si]], phi, theta, si},
{phi, 0, Pi/4, Pi/56},
{theta, 0, ArcCot[Cos[phi]], ArcCot[Cos[phi]]/14},
{si, 0 Pi, 0 Pi, 0}
]
I think that ParallelTable
is a better way to handle this than ParallelEvaluate
. On a trial function, I see about a 100x speedup. ParallelEvaluate
is simply evaluating your exact same function 4 times at each data point rather than splitting the task into multiple threads.
If you can, combine both things for the best speedup.
I hope this helps a bit! There are some people on here that are amazing at optimizing, perhaps they will be able to improve the speed even more. If it's possible, I would recommend posting your XXXX
function unless it's insanely long.
edited 16 hours ago
Lukas Lang
6,6651930
6,6651930
answered 17 hours ago
MassDefectMassDefect
86628
86628
$begingroup$
Thanks @LukasLang ! How do you type grave accents without it interpreting them as the inline code markers? I tried backslashes before them, but that didn’t help.
$endgroup$
– MassDefect
16 hours ago
1
$begingroup$
You have to increase the amount of enclosing accents: ``` `` Codewith
accents`` ```. If you need double accents, you enclose the code with three, and so on (edit: for some reason, it doesn't work in the comment section - but you can edit your answer to see how it's done)
$endgroup$
– Lukas Lang
16 hours ago
$begingroup$
@LukasLang Oh, I see! Thanks!
$endgroup$
– MassDefect
15 hours ago
$begingroup$
Thanks both of you. three points 1. I do not necessary need to use exact values of (theta, phi) if it can speed up, can use ".". 2. I tried to use ParalleleTable first, but in contrast to your experience, it took 30h/48h for 4/2 processor computer as compared to 8h/7h for ParallelEvaluate. 4 - 8 times slower. 3. How can I combine both...you mean ParallelTable[ParallelEvaluate[. ??
$endgroup$
– user49535
14 hours ago
1
$begingroup$
@user49535 As MassDefect already pointed out, usingParallelEvaluate
here does not make sense at all. It enforces that the same value is computed on each of your CPU cores which is why you won't gain any speedup. It really depends on your actual functionXXXX
whetherParallelTable
can help at all. If it is a pure function thenParallelTable
should help.But ifXXXX
has side effects (like modifying data that has to be used by another thread) then it is hard to parallelize the execution. In a nutshell, we cannot give any further suggestions without knowingXXXX
.
$endgroup$
– Henrik Schumacher
13 hours ago
|
show 2 more comments
$begingroup$
Thanks @LukasLang ! How do you type grave accents without it interpreting them as the inline code markers? I tried backslashes before them, but that didn’t help.
$endgroup$
– MassDefect
16 hours ago
1
$begingroup$
You have to increase the amount of enclosing accents: ``` `` Codewith
accents`` ```. If you need double accents, you enclose the code with three, and so on (edit: for some reason, it doesn't work in the comment section - but you can edit your answer to see how it's done)
$endgroup$
– Lukas Lang
16 hours ago
$begingroup$
@LukasLang Oh, I see! Thanks!
$endgroup$
– MassDefect
15 hours ago
$begingroup$
Thanks both of you. three points 1. I do not necessary need to use exact values of (theta, phi) if it can speed up, can use ".". 2. I tried to use ParalleleTable first, but in contrast to your experience, it took 30h/48h for 4/2 processor computer as compared to 8h/7h for ParallelEvaluate. 4 - 8 times slower. 3. How can I combine both...you mean ParallelTable[ParallelEvaluate[. ??
$endgroup$
– user49535
14 hours ago
1
$begingroup$
@user49535 As MassDefect already pointed out, usingParallelEvaluate
here does not make sense at all. It enforces that the same value is computed on each of your CPU cores which is why you won't gain any speedup. It really depends on your actual functionXXXX
whetherParallelTable
can help at all. If it is a pure function thenParallelTable
should help.But ifXXXX
has side effects (like modifying data that has to be used by another thread) then it is hard to parallelize the execution. In a nutshell, we cannot give any further suggestions without knowingXXXX
.
$endgroup$
– Henrik Schumacher
13 hours ago
$begingroup$
Thanks @LukasLang ! How do you type grave accents without it interpreting them as the inline code markers? I tried backslashes before them, but that didn’t help.
$endgroup$
– MassDefect
16 hours ago
$begingroup$
Thanks @LukasLang ! How do you type grave accents without it interpreting them as the inline code markers? I tried backslashes before them, but that didn’t help.
$endgroup$
– MassDefect
16 hours ago
1
1
$begingroup$
You have to increase the amount of enclosing accents: ``` `` Code
with
accents`` ```. If you need double accents, you enclose the code with three, and so on (edit: for some reason, it doesn't work in the comment section - but you can edit your answer to see how it's done)$endgroup$
– Lukas Lang
16 hours ago
$begingroup$
You have to increase the amount of enclosing accents: ``` `` Code
with
accents`` ```. If you need double accents, you enclose the code with three, and so on (edit: for some reason, it doesn't work in the comment section - but you can edit your answer to see how it's done)$endgroup$
– Lukas Lang
16 hours ago
$begingroup$
@LukasLang Oh, I see! Thanks!
$endgroup$
– MassDefect
15 hours ago
$begingroup$
@LukasLang Oh, I see! Thanks!
$endgroup$
– MassDefect
15 hours ago
$begingroup$
Thanks both of you. three points 1. I do not necessary need to use exact values of (theta, phi) if it can speed up, can use ".". 2. I tried to use ParalleleTable first, but in contrast to your experience, it took 30h/48h for 4/2 processor computer as compared to 8h/7h for ParallelEvaluate. 4 - 8 times slower. 3. How can I combine both...you mean ParallelTable[ParallelEvaluate[. ??
$endgroup$
– user49535
14 hours ago
$begingroup$
Thanks both of you. three points 1. I do not necessary need to use exact values of (theta, phi) if it can speed up, can use ".". 2. I tried to use ParalleleTable first, but in contrast to your experience, it took 30h/48h for 4/2 processor computer as compared to 8h/7h for ParallelEvaluate. 4 - 8 times slower. 3. How can I combine both...you mean ParallelTable[ParallelEvaluate[. ??
$endgroup$
– user49535
14 hours ago
1
1
$begingroup$
@user49535 As MassDefect already pointed out, using
ParallelEvaluate
here does not make sense at all. It enforces that the same value is computed on each of your CPU cores which is why you won't gain any speedup. It really depends on your actual function XXXX
whether ParallelTable
can help at all. If it is a pure function then ParallelTable
should help.But if XXXX
has side effects (like modifying data that has to be used by another thread) then it is hard to parallelize the execution. In a nutshell, we cannot give any further suggestions without knowing XXXX
.$endgroup$
– Henrik Schumacher
13 hours ago
$begingroup$
@user49535 As MassDefect already pointed out, using
ParallelEvaluate
here does not make sense at all. It enforces that the same value is computed on each of your CPU cores which is why you won't gain any speedup. It really depends on your actual function XXXX
whether ParallelTable
can help at all. If it is a pure function then ParallelTable
should help.But if XXXX
has side effects (like modifying data that has to be used by another thread) then it is hard to parallelize the execution. In a nutshell, we cannot give any further suggestions without knowing XXXX
.$endgroup$
– Henrik Schumacher
13 hours ago
|
show 2 more comments
Thanks for contributing an answer to Mathematica Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fmathematica.stackexchange.com%2fquestions%2f189731%2fparallele-computing-2-vs-4-processor-speed%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown