下面是C++版本,其中每个节点的数据收集到一个结构中,并使用该结构的单个向量:
#include <vector>
#include <cmath>
#include <iostream>
class FloodIsolation {
public:
FloodIsolation() :
numberOfCells(20000),
data(numberOfCells)
{
}
~FloodIsolation(){
}
void isUpdateNeeded() {
for (int i = 0; i < numberOfCells; ++i) {
data[i].h = data[i].h + 1;
data[i].floodedCells = !data[i].floodedCells;
data[i].floodedCellsTimeInterval = !data[i].floodedCellsTimeInterval;
data[i].qInflow = data[i].qInflow + 1;
data[i].qStartTime = data[i].qStartTime + 1;
data[i].qEndTime = data[i].qEndTime + 1;
data[i].lowerFloorCells = data[i].lowerFloorCells + 1;
data[i].cellLocationX = data[i].cellLocationX + 1;
data[i].cellLocationY = data[i].cellLocationY + 1;
data[i].cellLocationZ = data[i].cellLocationZ + 1;
data[i].levelOfCell = data[i].levelOfCell + 1;
data[i].valueOfCellIds = data[i].valueOfCellIds + 1;
data[i].h0 = data[i].h0 + 1;
data[i].vU = data[i].vU + 1;
data[i].vV = data[i].vV + 1;
data[i].vUh = data[i].vUh + 1;
data[i].vVh = data[i].vVh + 1;
data[i].vUh0 = data[i].vUh0 + 1;
data[i].vVh0 = data[i].vVh0 + 1;
data[i].ghh = data[i].ghh + 1;
data[i].sfx = data[i].sfx + 1;
data[i].sfy = data[i].sfy + 1;
data[i].qIn = data[i].qIn + 1;
for(int j = 0; j < nEdges; ++j) {
data[i].flagInterface[j] = !data[i].flagInterface[j];
data[i].typeInterface[j] = data[i].typeInterface[j] + 1;
data[i].neighborIds[j] = data[i].neighborIds[j] + 1;
}
}
}
private:
const int numberOfCells;
static const int nEdges = 6;
struct data_t {
bool floodedCells = 0;
bool floodedCellsTimeInterval = 0;
double valueOfCellIds = 0;
double h = 0;
double h0 = 0;
double vU = 0;
double vV = 0;
double vUh = 0;
double vVh = 0;
double vUh0 = 0;
double vVh0 = 0;
double ghh = 0;
double sfx = 0;
double sfy = 0;
double qInflow = 0;
double qStartTime = 0;
double qEndTime = 0;
double qIn = 0;
double nx = 0;
double ny = 0;
double floorLevels = 0;
int lowerFloorCells = 0;
bool floorCompleteleyFilled = 0;
double cellLocationX = 0;
double cellLocationY = 0;
double cellLocationZ = 0;
int levelOfCell = 0;
bool flagInterface[nEdges] = {};
int typeInterface[nEdges] = {};
int neighborIds[nEdges] = {};
};
std::vector<data_t> data;
};
int main() {
std::ios_base::sync_with_stdio(false);
FloodIsolation isolation;
clock_t start = clock();
for (int i = 0; i < 400; ++i) {
if(i % 100 == 0) {
std::cout << i << "\n";
}
isolation.isUpdateNeeded();
}
clock_t stop = clock();
std::cout << "Time: " << difftime(stop, start) / 1000 << "\n";
}
现场示例
现在,时间的速度是 Java 版本的 2 倍。(846与1631)。
JIT注意到到处都是访问数据的缓存刻录,并将您的代码转换为逻辑上相似但更有效的顺序。
我还关闭了 stdio 同步,因为只有当您将 / 与 C++ 和 .碰巧的是,您只打印出几个值,但C++的默认打印行为过于偏执且效率低下。printf
scanf
std::cout
std::cin
如果不是实际的常量值,则必须将 3 个“数组”值从 中剥离出来。这不应该造成巨大的性能损失。nEdges
struct
您可以通过减小大小对值进行排序来获得另一个性能提升,从而减少内存占用(并在无关紧要时进行排序访问)。但我不确定。struct
经验法则是,单个缓存未命中比指令贵 100 倍。安排数据以使其具有缓存一致性具有很大的价值。
如果将数据重新排列为 不可行,则可以将迭代更改为依次在每个容器上。struct
顺便说一句,请注意,Java和C++版本有一些细微的差异。我发现的是,Java版本在“for each edge”循环中有3个变量,而C++一个只有2个变量。我让我的Java匹配。我不知道是否有其他人。