6 #ifndef _UTILS_MICRODATASET_H_
7 #define _UTILS_MICRODATASET_H_
46 std::random_device rd;
47 std::default_random_engine e1;
76 template<
class dType=u
int32_t>
78 vector<dType> ru(len);
80 for (
size_t i = 0; i < len; i++) {
93 template<
class tsType=
size_t>
94 vector<tsType>
genZipfInt(
size_t len, tsType maxV,
double fac) {
95 vector<tsType> ret(len);
96 vector<tsType> alphabet = genIncrementalAlphabet<tsType>(maxV);
99 gen = std::mt19937_64(rd());
101 gen = std::mt19937_64(seed);
104 std::uniform_real_distribution<> dis(0, 1);
105 vector<double> lut = genZipfLut<double>(maxV, fac);
106 for (
size_t i = 0; i < len; i++) {
111 size_t right = maxV - 1;
118 while (right - left > 1) {
119 m = (left + right) / 2;
129 ret[i] = alphabet[pos];
147 template<
class tsType=u
int32_t,
class genType=std::mt19937>
148 vector<tsType>
genRandInt(
size_t len, tsType maxV, tsType minV = 0) {
155 std::uniform_int_distribution<> dis(minV, maxV);
156 vector<tsType> ret(len);
157 for (
size_t i = 0; i < len; i++) {
158 ret[i] = (tsType) dis(gen);
169 template<
class dType=
double>
171 dType scaling_factor;
173 vector<dType> lut(len);
180 scaling_factor = 0.0;
181 for (
size_t i = 1; i <= len; i++) { scaling_factor += 1.0 / pow(i, fac); }
186 for (
size_t i = 1; i <= len; i++) {
187 sum += 1.0 / std::pow(i, fac);
188 lut[i - 1] = sum / scaling_factor;
209 template<
class tsType=
size_t>
211 vector<tsType> ret(len);
213 for (
auto i = 0; i < len; i++) {
214 if (i % (step) == 0) {
230 template<
class tsType=
size_t>
232 vector<tsType> ret = genZipfInt<tsType>(len, maxTime, fac);
233 std::sort(ret.begin(), ret.end());
The all-in-one class for the Micro dataset.
Definition: MicroDataSet.hpp:44
MicroDataSet(uint64_t _seed)
construction with seed
Definition: MicroDataSet.hpp:61
MicroDataSet()
default construction, with auto random generator
Definition: MicroDataSet.hpp:54
vector< tsType > genZipfInt(size_t len, tsType maxV, double fac)
The function to generate a vector of integers which has zipf distribution.
Definition: MicroDataSet.hpp:94
vector< tsType > genRandInt(size_t len, tsType maxV, tsType minV=0)
generate the vector of random integer
Definition: MicroDataSet.hpp:148
vector< dType > genZipfLut(size_t len, dType fac)
To generate the zipf Lut.
Definition: MicroDataSet.hpp:170
vector< dType > genIncrementalAlphabet(size_t len)
To generate incremental alphabet, starting from 0 and end at len.
Definition: MicroDataSet.hpp:77
vector< tsType > genSmoothTimeStamp(size_t len, size_t step, size_t interval)
The function to generate a vector of timestamp which grows smoothly.
Definition: MicroDataSet.hpp:210
vector< tsType > genZipfTimeStamp(size_t len, tsType maxTime, double fac)
The function to generate a vector of timestamp which has zipf distribution.
Definition: MicroDataSet.hpp:231
Definition: DatasetTool.h:10