1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- import argparse
- import numpy as np
- import os
- from xgboost_ray.tests.utils import create_parquet
- if __name__ == "__main__":
- if "OMP_NUM_THREADS" in os.environ:
- del os.environ["OMP_NUM_THREADS"]
- parser = argparse.ArgumentParser(description="Create fake data.")
- parser.add_argument(
- "filename", type=str, default="/data/parted.parquet/", help="ray/dask"
- )
- parser.add_argument(
- "-r", "--num-rows", required=False, type=int, default=1e8, help="num rows"
- )
- parser.add_argument(
- "-p",
- "--num-partitions",
- required=False,
- type=int,
- default=100,
- help="num partitions",
- )
- parser.add_argument(
- "-c",
- "--num-cols",
- required=False,
- type=int,
- default=4,
- help="num columns (features)",
- )
- parser.add_argument(
- "-C", "--num-classes", required=False, type=int, default=2, help="num classes"
- )
- parser.add_argument(
- "-s", "--seed", required=False, type=int, default=1234, help="random seed"
- )
- args = parser.parse_args()
- np.random.seed(args.seed)
- create_parquet(
- args.filename,
- num_rows=int(args.num_rows),
- num_partitions=int(args.num_partitions),
- num_features=int(args.num_cols),
- num_classes=int(args.num_classes),
- )
|