I wrote below piece of code generating dummy data loaded to MongoDB.
2 issues :
1.customer class has subscriber class so it nested with one level customer--> subsciber. I think i'm not doing right:
self.subscriber = Subscriber().returnJson()
it does working , but what if need to add another class under subscriber? it will become messy.
2.Peroformance - very bad:
10 documents:
real 0m20.371s
user 0m2.691s
sys 0m0.736s
for 1000 documents - very bad:
real 5m8.299s
user 1m40.819s
sys 0m44.380s
The generating of documents is the bottleneck - not the actual load.
Surely multiprocessing didn't work well , as performance is really bad. second ,I collect all document in a list (see mongo_doc[]) and at the end use insert_many i think it right , but it doesn't show meaningful improvement compare to insert_one.
Would appreciate any help
from faker import Faker
import json
from multiprocessing.pool import ThreadPool as Pool
import concurrent.futures
import random
from motor.motor_asyncio import AsyncIOMotorClient
from config.config import DB, CONF,trace_request,redis_client
pool_size = 10
mongo_docs = []
customer_type_options = ['Individual', 'Organization']
customer_status_options = ['Active', 'Cancelled','Suspened','Collection']
class Subscriber():
def __init__(self):
fake = Faker()
self.subscriber_no= random.randint(10000000, 99999999)
def returnJson(self):
return dict (subscriber_no= self.subscriber_no)
class Customer():
def __init__(self):
fake = Faker()
self.customer_no = random.randint(10000000, 99999999)
self.customer_type = random.choice(customer_type_options)
self.customer_status = random.choice(customer_status_options)
self.address = fake.address()
self.subscriber = Subscriber().returnJson()
def returnJson(self):
return {'customer_no': self.customer_no,
'customer_type': self.customer_type,
'customer_status': self.customer_status,
'address': self.address,
'subscriber': self.subscriber
}
class ComplexEncoder(json.JSONEncoder):
def default(self, obj):
if hasattr(obj,'returnJson'):
return obj.returnJson()
else:
return json.JSONEncoder.default(self, obj)
def LoadOToMongo(Customer):
mongo_docs.append(Customer.returnJson())
if __name__ == "__main__":
no_of_input = 2
pool = Pool(pool_size)
jobs =[]
for i in range(0, no_of_input):
customer = Customer()
pool.apply_async(LoadOToMongo, (customer,))
pool.close()
pool.join()
#bulk insert
DB.customer.insert_many(mongo_docs)
```