From cb2203997643e591fad354adba6e3b12a074c1c8 Mon Sep 17 00:00:00 2001 From: Miguel Barão Date: Thu, 1 Feb 2018 00:22:34 +0000 Subject: [PATCH] - much faster (6.5x) initdb by using threads for bcrypt. --- BUGS.md | 8 +++++--- initdb.py | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------- 2 files changed, 91 insertions(+), 55 deletions(-) diff --git a/BUGS.md b/BUGS.md index 8b55138..03ec903 100644 --- a/BUGS.md +++ b/BUGS.md @@ -12,11 +12,12 @@ # TODO -- radio e checkboxes, aceitar numeros como seleccao das opcoes. -- each topic only loads a sample of K questions (max) in random order. +- adicionar codigo para radio e checkboxes onde em vez de se dar uma lista de opcoes, dão-se 2 listas uma de opcoes correctas e outra de erradas. - servir imagens/ficheiros. +- each topic only loads a sample of K questions (max) in random order. +- radio e checkboxes, aceitar numeros como seleccao das opcoes. +- reload das perguntas enquanto online. ver signal em http://stackabuse.com/python-async-await-tutorial/ - pertuntas tipo tristate: (sim, não, não sei -- reload das perguntas enquanto online. - tabela de progresso de todos os alunos por topico. - tabela com perguntas / quantidade de respostas certas/erradas. - tabela com topicos / quantidade de estrelas. @@ -30,6 +31,7 @@ # FIXED +- async/threadpool no bcrypt do initdb. - numero de estrelas depende da proporcao entre certas e erradas. - image brand da universidade está esbatida. - reportar comentarios após submeter. diff --git a/initdb.py b/initdb.py index 9bffd43..a111375 100755 --- a/initdb.py +++ b/initdb.py @@ -1,17 +1,38 @@ #!/usr/bin/env python3 +# base import csv import argparse import re import string from sys import exit +from concurrent.futures import ThreadPoolExecutor +from multiprocessing import cpu_count +import asyncio +# installed packages import bcrypt -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker +import sqlalchemy as sa +# this project from models import Base, Student +pool = ThreadPoolExecutor() #cpu_count() + +# replace password by hash for a single student dict +def hashpw(student): + pw = student.get('pw', student['uid']).encode('utf-8') + print('.', end='', flush=True) + hashed_pw = bcrypt.hashpw(pw, bcrypt.gensalt()) + student['pw'] = hashed_pw + + +async def hash_all_passwords(executor, students): + loop = asyncio.get_event_loop() + tasks = [loop.run_in_executor(executor, hashpw, s) for s in students] + await asyncio.wait(tasks) # block until all tasks are done + print() + # SIIUE names have alien strings like "(TE)" and are sometimes capitalized # We remove them so that students dont keep asking what it means def fix(name): @@ -19,74 +40,83 @@ def fix(name): # =========================================================================== # Parse command line options -argparser = argparse.ArgumentParser( - description='Create new database from a CSV file (SIIUE format)') +def parse_commandline_arguments(): + argparser = argparse.ArgumentParser( + description='Create new database from a CSV file (SIIUE format)') -argparser.add_argument('--db', - default='students.db', - type=str, - help='database filename') + argparser.add_argument('--db', + default='students.db', + type=str, + help='database filename') -argparser.add_argument('--demo', - action='store_true', - help='initialize database with a few fake students') + argparser.add_argument('--demo', + action='store_true', + help='initialize database with a few fake students') -argparser.add_argument('--pw', - default='', - type=str, - help='default password') + # FIXME + # argparser.add_argument('--pw', + # default='', + # type=str, + # help='default password') -argparser.add_argument('csvfile', - nargs='?', - type=str, - default='', - help='CSV filename') + argparser.add_argument('csvfile', + nargs='?', + type=str, + default='', + help='CSV filename') -args = argparser.parse_args() + return argparser.parse_args() -# =======================================================x==================== -engine = create_engine(f'sqlite:///{args.db}', echo=False) -Base.metadata.create_all(engine) # Criate schema if needed -Session = sessionmaker(bind=engine) - -# add administrator -students = {'0': 'Professor'} - -if args.csvfile: - # add students from csv file if available +# =========================================================================== +def get_students_from_csv(filename): try: - csvreader = csv.DictReader(open(args.csvfile, encoding='iso-8859-1'), delimiter=';', quotechar='"', skipinitialspace=True) + csvreader = csv.DictReader(open(filename, encoding='iso-8859-1'), delimiter=';', quotechar='"', skipinitialspace=True) except EnvironmentError: - print(f'Error: CSV file "{args.csvfile}" not found.') + print(f'Error: CSV file "{filename}" not found.') exit(1) - students.update({s['N.º']: fix(s['Nome']) for s in csvreader}) + students = [{ + 'uid': s['N.º'], + 'name': fix(s['Nome']) + } for s in csvreader] + + return students +# =========================================================================== +args = parse_commandline_arguments() + +if args.csvfile: + students = get_students_from_csv(args.csvfile) elif args.demo: - # add a few fake students - students.update({ - '1915': 'Alan Turing', - '1938': 'Donald Knuth', - '1815': 'Ada Lovelace', - '1969': 'Linus Torvalds', - '1955': 'Tim Burners-Lee', - '1916': 'Claude Shannon', - '1903': 'John von Neumann', - }) - -print(f'Generating {len(students)} bcrypt password hashes. This will take some time...') + students = [ + {'uid': '1915', 'name': 'Alan Turing'}, + {'uid': '1938', 'name': 'Donald Knuth'}, + {'uid': '1815', 'name': 'Ada Lovelace'}, + {'uid': '1969', 'name': 'Linus Torvalds'}, + {'uid': '1955', 'name': 'Tim Burners-Lee'}, + {'uid': '1916', 'name': 'Claude Shannon'}, + {'uid': '1903', 'name': 'John von Neumann'}] +students.append({'uid': '0', 'name': 'Admin'}) + +print(f'Generating {len(students)} bcrypt password hashes.') +executor = ThreadPoolExecutor(cpu_count()) +event_loop = asyncio.get_event_loop() +event_loop.run_until_complete(hash_all_passwords(executor, students)) +event_loop.close() + +print(f'Creating database: {args.db}') +engine = sa.create_engine(f'sqlite:///{args.db}', echo=False) +Base.metadata.create_all(engine) # Criate schema if needed +Session = sa.orm.sessionmaker(bind=engine) try: # --- start db session --- session = Session() - for num, name in students.items(): - print('.', end='', flush=True) - pw = (args.pw or num).encode('utf-8') - session.add(Student(id=num, name=name, password=bcrypt.hashpw(pw, bcrypt.gensalt()))) - print() + session.add_all([Student(id=s['uid'], name=s['name'], password=s['pw']) + for s in students]) n = session.query(Student).count() - print(f'New database created: {args.db}\n{n} user(s) inserted:') + print(f'{n} user(s):') users = session.query(Student).order_by(Student.id).all() print(f' {users[0].id:8} - {users[0].name} (administrator)') @@ -97,6 +127,10 @@ try: if n > 2: print(f' {users[-1].id:8} - {users[-1].name}') +except sa.exc.IntegrityError: + print('!!! Integrity error !!!') + session.rollback() + except Exception as e: print(f'Error: Database "{args.db}" already exists?') session.rollback() -- libgit2 0.21.2