Commit cb2203997643e591fad354adba6e3b12a074c1c8

Authored by Miguel Barão
1 parent dd4d2655
Exists in master and in 1 other branch dev

- much faster (6.5x) initdb by using threads for bcrypt.

Showing 2 changed files with 91 additions and 55 deletions   Show diff stats
@@ -12,11 +12,12 @@ @@ -12,11 +12,12 @@
12 12
13 # TODO 13 # TODO
14 14
15 -- radio e checkboxes, aceitar numeros como seleccao das opcoes.  
16 -- each topic only loads a sample of K questions (max) in random order. 15 +- adicionar codigo para radio e checkboxes onde em vez de se dar uma lista de opcoes, dão-se 2 listas uma de opcoes correctas e outra de erradas.
17 - servir imagens/ficheiros. 16 - servir imagens/ficheiros.
  17 +- each topic only loads a sample of K questions (max) in random order.
  18 +- radio e checkboxes, aceitar numeros como seleccao das opcoes.
  19 +- reload das perguntas enquanto online. ver signal em http://stackabuse.com/python-async-await-tutorial/
18 - pertuntas tipo tristate: (sim, não, não sei 20 - pertuntas tipo tristate: (sim, não, não sei
19 -- reload das perguntas enquanto online.  
20 - tabela de progresso de todos os alunos por topico. 21 - tabela de progresso de todos os alunos por topico.
21 - tabela com perguntas / quantidade de respostas certas/erradas. 22 - tabela com perguntas / quantidade de respostas certas/erradas.
22 - tabela com topicos / quantidade de estrelas. 23 - tabela com topicos / quantidade de estrelas.
@@ -30,6 +31,7 @@ @@ -30,6 +31,7 @@
30 31
31 # FIXED 32 # FIXED
32 33
  34 +- async/threadpool no bcrypt do initdb.
33 - numero de estrelas depende da proporcao entre certas e erradas. 35 - numero de estrelas depende da proporcao entre certas e erradas.
34 - image brand da universidade está esbatida. 36 - image brand da universidade está esbatida.
35 - reportar comentarios após submeter. 37 - reportar comentarios após submeter.
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 2
  3 +# base
3 import csv 4 import csv
4 import argparse 5 import argparse
5 import re 6 import re
6 import string 7 import string
7 from sys import exit 8 from sys import exit
  9 +from concurrent.futures import ThreadPoolExecutor
  10 +from multiprocessing import cpu_count
  11 +import asyncio
8 12
  13 +# installed packages
9 import bcrypt 14 import bcrypt
10 -from sqlalchemy import create_engine  
11 -from sqlalchemy.orm import sessionmaker 15 +import sqlalchemy as sa
12 16
  17 +# this project
13 from models import Base, Student 18 from models import Base, Student
14 19
  20 +pool = ThreadPoolExecutor() #cpu_count()
  21 +
  22 +# replace password by hash for a single student dict
  23 +def hashpw(student):
  24 + pw = student.get('pw', student['uid']).encode('utf-8')
  25 + print('.', end='', flush=True)
  26 + hashed_pw = bcrypt.hashpw(pw, bcrypt.gensalt())
  27 + student['pw'] = hashed_pw
  28 +
  29 +
  30 +async def hash_all_passwords(executor, students):
  31 + loop = asyncio.get_event_loop()
  32 + tasks = [loop.run_in_executor(executor, hashpw, s) for s in students]
  33 + await asyncio.wait(tasks) # block until all tasks are done
  34 + print()
  35 +
15 # SIIUE names have alien strings like "(TE)" and are sometimes capitalized 36 # SIIUE names have alien strings like "(TE)" and are sometimes capitalized
16 # We remove them so that students dont keep asking what it means 37 # We remove them so that students dont keep asking what it means
17 def fix(name): 38 def fix(name):
@@ -19,74 +40,83 @@ def fix(name): @@ -19,74 +40,83 @@ def fix(name):
19 40
20 # =========================================================================== 41 # ===========================================================================
21 # Parse command line options 42 # Parse command line options
22 -argparser = argparse.ArgumentParser(  
23 - description='Create new database from a CSV file (SIIUE format)') 43 +def parse_commandline_arguments():
  44 + argparser = argparse.ArgumentParser(
  45 + description='Create new database from a CSV file (SIIUE format)')
24 46
25 -argparser.add_argument('--db',  
26 - default='students.db',  
27 - type=str,  
28 - help='database filename') 47 + argparser.add_argument('--db',
  48 + default='students.db',
  49 + type=str,
  50 + help='database filename')
29 51
30 -argparser.add_argument('--demo',  
31 - action='store_true',  
32 - help='initialize database with a few fake students') 52 + argparser.add_argument('--demo',
  53 + action='store_true',
  54 + help='initialize database with a few fake students')
33 55
34 -argparser.add_argument('--pw',  
35 - default='',  
36 - type=str,  
37 - help='default password') 56 + # FIXME
  57 + # argparser.add_argument('--pw',
  58 + # default='',
  59 + # type=str,
  60 + # help='default password')
38 61
39 -argparser.add_argument('csvfile',  
40 - nargs='?',  
41 - type=str,  
42 - default='',  
43 - help='CSV filename') 62 + argparser.add_argument('csvfile',
  63 + nargs='?',
  64 + type=str,
  65 + default='',
  66 + help='CSV filename')
44 67
45 -args = argparser.parse_args() 68 + return argparser.parse_args()
46 69
47 -# =======================================================x====================  
48 -engine = create_engine(f'sqlite:///{args.db}', echo=False)  
49 -Base.metadata.create_all(engine) # Criate schema if needed  
50 -Session = sessionmaker(bind=engine)  
51 -  
52 -# add administrator  
53 -students = {'0': 'Professor'}  
54 -  
55 -if args.csvfile:  
56 - # add students from csv file if available 70 +# ===========================================================================
  71 +def get_students_from_csv(filename):
57 try: 72 try:
58 - csvreader = csv.DictReader(open(args.csvfile, encoding='iso-8859-1'), delimiter=';', quotechar='"', skipinitialspace=True) 73 + csvreader = csv.DictReader(open(filename, encoding='iso-8859-1'), delimiter=';', quotechar='"', skipinitialspace=True)
59 except EnvironmentError: 74 except EnvironmentError:
60 - print(f'Error: CSV file "{args.csvfile}" not found.') 75 + print(f'Error: CSV file "{filename}" not found.')
61 exit(1) 76 exit(1)
62 - students.update({s['N.º']: fix(s['Nome']) for s in csvreader}) 77 + students = [{
  78 + 'uid': s['N.º'],
  79 + 'name': fix(s['Nome'])
  80 + } for s in csvreader]
  81 +
  82 + return students
63 83
  84 +# ===========================================================================
  85 +args = parse_commandline_arguments()
  86 +
  87 +if args.csvfile:
  88 + students = get_students_from_csv(args.csvfile)
64 elif args.demo: 89 elif args.demo:
65 - # add a few fake students  
66 - students.update({  
67 - '1915': 'Alan Turing',  
68 - '1938': 'Donald Knuth',  
69 - '1815': 'Ada Lovelace',  
70 - '1969': 'Linus Torvalds',  
71 - '1955': 'Tim Burners-Lee',  
72 - '1916': 'Claude Shannon',  
73 - '1903': 'John von Neumann',  
74 - })  
75 -  
76 -print(f'Generating {len(students)} bcrypt password hashes. This will take some time...') 90 + students = [
  91 + {'uid': '1915', 'name': 'Alan Turing'},
  92 + {'uid': '1938', 'name': 'Donald Knuth'},
  93 + {'uid': '1815', 'name': 'Ada Lovelace'},
  94 + {'uid': '1969', 'name': 'Linus Torvalds'},
  95 + {'uid': '1955', 'name': 'Tim Burners-Lee'},
  96 + {'uid': '1916', 'name': 'Claude Shannon'},
  97 + {'uid': '1903', 'name': 'John von Neumann'}]
  98 +students.append({'uid': '0', 'name': 'Admin'})
  99 +
  100 +print(f'Generating {len(students)} bcrypt password hashes.')
  101 +executor = ThreadPoolExecutor(cpu_count())
  102 +event_loop = asyncio.get_event_loop()
  103 +event_loop.run_until_complete(hash_all_passwords(executor, students))
  104 +event_loop.close()
  105 +
  106 +print(f'Creating database: {args.db}')
  107 +engine = sa.create_engine(f'sqlite:///{args.db}', echo=False)
  108 +Base.metadata.create_all(engine) # Criate schema if needed
  109 +Session = sa.orm.sessionmaker(bind=engine)
77 110
78 try: 111 try:
79 # --- start db session --- 112 # --- start db session ---
80 session = Session() 113 session = Session()
81 114
82 - for num, name in students.items():  
83 - print('.', end='', flush=True)  
84 - pw = (args.pw or num).encode('utf-8')  
85 - session.add(Student(id=num, name=name, password=bcrypt.hashpw(pw, bcrypt.gensalt())))  
86 - print() 115 + session.add_all([Student(id=s['uid'], name=s['name'], password=s['pw'])
  116 + for s in students])
87 117
88 n = session.query(Student).count() 118 n = session.query(Student).count()
89 - print(f'New database created: {args.db}\n{n} user(s) inserted:') 119 + print(f'{n} user(s):')
90 120
91 users = session.query(Student).order_by(Student.id).all() 121 users = session.query(Student).order_by(Student.id).all()
92 print(f' {users[0].id:8} - {users[0].name} (administrator)') 122 print(f' {users[0].id:8} - {users[0].name} (administrator)')
@@ -97,6 +127,10 @@ try: @@ -97,6 +127,10 @@ try:
97 if n > 2: 127 if n > 2:
98 print(f' {users[-1].id:8} - {users[-1].name}') 128 print(f' {users[-1].id:8} - {users[-1].name}')
99 129
  130 +except sa.exc.IntegrityError:
  131 + print('!!! Integrity error !!!')
  132 + session.rollback()
  133 +
100 except Exception as e: 134 except Exception as e:
101 print(f'Error: Database "{args.db}" already exists?') 135 print(f'Error: Database "{args.db}" already exists?')
102 session.rollback() 136 session.rollback()