Commit cb2203997643e591fad354adba6e3b12a074c1c8
1 parent
dd4d2655
Exists in
master
and in
1 other branch
- much faster (6.5x) initdb by using threads for bcrypt.
Showing
2 changed files
with
91 additions
and
55 deletions
Show diff stats
BUGS.md
@@ -12,11 +12,12 @@ | @@ -12,11 +12,12 @@ | ||
12 | 12 | ||
13 | # TODO | 13 | # TODO |
14 | 14 | ||
15 | -- radio e checkboxes, aceitar numeros como seleccao das opcoes. | ||
16 | -- each topic only loads a sample of K questions (max) in random order. | 15 | +- adicionar codigo para radio e checkboxes onde em vez de se dar uma lista de opcoes, dão-se 2 listas uma de opcoes correctas e outra de erradas. |
17 | - servir imagens/ficheiros. | 16 | - servir imagens/ficheiros. |
17 | +- each topic only loads a sample of K questions (max) in random order. | ||
18 | +- radio e checkboxes, aceitar numeros como seleccao das opcoes. | ||
19 | +- reload das perguntas enquanto online. ver signal em http://stackabuse.com/python-async-await-tutorial/ | ||
18 | - pertuntas tipo tristate: (sim, não, não sei | 20 | - pertuntas tipo tristate: (sim, não, não sei |
19 | -- reload das perguntas enquanto online. | ||
20 | - tabela de progresso de todos os alunos por topico. | 21 | - tabela de progresso de todos os alunos por topico. |
21 | - tabela com perguntas / quantidade de respostas certas/erradas. | 22 | - tabela com perguntas / quantidade de respostas certas/erradas. |
22 | - tabela com topicos / quantidade de estrelas. | 23 | - tabela com topicos / quantidade de estrelas. |
@@ -30,6 +31,7 @@ | @@ -30,6 +31,7 @@ | ||
30 | 31 | ||
31 | # FIXED | 32 | # FIXED |
32 | 33 | ||
34 | +- async/threadpool no bcrypt do initdb. | ||
33 | - numero de estrelas depende da proporcao entre certas e erradas. | 35 | - numero de estrelas depende da proporcao entre certas e erradas. |
34 | - image brand da universidade está esbatida. | 36 | - image brand da universidade está esbatida. |
35 | - reportar comentarios após submeter. | 37 | - reportar comentarios após submeter. |
initdb.py
1 | #!/usr/bin/env python3 | 1 | #!/usr/bin/env python3 |
2 | 2 | ||
3 | +# base | ||
3 | import csv | 4 | import csv |
4 | import argparse | 5 | import argparse |
5 | import re | 6 | import re |
6 | import string | 7 | import string |
7 | from sys import exit | 8 | from sys import exit |
9 | +from concurrent.futures import ThreadPoolExecutor | ||
10 | +from multiprocessing import cpu_count | ||
11 | +import asyncio | ||
8 | 12 | ||
13 | +# installed packages | ||
9 | import bcrypt | 14 | import bcrypt |
10 | -from sqlalchemy import create_engine | ||
11 | -from sqlalchemy.orm import sessionmaker | 15 | +import sqlalchemy as sa |
12 | 16 | ||
17 | +# this project | ||
13 | from models import Base, Student | 18 | from models import Base, Student |
14 | 19 | ||
20 | +pool = ThreadPoolExecutor() #cpu_count() | ||
21 | + | ||
22 | +# replace password by hash for a single student dict | ||
23 | +def hashpw(student): | ||
24 | + pw = student.get('pw', student['uid']).encode('utf-8') | ||
25 | + print('.', end='', flush=True) | ||
26 | + hashed_pw = bcrypt.hashpw(pw, bcrypt.gensalt()) | ||
27 | + student['pw'] = hashed_pw | ||
28 | + | ||
29 | + | ||
30 | +async def hash_all_passwords(executor, students): | ||
31 | + loop = asyncio.get_event_loop() | ||
32 | + tasks = [loop.run_in_executor(executor, hashpw, s) for s in students] | ||
33 | + await asyncio.wait(tasks) # block until all tasks are done | ||
34 | + print() | ||
35 | + | ||
15 | # SIIUE names have alien strings like "(TE)" and are sometimes capitalized | 36 | # SIIUE names have alien strings like "(TE)" and are sometimes capitalized |
16 | # We remove them so that students dont keep asking what it means | 37 | # We remove them so that students dont keep asking what it means |
17 | def fix(name): | 38 | def fix(name): |
@@ -19,74 +40,83 @@ def fix(name): | @@ -19,74 +40,83 @@ def fix(name): | ||
19 | 40 | ||
20 | # =========================================================================== | 41 | # =========================================================================== |
21 | # Parse command line options | 42 | # Parse command line options |
22 | -argparser = argparse.ArgumentParser( | ||
23 | - description='Create new database from a CSV file (SIIUE format)') | 43 | +def parse_commandline_arguments(): |
44 | + argparser = argparse.ArgumentParser( | ||
45 | + description='Create new database from a CSV file (SIIUE format)') | ||
24 | 46 | ||
25 | -argparser.add_argument('--db', | ||
26 | - default='students.db', | ||
27 | - type=str, | ||
28 | - help='database filename') | 47 | + argparser.add_argument('--db', |
48 | + default='students.db', | ||
49 | + type=str, | ||
50 | + help='database filename') | ||
29 | 51 | ||
30 | -argparser.add_argument('--demo', | ||
31 | - action='store_true', | ||
32 | - help='initialize database with a few fake students') | 52 | + argparser.add_argument('--demo', |
53 | + action='store_true', | ||
54 | + help='initialize database with a few fake students') | ||
33 | 55 | ||
34 | -argparser.add_argument('--pw', | ||
35 | - default='', | ||
36 | - type=str, | ||
37 | - help='default password') | 56 | + # FIXME |
57 | + # argparser.add_argument('--pw', | ||
58 | + # default='', | ||
59 | + # type=str, | ||
60 | + # help='default password') | ||
38 | 61 | ||
39 | -argparser.add_argument('csvfile', | ||
40 | - nargs='?', | ||
41 | - type=str, | ||
42 | - default='', | ||
43 | - help='CSV filename') | 62 | + argparser.add_argument('csvfile', |
63 | + nargs='?', | ||
64 | + type=str, | ||
65 | + default='', | ||
66 | + help='CSV filename') | ||
44 | 67 | ||
45 | -args = argparser.parse_args() | 68 | + return argparser.parse_args() |
46 | 69 | ||
47 | -# =======================================================x==================== | ||
48 | -engine = create_engine(f'sqlite:///{args.db}', echo=False) | ||
49 | -Base.metadata.create_all(engine) # Criate schema if needed | ||
50 | -Session = sessionmaker(bind=engine) | ||
51 | - | ||
52 | -# add administrator | ||
53 | -students = {'0': 'Professor'} | ||
54 | - | ||
55 | -if args.csvfile: | ||
56 | - # add students from csv file if available | 70 | +# =========================================================================== |
71 | +def get_students_from_csv(filename): | ||
57 | try: | 72 | try: |
58 | - csvreader = csv.DictReader(open(args.csvfile, encoding='iso-8859-1'), delimiter=';', quotechar='"', skipinitialspace=True) | 73 | + csvreader = csv.DictReader(open(filename, encoding='iso-8859-1'), delimiter=';', quotechar='"', skipinitialspace=True) |
59 | except EnvironmentError: | 74 | except EnvironmentError: |
60 | - print(f'Error: CSV file "{args.csvfile}" not found.') | 75 | + print(f'Error: CSV file "{filename}" not found.') |
61 | exit(1) | 76 | exit(1) |
62 | - students.update({s['N.º']: fix(s['Nome']) for s in csvreader}) | 77 | + students = [{ |
78 | + 'uid': s['N.º'], | ||
79 | + 'name': fix(s['Nome']) | ||
80 | + } for s in csvreader] | ||
81 | + | ||
82 | + return students | ||
63 | 83 | ||
84 | +# =========================================================================== | ||
85 | +args = parse_commandline_arguments() | ||
86 | + | ||
87 | +if args.csvfile: | ||
88 | + students = get_students_from_csv(args.csvfile) | ||
64 | elif args.demo: | 89 | elif args.demo: |
65 | - # add a few fake students | ||
66 | - students.update({ | ||
67 | - '1915': 'Alan Turing', | ||
68 | - '1938': 'Donald Knuth', | ||
69 | - '1815': 'Ada Lovelace', | ||
70 | - '1969': 'Linus Torvalds', | ||
71 | - '1955': 'Tim Burners-Lee', | ||
72 | - '1916': 'Claude Shannon', | ||
73 | - '1903': 'John von Neumann', | ||
74 | - }) | ||
75 | - | ||
76 | -print(f'Generating {len(students)} bcrypt password hashes. This will take some time...') | 90 | + students = [ |
91 | + {'uid': '1915', 'name': 'Alan Turing'}, | ||
92 | + {'uid': '1938', 'name': 'Donald Knuth'}, | ||
93 | + {'uid': '1815', 'name': 'Ada Lovelace'}, | ||
94 | + {'uid': '1969', 'name': 'Linus Torvalds'}, | ||
95 | + {'uid': '1955', 'name': 'Tim Burners-Lee'}, | ||
96 | + {'uid': '1916', 'name': 'Claude Shannon'}, | ||
97 | + {'uid': '1903', 'name': 'John von Neumann'}] | ||
98 | +students.append({'uid': '0', 'name': 'Admin'}) | ||
99 | + | ||
100 | +print(f'Generating {len(students)} bcrypt password hashes.') | ||
101 | +executor = ThreadPoolExecutor(cpu_count()) | ||
102 | +event_loop = asyncio.get_event_loop() | ||
103 | +event_loop.run_until_complete(hash_all_passwords(executor, students)) | ||
104 | +event_loop.close() | ||
105 | + | ||
106 | +print(f'Creating database: {args.db}') | ||
107 | +engine = sa.create_engine(f'sqlite:///{args.db}', echo=False) | ||
108 | +Base.metadata.create_all(engine) # Criate schema if needed | ||
109 | +Session = sa.orm.sessionmaker(bind=engine) | ||
77 | 110 | ||
78 | try: | 111 | try: |
79 | # --- start db session --- | 112 | # --- start db session --- |
80 | session = Session() | 113 | session = Session() |
81 | 114 | ||
82 | - for num, name in students.items(): | ||
83 | - print('.', end='', flush=True) | ||
84 | - pw = (args.pw or num).encode('utf-8') | ||
85 | - session.add(Student(id=num, name=name, password=bcrypt.hashpw(pw, bcrypt.gensalt()))) | ||
86 | - print() | 115 | + session.add_all([Student(id=s['uid'], name=s['name'], password=s['pw']) |
116 | + for s in students]) | ||
87 | 117 | ||
88 | n = session.query(Student).count() | 118 | n = session.query(Student).count() |
89 | - print(f'New database created: {args.db}\n{n} user(s) inserted:') | 119 | + print(f'{n} user(s):') |
90 | 120 | ||
91 | users = session.query(Student).order_by(Student.id).all() | 121 | users = session.query(Student).order_by(Student.id).all() |
92 | print(f' {users[0].id:8} - {users[0].name} (administrator)') | 122 | print(f' {users[0].id:8} - {users[0].name} (administrator)') |
@@ -97,6 +127,10 @@ try: | @@ -97,6 +127,10 @@ try: | ||
97 | if n > 2: | 127 | if n > 2: |
98 | print(f' {users[-1].id:8} - {users[-1].name}') | 128 | print(f' {users[-1].id:8} - {users[-1].name}') |
99 | 129 | ||
130 | +except sa.exc.IntegrityError: | ||
131 | + print('!!! Integrity error !!!') | ||
132 | + session.rollback() | ||
133 | + | ||
100 | except Exception as e: | 134 | except Exception as e: |
101 | print(f'Error: Database "{args.db}" already exists?') | 135 | print(f'Error: Database "{args.db}" already exists?') |
102 | session.rollback() | 136 | session.rollback() |