Coverage for /home/ubuntu/hidebound/python/hidebound/core/validators.py: 100%

1from typing import Any, Callable, List, Union # noqa F401

2from schematics.models import Model # noqa F401

4from collections import Counter

5from itertools import product

6from pathlib import Path

7import os

8import re

10from pandas import DataFrame

11from pyparsing import ParseException

12from schematics.exceptions import DataError, ValidationError

13import wrapt

15from hidebound.core.parser import AssetNameParser

16# ------------------------------------------------------------------------------

19'''

20The validators module is function library for validating singular traits given

21to a specification.

23Validators are linked with traits via the validators kwarg of a

24specification class attribute. They succeed silently and raise DataError when

25the trait they validate fails. Schematics captures these error messages and

26pipes them to an error call.

27'''

30def validate(message):

31 # type: (str) -> Callable

32 '''

33 A decorator for predicate functions that raises a ValidationError

34 if it returns False.

36 Args:

37 message (str): Error message if predicate returns False.

39 Raises:

40 ValidationError: If predicate returns False.

42 Returns:

43 function: Function that returns a boolean.

44 '''

45 @wrapt.decorator

46 def wrapper(wrapped, instance, args, kwargs):

47 if not wrapped(*args):

48 args = [str(x) for x in args] * 10

49 msg = message.format(*args)

50 raise ValidationError(msg)

51 return

52 return wrapper

55def validate_each(message, list_first_arg=False):

56 # type: (str, bool) -> Callable

57 '''

58 A decorator for predicate functions that raises a ValidationError

59 if it returns False when applied to each argument individually.

61 Args:

62 message (str): Error message if predicate returns False.

63 list_first_arg (str, optional): Set to True if first argument is a list.

64 Default: False.

66 Raises:

67 ValidationError: If predicate returns False.

69 Returns:

70 function: Function that returns a boolean.

71 '''

72 @wrapt.decorator

73 def wrapper(wrapped, instance, args, kwargs):

74 extra_args = []

75 if len(args) > 1:

76 extra_args = args[1:]

78 args = args[0]

79 if list_first_arg or not isinstance(args, list):

80 args = [args]

81 for arg in args:

82 if not wrapped(arg, *extra_args):

83 msg = message.format(arg, *extra_args)

84 raise ValidationError(msg)

85 return

86 return wrapper

89# VALIDATORS--------------------------------------------------------------------

90@validate_each('"{}" is not a valid project name.')

91def is_project(item):

92 # type: (str) -> bool

93 '''

94 Validates a project name.

96 Args:

97 item (str): Project name.

99 Raises:

100 ValidationError: If project name is invalid.

101

102 Returns:

103 bool: Validity of project name.

104 '''

105 try:

106 ind = AssetNameParser.PROJECT_INDICATOR

107 AssetNameParser(['project']).parse(ind + item)

108 except ParseException:

109 return False # pragma: no cover

110

111 if re.search('^[a-z0-9]+$', item) is None:

112 return False # pragma: no cover

113

114 return True

115

116

117@validate_each('"{}" is not a valid descriptor.')

118def is_descriptor(item):

119 # type: (str) -> bool

120 '''

121 Validates a descriptor.

122

123 Args:

124 item (str): Descriptor.

125

126 Raises:

127 ValidationError: If descriptor is invalid.

128

129 Returns:

130 bool: Validity of descriptor.

131 '''

132 try:

133 ind = AssetNameParser.DESCRIPTOR_INDICATOR

134 AssetNameParser(['descriptor']).parse(ind + item)

135 except ParseException:

136 return False # pragma: no cover

137

138 if re.search('^[a-z0-9-]+$', item) is None:

139 return False # pragma: no cover

140

141 # the mast/final/last asset is never actually that

142 # asset should only ever be thought of in terms of latest version

143 if re.search('^(master|final|last)', item):

144 return False # pragma: no cover

145

146 if len(item) < 1:

147 return False # pragma: no cover

148

149 return True

150

151

152@validate_each('{} is not a valid version. 0 < version < 1000.')

153def is_version(item):

154 # type: (int) -> bool

155 '''

156 Validates a version.

157

158 Args:

159 item (int): Version.

160

161 Raises:

162 ValidationError: If version is invalid.

163

164 Returns:

165 bool: Validity of version.

166 '''

167 return item > 0 and item < 10**AssetNameParser.VERSION_PADDING

168

169

170@validate_each('{} is not a valid frame. -1 < frame < 10000.')

171def is_frame(item):

172 # type: (int) -> bool

173 '''

174 Validates a frame.

175

176 Args:

177 item (int): Frame.

178

179 Raises:

180 ValidationError: If frame is invalid.

181

182 Returns:

183 bool: Validity of frame.

184 '''

185 return item >= 0 and item < 10**AssetNameParser.FRAME_PADDING

186

187

188@validate_each(

189 '{} is not a valid coordinate. -1 < coordinate < 1000.',

190 list_first_arg=True

191)

192def is_coordinate(item):

193 # type: (List[int]) -> bool

194 '''

195 Validates a coordinate.

196

197 Args:

198 item (list[int]): Coordinate.

199

200 Raises:

201 ValidationError: If coordinate is invalid.

202

203 Returns:

204 bool: Validity of coordinate.

205 '''

206 if len(item) == 0:

207 return False # pragma: no cover

208

209 if len(item) > 3:

210 return False # pragma: no cover

211

212 if min(item) < 0:

213 return False # pragma: no cover

214

215 if max(item) >= 10**AssetNameParser.COORDINATE_PADDING:

216 return False # pragma: no cover

217

218 return True

219

220

221@validate_each('"{}" is not a valid extension.')

222def is_extension(item):

223 # type: (str) -> bool

224 '''

225 Validates a file extension.

226

227 Args:

228 item (str): File extension.

229

230 Raises:

231 ValidationError: If extension is invalid.

232

233 Returns:

234 bool: Validity of extension.

235 '''

236 if re.search('^[a-z0-9]+$', item):

237 return True

238 return False # pragma: no cover

239

240

241@validate_each('{} != {}.')

242def is_eq(a, b):

243 # type: (Any, Any) -> bool

244 '''

245 Validates that a and b are equal.

246

247 Args:

248 a (object): Object.

249 b (object): Object.

250

251 Raises:

252 ValidationError: If a does not equal b.

253

254 Returns:

255 bool: Equality of a and b.

256 '''

257 return a == b

258

259

260@validate_each('{} !< {}.')

261def is_lt(a, b):

262 # type: (Any, Any) -> bool

263 '''

264 Validates that a is less than b.

265

266 Args:

267 a (object): Object.

268 b (object): Object.

269

270 Raises:

271 ValidationError: If a is not less than b.

272

273 Returns:

274 bool: A is less than b.

275 '''

276 return a < b

277

278

279@validate_each('{} !> {}.')

280def is_gt(a, b):

281 # type: (Any, Any) -> bool

282 '''

283 Validates that a is greater than b.

284

285 Args:

286 a (object): Object.

287 b (object): Object.

288

289 Raises:

290 ValidationError: If a is not greater than b.

291

292 Returns:

293 bool: A is greater than b.

294 '''

295 return a > b

296

297

298@validate_each('{} !<= {}.')

299def is_lte(a, b):

300 # type: (Any, Any) -> bool

301 '''

302 Validates that a is less than or equal to b.

303

304 Args:

305 a (object): Object.

306 b (object): Object.

307

308 Raises:

309 ValidationError: If a is not less than or equal to b.

310

311 Returns:

312 bool: A is less than or equal to b.

313 '''

314 return a <= b

315

316

317@validate_each('{} !>= {}.')

318def is_gte(a, b):

319 # type: (Any, Any) -> bool

320 '''

321 Validates that a is greater than or equal to b.

322

323 Args:

324 a (object): Object.

325 b (object): Object.

326

327 Raises:

328 ValidationError: If a is not greater than or equal to b.

329

330 Returns:

331 bool: A is greater than or equal to b.

332 '''

333 return a >= b

334

335

336@validate('{} is not homogenous.')

337def is_homogenous(items):

338 # type: (List[Any]) -> bool

339 '''

340 Validates thats all items are equal.

341

342 Args:

343 items (list): List of items.

344

345 Raises:

346 ValidationError: If items are not all the same.

347

348 Returns:

349 bool: Homogeneity of items.

350 '''

351 if len(items) < 2:

352 return True

353

354 first = items[0]

355 for item in items[1:]:

356 if item != first:

357 return False

358 return True

359

360

361@validate_each('{} is not in {}.')

362def is_in(a, b):

363 # type: (Any, Any) -> bool

364 '''

365 Validates that each a is in b.

366

367 Args:

368 a (object): Object.

369 b (object): Object.

370

371 Raises:

372 ValidationError: If a is not in b.

373

374 Returns:

375 bool: Alls a's in b.

376 '''

377 return a in b

378

379

380@validate_each('{} is not an attribute of {}.')

381def is_attribute_of(name, object):

382 # type: (str, Any) -> bool

383 '''

384 Validates that each name is an attribute of given object.

385

386 Args:

387 a (str): Attribute name.

388 b (object): Object.

389

390 Raises:

391 ValidationError: If an name is not an attribute of given object.

392

393 Returns:

394 bool: Alls names are attributes of object.

395 '''

396 return hasattr(object, name)

397

398

399@validate('{} is not a directory or does not exist.')

400def is_directory(item):

401 # type: (Union[str, Path]) -> bool

402 '''

403 Validates thats item is a directory.

404

405 Args:

406 item (str): Directory path.

407

408 Raises:

409 ValidationError: If item is not a directory or does not exist.

410

411 Returns:

412 bool: State of item.

413 '''

414 if not os.path.isdir(item):

415 return False

416 return True

417

418

419@validate('{} is not a file or does not exist.')

420def is_file(item):

421 # type: (Union[str, Path]) -> bool

422 '''

423 Validates thats item is a file.

424

425 Args:

426 item (str): Filepath.

427

428 Raises:

429 ValidationError: If item is not a file or does not exist.

430

431 Returns:

432 bool: State of item.

433 '''

434 if not os.path.isfile(item):

435 return False

436 return True

437

438

439def is_not_missing_values(items):

440 # type: (List[int]) -> bool

441 '''

442 Validates that sequence of integers is not missing any values.

443

444 Args:

445 items (list[int]): Integers.

446

447 Raises:

448 ValidationError: If items is missing values.

449

450 Returns:

451 bool: State of item.

452 '''

453 expected = list(range(min(items), max(items) + 1))

454 if sorted(items) == expected:

455 return True

456

457 diff = sorted(list(set(expected).difference(items)))

458 msg = f'Missing values: {diff}.'

459 raise ValidationError(msg)

460

461

462def has_uniform_coordinate_count(items):

463 # type: (List[List[int]]) -> bool

464 '''

465 Validates that non-unique list of coordinates has a uniform count per

466 coordinate.

467

468 Args:

469 items (list[list[int]]): List of coordinates.

470

471 Raises:

472 ValidationError: If coordinate count is non-uniform.

473

474 Returns:

475 bool: Uniformity of coordinates.

476 '''

477 count = Counter(list(map(str, items)))

478 if len(set(count.values())) > 1:

479 max_ = max(count.values())

480 msg = filter(lambda x: x[1] < max_, count.items()) # type: Any

481 msg = [eval(x[0]) for x in msg]

482 msg = sorted(msg)

483 msg = f'Non-uniform coordinate count. Missing coordinates: {msg}.'

484 raise ValidationError(msg)

485 return True

486

487

488def has_dense_coordinates(items):

489 # type: (List[List[int]]) -> bool

490 '''

491 Validates that list of coordinates is dense (every point is filled).

492

493 Args:

494 items (list[list[int]]): List of coordinates.

495

496 Raises:

497 ValidationError: If coordinates are not dense.

498

499 Returns:

500 bool: Density of coordinates.

501 '''

502 # build dense cartesian coordinates

503 dense = DataFrame(items) \

504 .apply(lambda x: str(list(range(x.min(), x.max() + 1)))) \

505 .tolist()

506 dense = map(eval, dense)

507 dense = map(list, product(*dense))

508 dense = list(map(str, dense))

509

510 # find difference between given coords and dense

511 coords = list(map(str, items))

512 diff = set(dense).difference(coords) # type: Any

513 if len(diff) > 0:

514 diff = sorted(list(map(eval, diff)))

515 msg = f'Non-dense coordinates. Missing coordinates: {diff}.'

516 raise ValidationError(msg)

517 return True

518

519

520def coordinates_begin_at(items, origin):

521 # type: (List[List[int]], List[int]) -> bool

522 '''

523 Validates that the minimum coordinate of a given list equals a given origin.

524

525 Args:

526 items (list[list[int]]): List of coordinates.

527 origin (list[int]): Origin coordinate.

528

529 Raises:

530 ValidationError: If coordinates do not begin at origin.

531

532 Returns:

533 bool: State of items.

534 '''

535 if min(items) == origin:

536 return True

537 msg = f'Coordinates do not begin at {origin}.'

538 raise ValidationError(msg)

539

540

541@validate('''{} is not a valid bucket name. Bucket names must:

542 - be between 3 and 63 characters

543 - only consist of lowercase letters, numbers, periods and hyphens

544 - begin and end with a letter or number''')

545def is_bucket_name(item):

546 # type: (str) -> bool

547 '''

548 Validates a bucket name.

549

550 Args:

551 item (str): bucket name.

552

553 Raises:

554 ValidationError: If bucket name is invalid.

555

556 Returns:

557 bool: Validity of bucket name.

558 '''

559 if not 3 <= len(item) <= 63:

560 return False

561 if not item.islower():

562 return False

563 if re.search('^[a-z0-9][a-z0-9-.]*[a-z0-9]$', item) is None:

564 return False

565 return True

566

567

568@validate('{} is not a valid AWS region.')

569def is_aws_region(item):

570 # type: (str) -> bool

571 '''

572 Validates an AWS region name.

573

574 Args:

575 item (str): AWS region name.

576

577 Raises:

578 ValidationError: If region name is invalid.

579

580 Returns:

581 bool: Validity of region name.

582 '''

583 # list derived from boto.session.Session().get_available_regions('s3')

584 regions = [

585 'af-south-1',

586 'ap-east-1',

587 'ap-northeast-1',

588 'ap-northeast-2',

589 'ap-northeast-3',

590 'ap-south-1',

591 'ap-southeast-1',

592 'ap-southeast-2',

593 'ca-central-1',

594 'eu-central-1',

595 'eu-north-1',

596 'eu-south-1',

597 'eu-west-1',

598 'eu-west-2',

599 'eu-west-3',

600 'me-south-1',

601 'sa-east-1',

602 'us-east-1',

603 'us-east-2',

604 'us-west-1',

605 'us-west-2',

606 ]

607 return item in regions

608

609

610@validate('''{} is not a legal directory path.

611Legal directory paths must:

612 - Begin with /

613 - Not end with /

614 * Contain only the characters: /, a-z, A-Z, 0-9, _, -''')

615def is_legal_directory(item):

616 # type: (str) -> bool

617 '''

618 Validates that directory path is legal.

619 Legal directory paths must:

620

621 * Begin with /

622 * Not end with /

623 * Contain only the characters: /, a-z, A-Z, 0-9, _, -

624

625 Args:

626 item (str): Directory path.

627

628 Raises:

629 ValidationError: If directory path is invalid.

630

631 Returns:

632 bool: Validity of directory path.

633 '''

634 if not item.startswith('/'):

635 return False

636 if item.endswith('/'):

637 return False

638 if not re.search(r'^[/a-z0-9_\-]+$', item, re.I):

639 return False

640 return True

641

642

643@validate('''{} is not a legal metadata type.

644Legal metadata types: [asset, file, asset-chunk, file-chunk]''')

645def is_metadata_type(item):

646 # type: (str) -> bool

647 '''

648 Validates that a given metadata type is legal.

649 Legal types include:

650

651 * asset

652 * file

653 * asset-chunk

654 * file-chunk

655

656 Args:

657 item (str): Metadata type.

658

659 Raises:

660 ValidationError: If metadata type is illegal.

661

662 Returns:

663 bool: Validity of metadata type.

664 '''

665 return item in ['asset', 'file', 'asset-chunk', 'file-chunk']

666

667

668def is_hidebound_directory(directory):

669 # type: (Union[str, Path]) -> None

670 '''

671 Ensures directory name is "hidebound".

672

673 Args:

674 directory (str or Path): Hidebound directory.

675

676 Raises:

677 ValidationError: If directory is not named "hidebound".

678 '''

679 if Path(directory).name != 'hidebound':

680 msg = f'{directory} directory is not named hidebound.'

681 raise ValidationError(msg)

682

683

684def is_http_method(method):

685 # type: (str) -> None

686 '''

687 Ensures given method is a legal HTTP method.

688 Legal methods include:

689

690 * get

691 * put

692 * post

693 * delete

694 * patch

695

696 Args:

697 method (str): HTTP method.

698

699 Raises:

700 ValidationError: If method is not a legal HTTP method.

701 '''

702 methods = ['get', 'put', 'post', 'delete', 'patch']

703 if method not in methods:

704 msg = f'{method} is not a legal HTTP method. Legal methods: {methods}.'

705 raise ValidationError(msg)

706

707

708def is_workflow(steps):

709 # type: (List[str]) -> None

710 '''

711 Ensures given workflow steps are legal.

712 Legal workflows steps include:

713

714 * delete

715 * update

716 * create

717 * export

718

719 Args:

720 steps (list[str]): List of workflow steps:

721

722 Raises:

723 ValidationError: If method is not a legal workflow.

724 '''

725 legal = ['delete', 'update', 'create', 'export']

726 diff = sorted(list(set(steps).difference(legal)))

727 if len(diff) > 0:

728 msg = f'{diff} are not legal workflow steps. Legal steps: {legal}.'

729 raise ValidationError(msg)

730

731

732def is_one_of(item, models):

733 # type: (dict, List[Model]) -> None

734 '''

735 Validates whether given item matches at least one given model.

736

737 Args:

738 item (dict): Item to be validated.

739 models (list[Model]): List schematics Models.

740

741 Raises:

742 ValidationError: If no valid model could be found for given item.

743 '''

744 if len(models) == 0:

745 return

746

747 errors = set()

748 for model in models:

749 try:

750 model(item).validate()

751 return

752 except DataError as e:

753 errors.add(str(e))

754 error = '\n'.join(list(errors))

755 raise ValidationError(error)

756

757

758@validate('''{} is not a legal cluster option type.

759Legal cluster option types: [bool, float, int, mapping, select, string]''')

760def is_cluster_option_type(item):

761 # type: (str) -> bool

762 '''

763 Validates that a given cluster option type is legal.

764 Legal types include:

765

766 * bool

767 * float

768 * int

769 * mapping

770 * select

771 * string

772

773 Args:

774 item (str): Cluster option type.

775

776 Raises:

777 ValidationError: If cluster option type is illegal.

778

779 Returns:

780 bool: Validity of cluster option type.

781 '''

782 return item in ['bool', 'float', 'int', 'mapping', 'select', 'string']