Coverage for /home/ubuntu/hidebound/python/hidebound/core/validators.py: 100%

201 statements  

« prev     ^ index     » next       coverage.py v7.5.4, created at 2024-07-05 23:50 +0000

1from typing import Any, Callable, List, Union # noqa F401 

2from schematics.models import Model # noqa F401 

3 

4from collections import Counter 

5from itertools import product 

6from pathlib import Path 

7import os 

8import re 

9 

10from pandas import DataFrame 

11from pyparsing import ParseException 

12from schematics.exceptions import DataError, ValidationError 

13import wrapt 

14 

15from hidebound.core.parser import AssetNameParser 

16# ------------------------------------------------------------------------------ 

17 

18 

19''' 

20The validators module is function library for validating singular traits given 

21to a specification. 

22 

23Validators are linked with traits via the validators kwarg of a 

24specification class attribute. They succeed silently and raise DataError when 

25the trait they validate fails. Schematics captures these error messages and 

26pipes them to an error call. 

27''' 

28 

29 

30def validate(message): 

31 # type: (str) -> Callable 

32 ''' 

33 A decorator for predicate functions that raises a ValidationError 

34 if it returns False. 

35 

36 Args: 

37 message (str): Error message if predicate returns False. 

38 

39 Raises: 

40 ValidationError: If predicate returns False. 

41 

42 Returns: 

43 function: Function that returns a boolean. 

44 ''' 

45 @wrapt.decorator 

46 def wrapper(wrapped, instance, args, kwargs): 

47 if not wrapped(*args): 

48 args = [str(x) for x in args] * 10 

49 msg = message.format(*args) 

50 raise ValidationError(msg) 

51 return 

52 return wrapper 

53 

54 

55def validate_each(message, list_first_arg=False): 

56 # type: (str, bool) -> Callable 

57 ''' 

58 A decorator for predicate functions that raises a ValidationError 

59 if it returns False when applied to each argument individually. 

60 

61 Args: 

62 message (str): Error message if predicate returns False. 

63 list_first_arg (str, optional): Set to True if first argument is a list. 

64 Default: False. 

65 

66 Raises: 

67 ValidationError: If predicate returns False. 

68 

69 Returns: 

70 function: Function that returns a boolean. 

71 ''' 

72 @wrapt.decorator 

73 def wrapper(wrapped, instance, args, kwargs): 

74 extra_args = [] 

75 if len(args) > 1: 

76 extra_args = args[1:] 

77 

78 args = args[0] 

79 if list_first_arg or not isinstance(args, list): 

80 args = [args] 

81 for arg in args: 

82 if not wrapped(arg, *extra_args): 

83 msg = message.format(arg, *extra_args) 

84 raise ValidationError(msg) 

85 return 

86 return wrapper 

87 

88 

89# VALIDATORS-------------------------------------------------------------------- 

90@validate_each('"{}" is not a valid project name.') 

91def is_project(item): 

92 # type: (str) -> bool 

93 ''' 

94 Validates a project name. 

95 

96 Args: 

97 item (str): Project name. 

98 

99 Raises: 

100 ValidationError: If project name is invalid. 

101 

102 Returns: 

103 bool: Validity of project name. 

104 ''' 

105 try: 

106 ind = AssetNameParser.PROJECT_INDICATOR 

107 AssetNameParser(['project']).parse(ind + item) 

108 except ParseException: 

109 return False # pragma: no cover 

110 

111 if re.search('^[a-z0-9]+$', item) is None: 

112 return False # pragma: no cover 

113 

114 return True 

115 

116 

117@validate_each('"{}" is not a valid descriptor.') 

118def is_descriptor(item): 

119 # type: (str) -> bool 

120 ''' 

121 Validates a descriptor. 

122 

123 Args: 

124 item (str): Descriptor. 

125 

126 Raises: 

127 ValidationError: If descriptor is invalid. 

128 

129 Returns: 

130 bool: Validity of descriptor. 

131 ''' 

132 try: 

133 ind = AssetNameParser.DESCRIPTOR_INDICATOR 

134 AssetNameParser(['descriptor']).parse(ind + item) 

135 except ParseException: 

136 return False # pragma: no cover 

137 

138 if re.search('^[a-z0-9-]+$', item) is None: 

139 return False # pragma: no cover 

140 

141 # the mast/final/last asset is never actually that 

142 # asset should only ever be thought of in terms of latest version 

143 if re.search('^(master|final|last)', item): 

144 return False # pragma: no cover 

145 

146 if len(item) < 1: 

147 return False # pragma: no cover 

148 

149 return True 

150 

151 

152@validate_each('{} is not a valid version. 0 < version < 1000.') 

153def is_version(item): 

154 # type: (int) -> bool 

155 ''' 

156 Validates a version. 

157 

158 Args: 

159 item (int): Version. 

160 

161 Raises: 

162 ValidationError: If version is invalid. 

163 

164 Returns: 

165 bool: Validity of version. 

166 ''' 

167 return item > 0 and item < 10**AssetNameParser.VERSION_PADDING 

168 

169 

170@validate_each('{} is not a valid frame. -1 < frame < 10000.') 

171def is_frame(item): 

172 # type: (int) -> bool 

173 ''' 

174 Validates a frame. 

175 

176 Args: 

177 item (int): Frame. 

178 

179 Raises: 

180 ValidationError: If frame is invalid. 

181 

182 Returns: 

183 bool: Validity of frame. 

184 ''' 

185 return item >= 0 and item < 10**AssetNameParser.FRAME_PADDING 

186 

187 

188@validate_each( 

189 '{} is not a valid coordinate. -1 < coordinate < 1000.', 

190 list_first_arg=True 

191) 

192def is_coordinate(item): 

193 # type: (List[int]) -> bool 

194 ''' 

195 Validates a coordinate. 

196 

197 Args: 

198 item (list[int]): Coordinate. 

199 

200 Raises: 

201 ValidationError: If coordinate is invalid. 

202 

203 Returns: 

204 bool: Validity of coordinate. 

205 ''' 

206 if len(item) == 0: 

207 return False # pragma: no cover 

208 

209 if len(item) > 3: 

210 return False # pragma: no cover 

211 

212 if min(item) < 0: 

213 return False # pragma: no cover 

214 

215 if max(item) >= 10**AssetNameParser.COORDINATE_PADDING: 

216 return False # pragma: no cover 

217 

218 return True 

219 

220 

221@validate_each('"{}" is not a valid extension.') 

222def is_extension(item): 

223 # type: (str) -> bool 

224 ''' 

225 Validates a file extension. 

226 

227 Args: 

228 item (str): File extension. 

229 

230 Raises: 

231 ValidationError: If extension is invalid. 

232 

233 Returns: 

234 bool: Validity of extension. 

235 ''' 

236 if re.search('^[a-z0-9]+$', item): 

237 return True 

238 return False # pragma: no cover 

239 

240 

241@validate_each('{} != {}.') 

242def is_eq(a, b): 

243 # type: (Any, Any) -> bool 

244 ''' 

245 Validates that a and b are equal. 

246 

247 Args: 

248 a (object): Object. 

249 b (object): Object. 

250 

251 Raises: 

252 ValidationError: If a does not equal b. 

253 

254 Returns: 

255 bool: Equality of a and b. 

256 ''' 

257 return a == b 

258 

259 

260@validate_each('{} !< {}.') 

261def is_lt(a, b): 

262 # type: (Any, Any) -> bool 

263 ''' 

264 Validates that a is less than b. 

265 

266 Args: 

267 a (object): Object. 

268 b (object): Object. 

269 

270 Raises: 

271 ValidationError: If a is not less than b. 

272 

273 Returns: 

274 bool: A is less than b. 

275 ''' 

276 return a < b 

277 

278 

279@validate_each('{} !> {}.') 

280def is_gt(a, b): 

281 # type: (Any, Any) -> bool 

282 ''' 

283 Validates that a is greater than b. 

284 

285 Args: 

286 a (object): Object. 

287 b (object): Object. 

288 

289 Raises: 

290 ValidationError: If a is not greater than b. 

291 

292 Returns: 

293 bool: A is greater than b. 

294 ''' 

295 return a > b 

296 

297 

298@validate_each('{} !<= {}.') 

299def is_lte(a, b): 

300 # type: (Any, Any) -> bool 

301 ''' 

302 Validates that a is less than or equal to b. 

303 

304 Args: 

305 a (object): Object. 

306 b (object): Object. 

307 

308 Raises: 

309 ValidationError: If a is not less than or equal to b. 

310 

311 Returns: 

312 bool: A is less than or equal to b. 

313 ''' 

314 return a <= b 

315 

316 

317@validate_each('{} !>= {}.') 

318def is_gte(a, b): 

319 # type: (Any, Any) -> bool 

320 ''' 

321 Validates that a is greater than or equal to b. 

322 

323 Args: 

324 a (object): Object. 

325 b (object): Object. 

326 

327 Raises: 

328 ValidationError: If a is not greater than or equal to b. 

329 

330 Returns: 

331 bool: A is greater than or equal to b. 

332 ''' 

333 return a >= b 

334 

335 

336@validate('{} is not homogenous.') 

337def is_homogenous(items): 

338 # type: (List[Any]) -> bool 

339 ''' 

340 Validates thats all items are equal. 

341 

342 Args: 

343 items (list): List of items. 

344 

345 Raises: 

346 ValidationError: If items are not all the same. 

347 

348 Returns: 

349 bool: Homogeneity of items. 

350 ''' 

351 if len(items) < 2: 

352 return True 

353 

354 first = items[0] 

355 for item in items[1:]: 

356 if item != first: 

357 return False 

358 return True 

359 

360 

361@validate_each('{} is not in {}.') 

362def is_in(a, b): 

363 # type: (Any, Any) -> bool 

364 ''' 

365 Validates that each a is in b. 

366 

367 Args: 

368 a (object): Object. 

369 b (object): Object. 

370 

371 Raises: 

372 ValidationError: If a is not in b. 

373 

374 Returns: 

375 bool: Alls a's in b. 

376 ''' 

377 return a in b 

378 

379 

380@validate_each('{} is not an attribute of {}.') 

381def is_attribute_of(name, object): 

382 # type: (str, Any) -> bool 

383 ''' 

384 Validates that each name is an attribute of given object. 

385 

386 Args: 

387 a (str): Attribute name. 

388 b (object): Object. 

389 

390 Raises: 

391 ValidationError: If an name is not an attribute of given object. 

392 

393 Returns: 

394 bool: Alls names are attributes of object. 

395 ''' 

396 return hasattr(object, name) 

397 

398 

399@validate('{} is not a directory or does not exist.') 

400def is_directory(item): 

401 # type: (Union[str, Path]) -> bool 

402 ''' 

403 Validates thats item is a directory. 

404 

405 Args: 

406 item (str): Directory path. 

407 

408 Raises: 

409 ValidationError: If item is not a directory or does not exist. 

410 

411 Returns: 

412 bool: State of item. 

413 ''' 

414 if not os.path.isdir(item): 

415 return False 

416 return True 

417 

418 

419@validate('{} is not a file or does not exist.') 

420def is_file(item): 

421 # type: (Union[str, Path]) -> bool 

422 ''' 

423 Validates thats item is a file. 

424 

425 Args: 

426 item (str): Filepath. 

427 

428 Raises: 

429 ValidationError: If item is not a file or does not exist. 

430 

431 Returns: 

432 bool: State of item. 

433 ''' 

434 if not os.path.isfile(item): 

435 return False 

436 return True 

437 

438 

439def is_not_missing_values(items): 

440 # type: (List[int]) -> bool 

441 ''' 

442 Validates that sequence of integers is not missing any values. 

443 

444 Args: 

445 items (list[int]): Integers. 

446 

447 Raises: 

448 ValidationError: If items is missing values. 

449 

450 Returns: 

451 bool: State of item. 

452 ''' 

453 expected = list(range(min(items), max(items) + 1)) 

454 if sorted(items) == expected: 

455 return True 

456 

457 diff = sorted(list(set(expected).difference(items))) 

458 msg = f'Missing values: {diff}.' 

459 raise ValidationError(msg) 

460 

461 

462def has_uniform_coordinate_count(items): 

463 # type: (List[List[int]]) -> bool 

464 ''' 

465 Validates that non-unique list of coordinates has a uniform count per 

466 coordinate. 

467 

468 Args: 

469 items (list[list[int]]): List of coordinates. 

470 

471 Raises: 

472 ValidationError: If coordinate count is non-uniform. 

473 

474 Returns: 

475 bool: Uniformity of coordinates. 

476 ''' 

477 count = Counter(list(map(str, items))) 

478 if len(set(count.values())) > 1: 

479 max_ = max(count.values()) 

480 msg = filter(lambda x: x[1] < max_, count.items()) # type: Any 

481 msg = [eval(x[0]) for x in msg] 

482 msg = sorted(msg) 

483 msg = f'Non-uniform coordinate count. Missing coordinates: {msg}.' 

484 raise ValidationError(msg) 

485 return True 

486 

487 

488def has_dense_coordinates(items): 

489 # type: (List[List[int]]) -> bool 

490 ''' 

491 Validates that list of coordinates is dense (every point is filled). 

492 

493 Args: 

494 items (list[list[int]]): List of coordinates. 

495 

496 Raises: 

497 ValidationError: If coordinates are not dense. 

498 

499 Returns: 

500 bool: Density of coordinates. 

501 ''' 

502 # build dense cartesian coordinates 

503 dense = DataFrame(items) \ 

504 .apply(lambda x: str(list(range(x.min(), x.max() + 1)))) \ 

505 .tolist() 

506 dense = map(eval, dense) 

507 dense = map(list, product(*dense)) 

508 dense = list(map(str, dense)) 

509 

510 # find difference between given coords and dense 

511 coords = list(map(str, items)) 

512 diff = set(dense).difference(coords) # type: Any 

513 if len(diff) > 0: 

514 diff = sorted(list(map(eval, diff))) 

515 msg = f'Non-dense coordinates. Missing coordinates: {diff}.' 

516 raise ValidationError(msg) 

517 return True 

518 

519 

520def coordinates_begin_at(items, origin): 

521 # type: (List[List[int]], List[int]) -> bool 

522 ''' 

523 Validates that the minimum coordinate of a given list equals a given origin. 

524 

525 Args: 

526 items (list[list[int]]): List of coordinates. 

527 origin (list[int]): Origin coordinate. 

528 

529 Raises: 

530 ValidationError: If coordinates do not begin at origin. 

531 

532 Returns: 

533 bool: State of items. 

534 ''' 

535 if min(items) == origin: 

536 return True 

537 msg = f'Coordinates do not begin at {origin}.' 

538 raise ValidationError(msg) 

539 

540 

541@validate('''{} is not a valid bucket name. Bucket names must: 

542 - be between 3 and 63 characters 

543 - only consist of lowercase letters, numbers, periods and hyphens 

544 - begin and end with a letter or number''') 

545def is_bucket_name(item): 

546 # type: (str) -> bool 

547 ''' 

548 Validates a bucket name. 

549 

550 Args: 

551 item (str): bucket name. 

552 

553 Raises: 

554 ValidationError: If bucket name is invalid. 

555 

556 Returns: 

557 bool: Validity of bucket name. 

558 ''' 

559 if not 3 <= len(item) <= 63: 

560 return False 

561 if not item.islower(): 

562 return False 

563 if re.search('^[a-z0-9][a-z0-9-.]*[a-z0-9]$', item) is None: 

564 return False 

565 return True 

566 

567 

568@validate('{} is not a valid AWS region.') 

569def is_aws_region(item): 

570 # type: (str) -> bool 

571 ''' 

572 Validates an AWS region name. 

573 

574 Args: 

575 item (str): AWS region name. 

576 

577 Raises: 

578 ValidationError: If region name is invalid. 

579 

580 Returns: 

581 bool: Validity of region name. 

582 ''' 

583 # list derived from boto.session.Session().get_available_regions('s3') 

584 regions = [ 

585 'af-south-1', 

586 'ap-east-1', 

587 'ap-northeast-1', 

588 'ap-northeast-2', 

589 'ap-northeast-3', 

590 'ap-south-1', 

591 'ap-southeast-1', 

592 'ap-southeast-2', 

593 'ca-central-1', 

594 'eu-central-1', 

595 'eu-north-1', 

596 'eu-south-1', 

597 'eu-west-1', 

598 'eu-west-2', 

599 'eu-west-3', 

600 'me-south-1', 

601 'sa-east-1', 

602 'us-east-1', 

603 'us-east-2', 

604 'us-west-1', 

605 'us-west-2', 

606 ] 

607 return item in regions 

608 

609 

610@validate('''{} is not a legal directory path. 

611Legal directory paths must: 

612 - Begin with / 

613 - Not end with / 

614 * Contain only the characters: /, a-z, A-Z, 0-9, _, -''') 

615def is_legal_directory(item): 

616 # type: (str) -> bool 

617 ''' 

618 Validates that directory path is legal. 

619 Legal directory paths must: 

620 

621 * Begin with / 

622 * Not end with / 

623 * Contain only the characters: /, a-z, A-Z, 0-9, _, - 

624 

625 Args: 

626 item (str): Directory path. 

627 

628 Raises: 

629 ValidationError: If directory path is invalid. 

630 

631 Returns: 

632 bool: Validity of directory path. 

633 ''' 

634 if not item.startswith('/'): 

635 return False 

636 if item.endswith('/'): 

637 return False 

638 if not re.search(r'^[/a-z0-9_\-]+$', item, re.I): 

639 return False 

640 return True 

641 

642 

643@validate('''{} is not a legal metadata type. 

644Legal metadata types: [asset, file, asset-chunk, file-chunk]''') 

645def is_metadata_type(item): 

646 # type: (str) -> bool 

647 ''' 

648 Validates that a given metadata type is legal. 

649 Legal types include: 

650 

651 * asset 

652 * file 

653 * asset-chunk 

654 * file-chunk 

655 

656 Args: 

657 item (str): Metadata type. 

658 

659 Raises: 

660 ValidationError: If metadata type is illegal. 

661 

662 Returns: 

663 bool: Validity of metadata type. 

664 ''' 

665 return item in ['asset', 'file', 'asset-chunk', 'file-chunk'] 

666 

667 

668def is_hidebound_directory(directory): 

669 # type: (Union[str, Path]) -> None 

670 ''' 

671 Ensures directory name is "hidebound". 

672 

673 Args: 

674 directory (str or Path): Hidebound directory. 

675 

676 Raises: 

677 ValidationError: If directory is not named "hidebound". 

678 ''' 

679 if Path(directory).name != 'hidebound': 

680 msg = f'{directory} directory is not named hidebound.' 

681 raise ValidationError(msg) 

682 

683 

684def is_http_method(method): 

685 # type: (str) -> None 

686 ''' 

687 Ensures given method is a legal HTTP method. 

688 Legal methods include: 

689 

690 * get 

691 * put 

692 * post 

693 * delete 

694 * patch 

695 

696 Args: 

697 method (str): HTTP method. 

698 

699 Raises: 

700 ValidationError: If method is not a legal HTTP method. 

701 ''' 

702 methods = ['get', 'put', 'post', 'delete', 'patch'] 

703 if method not in methods: 

704 msg = f'{method} is not a legal HTTP method. Legal methods: {methods}.' 

705 raise ValidationError(msg) 

706 

707 

708def is_workflow(steps): 

709 # type: (List[str]) -> None 

710 ''' 

711 Ensures given workflow steps are legal. 

712 Legal workflows steps include: 

713 

714 * delete 

715 * update 

716 * create 

717 * export 

718 

719 Args: 

720 steps (list[str]): List of workflow steps: 

721 

722 Raises: 

723 ValidationError: If method is not a legal workflow. 

724 ''' 

725 legal = ['delete', 'update', 'create', 'export'] 

726 diff = sorted(list(set(steps).difference(legal))) 

727 if len(diff) > 0: 

728 msg = f'{diff} are not legal workflow steps. Legal steps: {legal}.' 

729 raise ValidationError(msg) 

730 

731 

732def is_one_of(item, models): 

733 # type: (dict, List[Model]) -> None 

734 ''' 

735 Validates whether given item matches at least one given model. 

736 

737 Args: 

738 item (dict): Item to be validated. 

739 models (list[Model]): List schematics Models. 

740 

741 Raises: 

742 ValidationError: If no valid model could be found for given item. 

743 ''' 

744 if len(models) == 0: 

745 return 

746 

747 errors = set() 

748 for model in models: 

749 try: 

750 model(item).validate() 

751 return 

752 except DataError as e: 

753 errors.add(str(e)) 

754 error = '\n'.join(list(errors)) 

755 raise ValidationError(error) 

756 

757 

758@validate('''{} is not a legal cluster option type. 

759Legal cluster option types: [bool, float, int, mapping, select, string]''') 

760def is_cluster_option_type(item): 

761 # type: (str) -> bool 

762 ''' 

763 Validates that a given cluster option type is legal. 

764 Legal types include: 

765 

766 * bool 

767 * float 

768 * int 

769 * mapping 

770 * select 

771 * string 

772 

773 Args: 

774 item (str): Cluster option type. 

775 

776 Raises: 

777 ValidationError: If cluster option type is illegal. 

778 

779 Returns: 

780 bool: Validity of cluster option type. 

781 ''' 

782 return item in ['bool', 'float', 'int', 'mapping', 'select', 'string']