From fa264780e28308c5f3b983dfc36fe61401bd8799 Mon Sep 17 00:00:00 2001 From: Max Date: Fri, 29 Jun 2018 08:10:49 -0500 Subject: [PATCH 1/3] Allow multi-GPU training (removes inadvertent restrictions in the code) Fix: restore compatibility with single-GPU configs Remove extraneous code (model is reassigned to CUDA device in __init__) Restore GPU device check in the command shell --- allennlp/commands/train.py | 7 ++++++- allennlp/training/trainer.py | 4 +--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/allennlp/commands/train.py b/allennlp/commands/train.py index dd67f907404..13a18af9a06 100644 --- a/allennlp/commands/train.py +++ b/allennlp/commands/train.py @@ -253,7 +253,12 @@ def train_model(params: Params, create_serialization_dir(params, serialization_dir, recover) prepare_global_logging(serialization_dir, file_friendly_logging) - check_for_gpu(params.params.get('trainer').get('cuda_device', -1)) + cuda_device = params.params.get('trainer').get('cuda_device', -1) + if isinstance(cuda_device, list): + for device in cuda_device: + check_for_gpu(device) + else: + check_for_gpu(cuda_device) serialization_params = deepcopy(params).as_dict(quiet=True) with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file: diff --git a/allennlp/training/trainer.py b/allennlp/training/trainer.py index 8dcbd7e3d91..2a9dbd84acd 100644 --- a/allennlp/training/trainer.py +++ b/allennlp/training/trainer.py @@ -926,13 +926,11 @@ def from_params(cls, patience = params.pop_int("patience", None) validation_metric = params.pop("validation_metric", "-loss") num_epochs = params.pop_int("num_epochs", 20) - cuda_device = params.pop_int("cuda_device", -1) + cuda_device = params.pop( "cuda_device") grad_norm = params.pop_float("grad_norm", None) grad_clipping = params.pop_float("grad_clipping", None) lr_scheduler_params = params.pop("learning_rate_scheduler", None) - if cuda_device >= 0: - model = model.cuda(cuda_device) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, params.pop("optimizer")) From 82b0d8daee2b62345d824d644d06ce2f29027498 Mon Sep 17 00:00:00 2001 From: Max Date: Fri, 29 Jun 2018 11:45:06 -0500 Subject: [PATCH 2/3] Chunk metadata batches when training multiple GPUs (#1439) --- allennlp/training/trainer.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/allennlp/training/trainer.py b/allennlp/training/trainer.py index 2a9dbd84acd..7d5ff1194e0 100644 --- a/allennlp/training/trainer.py +++ b/allennlp/training/trainer.py @@ -378,7 +378,20 @@ def _data_parallel(self, batch): of torch.nn.parallel.data_parallel to support the allennlp model interface. """ + metadata_batch_size = len(batch['metadata']) if 'metadata' in batch and isinstance(batch['metadata'],list) else None + inputs, module_kwargs = scatter_kwargs((), batch, self._cuda_devices, 0) + + if metadata_batch_size is not None: + # Metadata batches also have to be chunked as PyTorch is unaware of them. + # Follows chunking implementation by ATen.native.TensorShape functions. + chunk_size = 1 + (metadata_batch_size - 1)//len(self._cuda_devices) + chunk_offset = 0 + for instance in module_kwargs: + if 'metadata' in instance: + instance['metadata'] = instance['metadata'][chunk_offset:chunk_size+chunk_offset] + chunk_offset += chunk_size + used_device_ids = self._cuda_devices[:len(inputs)] replicas = replicate(self._model, used_device_ids) outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) From d3cadd936a132d6f0e75adee6a163c54058de546 Mon Sep 17 00:00:00 2001 From: "paul.murphy" Date: Mon, 2 Jul 2018 12:02:58 +0100 Subject: [PATCH 3/3] multi para predictor. --- .../reading_comprehension/triviaqa.py | 2 +- allennlp/predictors/__init__.py | 1 + allennlp/predictors/multi_para.py | 42 +++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 allennlp/predictors/multi_para.py diff --git a/allennlp/data/dataset_readers/reading_comprehension/triviaqa.py b/allennlp/data/dataset_readers/reading_comprehension/triviaqa.py index e8335c6ae52..c37682b4117 100644 --- a/allennlp/data/dataset_readers/reading_comprehension/triviaqa.py +++ b/allennlp/data/dataset_readers/reading_comprehension/triviaqa.py @@ -499,7 +499,7 @@ def text_to_instance(self, # type: ignore paragraph_tokens = [[truncate_token(token, self._max_token_length) for token in tokens] for tokens in paragraph_tokens] - if token_spans is None: + if token_spans is None and answer_texts is not None: token_spans = [util.find_valid_answer_spans(paragraph_tokens_i, answer_texts) for paragraph_tokens_i in paragraph_tokens] if question_tokens is None: diff --git a/allennlp/predictors/__init__.py b/allennlp/predictors/__init__.py index 0be6fc4ee6e..598dd914678 100644 --- a/allennlp/predictors/__init__.py +++ b/allennlp/predictors/__init__.py @@ -16,3 +16,4 @@ from allennlp.predictors.simple_seq2seq import SimpleSeq2SeqPredictor from allennlp.predictors.wikitables_parser import WikiTablesParserPredictor from allennlp.predictors.nlvr_parser import NlvrParserPredictor +from allennlp.predictors.multi_para import MultiParaPredictor diff --git a/allennlp/predictors/multi_para.py b/allennlp/predictors/multi_para.py new file mode 100644 index 00000000000..2a9422506e7 --- /dev/null +++ b/allennlp/predictors/multi_para.py @@ -0,0 +1,42 @@ +from typing import Tuple +from overrides import overrides + +from allennlp.common.util import JsonDict +from allennlp.data import Instance +from allennlp.predictors.predictor import Predictor + + +@Predictor.register('multi-para') +class MultiParaPredictor(Predictor): + """ + Predictor for the :class:`~allennlp.models.bidaf.BidirectionalAttentionFlow` model. + """ + + def predict(self, question: str, passage: str) -> JsonDict: + """ + Make a machine comprehension prediction on the supplied input. + See https://rajpurkar.github.io/SQuAD-explorer/ for more information about the machine comprehension task. + + Parameters + ---------- + question : ``str`` + A question about the content in the supplied paragraph. The question must be answerable by a + span in the paragraph. + passage : ``str`` + A paragraph of information relevant to the question. + + Returns + ------- + A dictionary that represents the prediction made by the system. The answer string will be under the + "best_span_str" key. + """ + return self.predict_json({"passage": passage, "question": question}) + + @overrides + def _json_to_instance(self, json_dict: JsonDict) -> Tuple[Instance, JsonDict]: + """ + Expects JSON that looks like ``{"question": "...", "passage": "..."}``. + """ + question_text = json_dict["question"] + passage_texts = json_dict["passages"] + return self._dataset_reader.text_to_instance(question_text, passage_texts), {}