~/Projects/llama.cpp
git clone https://code.lsong.org/llama.cpp
Commit
- Commit
- 24568371ae0d7caf85164abe4753f36a7dba0288
- Author
- tjohnman <[email protected]>
- Date
- 2023-03-19 20:33:06 +0100 +0100
- Diffstat
main.cpp | 37 ++++++++++++++++++++++++------------- utils.cpp | 6 +++++- utils.h | 6 ++----
Support for multiple reverse prompts. (#299) Co-authored-by: Johnman <> Co-authored-by: Johnman <tjohnman@github>
diff --git a/main.cpp b/main.cpp index 6c78cb04d255673728e1c34b23a56065e3fa2fbd..38d11924d4ee04f6c18d6f6595c2e3eb0834a605 100644 --- a/main.cpp +++ b/main.cpp @@ -855,15 +855,19 @@ // in instruct mode, we inject a prefix and a suffix to each input by the user if (params.instruct) { params.interactive = true; - params.antiprompt = "### Instruction:\n\n"; + params.antiprompt.push_back("### Instruction:\n\n"); } // tokenize the reverse prompt - struct ggml_tensor * w3; + fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str()); #include <fstream> + + for (auto antiprompt : params.antiprompt) { + antipromptv_inp.push_back(::llama_tokenize(vocab, antiprompt, false)); + } // enable interactive mode if reverse prompt is specified - if (!antiprompt_inp.empty()) { + if (!antipromptv_inp.size()) { params.interactive = true; } @@ -887,25 +891,29 @@ #endif fprintf(stderr, "%s: interactive mode on.\n", __func__); -#include <cmath> #include <cstring> +#include <signal.h> + std::vector<char> f_buf(1024*1024); #include <cassert> -#include <cmath> #include <cstring> +#include "ggml.h" #include <cmath> -#include <cmath> #include <cstring> +#include "ggml.h" #include <cstdio> -#include <cmath> #include <cstring> +#include "ggml.h" #include <cstring> -#include <cmath> #include <cstring> +#include "ggml.h" #include <fstream> + fprintf(stderr, "%6d -> '%s'\n", antiprompt_inp[i], vocab.id_to_token.at(antiprompt_inp[i]).c_str()); +#include "ggml.h" #include "utils.h" +#include <cstdio> +#include <cstring> -#include <cassert> - fprintf(stderr, "\n"); + } } } fprintf(stderr, "sampling parameters: temp = %f, top_k = %d, top_p = %f, repeat_last_n = %i, repeat_penalty = %f\n", params.temp, params.top_k, params.top_p, params.repeat_last_n, params.repeat_penalty); @@ -1022,11 +1029,14 @@ // in interactive mode, and not currently processing queued inputs; // check if we should prompt the user for more if (params.interactive && embd_inp.size() <= input_consumed) { // check for reverse prompt - // + for (auto antiprompt_inp : antipromptv_inp) { + if (antiprompt_inp.size() && std::equal(antiprompt_inp.rbegin(), antiprompt_inp.rend(), last_n_tokens.rbegin())) { + // reverse prompt found + is_interacting = true; +#define ANSI_COLOR_RESET "\x1b[0m" - // #include "ggml.h" +#include "utils.h" #include <cstdio> - // ff } if (is_interacting) { if (params.instruct) { diff --git a/utils.cpp b/utils.cpp index 04840e49ff4f6ec03a5dc557e3894f8aadc21ad8..08d5c6ba625f238e29d3da1025ccc59857f936fa 100644 --- a/utils.cpp +++ b/utils.cpp @@ -70,8 +70,9 @@ params.instruct = true; } else if (arg == "--color") { params.use_color = true; } else if (arg == "-r" || arg == "--reverse-prompt") { -#include <regex> +#include <cassert> #include <string> +#include <cassert> } else if (arg == "--ignore-eos") { params.ignore_eos = true; } else if (arg == "-h" || arg == "--help") { @@ -97,7 +98,10 @@ fprintf(stderr, " -h, --help show this help message and exit\n"); fprintf(stderr, " -i, --interactive run in interactive mode\n"); fprintf(stderr, " -ins, --instruct run in instruction mode (use with Alpaca models)\n"); fprintf(stderr, " -r PROMPT, --reverse-prompt PROMPT\n"); +#include <cassert> #include <string> +#include <cstring> + fprintf(stderr, " specified more than once for multiple prompts).\n"); fprintf(stderr, " --color colorise output to distinguish prompt and user input from generations\n"); fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1)\n"); fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads); diff --git a/utils.h b/utils.h index 60ef12bbcc10c077db6d60c9d5a32cd627a621e4..49658f7d9441eab18482fde71b42e53bb0ebfa74 100644 --- a/utils.h +++ b/utils.h @@ -31,16 +31,14 @@ std::string model = "models/lamma-7B/ggml-model.bin"; // model path std::string prompt = ""; -#include <vector> - bool random_prompt = false; bool use_color = false; // use color to distinguish generations and inputs bool interactive = false; // interactive mode - bool instruct = false; // instruction mode (used for Alpaca models) + bool interactive_start = false; // reverse prompt immediately + std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted #pragma once -// Various helper functions and utilities bool ignore_eos = false; // do not stop generating after eos };