{"id":54,"date":"2023-12-08T08:48:00","date_gmt":"2023-12-08T08:48:00","guid":{"rendered":"https:\/\/tensor.agenthub.uk\/?p=54"},"modified":"2024-05-13T03:46:48","modified_gmt":"2024-05-13T03:46:48","slug":"llama","status":"publish","type":"post","link":"https:\/\/tensorzen.blog\/?p=54","title":{"rendered":"Llama \u91cd\u5199\u65e5\u5fd7[\u672a\u5b8c&#8230;]"},"content":{"rendered":"\n<p>\u6267\u884c\u5982\u4e0b\u547d\u4ee4\u540e\u62a5\u9519<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#2e3440ff\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"from llama import tokenizer, Llama, Dialog\ncheckpoint_dir = &quot;\/training-data\/pakcages\/llama\/llama-2-7b-chat&quot;\ntokenizer_path = &quot;\/training-data\/pakcages\/llama\/tokenizer.model&quot;\ntemperature = 0.75\ntop_p = 0.9\nmax_seq_len = 128\nmax_gen_len = 64\nmax_batch_size = 4\n\ngenerator = Llama.build(\n    ckpt_dir=checkpoint_dir,\n    tokenizer_path=tokenizer_path,\n    max_seq_len=max_seq_len,\n    max_batch_size=max_batch_size)\" style=\"color:#d8dee9ff;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki nord\" style=\"background-color: #2e3440ff\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #81A1C1\">from<\/span><span style=\"color: #D8DEE9FF\"> llama <\/span><span style=\"color: #81A1C1\">import<\/span><span style=\"color: #D8DEE9FF\"> tokenizer<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9FF\"> Llama<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9FF\"> Dialog<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">checkpoint_dir <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #ECEFF4\">&quot;<\/span><span style=\"color: #A3BE8C\">\/training-data\/pakcages\/llama\/llama-2-7b-chat<\/span><span style=\"color: #ECEFF4\">&quot;<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">tokenizer_path <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #ECEFF4\">&quot;<\/span><span style=\"color: #A3BE8C\">\/training-data\/pakcages\/llama\/tokenizer.model<\/span><span style=\"color: #ECEFF4\">&quot;<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">temperature <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #B48EAD\">0.75<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">top_p <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #B48EAD\">0.9<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">max_seq_len <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #B48EAD\">128<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">max_gen_len <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #B48EAD\">64<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">max_batch_size <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #B48EAD\">4<\/span><\/span>\n<span class=\"line\"><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">generator <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> Llama<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #88C0D0\">build<\/span><span style=\"color: #ECEFF4\">(<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">    <\/span><span style=\"color: #D8DEE9\">ckpt_dir<\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\">checkpoint_dir<\/span><span style=\"color: #ECEFF4\">,<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">    <\/span><span style=\"color: #D8DEE9\">tokenizer_path<\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\">tokenizer_path<\/span><span style=\"color: #ECEFF4\">,<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">    <\/span><span style=\"color: #D8DEE9\">max_seq_len<\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\">max_seq_len<\/span><span style=\"color: #ECEFF4\">,<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">    <\/span><span style=\"color: #D8DEE9\">max_batch_size<\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\">max_batch_size<\/span><span style=\"color: #ECEFF4\">)<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p>ValueError: Error initializing torch.distributed using env:\/\/ rendezvous: environment variable RANK expected, but not set<\/p>\n\n\n\n<p>\u662f\u6e90\u7801\u91cc\u9762\u8fd9\u4e00\u6bb5\u5f15\u8d77\u7684\uff1a<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#2e3440ff\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"if not torch.distributed.is_initialized():\n    torch.distributed.init_process_group(&quot;nccl&quot;)\" style=\"color:#d8dee9ff;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki nord\" style=\"background-color: #2e3440ff\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #81A1C1\">if<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #81A1C1\">not<\/span><span style=\"color: #D8DEE9FF\"> torch<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\">distributed<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #88C0D0\">is_initialized<\/span><span style=\"color: #ECEFF4\">():<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">    torch<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\">distributed<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #88C0D0\">init_process_group<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #ECEFF4\">&quot;<\/span><span style=\"color: #A3BE8C\">nccl<\/span><span style=\"color: #ECEFF4\">&quot;<\/span><span style=\"color: #ECEFF4\">)<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p>\u542f\u52a8\u4e0d\u8d77\u6765\u770b\u6837\u5b50\u662f\u56e0\u4e3a\u5206\u5e03\u5f0f\u7684\u95ee\u9898\u3002\u6211\u5c1d\u8bd5\u7ed5\u5f00\u5206\u5e03\u5f0f\uff0c\u4ece\u5b83\u7684build\u51fd\u6570\u5f00\u59cb\u770b\uff1a<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#2e3440ff\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"checkpoints = sorted(Path(ckpt_dir).glob(&quot;*.pth&quot;))\nckpt_path = checkpoints[get_model_parallel_rank()]\ncheckpoint = torch.load(ckpt_path, map_location=&quot;cpu&quot;)\" style=\"color:#d8dee9ff;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki nord\" style=\"background-color: #2e3440ff\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #D8DEE9FF\">checkpoints <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #88C0D0\">sorted<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #88C0D0\">Path<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #D8DEE9FF\">ckpt_dir<\/span><span style=\"color: #ECEFF4\">).<\/span><span style=\"color: #88C0D0\">glob<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #ECEFF4\">&quot;<\/span><span style=\"color: #A3BE8C\">*.pth<\/span><span style=\"color: #ECEFF4\">&quot;<\/span><span style=\"color: #ECEFF4\">))<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">ckpt_path <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> checkpoints<\/span><span style=\"color: #ECEFF4\">[<\/span><span style=\"color: #88C0D0\">get_model_parallel_rank<\/span><span style=\"color: #ECEFF4\">()]<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">checkpoint <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> torch<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #88C0D0\">load<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #D8DEE9FF\">ckpt_path<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #D8DEE9\">map_location<\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #ECEFF4\">&quot;<\/span><span style=\"color: #A3BE8C\">cpu<\/span><span style=\"color: #ECEFF4\">&quot;<\/span><span style=\"color: #ECEFF4\">)<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p>\u68c0\u67e5\u7ed9\u5b9a\u7684checkpoint_dir\u662f\u5426\u5305\u542bpth\u6587\u4ef6\uff0c7B\u7684\u6a21\u578b\u53ea\u6709\u4e00\u4e2apth\u6587\u4ef6\uff0c\u6240\u4ee5\u4e00\u4e2a\u8fdb\u7a0b\u5c31\u53ef\u4ee5\u4e86\uff0c\u6211\u60f3get_model_parallel_rank()\u5927\u6982\u610f\u601d\u5373\u662f\u6709\u51e0\u4e2a\u6587\u4ef6\u5c31\u542f\u52a8\u591a\u5c11\u4e2a\u8fdb\u7a0b\uff0c\u4ee3\u7801\u6765\u81eafacebook\u56e2\u961f\u5f00\u53d1\u5e76\u884c\u8bad\u7ec3\u5305fairscale:<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#2e3440ff\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"def get_model_parallel_rank() -&gt; int:\n    &quot;&quot;&quot;Return my rank for the model parallel group.&quot;&quot;&quot;\n    return torch.distributed.get_rank(group=get_model_parallel_group())\n\ndef get_model_parallel_group() -&gt; torch.distributed.ProcessGroup:\n    &quot;&quot;&quot;Get the model parallel group the caller rank belongs to.&quot;&quot;&quot;\n    assert _MODEL_PARALLEL_GROUP is not None, &quot;model parallel group is not initialized&quot;\n    return _MODEL_PARALLEL_GROUP\" style=\"color:#d8dee9ff;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki nord\" style=\"background-color: #2e3440ff\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #81A1C1\">def<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #88C0D0\">get_model_parallel_rank<\/span><span style=\"color: #ECEFF4\">()<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #ECEFF4\">-&gt;<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #88C0D0\">int<\/span><span style=\"color: #ECEFF4\">:<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">    <\/span><span style=\"color: #ECEFF4\">&quot;&quot;&quot;<\/span><span style=\"color: #A3BE8C\">Return my rank for the model parallel group.<\/span><span style=\"color: #ECEFF4\">&quot;&quot;&quot;<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">    <\/span><span style=\"color: #81A1C1\">return<\/span><span style=\"color: #D8DEE9FF\"> torch<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\">distributed<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #88C0D0\">get_rank<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #D8DEE9\">group<\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #88C0D0\">get_model_parallel_group<\/span><span style=\"color: #ECEFF4\">())<\/span><\/span>\n<span class=\"line\"><\/span>\n<span class=\"line\"><span style=\"color: #81A1C1\">def<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #88C0D0\">get_model_parallel_group<\/span><span style=\"color: #ECEFF4\">()<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #ECEFF4\">-&gt;<\/span><span style=\"color: #D8DEE9FF\"> torch<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\">distributed<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\">ProcessGroup<\/span><span style=\"color: #ECEFF4\">:<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">    <\/span><span style=\"color: #ECEFF4\">&quot;&quot;&quot;<\/span><span style=\"color: #A3BE8C\">Get the model parallel group the caller rank belongs to.<\/span><span style=\"color: #ECEFF4\">&quot;&quot;&quot;<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">    <\/span><span style=\"color: #81A1C1\">assert<\/span><span style=\"color: #D8DEE9FF\"> _MODEL_PARALLEL_GROUP <\/span><span style=\"color: #81A1C1\">is<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #81A1C1\">not<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #81A1C1\">None<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #ECEFF4\">&quot;<\/span><span style=\"color: #A3BE8C\">model parallel group is not initialized<\/span><span style=\"color: #ECEFF4\">&quot;<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">    <\/span><span style=\"color: #81A1C1\">return<\/span><span style=\"color: #D8DEE9FF\"> _MODEL_PARALLEL_GROUP<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p>\u54b1\u4e0d\u7ba1\u8fd9\u4e9b\uff0c7B\u53cd\u6b63\u5c31\u4e00\u4e2a\u53c2\u6570\u6587\u4ef6\uff0c\u76f4\u63a5\u4ece\u6587\u4ef6\u5939\u52a0\u8f7d\uff1a<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#2e3440ff\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"from pathlib import Path\ncheckpoints = sorted(Path(checkpoint_dir).glob(&quot;*.pth&quot;))\n# Llama-2-7b model weights are distributed in a single file.\ncheckpoint = checkpoints[0]\ndevice = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\ncheckpoint = torch.load(checkpoint, map_location=device)\" style=\"color:#d8dee9ff;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki nord\" style=\"background-color: #2e3440ff\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #81A1C1\">from<\/span><span style=\"color: #D8DEE9FF\"> pathlib <\/span><span style=\"color: #81A1C1\">import<\/span><span style=\"color: #D8DEE9FF\"> Path<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">checkpoints <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #88C0D0\">sorted<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #88C0D0\">Path<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #D8DEE9FF\">checkpoint_dir<\/span><span style=\"color: #ECEFF4\">).<\/span><span style=\"color: #88C0D0\">glob<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #ECEFF4\">&quot;<\/span><span style=\"color: #A3BE8C\">*.pth<\/span><span style=\"color: #ECEFF4\">&quot;<\/span><span style=\"color: #ECEFF4\">))<\/span><\/span>\n<span class=\"line\"><span style=\"color: #616E88\"># Llama-2-7b model weights are distributed in a single file.<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">checkpoint <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> checkpoints<\/span><span style=\"color: #ECEFF4\">[<\/span><span style=\"color: #B48EAD\">0<\/span><span style=\"color: #ECEFF4\">]<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">device <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> torch<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #88C0D0\">device<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #ECEFF4\">&#39;<\/span><span style=\"color: #A3BE8C\">cuda<\/span><span style=\"color: #ECEFF4\">&#39;<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #81A1C1\">if<\/span><span style=\"color: #D8DEE9FF\"> torch<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\">cuda<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #88C0D0\">is_available<\/span><span style=\"color: #ECEFF4\">()<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #81A1C1\">else<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #ECEFF4\">&#39;<\/span><span style=\"color: #A3BE8C\">cpu<\/span><span style=\"color: #ECEFF4\">&#39;<\/span><span style=\"color: #ECEFF4\">)<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">checkpoint <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> torch<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #88C0D0\">load<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #D8DEE9FF\">checkpoint<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #D8DEE9\">map_location<\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\">device<\/span><span style=\"color: #ECEFF4\">)<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p>7b\u6a21\u578b\u76f4\u63a5\u52a0\u8f7d\u8fdb\u663e\u5361\u5360\u7528\u4e8613671MB\u7684\u663e\u5b58\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"772\" height=\"382\" src=\"https:\/\/tensor.agenthub.uk\/wp-content\/uploads\/2023\/12\/image-6.png\" alt=\"\" class=\"wp-image-55\" srcset=\"https:\/\/tensorzen.blog\/wp-content\/uploads\/2023\/12\/image-6.png 772w, https:\/\/tensorzen.blog\/wp-content\/uploads\/2023\/12\/image-6-300x148.png 300w, https:\/\/tensorzen.blog\/wp-content\/uploads\/2023\/12\/image-6-768x380.png 768w\" sizes=\"auto, (max-width: 772px) 100vw, 772px\" \/><\/figure>\n\n\n\n<h2 class=\"wp-block-heading\">huggingface\u8f6c\u6362<\/h2>\n\n\n\n<p>\u7b97\u4e86\uff0c\u8fd8\u662f\u5148\u4e0d\u91cd\u5199\u4e86\uff0c\u5148\u7528huggingface\u8f6c\u6362\u5427\uff0c\u5b89\u88c5\u4e00\u4e0btransformers\uff0c\u4ed6\u7684\u8f6c\u6362\u51fd\u6570\u5728src\/transformers\/models\/llama\/convert_llama_weights_to_hf.py <a href=\"https:\/\/github.com\/huggingface\/transformers\/blob\/main\/src\/transformers\/models\/llama\/convert_llama_weights_to_hf.py\">\u8fd9\u91cc<\/a>\u53ef\u4ee5\u770b\u6e90\u6587\u4ef6\u3002<\/p>\n\n\n\n<p>\u672c\u8eab\u5b89\u88c5transformers\u7684\u65f6\u5019\u5df2\u7ecf\u5b89\u88c5\u4e86\u8fd9\u4e2a\u6a21\u5757\uff0c\u5199\u4e2a\u811a\u672c\uff1a<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#2e3440ff\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"from transformers.models.llama.convert_llama_weights_to_hf import main\n\nif __name__ == &quot;__main__&quot;:\n    main()\" style=\"color:#d8dee9ff;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki nord\" style=\"background-color: #2e3440ff\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #81A1C1\">from<\/span><span style=\"color: #D8DEE9FF\"> transformers<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\">models<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\">llama<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\">convert_llama_weights_to_hf <\/span><span style=\"color: #81A1C1\">import<\/span><span style=\"color: #D8DEE9FF\"> main<\/span><\/span>\n<span class=\"line\"><\/span>\n<span class=\"line\"><span style=\"color: #81A1C1\">if<\/span><span style=\"color: #D8DEE9FF\"> __name__ <\/span><span style=\"color: #81A1C1\">==<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #ECEFF4\">&quot;<\/span><span style=\"color: #A3BE8C\">__main__<\/span><span style=\"color: #ECEFF4\">&quot;<\/span><span style=\"color: #ECEFF4\">:<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">    <\/span><span style=\"color: #88C0D0\">main<\/span><span style=\"color: #ECEFF4\">()<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p>\u76f4\u63a5\u8fd9\u4e2a\u811a\u672c\u5c31\u884c\u4e86\uff0c\u6267\u884c\u4e00\u4e0b\uff1a<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#2e3440ff\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"# python convert.py --help\nusage: convert.py [-h] [--input_dir INPUT_DIR] [--model_size {7B,7Bf,13B,13Bf,30B,34B,65B,70B,70Bf,tokenizer_only}] [--output_dir OUTPUT_DIR] [--safe_serialization SAFE_SERIALIZATION]\n\noptions:\n  -h, --help            show this help message and exit\n  --input_dir INPUT_DIR\n                        Location of LLaMA weights, which contains tokenizer.model and model folders\n  --model_size {7B,7Bf,13B,13Bf,30B,34B,65B,70B,70Bf,tokenizer_only}\n                        'f' models correspond to the finetuned versions, and are specific to the Llama2 official release. For more details on Llama2, checkout the original repo: https:\/\/huggingface.co\/meta-llama\n  --output_dir OUTPUT_DIR\n                        Location to write HF model and tokenizer\n  --safe_serialization SAFE_SERIALIZATION\n                        Whether or not to save using `safetensors`.\" style=\"color:#d8dee9ff;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki nord\" style=\"background-color: #2e3440ff\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #616E88\"># python convert.py --help<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">usage<\/span><span style=\"color: #ECEFF4\">:<\/span><span style=\"color: #D8DEE9FF\"> convert<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\">py <\/span><span style=\"color: #ECEFF4\">[<\/span><span style=\"color: #81A1C1\">-<\/span><span style=\"color: #D8DEE9FF\">h<\/span><span style=\"color: #ECEFF4\">]<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #ECEFF4\">[<\/span><span style=\"color: #D8DEE9\">--<\/span><span style=\"color: #D8DEE9FF\">input_dir INPUT_DIR<\/span><span style=\"color: #ECEFF4\">]<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #ECEFF4\">[<\/span><span style=\"color: #D8DEE9\">--<\/span><span style=\"color: #D8DEE9FF\">model_size <\/span><span style=\"color: #ECEFF4\">{<\/span><span style=\"color: #D8DEE9\">7B<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">7Bf<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">13B<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">13Bf<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">30B<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">34B<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">65B<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">70B<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">70Bf<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9FF\">tokenizer_only<\/span><span style=\"color: #ECEFF4\">}]<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #ECEFF4\">[<\/span><span style=\"color: #D8DEE9\">--<\/span><span style=\"color: #D8DEE9FF\">output_dir OUTPUT_DIR<\/span><span style=\"color: #ECEFF4\">]<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #ECEFF4\">[<\/span><span style=\"color: #D8DEE9\">--<\/span><span style=\"color: #D8DEE9FF\">safe_serialization SAFE_SERIALIZATION<\/span><span style=\"color: #ECEFF4\">]<\/span><\/span>\n<span class=\"line\"><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">options<\/span><span style=\"color: #ECEFF4\">:<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">  <\/span><span style=\"color: #81A1C1\">-<\/span><span style=\"color: #D8DEE9FF\">h<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #D8DEE9\">--<\/span><span style=\"color: #88C0D0\">help<\/span><span style=\"color: #D8DEE9FF\">            show this <\/span><span style=\"color: #88C0D0\">help<\/span><span style=\"color: #D8DEE9FF\"> message <\/span><span style=\"color: #81A1C1\">and<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #88C0D0\">exit<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">  <\/span><span style=\"color: #D8DEE9\">--<\/span><span style=\"color: #D8DEE9FF\">input_dir INPUT_DIR<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">                        Location of LLaMA weights<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9FF\"> which contains tokenizer<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\">model <\/span><span style=\"color: #81A1C1\">and<\/span><span style=\"color: #D8DEE9FF\"> model folders<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">  <\/span><span style=\"color: #D8DEE9\">--<\/span><span style=\"color: #D8DEE9FF\">model_size <\/span><span style=\"color: #ECEFF4\">{<\/span><span style=\"color: #D8DEE9\">7B<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">7Bf<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">13B<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">13Bf<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">30B<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">34B<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">65B<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">70B<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9\">70Bf<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9FF\">tokenizer_only<\/span><span style=\"color: #ECEFF4\">}<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">                        <\/span><span style=\"color: #ECEFF4\">&#39;<\/span><span style=\"color: #A3BE8C\">f<\/span><span style=\"color: #ECEFF4\">&#39;<\/span><span style=\"color: #D8DEE9FF\"> models correspond to the finetuned versions<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #81A1C1\">and<\/span><span style=\"color: #D8DEE9FF\"> are specific to the Llama2 official release<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\"> For more details on Llama2<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9FF\"> checkout the original repo<\/span><span style=\"color: #ECEFF4\">:<\/span><span style=\"color: #D8DEE9FF\"> https<\/span><span style=\"color: #ECEFF4\">:<\/span><span style=\"color: #81A1C1\">\/\/<\/span><span style=\"color: #D8DEE9FF\">huggingface<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\">co<\/span><span style=\"color: #81A1C1\">\/<\/span><span style=\"color: #D8DEE9FF\">meta<\/span><span style=\"color: #81A1C1\">-<\/span><span style=\"color: #D8DEE9FF\">llama<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">  <\/span><span style=\"color: #D8DEE9\">--<\/span><span style=\"color: #D8DEE9FF\">output_dir OUTPUT_DIR<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">                        Location to write HF model <\/span><span style=\"color: #81A1C1\">and<\/span><span style=\"color: #D8DEE9FF\"> tokenizer<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">  <\/span><span style=\"color: #D8DEE9\">--<\/span><span style=\"color: #D8DEE9FF\">safe_serialization SAFE_SERIALIZATION<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">                        Whether <\/span><span style=\"color: #81A1C1\">or<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #81A1C1\">not<\/span><span style=\"color: #D8DEE9FF\"> to save using <\/span><span style=\"color: #D8DEE9\">`safetensors`<\/span><span style=\"color: #ECEFF4\">.<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p>&#8211;input_dir \u5199\u7684\u662fllama\u7684\u6839\u76ee\u5f55<\/p>\n\n\n\n<p>&#8211;model_size \u9009\u62e9\u8981\u8f6c\u6362\u7684\u6a21\u578b\u53c2\u6570\u91cf\uff0c\u8fd9\u4e2a\u6709\u4e2abug\uff0c\u4f60\u53ea\u80fd\u586b\u63d0\u4f9b\u7684\u90a3\u51e0\u4e2a\u540d\u5b57\uff0c\u95ee\u9898\u662fllama\u7684\u76ee\u5f55\u4e0b\u5bf9\u5e94\u7684\u6a21\u578b\u6587\u4ef6\u540d\u662f&#8221;llama-2-*b&#8221;\u8fd9\u79cd\uff0c\u8f6c\u6362\u811a\u672c\u4f1a\u53bb&#8221;input_dir\/*B&#8221;\u4e0b\u9762\u53bb\u627e\u6a21\u578b\u6587\u4ef6\uff0c\u6240\u4ee5\u9700\u8981\u7ed9&#8221;llama-2-*b&#8221;\u6539\u6210&#8221;*B&#8221;\u540e\u518d\u6267\u884c\u811a\u672c\u3002<\/p>\n\n\n\n<p>\u8f6c\u6362\u8fc7\u7a0b\u4e2d\u5e76\u4e0d\u9700\u8981GPU\uff0c\u5b8c\u6210\u4e4b\u540e\u7528transformers\u52a0\u8f7d\u5c31\u884c\u4e86\uff0c7B\u7684\u6a21\u578b\u52a0\u8f7d\u5b8c\u540e\u5212\u5206\u4e8626G\u73b0\u5b58<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"721\" height=\"93\" src=\"https:\/\/tensor.agenthub.uk\/wp-content\/uploads\/2023\/12\/image-8.png\" alt=\"\" class=\"wp-image-58\" srcset=\"https:\/\/tensorzen.blog\/wp-content\/uploads\/2023\/12\/image-8.png 721w, https:\/\/tensorzen.blog\/wp-content\/uploads\/2023\/12\/image-8-300x39.png 300w\" sizes=\"auto, (max-width: 721px) 100vw, 721px\" \/><\/figure>\n\n\n\n<p>\u6267\u884c\uff1a<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#2e3440ff\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"total_params = sum(p.numel() for p in model.parameters())\nprint(f&quot;Total number of parameters: {total_params}&quot;)\n&gt;&gt;&gt; Total number of parameters: 6607343616\" style=\"color:#d8dee9ff;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki nord\" style=\"background-color: #2e3440ff\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #D8DEE9FF\">total_params <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #88C0D0\">sum<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #D8DEE9FF\">p<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #88C0D0\">numel<\/span><span style=\"color: #ECEFF4\">()<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #81A1C1\">for<\/span><span style=\"color: #D8DEE9FF\"> p <\/span><span style=\"color: #81A1C1\">in<\/span><span style=\"color: #D8DEE9FF\"> model<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #88C0D0\">parameters<\/span><span style=\"color: #ECEFF4\">())<\/span><\/span>\n<span class=\"line\"><span style=\"color: #88C0D0\">print<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #81A1C1\">f<\/span><span style=\"color: #A3BE8C\">&quot;Total number of parameters: <\/span><span style=\"color: #EBCB8B\">{<\/span><span style=\"color: #D8DEE9FF\">total_params<\/span><span style=\"color: #EBCB8B\">}<\/span><span style=\"color: #A3BE8C\">&quot;<\/span><span style=\"color: #ECEFF4\">)<\/span><\/span>\n<span class=\"line\"><span style=\"color: #81A1C1\">&gt;&gt;&gt;<\/span><span style=\"color: #D8DEE9FF\"> Total number of parameters<\/span><span style=\"color: #ECEFF4\">:<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #B48EAD\">6607343616<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p>\u6a21\u578b\u7684\u603b\u53c2\u6570\u662f6,607,343,616<\/p>\n\n\n\n<p>\u7c97\u7565\u8ba1\u7b97\u4e00\u4e0b\uff0c\u91c7\u7528\u5355\u7cbe\u5ea6(single-precision float-point format)\u5b58\u50a8\u8fd9\u4e9b\u53c2\u6570\u7684\u8bdd\u603b\u5171\u8981\u75286607343616 * 32 \/ 8 \/ 1024 \/ 1024 \/ 1024 = 24.6143G\uff0c\u5982\u679c\u91c7\u7528\u534a\u7cbe\u5ea6\u5b58\u50a8\u770b\u4e0b\uff1a<\/p>\n\n\n\n<div class=\"wp-block-kevinbatdorf-code-block-pro\" data-code-block-pro-font-family=\"Code-Pro-JetBrains-Mono\" style=\"font-size:.875rem;font-family:Code-Pro-JetBrains-Mono,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,monospace;line-height:1.25rem;--cbp-tab-width:2;tab-size:var(--cbp-tab-width, 2)\"><span style=\"display:block;padding:16px 0 0 16px;margin-bottom:-1px;width:100%;text-align:left;background-color:#2e3440ff\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"54\" height=\"14\" viewBox=\"0 0 54 14\"><g fill=\"none\" fill-rule=\"evenodd\" transform=\"translate(1 1)\"><circle cx=\"6\" cy=\"6\" r=\"6\" fill=\"#FF5F56\" stroke=\"#E0443E\" stroke-width=\".5\"><\/circle><circle cx=\"26\" cy=\"6\" r=\"6\" fill=\"#FFBD2E\" stroke=\"#DEA123\" stroke-width=\".5\"><\/circle><circle cx=\"46\" cy=\"6\" r=\"6\" fill=\"#27C93F\" stroke=\"#1AAB29\" stroke-width=\".5\"><\/circle><\/g><\/svg><\/span><span role=\"button\" tabindex=\"0\" data-code=\"tokenizer = LlamaTokenizer.from_pretrained('llama_hf\/7Bf', torch_dtype=torch.float16)\nmodel = LlamaModel.from_pretrained('llama_hf\/7Bf', torch_dtype=torch.float16)\" style=\"color:#d8dee9ff;display:none\" aria-label=\"Copy\" class=\"code-block-pro-copy-button\"><svg xmlns=\"http:\/\/www.w3.org\/2000\/svg\" style=\"width:24px;height:24px\" fill=\"none\" viewBox=\"0 0 24 24\" stroke=\"currentColor\" stroke-width=\"2\"><path class=\"with-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4\"><\/path><path class=\"without-check\" stroke-linecap=\"round\" stroke-linejoin=\"round\" d=\"M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2\"><\/path><\/svg><\/span><pre class=\"shiki nord\" style=\"background-color: #2e3440ff\" tabindex=\"0\"><code><span class=\"line\"><span style=\"color: #D8DEE9FF\">tokenizer <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> LlamaTokenizer<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #88C0D0\">from_pretrained<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #ECEFF4\">&#39;<\/span><span style=\"color: #A3BE8C\">llama_hf\/7Bf<\/span><span style=\"color: #ECEFF4\">&#39;<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #D8DEE9\">torch_dtype<\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\">torch<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\">float16<\/span><span style=\"color: #ECEFF4\">)<\/span><\/span>\n<span class=\"line\"><span style=\"color: #D8DEE9FF\">model <\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\"> LlamaModel<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #88C0D0\">from_pretrained<\/span><span style=\"color: #ECEFF4\">(<\/span><span style=\"color: #ECEFF4\">&#39;<\/span><span style=\"color: #A3BE8C\">llama_hf\/7Bf<\/span><span style=\"color: #ECEFF4\">&#39;<\/span><span style=\"color: #ECEFF4\">,<\/span><span style=\"color: #D8DEE9FF\"> <\/span><span style=\"color: #D8DEE9\">torch_dtype<\/span><span style=\"color: #81A1C1\">=<\/span><span style=\"color: #D8DEE9FF\">torch<\/span><span style=\"color: #ECEFF4\">.<\/span><span style=\"color: #D8DEE9FF\">float16<\/span><span style=\"color: #ECEFF4\">)<\/span><\/span><\/code><\/pre><\/div>\n\n\n\n<p>\u653e\u8fdb\u663e\u5361\u7684\u8bdd\u5212\u5206\u4e8613G\u73b0\u5b58<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" decoding=\"async\" width=\"718\" height=\"94\" src=\"https:\/\/tensor.agenthub.uk\/wp-content\/uploads\/2023\/12\/image-9.png\" alt=\"\" class=\"wp-image-61\" srcset=\"https:\/\/tensorzen.blog\/wp-content\/uploads\/2023\/12\/image-9.png 718w, https:\/\/tensorzen.blog\/wp-content\/uploads\/2023\/12\/image-9-300x39.png 300w\" sizes=\"auto, (max-width: 718px) 100vw, 718px\" \/><\/figure>\n\n\n\n<p>\u8fd8\u60f3\u518d\u964d\u4f4e\u663e\u5b58\u5360\u7528\u5c31\u5f97\u4f7f\u7528quantization\u4e86\uff0c\u540e\u9762\u6574\u7406\u4e0b\u3002<\/p>\n\n\n\n<p>LlamaForCausalLM\u53ef\u4ee5\u7528\u6765\u751f\u6210\u56de\u7b54\uff0c\u9ed8\u8ba4\u7684LlamaModel\u6ca1\u6709\u8fd9\u529f\u80fd<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5f88\u9057\u61be\u91cd\u5199\u5931\u8d25\u4e86\uff0c\u5b98\u65b9inference\u4f7f\u7528\u4e86meta\u4e4b\u524d\u7684\u4e00\u4e2a\u5305fairscale\uff0c\u5f88\u9ebb\u70e6\uff0c\u540e\u9762\u6709\u5927\u6bb5\u7684\u7a7a\u95f2\u65f6\u95f4\u7684\u8bdd\u518d\u6361\u8d77\u6765\u3002<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[5,12],"tags":[],"class_list":["post-54","post","type-post","status-publish","format-standard","hentry","category-coding","category-llm"],"_links":{"self":[{"href":"https:\/\/tensorzen.blog\/index.php?rest_route=\/wp\/v2\/posts\/54","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/tensorzen.blog\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/tensorzen.blog\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/tensorzen.blog\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/tensorzen.blog\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=54"}],"version-history":[{"count":11,"href":"https:\/\/tensorzen.blog\/index.php?rest_route=\/wp\/v2\/posts\/54\/revisions"}],"predecessor-version":[{"id":511,"href":"https:\/\/tensorzen.blog\/index.php?rest_route=\/wp\/v2\/posts\/54\/revisions\/511"}],"wp:attachment":[{"href":"https:\/\/tensorzen.blog\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=54"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/tensorzen.blog\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=54"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/tensorzen.blog\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=54"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}